12 lines
421 B
Python
12 lines
421 B
Python
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
from langchain_community.document_loaders import PyPDFLoader
|
|
|
|
|
|
def load_pdf(file_path):
|
|
loader = PyPDFLoader(file_path)
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
|
documents = loader.load_and_split(splitter)
|
|
print(f"Loaded and Split into {len(documents)} documents from {file_path}")
|
|
|
|
return documents
|