from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader def load_pdf(file_path): loader = PyPDFLoader(file_path) pages = loader.load() print(f"Loaded {len(pages)} documents from {file_path}") splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) splits = splitter.split_documents(pages) documents = [] metadatas = [] for split in splits: documents.append(split.page_content) metadatas.append(split.metadata) return (documents, metadatas)