16 lines
501 B
Python
16 lines
501 B
Python
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
from langchain_community.document_loaders import PyPDFLoader
|
|
|
|
import logging
|
|
|
|
logger: logging.Logger = logging.getLogger("pdf_loader")
|
|
|
|
|
|
def load_pdf(file_path):
|
|
loader = PyPDFLoader(file_path)
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
|
documents = loader.load_and_split(splitter)
|
|
logger.info(f"Loaded and Split into {len(documents)} documents from {file_path}")
|
|
|
|
return documents
|