refactored project to use poetry
This commit is contained in:
11
rag_system/loaders/pdf_loader.py
Normal file
11
rag_system/loaders/pdf_loader.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
|
||||
|
||||
def load_pdf(file_path):
|
||||
loader = PyPDFLoader(file_path)
|
||||
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
||||
documents = loader.load_and_split(splitter)
|
||||
print(f"Loaded and Split into {len(documents)} documents from {file_path}")
|
||||
|
||||
return documents
|
||||
Reference in New Issue
Block a user