Added logger
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
|
||||
import logging
|
||||
|
||||
logger: logging.Logger = logging.getLogger("pdf_loader")
|
||||
|
||||
|
||||
def load_pdf(file_path):
|
||||
loader = PyPDFLoader(file_path)
|
||||
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
||||
documents = loader.load_and_split(splitter)
|
||||
print(f"Loaded and Split into {len(documents)} documents from {file_path}")
|
||||
logger.info(f"Loaded and Split into {len(documents)} documents from {file_path}")
|
||||
|
||||
return documents
|
||||
|
||||
@@ -2,6 +2,8 @@ import json
|
||||
import os
|
||||
import logging
|
||||
|
||||
logger: logging.Logger = logging.getLogger("web_loader")
|
||||
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from rag_system.loaders.firecrawl import FireCrawlLoader
|
||||
@@ -11,9 +13,6 @@ from dotenv import load_dotenv
|
||||
|
||||
load_dotenv() # take environment variables]
|
||||
|
||||
# Configure the logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
|
||||
firecrawl_api_url = os.getenv("FIRECRAWL_API_URL")
|
||||
firecrawl_mode = os.getenv("FIRECRAWL_MODE")
|
||||
@@ -27,14 +26,14 @@ if firecrawl_params["scrape_options"]:
|
||||
)
|
||||
|
||||
|
||||
logging.info(f"web_loader firecrawl_params: {firecrawl_params}")
|
||||
logging.info(f"web_loader firecrawl_api_url: {firecrawl_api_url}")
|
||||
logging.info(f"web_loader firecrawl_mode: {firecrawl_mode}")
|
||||
logging.info(f"web_loader firecrawl_params: {firecrawl_params}")
|
||||
logger.info(f"web_loader firecrawl_params: {firecrawl_params}")
|
||||
logger.info(f"web_loader firecrawl_api_url: {firecrawl_api_url}")
|
||||
logger.info(f"web_loader firecrawl_mode: {firecrawl_mode}")
|
||||
logger.info(f"web_loader firecrawl_params: {firecrawl_params}")
|
||||
|
||||
|
||||
def load_web_crawl(url):
|
||||
logging.info(f"load_web_crawl url: {url}")
|
||||
logger.info(f"load_web_crawl url: {url}")
|
||||
|
||||
loader = FireCrawlLoader(
|
||||
url=url,
|
||||
@@ -46,9 +45,7 @@ def load_web_crawl(url):
|
||||
docs = []
|
||||
docs_lazy = loader.load()
|
||||
for doc in docs_lazy:
|
||||
print(".", end="")
|
||||
docs.append(doc)
|
||||
print()
|
||||
|
||||
# Load documents from the URLs
|
||||
# docs = [WebBaseLoader(url).load() for url in urls]
|
||||
|
||||
Reference in New Issue
Block a user