diff --git a/rag_system/loaders/web_loader.py b/rag_system/loaders/web_loader.py index 02cb5c8..33a3ce5 100644 --- a/rag_system/loaders/web_loader.py +++ b/rag_system/loaders/web_loader.py @@ -5,6 +5,7 @@ import logging from langchain_community.document_loaders import WebBaseLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from rag_system.loaders.firecrawl import FireCrawlLoader +from firecrawl import ScrapeOptions from dotenv import load_dotenv @@ -20,6 +21,13 @@ firecrawl_params = os.getenv("FIRECRAWL_PARAMS") if firecrawl_params: firecrawl_params = json.loads(firecrawl_params) +if firecrawl_params["scrape_options"]: + firecrawl_params["scrape_options"] = ScrapeOptions( + **firecrawl_params["scrape_options"] + ) + + +logging.info(f"web_loader firecrawl_params: {firecrawl_params}") logging.info(f"web_loader firecrawl_api_url: {firecrawl_api_url}") logging.info(f"web_loader firecrawl_mode: {firecrawl_mode}") logging.info(f"web_loader firecrawl_params: {firecrawl_params}")