Fix for not applying company field to documents

2025-05-30 17:31:09 -05:00
parent 4717cb35ab
commit f0503e7aac
2 changed files with 5 additions and 2 deletions
--- a/rag_system/crawler.py
+++ b/rag_system/crawler.py
@@ -45,8 +45,6 @@ def main():
        from rag_system.loaders.web_loader import load_web_crawl

        documents += load_web_crawl(url)
-        for doc in documents:
-            doc.metadata["company"] = os.getenv("CRAWLER_COMPANY_NAME")

    logger.info("[2/2] Generating and storing embeddings...")
    if not documents:
@@ -56,6 +54,10 @@ def main():
    from rag_system.vectordb.azure_search import add_documents

    logger.info(f"Total documents to process: {len(documents)}")
+
+    for doc in documents:
+        doc.metadata["company"] = os.getenv("CRAWLER_COMPANY_NAME")
+
    logger.info("Storing embeddings in Azure Search...")
    # Add the documents to the vector database
    # This function should handle the embedding generation and storage