Fix for not applying company field to documents
This commit is contained in:
parent
4717cb35ab
commit
f0503e7aac
@ -13,6 +13,7 @@ RETRY_TOTAL=4
|
||||
# Crawler settings
|
||||
CRAWLER_LOADER=web # web or file
|
||||
CRAWLER_COMPANY_NAME=Morton
|
||||
CRAWLER_COMPANY_PDF_DIR=files
|
||||
CRAWLER_COMPANY_URL=https://blog.mortons.site/
|
||||
CRAWLER_COMPANY_FILE=data/verint-responsible-ethical-ai.pdf
|
||||
FIRECRAWL_API_KEY=your-firecrawl-api-key
|
||||
|
||||
@ -45,8 +45,6 @@ def main():
|
||||
from rag_system.loaders.web_loader import load_web_crawl
|
||||
|
||||
documents += load_web_crawl(url)
|
||||
for doc in documents:
|
||||
doc.metadata["company"] = os.getenv("CRAWLER_COMPANY_NAME")
|
||||
|
||||
logger.info("[2/2] Generating and storing embeddings...")
|
||||
if not documents:
|
||||
@ -56,6 +54,10 @@ def main():
|
||||
from rag_system.vectordb.azure_search import add_documents
|
||||
|
||||
logger.info(f"Total documents to process: {len(documents)}")
|
||||
|
||||
for doc in documents:
|
||||
doc.metadata["company"] = os.getenv("CRAWLER_COMPANY_NAME")
|
||||
|
||||
logger.info("Storing embeddings in Azure Search...")
|
||||
# Add the documents to the vector database
|
||||
# This function should handle the embedding generation and storage
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user