Refactored and moved all keys and configuration into .env files and provided samples
This commit is contained in:
@@ -1,17 +1,23 @@
|
||||
import os
|
||||
from rag_system.loaders.pdf_loader import load_pdf
|
||||
from rag_system.loaders.web_loader import load_web_crawl
|
||||
from rag_system.vectordb.azure_search import add_documents
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv() # take environment variables
|
||||
|
||||
|
||||
def main():
|
||||
print("[1/2] Splitting and processing documents...")
|
||||
pdf_documents = load_pdf("data/verint-responsible-ethical-ai.pdf")
|
||||
# web_documents = load_web_crawl(["https://excalibur.mgmresorts.com/en.html"])
|
||||
# web_documents = load_web_crawl(["https://www.verint.com"])
|
||||
# web_documents = load_web_crawl("https://firecrawl.dev")
|
||||
# pdf_documents = load_pdf("data/verint-responsible-ethical-ai.pdf")
|
||||
|
||||
documents = load_web_crawl(os.getenv("CRAWLER_COMPANY_URL"))
|
||||
for doc in documents:
|
||||
doc.metadata["company"] = os.getenv("CRAWLER_COMPANY_NAME")
|
||||
print("[2/2] Generating and storing embeddings...")
|
||||
add_documents(pdf_documents)
|
||||
# add_documents(web_documents)
|
||||
# add_documents(pdf_documents)
|
||||
add_documents(documents)
|
||||
print("Embeddings stored. You can now run the Streamlit app with:\n")
|
||||
print(" streamlit run rag_system/app/streamlit_app.py")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user