27 lines
854 B
Python
27 lines
854 B
Python
import os
|
|
from rag_system.loaders.pdf_loader import load_pdf
|
|
from rag_system.loaders.web_loader import load_web_crawl
|
|
from rag_system.vectordb.azure_search import add_documents
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv() # take environment variables
|
|
|
|
|
|
def main():
|
|
print("[1/2] Splitting and processing documents...")
|
|
# pdf_documents = load_pdf("data/verint-responsible-ethical-ai.pdf")
|
|
|
|
documents = load_web_crawl(os.getenv("CRAWLER_COMPANY_URL"))
|
|
for doc in documents:
|
|
doc.metadata["company"] = os.getenv("CRAWLER_COMPANY_NAME")
|
|
print("[2/2] Generating and storing embeddings...")
|
|
# add_documents(pdf_documents)
|
|
add_documents(documents)
|
|
print("Embeddings stored. You can now run the Streamlit app with:\n")
|
|
print(" streamlit run rag_system/app/streamlit_app.py")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|