refactored project to use poetry

This commit is contained in:
2025-05-20 22:19:30 -05:00
parent 3beb160c22
commit b3da128396
20 changed files with 5113 additions and 23 deletions

20
rag_system/crawler.py Normal file
View File

@@ -0,0 +1,20 @@
from rag_system.loaders.pdf_loader import load_pdf
from rag_system.loaders.web_loader import load_web_crawl
from rag_system.vectordb.azure_search import add_documents
def main():
print("[1/2] Splitting and processing documents...")
pdf_documents = load_pdf("data/verint-responsible-ethical-ai.pdf")
# web_documents = load_web_crawl(["https://excalibur.mgmresorts.com/en.html"])
# web_documents = load_web_crawl(["https://www.verint.com"])
# web_documents = load_web_crawl("https://firecrawl.dev")
print("[2/2] Generating and storing embeddings...")
add_documents(pdf_documents)
# add_documents(web_documents)
print("Embeddings stored. You can now run the Streamlit app with:\n")
print(" streamlit run rag_system/app/streamlit_app.py")
if __name__ == "__main__":
main()