refactored project to use poetry
This commit is contained in:
parent
3beb160c22
commit
b3da128396
15
README.md
15
README.md
@ -1,2 +1,17 @@
|
|||||||
# rag-system
|
# rag-system
|
||||||
|
|
||||||
|
## Initialization
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry install
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running
|
||||||
|
|
||||||
|
### Crawler
|
||||||
|
|
||||||
|
### Clear Index
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry run python src/clearIndex.py
|
||||||
|
```
|
||||||
|
|||||||
5057
poetry.lock
generated
Normal file
5057
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
34
pyproject.toml
Normal file
34
pyproject.toml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "rag-system"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "A simple RAG (Retrieval-Augmented Generation) system"
|
||||||
|
authors = ["Peter Morton <Peter.Morton@verint.com>"]
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.12"
|
||||||
|
black = "^25.1.0"
|
||||||
|
flake8 = "^7.2.0"
|
||||||
|
python-dotenv = "^1.1.0"
|
||||||
|
azure-identity = "^1.23.0"
|
||||||
|
azure-search-documents = "^11.5.2"
|
||||||
|
firecrawl-py = "^2.7.0"
|
||||||
|
tiktoken = "^0.9.0"
|
||||||
|
bs4 = "^0.0.2"
|
||||||
|
langchain-ollama = "^0.3.3"
|
||||||
|
langchain-openai = "^0.3.17"
|
||||||
|
langchain-chroma = "^0.2.4"
|
||||||
|
langchain-community = "^0.3.24"
|
||||||
|
langchain = "^0.3.25"
|
||||||
|
chromadb = "^1.0.9"
|
||||||
|
pypdf = "^5.5.0"
|
||||||
|
streamlit = "^1.45.1"
|
||||||
|
ollama = "^0.4.8"
|
||||||
|
|
||||||
|
[tool.poetry.scripts]
|
||||||
|
crawler = "rag_system.crawler:main"
|
||||||
|
clear-index = "rag_system.clear_index:main"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
@ -1,5 +1,5 @@
|
|||||||
import streamlit as st
|
import streamlit as st
|
||||||
from app.rag_chain import get_rag_response
|
from rag_system.app.rag_chain import get_rag_response
|
||||||
|
|
||||||
st.title("RAG System")
|
st.title("RAG System")
|
||||||
query = st.text_input("Ask a question:")
|
query = st.text_input("Ask a question:")
|
||||||
@ -1,4 +1,4 @@
|
|||||||
from vectordb.azure_search import delete_all_documents
|
from rag_system.vectordb.azure_search import delete_all_documents
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -1,6 +1,6 @@
|
|||||||
from loaders.pdf_loader import load_pdf
|
from rag_system.loaders.pdf_loader import load_pdf
|
||||||
from loaders.web_loader import load_web_crawl
|
from rag_system.loaders.web_loader import load_web_crawl
|
||||||
from vectordb.azure_search import add_documents
|
from rag_system.vectordb.azure_search import add_documents
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -13,7 +13,7 @@ def main():
|
|||||||
add_documents(pdf_documents)
|
add_documents(pdf_documents)
|
||||||
# add_documents(web_documents)
|
# add_documents(web_documents)
|
||||||
print("Embeddings stored. You can now run the Streamlit app with:\n")
|
print("Embeddings stored. You can now run the Streamlit app with:\n")
|
||||||
print(" streamlit run app/streamlit_app.py")
|
print(" streamlit run rag_system/app/streamlit_app.py")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -1,6 +1,6 @@
|
|||||||
from langchain_community.document_loaders import WebBaseLoader
|
from langchain_community.document_loaders import WebBaseLoader
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from loaders.firecrawl import FireCrawlLoader
|
from rag_system.loaders.firecrawl import FireCrawlLoader
|
||||||
|
|
||||||
|
|
||||||
def load_web_crawl(url):
|
def load_web_crawl(url):
|
||||||
0
rag_system/vectordb/__init__.py
Normal file
0
rag_system/vectordb/__init__.py
Normal file
@ -1,16 +0,0 @@
|
|||||||
langchain
|
|
||||||
langchain-community
|
|
||||||
langchain-chroma
|
|
||||||
langchain-openai
|
|
||||||
chromadb
|
|
||||||
pypdf
|
|
||||||
streamlit
|
|
||||||
ollama
|
|
||||||
langchain_ollama
|
|
||||||
bs4
|
|
||||||
tiktoken
|
|
||||||
firecrawl-py
|
|
||||||
azure-search-documents
|
|
||||||
azure-identity
|
|
||||||
python-dotenv
|
|
||||||
black
|
|
||||||
Loading…
x
Reference in New Issue
Block a user