Added logger
This commit is contained in:
parent
548307102d
commit
2552d867da
@ -1,11 +1,15 @@
|
|||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from langchain_community.document_loaders import PyPDFLoader
|
from langchain_community.document_loaders import PyPDFLoader
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger: logging.Logger = logging.getLogger("pdf_loader")
|
||||||
|
|
||||||
|
|
||||||
def load_pdf(file_path):
|
def load_pdf(file_path):
|
||||||
loader = PyPDFLoader(file_path)
|
loader = PyPDFLoader(file_path)
|
||||||
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
||||||
documents = loader.load_and_split(splitter)
|
documents = loader.load_and_split(splitter)
|
||||||
print(f"Loaded and Split into {len(documents)} documents from {file_path}")
|
logger.info(f"Loaded and Split into {len(documents)} documents from {file_path}")
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|||||||
@ -2,6 +2,8 @@ import json
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
logger: logging.Logger = logging.getLogger("web_loader")
|
||||||
|
|
||||||
from langchain_community.document_loaders import WebBaseLoader
|
from langchain_community.document_loaders import WebBaseLoader
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from rag_system.loaders.firecrawl import FireCrawlLoader
|
from rag_system.loaders.firecrawl import FireCrawlLoader
|
||||||
@ -11,9 +13,6 @@ from dotenv import load_dotenv
|
|||||||
|
|
||||||
load_dotenv() # take environment variables]
|
load_dotenv() # take environment variables]
|
||||||
|
|
||||||
# Configure the logging
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
|
||||||
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
|
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
|
||||||
firecrawl_api_url = os.getenv("FIRECRAWL_API_URL")
|
firecrawl_api_url = os.getenv("FIRECRAWL_API_URL")
|
||||||
firecrawl_mode = os.getenv("FIRECRAWL_MODE")
|
firecrawl_mode = os.getenv("FIRECRAWL_MODE")
|
||||||
@ -27,14 +26,14 @@ if firecrawl_params["scrape_options"]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
logging.info(f"web_loader firecrawl_params: {firecrawl_params}")
|
logger.info(f"web_loader firecrawl_params: {firecrawl_params}")
|
||||||
logging.info(f"web_loader firecrawl_api_url: {firecrawl_api_url}")
|
logger.info(f"web_loader firecrawl_api_url: {firecrawl_api_url}")
|
||||||
logging.info(f"web_loader firecrawl_mode: {firecrawl_mode}")
|
logger.info(f"web_loader firecrawl_mode: {firecrawl_mode}")
|
||||||
logging.info(f"web_loader firecrawl_params: {firecrawl_params}")
|
logger.info(f"web_loader firecrawl_params: {firecrawl_params}")
|
||||||
|
|
||||||
|
|
||||||
def load_web_crawl(url):
|
def load_web_crawl(url):
|
||||||
logging.info(f"load_web_crawl url: {url}")
|
logger.info(f"load_web_crawl url: {url}")
|
||||||
|
|
||||||
loader = FireCrawlLoader(
|
loader = FireCrawlLoader(
|
||||||
url=url,
|
url=url,
|
||||||
@ -46,9 +45,7 @@ def load_web_crawl(url):
|
|||||||
docs = []
|
docs = []
|
||||||
docs_lazy = loader.load()
|
docs_lazy = loader.load()
|
||||||
for doc in docs_lazy:
|
for doc in docs_lazy:
|
||||||
print(".", end="")
|
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
print()
|
|
||||||
|
|
||||||
# Load documents from the URLs
|
# Load documents from the URLs
|
||||||
# docs = [WebBaseLoader(url).load() for url in urls]
|
# docs = [WebBaseLoader(url).load() for url in urls]
|
||||||
|
|||||||
@ -1,4 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger: logging.Logger = logging.getLogger("azure_search")
|
||||||
|
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
from langchain_community.vectorstores.azuresearch import (
|
from langchain_community.vectorstores.azuresearch import (
|
||||||
@ -106,24 +109,24 @@ def get_document_id(document):
|
|||||||
raise ValueError("Document does not have a valid ID.")
|
raise ValueError("Document does not have a valid ID.")
|
||||||
|
|
||||||
|
|
||||||
def delete_all_documents():
|
def delete_documents(filters: str = None):
|
||||||
"""
|
"""
|
||||||
Delete all documents from the AzureSearch vector store.
|
Delete documents from the AzureSearch vector store.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
docs_to_delete = []
|
docs_to_delete = []
|
||||||
while True:
|
while True:
|
||||||
# Delete all documents in the index
|
# Delete all documents in the index
|
||||||
docs_to_delete = retrieve("", 10)
|
docs_to_delete = vector_store.vector_search("*", 20, filters=filters)
|
||||||
|
logger.debug("Deleting documents: %s", docs_to_delete)
|
||||||
vector_store.delete(list(map(get_document_id, docs_to_delete)))
|
vector_store.delete(list(map(get_document_id, docs_to_delete)))
|
||||||
if len(docs_to_delete) > 0:
|
if len(docs_to_delete) > 0:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
print("All documents deleted successfully.")
|
logger.debug("All documents deleted successfully.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error deleting documents: {str(e)}")
|
print(f"Error deleting documents: {str(e)}")
|
||||||
|
|
||||||
@ -134,7 +137,7 @@ def add_documents(documents):
|
|||||||
try:
|
try:
|
||||||
vector_store.add_documents(documents)
|
vector_store.add_documents(documents)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error adding document to vector store: {str(e)}")
|
logger.error(f"Error adding document to vector store: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def retrieve(query_text, n_results=1):
|
def retrieve(query_text, n_results=1):
|
||||||
@ -145,46 +148,3 @@ def retrieve(query_text, n_results=1):
|
|||||||
search_type="similarity",
|
search_type="similarity",
|
||||||
)
|
)
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|
||||||
# def add_document_to_vector_store(document):
|
|
||||||
# """
|
|
||||||
# Add a document to the AzureSearch vector store.
|
|
||||||
|
|
||||||
# Args:
|
|
||||||
# vector_store: The initialized AzureSearch vector store instance.
|
|
||||||
# document: A dictionary or object representing the document to be added.
|
|
||||||
# Example format:
|
|
||||||
# {
|
|
||||||
# "id": "unique_document_id",
|
|
||||||
# "content": "The text content of the document",
|
|
||||||
# "metadata": {
|
|
||||||
# "source": "source_url",
|
|
||||||
# "created": "2025-03-04T14:14:40.421666",
|
|
||||||
# "modified": "2025-03-04T14:14:40.421666"
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# """
|
|
||||||
# try:
|
|
||||||
|
|
||||||
# # Add the document to the vector store
|
|
||||||
# vector_store.add_documents([document])
|
|
||||||
# print(f"Document with ID {document['id']} added successfully.")
|
|
||||||
# except Exception as e:
|
|
||||||
# print(f"Error adding document to vector store: {str(e)}")
|
|
||||||
|
|
||||||
# add_document_to_vector_store("https://api.python.langchain.com/en/latest/langchain_api_reference.html",None)
|
|
||||||
# Example document to add
|
|
||||||
|
|
||||||
# doc = Document(
|
|
||||||
# page_content="This is the content of the document.For testing IVA demo integration ",
|
|
||||||
# metadata= {
|
|
||||||
# "source": "https://example.com/source",
|
|
||||||
# "created": "2025-03-04T14:14:40.421666",
|
|
||||||
# "modified": "2025-03-04T14:14:40.421666"
|
|
||||||
# }
|
|
||||||
# )
|
|
||||||
# Add the document to the vector store
|
|
||||||
# add_document_to_vector_store( doc)
|
|
||||||
|
|
||||||
# result = retrieve("iva",1)
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user