refactored project to use poetry
This commit is contained in:
55
rag_system/vectordb/chromadb.py
Normal file
55
rag_system/vectordb/chromadb.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from typing import Tuple
|
||||
import chromadb
|
||||
from langchain_chroma import Chroma
|
||||
from uuid import uuid4
|
||||
|
||||
# from chromadb.utils.embedding_functions.ollama_embedding_function import (
|
||||
# OllamaEmbeddingFunction,
|
||||
# )
|
||||
from langchain_ollama import OllamaEmbeddings
|
||||
from chromadb.api.types import Metadata, Document, OneOrMany
|
||||
|
||||
|
||||
# Define a custom embedding function for ChromaDB using Ollama
|
||||
class ChromaDBEmbeddingFunction:
|
||||
"""
|
||||
Custom embedding function for ChromaDB using embeddings from Ollama.
|
||||
"""
|
||||
|
||||
def __init__(self, langchain_embeddings):
|
||||
self.langchain_embeddings = langchain_embeddings
|
||||
|
||||
def __call__(self, input):
|
||||
# Ensure the input is in a list format for processing
|
||||
if isinstance(input, str):
|
||||
input = [input]
|
||||
return self.langchain_embeddings.embed_documents(input)
|
||||
|
||||
|
||||
# Initialize the embedding function with Ollama embeddings
|
||||
embedding = ChromaDBEmbeddingFunction(
|
||||
OllamaEmbeddings(
|
||||
model="nomic-embed-text",
|
||||
base_url="http://localhost:11434", # Adjust the base URL as per your Ollama server configuration
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
persistent_client = chromadb.PersistentClient()
|
||||
collection = persistent_client.get_or_create_collection(
|
||||
name="collection_name",
|
||||
metadata={"description": "A collection for RAG with Ollama - Demo1"},
|
||||
embedding_function=embedding, # Use the custom embedding function)
|
||||
)
|
||||
|
||||
|
||||
def add_documents(documents: Tuple[OneOrMany[Document], OneOrMany[Metadata]]):
|
||||
docs, metas = documents
|
||||
uuids = [str(uuid4()) for _ in range(len(docs))]
|
||||
collection.add(documents=docs, ids=uuids, metadatas=metas)
|
||||
|
||||
|
||||
def retrieve(query_text, n_results=1):
|
||||
# return vector_store.similarity_search(query, k=3)
|
||||
results = collection.query(query_texts=[query_text], n_results=n_results)
|
||||
return results["documents"], results["metadatas"]
|
||||
Reference in New Issue
Block a user