initial commit

This commit is contained in:
2025-05-01 12:21:47 -05:00
parent 2b9c4289e7
commit 226b51a6a1
18 changed files with 13479 additions and 0 deletions

0
vectordb/__init__.py Normal file
View File

53
vectordb/vector_store.py Normal file
View File

@@ -0,0 +1,53 @@
from typing import Tuple
import chromadb
from langchain_chroma import Chroma
from uuid import uuid4
# from chromadb.utils.embedding_functions.ollama_embedding_function import (
# OllamaEmbeddingFunction,
# )
from langchain_ollama import OllamaEmbeddings
from chromadb.api.types import (Metadata,Document,OneOrMany)
# Define a custom embedding function for ChromaDB using Ollama
class ChromaDBEmbeddingFunction:
"""
Custom embedding function for ChromaDB using embeddings from Ollama.
"""
def __init__(self, langchain_embeddings):
self.langchain_embeddings = langchain_embeddings
def __call__(self, input):
# Ensure the input is in a list format for processing
if isinstance(input, str):
input = [input]
return self.langchain_embeddings.embed_documents(input)
# Initialize the embedding function with Ollama embeddings
embedding = ChromaDBEmbeddingFunction(
OllamaEmbeddings(
model="nomic-embed-text",
base_url="http://localhost:11434" # Adjust the base URL as per your Ollama server configuration
)
)
persistent_client = chromadb.PersistentClient()
collection = persistent_client.get_or_create_collection(
name="collection_name",
metadata={"description": "A collection for RAG with Ollama - Demo1"},
embedding_function=embedding # Use the custom embedding function)
)
def add_documents(documents: Tuple[OneOrMany[Document], OneOrMany[Metadata]]):
docs, metas = documents
uuids = [str(uuid4()) for _ in range(len(docs))]
collection.add(documents=docs, ids=uuids, metadatas=metas)
def retrieve(query_text, n_results=1):
# return vector_store.similarity_search(query, k=3)
results = collection.query(
query_texts=[query_text],
n_results=n_results
)
return results["documents"], results["metadatas"]