import os import logging logger: logging.Logger = logging.getLogger("azure_search") from typing import Tuple from langchain_community.vectorstores.azuresearch import ( AzureSearch, FIELDS_CONTENT, FIELDS_CONTENT_VECTOR, FIELDS_ID, FIELDS_METADATA, ) from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings from dotenv import load_dotenv from uuid import uuid4 load_dotenv() # take environment variables required_env_vars = [ "AZURE_DEPLOYMENT", "AZURE_OPENAI_API_VERSION", "AZURE_ENDPOINT", "AZURE_OPENAI_API_KEY", "VECTOR_STORE_ADDRESS", "VECTOR_STORE_PASSWORD", "INDEX_NAME", "RETRY_TOTAL", ] missing_vars = [var for var in required_env_vars if not os.environ.get(var)] if missing_vars: raise ValueError( f"Missing required environment variables: {', '.join(missing_vars)}" ) # Use AzureOpenAIEmbeddings with an Azure account embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings( azure_deployment=os.getenv("AZURE_DEPLOYMENT"), openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"), azure_endpoint=os.getenv("AZURE_ENDPOINT"), api_key=os.getenv("AZURE_OPENAI_API_KEY"), ) try: from azure.search.documents.indexes.models import ( SearchableField, SearchField, SearchFieldDataType, SimpleField, ) except ImportError as e: raise ImportError( "Unable to import azure.search.documents. Please install with " "`pip install -U azure-search-documents`." ) from e fields = [ SimpleField( name=FIELDS_ID, type=SearchFieldDataType.String, key=True, filterable=True, ), SimpleField( name="company", type=SearchFieldDataType.String, key=False, filterable=True, ), SearchableField( name=FIELDS_CONTENT, type=SearchFieldDataType.String, ), SearchField( name=FIELDS_CONTENT_VECTOR, type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, vector_search_dimensions=None or len("Text"), vector_search_profile_name="myHnswProfile", ), SearchableField( name=FIELDS_METADATA, type=SearchFieldDataType.String, ), ] # Specify additional properties for the Azure client such as the following https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md#configurations vector_store: AzureSearch = AzureSearch( azure_search_endpoint=os.getenv("VECTOR_STORE_ADDRESS"), azure_search_key=os.getenv("VECTOR_STORE_PASSWORD"), index_name=os.getenv("INDEX_NAME"), embedding_function=embeddings.embed_query, # Configure max retries for the Azure client additional_search_client_options={"retry_total": os.getenv("RETRY_TOTAL")}, fields=fields, ) def get_document_id(document): """ Get the document ID from the document object. """ if hasattr(document, "metadata") and "id" in document.metadata: return document.metadata["id"] elif hasattr(document, "id"): return document.id else: raise ValueError("Document does not have a valid ID.") def delete_documents(filters: str = None): """ Delete documents from the AzureSearch vector store. """ try: docs_to_delete = [] while True: # Delete all documents in the index docs_to_delete = vector_store.vector_search("*", 20, filters=filters) logger.debug("Deleting documents: %s", docs_to_delete) vector_store.delete(list(map(get_document_id, docs_to_delete))) if len(docs_to_delete) > 0: continue else: break logger.debug("All documents deleted successfully.") except Exception as e: print(f"Error deleting documents: {str(e)}") def add_documents(documents): # uuids = [str(uuid4()) for _ in range(len(documents))] try: vector_store.add_documents(documents) except Exception as e: logger.error(f"Error adding document to vector store: {str(e)}") def retrieve(query_text, n_results=1): # Perform a similarity search docs = vector_store.similarity_search( query=query_text, k=n_results, search_type="similarity", ) return docs