75 lines
2.2 KiB
JavaScript

const embedding = new langchain.openai.AzureOpenAIEmbeddings({
azureOpenAIApiInstanceName:
CQA_RetrieverSettings.azure_openai_api.instance_name,
azureOpenAIApiDeploymentName:
CQA_RetrieverSettings.azure_openai_api.deployment_name,
azureOpenAIApiVersion: CQA_RetrieverSettings.azure_openai_api.version,
azureOpenAIApiKey: CQA_RetrieverSettings.azure_openai_api.key,
});
const store =
new langchain.community.vectorstores.azure_aisearch.AzureAISearchVectorStore(
embedding,
{
endpoint: CQA_RetrieverSettings.azure_aisearch.endpoint,
key: CQA_RetrieverSettings.azure_aisearch.key,
indexName: CQA_RetrieverSettings.azure_aisearch.index_name,
search: {
type: langchain.community.vectorstores.azure_aisearch
.AzureAISearchQueryType.SimilarityHybrid,
},
}
);
function getSourceId(document) {
if (document.metadata) {
const mergedMetadata = Object.values(document.metadata).join("");
const metatDataObj = JSON.parse(mergedMetadata);
if ("sourceURL" in metatDataObj) {
return metatDataObj.sourceURL;
}
if ("source" in metatDataObj) {
return metatDataObj.source;
}
if ("source_id" in metatDataObj) {
return metatDataObj.source_id;
}
if ("sourceName" in metatDataObj) {
return metatDataObj.sourceName;
}
} else return "no source found";
}
return {
async retrieve(query, filterExpression) {
console.log({ query: query, filterExpression:filterExpression})
const filter = {
filterExpression: filterExpression?? CQA_RetrieverSettings.filterExpression,
};
const resultDocuments = await store.similaritySearch(query, 20, filter);
const sources = resultDocuments.map((doc) => ({
source_id: getSourceId(doc),
text: doc.pageContent,
}));
const cqaSources = {
instances: [
{
sources: sources,
question: query,
generate_question: true,
knowledgebase_description: "iva-vector-demo",
extra_guidance: "",
language_code: "en-GB",
},
],
};
if (CQA_RetrieverSettings.debug)
console.log(JSON.stringify(cqaSources, null, 2));
return cqaSources;
},
};