40 lines
1.4 KiB
Python
40 lines
1.4 KiB
Python
from llm.ollama import load_llm
|
|
from vectordb.vector_store import retrieve
|
|
from langchain.prompts import PromptTemplate
|
|
from langchain_core.output_parsers import StrOutputParser
|
|
|
|
# Define the prompt template for the LLM
|
|
prompt = PromptTemplate(
|
|
template="""You are an assistant for question-answering tasks.
|
|
Use the following context to answer the question.
|
|
If you don't know the answer, just say that you don't know.
|
|
Use three sentences maximum and keep the answer concise:
|
|
Question: {question}
|
|
Context: {context}
|
|
Answer:
|
|
""",
|
|
input_variables=["question", "documents"],
|
|
)
|
|
|
|
def get_rag_response(query):
|
|
print("⌄⌄⌄⌄ Retrieving ⌄⌄⌄⌄")
|
|
retrieved_docs, metadata = retrieve(query, 10)
|
|
print("Query Found %d documents." % len(retrieved_docs[0]))
|
|
for meta in metadata[0]:
|
|
print("Metadata: ", meta)
|
|
print("⌃⌃⌃⌃ Retrieving ⌃⌃⌃⌃ " )
|
|
|
|
print("⌄⌄⌄⌄ Augmented Prompt ⌄⌄⌄⌄")
|
|
llm = load_llm()
|
|
# Create a chain combining the prompt template and LLM
|
|
rag_chain = prompt | llm | StrOutputParser()
|
|
context = " ".join(retrieved_docs[0]) if retrieved_docs else "No relevant documents found."
|
|
print("⌃⌃⌃⌃ Augmented Prompt ⌃⌃⌃⌃")
|
|
|
|
print("⌄⌄⌄⌄ Generation ⌄⌄⌄⌄")
|
|
response = rag_chain.invoke({"question": query, "context": context});
|
|
print(response)
|
|
print("⌃⌃⌃⌃ Generation ⌃⌃⌃⌃")
|
|
|
|
return response
|