rag-system/rag_system/app/rag_chain.py

from llm.ollama import load_llm
from vectordb.azure_search import retrieve
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Define the prompt template for the LLM
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following context to answer the question.
    If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise:
    Question: {question}
    Context: {context}
    Answer:
    """,
    input_variables=["question", "documents"],
)


def get_rag_response(query):
    print("⌄⌄⌄⌄ Retrieving ⌄⌄⌄⌄")
    retrieved_docs = retrieve(query, 10)
    print("Query Found %d documents." % len(retrieved_docs))

    print("⌃⌃⌃⌃ Retrieving ⌃⌃⌃⌃ ")

    print("⌄⌄⌄⌄ Augmented Prompt ⌄⌄⌄⌄")
    llm = load_llm()
    # Create a chain combining the prompt template and LLM
    rag_chain = prompt | llm | StrOutputParser()
    context = (
        (" ".join(doc.page_content) for doc in retrieved_docs)
        if retrieved_docs
        else "No relevant documents found."
    )

    print("⌃⌃⌃⌃ Augmented Prompt ⌃⌃⌃⌃")

    print("⌄⌄⌄⌄ Generation ⌄⌄⌄⌄")
    response = rag_chain.invoke({"question": query, "context": context})
    print(response)
    print("⌃⌃⌃⌃ Generation ⌃⌃⌃⌃")

    return response