from llm.ollama import load_llm from vectordb.azure_search import retrieve from langchain.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser # Define the prompt template for the LLM prompt = PromptTemplate( template="""You are an assistant for question-answering tasks. Use the following context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise: Question: {question} Context: {context} Answer: """, input_variables=["question", "documents"], ) def get_rag_response(query): print("⌄⌄⌄⌄ Retrieving ⌄⌄⌄⌄") retrieved_docs = retrieve(query, 10) print("Query Found %d documents." % len(retrieved_docs)) print("⌃⌃⌃⌃ Retrieving ⌃⌃⌃⌃ ") print("⌄⌄⌄⌄ Augmented Prompt ⌄⌄⌄⌄") llm = load_llm() # Create a chain combining the prompt template and LLM rag_chain = prompt | llm | StrOutputParser() context = ( (" ".join(doc.page_content) for doc in retrieved_docs) if retrieved_docs else "No relevant documents found." ) print("⌃⌃⌃⌃ Augmented Prompt ⌃⌃⌃⌃") print("⌄⌄⌄⌄ Generation ⌄⌄⌄⌄") response = rag_chain.invoke({"question": query, "context": context}) print(response) print("⌃⌃⌃⌃ Generation ⌃⌃⌃⌃") return response