from llm.ollama import load_llm from vectordb.vector_store import retrieve from langchain.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser # Define the prompt template for the LLM prompt = PromptTemplate( template="""You are an assistant for question-answering tasks. Use the following context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise: Question: {question} Context: {context} Answer: """, input_variables=["question", "documents"], ) def get_rag_response(query): print("⌄⌄⌄⌄ Retrieving ⌄⌄⌄⌄") retrieved_docs, metadata = retrieve(query, 10) print("Query Found %d documents." % len(retrieved_docs[0])) for meta in metadata[0]: print("Metadata: ", meta) print("⌃⌃⌃⌃ Retrieving ⌃⌃⌃⌃ " ) print("⌄⌄⌄⌄ Augmented Prompt ⌄⌄⌄⌄") llm = load_llm() # Create a chain combining the prompt template and LLM rag_chain = prompt | llm | StrOutputParser() context = " ".join(retrieved_docs[0]) if retrieved_docs else "No relevant documents found." print("⌃⌃⌃⌃ Augmented Prompt ⌃⌃⌃⌃") print("⌄⌄⌄⌄ Generation ⌄⌄⌄⌄") response = rag_chain.invoke({"question": query, "context": context}); print(response) print("⌃⌃⌃⌃ Generation ⌃⌃⌃⌃") return response