import os from huggingface_hub import InferenceClient from langchain.schema import SystemMessage, AIMessage, HumanMessage from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint from data import Data class Model: def __init__(self, model_id="meta-llama/Llama-3.2-1B-Instruct"): self.client = InferenceClient(model_id, token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) self.llm = HuggingFaceEndpoint( repo_id="HuggingFaceH4/zephyr-7b-beta", task="text-generation", max_new_tokens=512, do_sample=False, repetition_penalty=1.03, ) self.chat_model = ChatHuggingFace(llm=self.llm, token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) def build_prompt(self, question, context_urls): data = Data(context_urls) context = data.retriever.invoke(f"{question}")[0].page_content prompt = f""" Use the following piece of context to answer the question asked. Please try to provide the answer only based on the context {context} Question:{question} Helpful Answers: """ return prompt def _build_prompt_rag(self): prompt_template=""" Use the following piece of context to answer the question asked. Please try to provide the answer only based on the context {context} Question:{question} Helpful Answers: """ prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"]) return prompt def _retrieval_qa(self, url): data = Data([url]) prompt = self._build_prompt_rag() return RetrievalQA.from_chain_type( llm=self.chat_model, chain_type="stuff", retriever=data.retriever, return_source_documents=True, chain_type_kwargs={"prompt":prompt} ) def predict(self, message, history, url, max_tokens, temperature, top_p): history_langchain_format = [SystemMessage(content="You're a helpful python developer assistant")] for msg in history: if msg['role'] == "user": history_langchain_format.append(HumanMessage(content=msg['content'])) elif msg['role'] == "assistant": history_langchain_format.append(AIMessage(content=msg['content'])) history_langchain_format.append(HumanMessage(content=message)) # ai_msg = self.chat_model.invoke(history_langchain_format) # return ai_msg.content ret = self._retrieval_qa(url) return ret.invoke({"query": message})['result'] def respond( self, message, history: list[tuple[str, str]], url, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": url}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": self.build_prompt(message, [url])}) response = "" for message in self.client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response model = Model()