import os from huggingface_hub import InferenceClient from data import Data class Model: def __init__(self, model_id="meta-llama/Llama-3.2-1B-Instruct"): self.client = InferenceClient(model_id, token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) def respond( self, message, history: list[tuple[str, str]], url, max_tokens, temperature, top_p, ): data = Data(url) messages = [{"role": "system", "content": url}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": data.build_prompt(message)}) response = "" for message in self.client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response model = Model()