Spaces:
Sleeping
Sleeping
import os | |
from huggingface_hub import InferenceClient | |
from data import Data | |
class Model: | |
def __init__(self, model_id="meta-llama/Llama-3.2-1B-Instruct"): | |
self.client = InferenceClient(model_id, token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) | |
def respond( | |
self, | |
message, | |
history: list[tuple[str, str]], | |
url, | |
max_tokens, | |
temperature, | |
top_p, | |
): | |
data = Data(url) | |
messages = [{"role": "system", "content": url}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": data.build_prompt(message)}) | |
response = "" | |
for message in self.client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
model = Model() |