# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb. # %% auto 0 __all__ = ['HF_TOKEN', 'ENDPOINT_URL', 'title', 'description', 'get_model_endpoint_params', 'query_chat_api', 'inference_chat'] # %% app.ipynb 0 import gradio as gr import requests import json import requests import os from pathlib import Path from dotenv import load_dotenv # %% app.ipynb 1 if Path(".env").is_file(): load_dotenv(".env") HF_TOKEN = os.getenv("HF_TOKEN") ENDPOINT_URL = os.getenv("ENDPOINT_URL") # %% app.ipynb 2 def get_model_endpoint_params(model_id): if "joi" in model_id: headers = None max_new_tokens_supported = True return ENDPOINT_URL, headers, max_new_tokens_supported else: max_new_tokens_supported = False headers = {"Authorization": f"Bearer {HF_TOKEN}", "x-wait-for-model": "1"} return f"https://api-inference.huggingface.co/models/{model_id}", headers, max_new_tokens_supported # %% app.ipynb 3 def query_chat_api( model_id, inputs, temperature, top_p ): endpoint, headers, max_new_tokens_supported = get_model_endpoint_params(model_id) payload = { "inputs": inputs, "parameters": { "temperature": temperature, "top_p": top_p, "do_sample": True, }, } if max_new_tokens_supported is True: payload["parameters"]["max_new_tokens"] = 100 payload["parameters"]["repetition_penalty"]: 1.03 payload["parameters"]["stop"] = ["Human:"] else: payload["parameters"]["max_length"] = 512 response = requests.post(endpoint, json=payload, headers=headers) if response.status_code == 200: return response.json() else: return "Error: " + response.text # %% app.ipynb 5 def inference_chat( model_id, text_input, temperature, top_p, history=[], ): if "joi" in model_id: prompt_filename = "langchain_default.json" else: prompt_filename = "anthropic_hhh_single.json" print(prompt_filename) with open(f"prompt_templates/{prompt_filename}", "r") as f: prompt_template = json.load(f) history_input = "" for idx, text in enumerate(history): if idx % 2 == 0: history_input += f"Human: {text}\n" else: history_input += f"Assistant: {text}\n" history_input = history_input.rstrip("\n") inputs = prompt_template["prompt"].format(human_input=text_input, history=history_input) history.append(text_input) print(f"History: {history}") print(f"Inputs: {inputs}") output = query_chat_api(model_id, inputs, temperature, top_p) if isinstance(output, list): output = output[0] output = output["generated_text"].rstrip(" Human:") history.append(" " + output) chat = [ (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list return {chatbot: chat, state: history} # %% app.ipynb 21 title = """