from fastapi import FastAPI from pydantic import BaseModel from huggingface_hub import InferenceClient import uvicorn app = FastAPI() client = InferenceClient("FacebookAI/roberta-large-mnli") class Item(BaseModel): prompt: str #history: list #system_prompt: str #temperature: float = 0.0 #max_new_tokens: int = 1048 #top_p: float = 0.15 #repetition_penalty: float = 1.0 #trust_remote_code = True #def format_prompt(message, history): # prompt = "" # for user_prompt, bot_response in history: # prompt += f"[INST] {user_prompt} [/INST]" # prompt += f" {bot_response} " # prompt += f"[INST] {message} [/INST]" # return prompt def generate(item: Item): #temperature = float(item.temperature) #if temperature < 1e-2: # temperature = 1e-2 #top_p = float(item.top_p) #generate_kwargs = dict( # temperature=temperature, # max_new_tokens=item.max_new_tokens, # top_p=top_p, # repetition_penalty=item.repetition_penalty, # do_sample=True, # seed=42, # ) #formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history) #text = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history) text = item.prompt print(text) labels = ["Requirement", "Information"] print(labels) result = client.zero_shot_classification("The car shall be slow.", labels) print("Predicted Labels:") print(result["labels"][0], result["scores"][0]) print(result["labels"][1], result["scores"][1]) #stream = client.zero_shot_classification(text, labels) #print("Stream: " + stream) #stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in result: output += response.token.text return output @app.post("/generate/") async def generate_text(item: Item): return {"response": generate(item)}