Chat_Flash / app.py
KingNish's picture
Update app.py
41838e1 verified
raw
history blame
780 Bytes
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("google/gemma-1.1-2b-it")
def models(Query):
messages = []
messages.append({"role": "user", "content": f"[SYSTEM] You are ASSISTANT who answer question asked by user in short and concise manner. [USER] {Query}"})
Response = ""
for message in client.chat_completion(
messages,
max_tokens=2048,
stream=True
):
token = message.choices[0].delta.content
Response += token
yield Response
description="# Chat GO\n### Enter your query and Press enter and get lightning fast response"
demo = gr.Interface(description=description,fn=models, inputs=["text"], outputs="text")
demo.queue(max_size=300000)
demo.launch()