Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import torch | |
import spaces | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
from threading import Thread | |
# Set an environment variable | |
HF_TOKEN = os.environ.get("secrets_sindhi_token", None) | |
# Load the tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained("Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0") | |
model = AutoModelForCausalLM.from_pretrained("Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0",torch_dtype=torch.float16, device_map="auto") | |
# Create the text generation pipeline | |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
# Function to generate text | |
def gemma(message: str, history: list, temperature: float, max_new_tokens: int) -> str: | |
""" | |
Generate a response using the model. | |
Args: | |
message (str): The input message. | |
history (list): The conversation history. | |
temperature (float): The temperature for generating the response. | |
max_new_tokens (int): The maximum number of new tokens to generate. | |
Returns: | |
str: The generated response. | |
""" | |
conversation = [] | |
for user, assistant in history: | |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) | |
conversation.append({"role": "user", "content": message}) | |
input_prompt = f""" | |
### Instruction: | |
You are an AI assistant. Engage in a conversation with the user and provide helpful responses. | |
### Input: | |
{message} | |
### Response: | |
""" | |
inputs = tokenizer([input_prompt], return_tensors="pt").to(model.device) | |
# Generate response using the model and parameters | |
result = generator(input_prompt, max_length=max_new_tokens, temperature=temperature, do_sample=True) | |
# Return the output text (clean up if needed) | |
return result[0]['generated_text'] | |
# Gradio block for the interface | |
chatbot = gr.Chatbot(placeholder="پنهنجي مقامي ٻولي ۾ ترغيب ڏيو", height=500) | |
with gr.Blocks(fill_height=True) as demo: | |
# Header | |
gr.Markdown("<h1 style='text-align: center; color: #4CAF50;'>سنڌي ٻولي اسسٽنٽ</h1>") | |
gr.Markdown("<p style='text-align: center;'>توهان جي ذاتي AI اسسٽنٽ ۾ ڀليڪار! سنڌي ۾ ڪجهه به پڇو..</p>") | |
gr.ChatInterface( | |
fn=gemma, | |
chatbot=chatbot, | |
fill_height=True, | |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False), | |
additional_inputs=[ | |
gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False), | |
gr.Slider(minimum=128, maximum=4096, step=1, value=512, label="Max new tokens", render=False), | |
], | |
examples=[ | |
["مون کي ڪانءَ جي ڪهاڻي ٻڌاءِ"], | |
], | |
cache_examples=False, | |
) | |
# Footer | |
gr.Markdown(""" | |
<p style='text-align: center; color: #888;'>Made with ❤️ for "سنڌي ٻولي". Powered by Danyal, Yousif & Hajan.</p> | |
""") | |
if __name__ == "__main__": | |
demo.launch() |