Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
| import torch | |
| from threading import Thread | |
| model_id = "TheDrummer/Tiger-Gemma-9B-v3" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| def respond(message, history): | |
| # Build conversation (NO system prompt) | |
| messages = [] | |
| for user_msg, bot_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": bot_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| input_ids = tokenizer.apply_chat_template( | |
| messages, | |
| return_tensors="pt", | |
| add_generation_prompt=True | |
| ).to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| thread = Thread(target=model.generate, kwargs=dict( | |
| input_ids=input_ids, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| do_sample=True, | |
| streamer=streamer | |
| )) | |
| thread.start() | |
| partial = "" | |
| for token in streamer: | |
| partial += token | |
| yield partial | |
| gr.ChatInterface( | |
| fn=respond, | |
| title="Tiger-Gemma 9B Chat", | |
| description="Powered by TheDrummer/Tiger-Gemma-9B-v3", | |
| ).launch(share=True) |