Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import spaces | |
| from transformers import GemmaTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
| from threading import Thread | |
| # Set an environment variable | |
| HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
| # Load the tokenizer and model | |
| tokenizer = GemmaTokenizer.from_pretrained("google/codegemma-7b-it") | |
| model = AutoModelForCausalLM.from_pretrained("google/codegemma-7b-it", device_map="auto") | |
| def codegemma(message: str, history: list, temperature: float, max_new_tokens: int) -> str: | |
| """ | |
| Generate a response using the CodeGemma model. | |
| Args: | |
| message (str): The input message. | |
| history (list): The conversation history used by ChatInterface. | |
| temperature (float): The temperature for generating the response. | |
| max_new_tokens (int): The maximum number of new tokens to generate. | |
| Returns: | |
| str: The generated response. | |
| """ | |
| chat = [] | |
| for item in history: | |
| chat.append({"role": "user", "content": item[0]}) | |
| if item[1] is not None: | |
| chat.append({"role": "assistant", "content": item[1]}) | |
| chat.append({"role": "user", "content": message}) | |
| messages = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) | |
| # Tokenize the messages string | |
| model_inputs = tokenizer([messages], return_tensors="pt").to(device) | |
| streamer = TextIteratorStreamer( | |
| tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True) | |
| generate_kwargs = dict( | |
| model_inputs, | |
| streamer=streamer, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| ) | |
| t = Thread(target=model.generate, kwargs=generate_kwargs) | |
| t.start() | |
| # Initialize an empty string to store the generated text | |
| partial_text = "" | |
| for new_text in streamer: | |
| # print(new_text) | |
| partial_text += new_text | |
| # Yield an empty string to cleanup the message textbox and the updated conversation history | |
| yield partial_text | |
| placeholder = """ | |
| <div style="opacity: 0.65;"> | |
| <img src="https://ysharma-dummy-chat-app.hf.space/file=/tmp/gradio/7dd7659cff2eab51f0f5336f378edfca01dd16fa/gemma_lockup_vertical_full-color_rgb.png" style="width:30%;"> | |
| <br><b>CodeGemma-7B-IT Chatbot</b> | |
| </div> | |
| """ | |
| # Gradio block | |
| chatbot=gr.Chatbot(placeholder=placeholder,) | |
| with gr.Blocks(fill_height=True) as demo: | |
| gr.Markdown("# CODEGEMMA-7b-IT") | |
| gr.ChatInterface(codegemma, | |
| chatbot=chatbot, | |
| fill_height=True, | |
| additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False), | |
| additional_inputs=[ | |
| gr.Slider(0, 1, 0.95, label="Temperature", render=False), | |
| gr.Slider(128, 4096, 512, label="Max new tokens", render=False ), | |
| ], | |
| examples=[["Write a Python function to calculate the nth fibonacci number."]], | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(debug=False) |