Spaces:
Runtime error
Runtime error
# import gradio as gr | |
# demo = gr.load("models/NSTiwari/fine_tuned_science_gemma2b-it") | |
# demo.launch() | |
import gradio as gr | |
import transformers | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import time | |
# Replace with your fine-tuned model ID from Hugging Face Hub | |
model_id = "NSTiwari/fine_tuned_science_gemma2b-it" | |
# Load tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto") | |
def inference(input_text): | |
""" | |
Performs inference on the science question and returns answer and latency. | |
""" | |
start_time = time.time() | |
input_ids = tokenizer(input_text, return_tensors="pt").to(model.device) | |
input_length = input_ids["input_ids"].shape[1] | |
outputs = model.generate( | |
input_ids=input_ids["input_ids"], | |
max_length=512, # Adjust max_length as needed | |
do_sample=False | |
) | |
generated_sequence = outputs[:, input_length:].tolist() | |
response = tokenizer.decode(generated_sequence[0]) | |
end_time = time.time() | |
return {"answer": response, "latency": f"{end_time - start_time:.2f} seconds"} | |
def gradio_interface(question): | |
""" | |
Gradio interface function that calls inference and returns answer/latency. | |
""" | |
result = inference(question) | |
return result["answer"], result["latency"] | |
# Gradio interface definition | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.Textbox(label="Science Question", lines=4), | |
outputs=[gr.Textbox(label="Answer"), gr.Textbox(label="Latency")], | |
title="SciGemma", | |
description="Ask a science question and get an answer from the fine-tuned Gemma 2b-it model.", | |
examples=[ | |
["What does air consist of?"], | |
["What is an atom?"], | |
] | |
) | |
if __name__ == "__main__": | |
iface.launch() |