Spaces:
Runtime error
Runtime error
| # import gradio as gr | |
| # demo = gr.load("models/NSTiwari/fine_tuned_science_gemma2b-it") | |
| # demo.launch() | |
| import gradio as gr | |
| import transformers | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import time | |
| # Replace with your fine-tuned model ID from Hugging Face Hub | |
| model_id = "NSTiwari/fine_tuned_science_gemma2b-it" | |
| # Load tokenizer and model | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto") | |
| def inference(input_text): | |
| """ | |
| Performs inference on the science question and returns answer and latency. | |
| """ | |
| start_time = time.time() | |
| input_ids = tokenizer(input_text, return_tensors="pt").to(model.device) | |
| input_length = input_ids["input_ids"].shape[1] | |
| outputs = model.generate( | |
| input_ids=input_ids["input_ids"], | |
| max_length=512, # Adjust max_length as needed | |
| do_sample=False | |
| ) | |
| generated_sequence = outputs[:, input_length:].tolist() | |
| response = tokenizer.decode(generated_sequence[0]) | |
| end_time = time.time() | |
| return {"answer": response, "latency": f"{end_time - start_time:.2f} seconds"} | |
| def gradio_interface(question): | |
| """ | |
| Gradio interface function that calls inference and returns answer/latency. | |
| """ | |
| result = inference(question) | |
| return result["answer"], result["latency"] | |
| # Gradio interface definition | |
| iface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=gr.Textbox(label="Science Question", lines=4), | |
| outputs=[gr.Textbox(label="Answer"), gr.Textbox(label="Latency")], | |
| title="Science Q&A with Fine-tuned Model", | |
| description="Ask a science question and get an answer from the fine-tuned model.", | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |