|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("TuringsSolutions/Gemma2LegalEdition", trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained("TuringsSolutions/Gemma2LegalEdition", trust_remote_code=True) |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
|
|
def predict(prompt, temperature, max_tokens): |
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=max_tokens, |
|
temperature=temperature |
|
) |
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return response |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs=[ |
|
gr.Textbox(lines=2, placeholder="Enter your prompt here..."), |
|
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature"), |
|
gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Number of Output Tokens") |
|
], |
|
outputs="text", |
|
title="Phi3 Law Case Management Model", |
|
description="A model to assist with law case management. Adjust the temperature and number of output tokens as needed." |
|
) |
|
|
|
|
|
iface.launch() |