from fastT5 import get_onnx_model import gradio as gr from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained('allenai/macaw-large') model = get_onnx_model('allenai/macaw-large', "quantized_model") def infer(context, question, options=None): input_string = "$answer$ ; $context$ = " + context + " ; $question$ = " + question input_ids = tokenizer.encode(input_string, return_tensors="pt") output = model.generate(input_ids, max_length=200) responses = tokenizer.batch_decode(output, skip_special_tokens=True) return responses[0].split(";")[0].split("=")[1].strip() def greet(context, question): return infer(context, question) examples = [['','What is the color of a cloudy sky?']] iface = gr.Interface(fn=greet, inputs=["text", "text"], outputs="text", examples=examples) iface.launch()