kpatlolla commited on
Commit
d77dc87
1 Parent(s): ffb9fea

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import torch
4
+ import numpy as np
5
+ from transformers import pipeline
6
+
7
+ import torch
8
+ print(f"Is CUDA available: {torch.cuda.is_available()}")
9
+ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
10
+
11
+ pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
12
+ pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
13
+
14
+ title = "Flan T5 and Vanilla T5"
15
+ description = "This demo compares [T5-large](https://huggingface.co/t5-large) and [Flan-T5-XX-large](https://huggingface.co/google/flan-t5-xxl). Note that T5 expects a very specific format of the prompts, so the examples below are not necessarily the best prompts to compare."
16
+
17
+ def inference(text):
18
+ output_flan = pipe_flan(text, max_length=100)[0]["generated_text"]
19
+ output_vanilla = pipe_vanilla(text, max_length=100)[0]["generated_text"]
20
+ return [output_flan, output_vanilla]
21
+
22
+ io = gr.Interface(
23
+ inference,
24
+ gr.Textbox(lines=3),
25
+ outputs=[
26
+ gr.Textbox(lines=3, label="Flan T5"),
27
+ gr.Textbox(lines=3, label="T5")
28
+ ],
29
+ title=title,
30
+ description=description,
31
+ examples=examples
32
+ )
33
+ io.launch()