wjbmattingly commited on
Commit
8335937
1 Parent(s): 83f93f3
Files changed (2) hide show
  1. app.py +59 -4
  2. requirements.txt +3 -0
app.py CHANGED
@@ -1,7 +1,62 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import spaces
5
 
6
+ # Load the model and tokenizer
7
+ model_name = "Qwen/Qwen2-72B-Instruct"
8
 
9
+ # Load model (without moving to GPU yet)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ torch_dtype=torch.float16,
13
+ trust_remote_code=True
14
+ )
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
16
+
17
+ @spaces.GPU
18
+ def generate_text(prompt):
19
+ # Move model to GPU when function is called
20
+ model.to('cuda')
21
+
22
+ messages = [
23
+ {"role": "system", "content": "You are a helpful assistant."},
24
+ {"role": "user", "content": prompt}
25
+ ]
26
+
27
+ text = tokenizer.apply_chat_template(
28
+ messages,
29
+ tokenize=False,
30
+ add_generation_prompt=True
31
+ )
32
+ model_inputs = tokenizer([text], return_tensors="pt").to('cuda')
33
+
34
+ with torch.no_grad():
35
+ generated_ids = model.generate(
36
+ model_inputs.input_ids,
37
+ temperature=0.7,
38
+ max_new_tokens=500,
39
+ do_sample=True,
40
+ top_p=0.95
41
+ )
42
+ generated_ids = [
43
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
44
+ ]
45
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
46
+
47
+ # Move model back to CPU to free up GPU resources
48
+ model.to('cpu')
49
+
50
+ return response
51
+
52
+ # Create Gradio interface
53
+ iface = gr.Interface(
54
+ fn=generate_text,
55
+ inputs=gr.Textbox(lines=5, label="Input Prompt"),
56
+ outputs=gr.Textbox(label="Generated Text"),
57
+ title="Qwen Text Generator (Spaces GPU)",
58
+ description="Enter a prompt to generate text using the Qwen model. This Space uses Spaces GPU for efficient GPU usage."
59
+ )
60
+
61
+ # Launch the app
62
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ spaces
3
+ transformers