Spaces:

wjbmattingly
/

qwen-demo

Runtime error

App Files Files Community

wjbmattingly commited on Aug 17

Commit

8335937

•

1 Parent(s): 83f93f3

init

Browse files

Files changed (2) hide show

app.py +59 -4
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,7 +1,62 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import spaces
+# Load the model and tokenizer
+model_name = "Qwen/Qwen2-72B-Instruct"
+# Load model (without moving to GPU yet)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16,
+    trust_remote_code=True
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+@spaces.GPU
+def generate_text(prompt):
+    # Move model to GPU when function is called
+    model.to('cuda')
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to('cuda')
+    with torch.no_grad():
+        generated_ids = model.generate(
+            model_inputs.input_ids,
+            temperature=0.7,
+            max_new_tokens=500,
+            do_sample=True,
+            top_p=0.95
+        )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    # Move model back to CPU to free up GPU resources
+    model.to('cpu')
+    return response
+# Create Gradio interface
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=gr.Textbox(lines=5, label="Input Prompt"),
+    outputs=gr.Textbox(label="Generated Text"),
+    title="Qwen Text Generator (Spaces GPU)",
+    description="Enter a prompt to generate text using the Qwen model. This Space uses Spaces GPU for efficient GPU usage."
+)
+# Launch the app
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+spaces
+transformers