yilunzhao commited on
Commit
e3dce0b
·
verified ·
1 Parent(s): 8542d23

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +46 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import torch
4
+ import spaces
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+
7
+ # Load model and tokenizer if a GPU is available
8
+ if torch.cuda.is_available():
9
+ model_id = "allenai/OLMo-7B-Instruct"
10
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
11
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
12
+ else:
13
+ raise EnvironmentError("CUDA device not available. Please run on a GPU-enabled environment.")
14
+
15
+ # Basic function to generate response based on passage and question
16
+ @spaces.GPU
17
+ def generate_response(passage: str, question: str) -> str:
18
+ # Prepare the input text by combining the passage and question
19
+ user_message = f"Passage: {passage}\nQuestion: {question}"
20
+ inputs = tokenizer(user_message, return_tensors="pt").to(model.device)
21
+
22
+ # Generate text, focusing only on the new tokens added by the model
23
+ outputs = model.generate(inputs.input_ids, max_new_tokens=150)
24
+
25
+ # Decode only the generated part, skipping the prompt input
26
+ generated_tokens = outputs[0][inputs.input_ids.shape[-1]:] # Ignore input tokens in the output
27
+ response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
28
+
29
+ return response
30
+
31
+
32
+ # Gradio Interface
33
+ with gr.Blocks() as demo:
34
+ gr.Markdown("# Passage and Question Response Generator")
35
+
36
+ passage_input = gr.Textbox(label="Passage", placeholder="Enter the passage here", lines=5)
37
+ question_input = gr.Textbox(label="Question", placeholder="Enter the question here", lines=2)
38
+
39
+ output_box = gr.Textbox(label="Response", placeholder="Model's response will appear here")
40
+
41
+ submit_button = gr.Button("Generate Response")
42
+ submit_button.click(fn=generate_response, inputs=[passage_input, question_input], outputs=output_box)
43
+
44
+ # Run the app
45
+ if __name__ == "__main__":
46
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ai2-olmo
2
+ accelerate