zubairsamo commited on
Commit
c2b2585
1 Parent(s): cc5a14b

Reversing Code With Alpaca Implementation

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -1,27 +1,30 @@
1
- from threading import Thread
2
 
3
- import torch
4
- import gradio as gr
5
- from transformers import AutoTokenizer, AutoModel,TextIteratorStreamer
6
 
7
- model_id = "microsoft/codebert-base"
 
8
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
9
  print("Running on device:", torch_device)
10
  print("CPU threads:", torch.get_num_threads())
11
 
 
12
  if torch_device == "cuda":
13
- model = AutoModel.from_pretrained(model_id, device_map="auto")
14
  else:
15
- model = AutoModel.from_pretrained(model_id)
16
- tokenizer = AutoTokenizer.from_pretrained(model_id)
17
 
 
18
 
 
19
  def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
20
  # Get the model and tokenizer, and tokenize the user text.
21
  model_inputs = tokenizer([user_text], return_tensors="pt").to(torch_device)
22
 
23
- # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
24
- # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
25
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
26
  generate_kwargs = dict(
27
  model_inputs,
@@ -32,37 +35,42 @@ def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
32
  temperature=float(temperature),
33
  top_k=top_k
34
  )
 
 
35
  t = Thread(target=model.generate, kwargs=generate_kwargs)
36
  t.start()
37
 
38
- # Pull the generated text from the streamer, and update the model output.
39
  model_output = ""
40
  for new_text in streamer:
41
  model_output += new_text
42
  yield model_output
43
  return model_output
44
 
45
-
46
  def reset_textbox():
47
  return gr.update(value='')
48
 
49
-
50
  with gr.Blocks() as demo:
51
- # duplicate_link = "http://huggingface.co/spaces/zubairsamo/transformers_streaming?duplicate=true"
52
  gr.Markdown(
53
  "# Testing ALPACA Model \n"
54
  )
55
 
56
  with gr.Row():
57
  with gr.Column(scale=4):
 
58
  user_text = gr.Textbox(
59
  placeholder="Ask Me Anything ... ",
60
  label="User input"
61
  )
 
62
  model_output = gr.Textbox(label="Model output", lines=10, interactive=False)
 
63
  button_submit = gr.Button(value="Submit")
64
 
65
  with gr.Column(scale=1):
 
66
  max_new_tokens = gr.Slider(
67
  minimum=1, maximum=1000, value=250, step=1, interactive=True, label="Max New Tokens",
68
  )
@@ -76,7 +84,9 @@ with gr.Blocks() as demo:
76
  minimum=0.1, maximum=5.0, value=0.8, step=0.1, interactive=True, label="Temperature",
77
  )
78
 
 
79
  user_text.submit(run_generation, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
80
  button_submit.click(run_generation, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
81
 
82
- demo.queue(max_size=32).launch(enable_queue=True)
 
 
1
+ from threading import Thread # Import the Thread class from the threading module
2
 
3
+ import torch # Import the PyTorch library
4
+ import gradio as gr # Import Gradio for creating a UI
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer # Import Hugging Face Transformers
6
 
7
+ # Define the Hugging Face model ID and check for available GPU (cuda)
8
+ model_id = "declare-lab/flan-alpaca-large"
9
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
10
  print("Running on device:", torch_device)
11
  print("CPU threads:", torch.get_num_threads())
12
 
13
+ # Load the pre-trained model based on the device
14
  if torch_device == "cuda":
15
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id, load_in_8bit=True, device_map="auto")
16
  else:
17
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
 
18
 
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
20
 
21
+ # Define a function to run model text generation
22
  def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
23
  # Get the model and tokenizer, and tokenize the user text.
24
  model_inputs = tokenizer([user_text], return_tensors="pt").to(torch_device)
25
 
26
+ # Start generation on a separate thread, so that we don't block the UI.
27
+ # Adds timeout to the streamer to handle exceptions in the generation thread.
28
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
29
  generate_kwargs = dict(
30
  model_inputs,
 
35
  temperature=float(temperature),
36
  top_k=top_k
37
  )
38
+
39
+ # Create a new thread for model generation
40
  t = Thread(target=model.generate, kwargs=generate_kwargs)
41
  t.start()
42
 
 
43
  model_output = ""
44
  for new_text in streamer:
45
  model_output += new_text
46
  yield model_output
47
  return model_output
48
 
49
+ # Define a function to reset the user input textbox
50
  def reset_textbox():
51
  return gr.update(value='')
52
 
53
+ # Create a Gradio UI interface
54
  with gr.Blocks() as demo:
55
+ # Display a title
56
  gr.Markdown(
57
  "# Testing ALPACA Model \n"
58
  )
59
 
60
  with gr.Row():
61
  with gr.Column(scale=4):
62
+ # Create a textbox for user input
63
  user_text = gr.Textbox(
64
  placeholder="Ask Me Anything ... ",
65
  label="User input"
66
  )
67
+ # Create a textbox for model output
68
  model_output = gr.Textbox(label="Model output", lines=10, interactive=False)
69
+ # Create a submit button
70
  button_submit = gr.Button(value="Submit")
71
 
72
  with gr.Column(scale=1):
73
+ # Create sliders for adjusting generation parameters
74
  max_new_tokens = gr.Slider(
75
  minimum=1, maximum=1000, value=250, step=1, interactive=True, label="Max New Tokens",
76
  )
 
84
  minimum=0.1, maximum=5.0, value=0.8, step=0.1, interactive=True, label="Temperature",
85
  )
86
 
87
+ # Set up the submission of user input
88
  user_text.submit(run_generation, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
89
  button_submit.click(run_generation, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
90
 
91
+ # Launch the Gradio interface
92
+ demo.queue(max_size=32).launch(enable_queue=True)