lightmate commited on
Commit
9b61493
·
verified ·
1 Parent(s): 8f9fe18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -151
app.py CHANGED
@@ -17,159 +17,153 @@ from llm_config import SUPPORTED_LLM_MODELS
17
  # Initialize model language options
18
  model_languages = list(SUPPORTED_LLM_MODELS)
19
 
20
- # Gradio components for selecting model language and model ID
21
- model_language = gr.Dropdown(
22
- choices=model_languages,
23
- value=model_languages[0],
24
- label="Model Language"
25
- )
26
-
27
- # Gradio dropdown for selecting model ID based on language
28
- def update_model_id(model_language_value):
29
- model_ids = list(SUPPORTED_LLM_MODELS[model_language_value])
30
- return model_ids[0], gr.update(choices=model_ids)
31
-
32
- model_id = gr.Dropdown(
33
- choices=[], # will be dynamically populated
34
- label="Model",
35
- value=None
36
- )
37
-
38
- model_language.change(update_model_id, inputs=model_language, outputs=[model_id])
39
-
40
- # Gradio checkbox for preparing INT4 model
41
- prepare_int4_model = gr.Checkbox(
42
- value=True,
43
- label="Prepare INT4 Model"
44
- )
45
-
46
- # Gradio checkbox for enabling AWQ (depends on INT4 checkbox)
47
- enable_awq = gr.Checkbox(
48
- value=False,
49
- label="Enable AWQ",
50
- visible=False
51
- )
52
-
53
- # Gradio dropdown for device selection (replaces device_widget)
54
- device = gr.Dropdown(
55
- choices=["CPU", "GPU"],
56
- value="CPU",
57
- label="Device"
58
- )
59
-
60
- # Model directory and setup based on selections
61
- def get_model_path(model_language_value, model_id_value):
62
- model_configuration = SUPPORTED_LLM_MODELS[model_language_value][model_id_value]
63
- pt_model_id = model_configuration["model_id"]
64
- pt_model_name = model_id_value.split("-")[0]
65
- int4_model_dir = Path(model_id_value) / "INT4_compressed_weights"
66
- return model_configuration, int4_model_dir, pt_model_name
67
-
68
- # Function to download the model if not already present
69
- def download_model_if_needed(model_language_value, model_id_value):
70
- model_configuration, int4_model_dir, pt_model_name = get_model_path(model_language_value, model_id_value)
71
-
72
- int4_weights = int4_model_dir / "openvino_model.bin"
73
-
74
- if not int4_weights.exists():
75
- print(f"Downloading model {model_id_value}...")
76
- # Add your download logic here (e.g., from a URL)
77
- # Example:
78
- # r = requests.get(model_configuration["model_url"])
79
- # with open(int4_weights, "wb") as f:
80
- # f.write(r.content)
81
-
82
- return int4_model_dir
83
-
84
- # Load the model
85
- def load_model(model_language_value, model_id_value):
86
- int4_model_dir = download_model_if_needed(model_language_value, model_id_value)
87
-
88
- # Load the OpenVINO model
89
- ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
90
- core = ov.Core()
91
-
92
- model_dir = int4_model_dir
93
- model_configuration = SUPPORTED_LLM_MODELS[model_language_value][model_id_value]
94
-
95
- tok = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
96
- ov_model = OVModelForCausalLM.from_pretrained(
97
- model_dir,
98
- device=device.value, # Use Gradio dropdown value for device
99
- ov_config=ov_config,
100
- config=AutoConfig.from_pretrained(model_dir, trust_remote_code=True),
101
- trust_remote_code=True
102
  )
103
-
104
- return tok, ov_model, model_configuration
105
-
106
- # Gradio interface function for generating text responses
107
- def generate_response(history, temperature, top_p, top_k, repetition_penalty, model_language_value, model_id_value):
108
- tok, ov_model, model_configuration = load_model(model_language_value, model_id_value)
109
-
110
- # Convert history to tokens
111
- def convert_history_to_token(history):
112
- # (Your history conversion logic here)
113
- # Use model_configuration to determine the exact format
114
- input_tokens = tok(" ".join([msg[0] for msg in history]), return_tensors="pt").input_ids
115
- return input_tokens
116
-
117
- input_ids = convert_history_to_token(history)
118
- streamer = gr.Textbox.update()
119
-
120
- # Adjust generation kwargs
121
- generate_kwargs = dict(
122
- input_ids=input_ids,
123
- max_new_tokens=256,
124
- temperature=temperature,
125
- top_p=top_p,
126
- top_k=top_k,
127
- repetition_penalty=repetition_penalty,
128
- streamer=streamer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  )
130
-
131
- # Start streaming response
132
- event = Event()
133
-
134
- def generate_and_signal_complete():
135
- ov_model.generate(**generate_kwargs)
136
- event.set()
137
-
138
- t1 = Thread(target=generate_and_signal_complete)
139
- t1.start()
140
-
141
- # Collect generated text
142
- partial_text = ""
143
- for new_text in streamer:
144
- partial_text += new_text
145
- history[-1][1] = partial_text
146
- yield history
147
-
148
- # Gradio UI components
149
- temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
150
- top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P")
151
- top_k = gr.Slider(minimum=0, maximum=50, value=50, label="Top K")
152
- repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, label="Repetition Penalty")
153
-
154
- # Conversation history input/output
155
- history = gr.State([]) # store the conversation history
156
-
157
- # Gradio Interface
158
- iface = gr.Interface(
159
- fn=generate_response,
160
- inputs=[
161
- history,
162
- temperature,
163
- top_p,
164
- top_k,
165
- repetition_penalty,
166
- model_language,
167
- model_id
168
- ],
169
- outputs=[gr.Textbox(label="Conversation History")],
170
- live=True,
171
- title="OpenVINO Chatbot"
172
- )
173
 
174
  # Launch Gradio app
175
  if __name__ == "__main__":
 
17
  # Initialize model language options
18
  model_languages = list(SUPPORTED_LLM_MODELS)
19
 
20
+ # Gradio Interface inside Blocks
21
+ with gr.Blocks() as iface:
22
+ model_language = gr.Dropdown(
23
+ choices=model_languages,
24
+ value=model_languages[0],
25
+ label="Model Language"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  )
27
+
28
+ model_id = gr.Dropdown(
29
+ choices=[], # will be dynamically populated
30
+ label="Model",
31
+ value=None
32
+ )
33
+
34
+ # Function to update model_id dropdown choices based on model_language
35
+ def update_model_id(model_language_value):
36
+ model_ids = list(SUPPORTED_LLM_MODELS[model_language_value])
37
+ return gr.update(value=model_ids[0], choices=model_ids)
38
+
39
+ model_language.change(update_model_id, inputs=model_language, outputs=model_id)
40
+
41
+ # Gradio checkbox for preparing INT4 model
42
+ prepare_int4_model = gr.Checkbox(
43
+ value=True,
44
+ label="Prepare INT4 Model"
45
+ )
46
+
47
+ # Gradio checkbox for enabling AWQ (depends on INT4 checkbox)
48
+ enable_awq = gr.Checkbox(
49
+ value=False,
50
+ label="Enable AWQ",
51
+ visible=False
52
+ )
53
+
54
+ # Gradio dropdown for device selection
55
+ device = gr.Dropdown(
56
+ choices=["CPU", "GPU"],
57
+ value="CPU",
58
+ label="Device"
59
+ )
60
+
61
+ # Model directory and setup based on selections
62
+ def get_model_path(model_language_value, model_id_value):
63
+ model_configuration = SUPPORTED_LLM_MODELS[model_language_value][model_id_value]
64
+ pt_model_id = model_configuration["model_id"]
65
+ pt_model_name = model_id_value.split("-")[0]
66
+ int4_model_dir = Path(model_id_value) / "INT4_compressed_weights"
67
+ return model_configuration, int4_model_dir, pt_model_name
68
+
69
+ # Function to download the model if not already present
70
+ def download_model_if_needed(model_language_value, model_id_value):
71
+ model_configuration, int4_model_dir, pt_model_name = get_model_path(model_language_value, model_id_value)
72
+
73
+ int4_weights = int4_model_dir / "openvino_model.bin"
74
+
75
+ if not int4_weights.exists():
76
+ print(f"Downloading model {model_id_value}...")
77
+ # Add your download logic here (e.g., from a URL)
78
+ # Example:
79
+ # r = requests.get(model_configuration["model_url"])
80
+ # with open(int4_weights, "wb") as f:
81
+ # f.write(r.content)
82
+
83
+ return int4_model_dir
84
+
85
+ # Load the model
86
+ def load_model(model_language_value, model_id_value):
87
+ int4_model_dir = download_model_if_needed(model_language_value, model_id_value)
88
+
89
+ # Load the OpenVINO model
90
+ ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
91
+ core = ov.Core()
92
+
93
+ model_dir = int4_model_dir
94
+ model_configuration = SUPPORTED_LLM_MODELS[model_language_value][model_id_value]
95
+
96
+ tok = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
97
+ ov_model = OVModelForCausalLM.from_pretrained(
98
+ model_dir,
99
+ device=device.value, # Use Gradio dropdown value for device
100
+ ov_config=ov_config,
101
+ config=AutoConfig.from_pretrained(model_dir, trust_remote_code=True),
102
+ trust_remote_code=True
103
+ )
104
+
105
+ return tok, ov_model, model_configuration
106
+
107
+ # Gradio UI for temperature and other model parameters
108
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
109
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P")
110
+ top_k = gr.Slider(minimum=0, maximum=50, value=50, label="Top K")
111
+ repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, label="Repetition Penalty")
112
+
113
+ # Conversation history input/output
114
+ history = gr.State([]) # store the conversation history
115
+
116
+ # Gradio function for generating responses
117
+ def generate_response(history, temperature, top_p, top_k, repetition_penalty, model_language_value, model_id_value):
118
+ tok, ov_model, model_configuration = load_model(model_language_value, model_id_value)
119
+
120
+ def convert_history_to_token(history):
121
+ input_tokens = tok(" ".join([msg[0] for msg in history]), return_tensors="pt").input_ids
122
+ return input_tokens
123
+
124
+ input_ids = convert_history_to_token(history)
125
+ streamer = gr.Textbox.update()
126
+
127
+ generate_kwargs = dict(
128
+ input_ids=input_ids,
129
+ max_new_tokens=256,
130
+ temperature=temperature,
131
+ top_p=top_p,
132
+ top_k=top_k,
133
+ repetition_penalty=repetition_penalty,
134
+ streamer=streamer
135
+ )
136
+
137
+ event = Event()
138
+ def generate_and_signal_complete():
139
+ ov_model.generate(**generate_kwargs)
140
+ event.set()
141
+
142
+ t1 = Thread(target=generate_and_signal_complete)
143
+ t1.start()
144
+
145
+ partial_text = ""
146
+ for new_text in streamer:
147
+ partial_text += new_text
148
+ history[-1][1] = partial_text
149
+ yield history
150
+
151
+ # Interface setup
152
+ iface = gr.Interface(
153
+ fn=generate_response,
154
+ inputs=[
155
+ history,
156
+ temperature,
157
+ top_p,
158
+ top_k,
159
+ repetition_penalty,
160
+ model_language,
161
+ model_id
162
+ ],
163
+ outputs=[gr.Textbox(label="Conversation History")],
164
+ live=True,
165
+ title="OpenVINO Chatbot"
166
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  # Launch Gradio app
169
  if __name__ == "__main__":