Ganesh Karbhari commited on
Commit
6c79cf1
1 Parent(s): b8f046a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -40
app.py CHANGED
@@ -168,54 +168,127 @@
168
 
169
 
170
 
171
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  import gradio as gr
173
- client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
174
-
175
- def format_prompt(message, history):
176
- prompt = "<s>"
177
- for user_prompt, bot_response in history:
178
- prompt += f"[INST] {user_prompt} [/INST]"
179
- prompt += f" {bot_response}</s> "
180
- prompt += f"[INST] {message} [/INST]"
181
- return prompt
182
-
183
- def generate(
184
- prompt, history, temperature=0.2, max_new_tokens=3000, top_p=0.95, repetition_penalty=1.0,
185
- ):
186
- temperature = float(temperature)
187
- if temperature < 1e-2:
188
- temperature = 1e-2
189
- top_p = float(top_p)
190
-
191
- generate_kwargs = dict(
192
- temperature=temperature,
193
- max_new_tokens=max_new_tokens,
194
- top_p=top_p,
195
- repetition_penalty=repetition_penalty,
196
- do_sample=True,
197
- seed=42,
198
- )
199
-
200
- formatted_prompt = format_prompt(prompt, history)
201
-
202
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
203
- output = ""
204
-
205
- for response in stream:
206
- output += response.token.text
207
- yield output
208
- return output
 
 
 
 
 
 
 
209
 
210
 
211
  mychatbot = gr.Chatbot(
212
  avatar_images=["./user.png", "./bot.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
213
 
214
- demo = gr.ChatInterface(fn=generate,
215
  chatbot=mychatbot,
216
- title="Mistral-Chat",
217
  retry_btn=None,
218
  undo_btn=None
219
  )
220
 
221
- demo.queue().launch(show_api=False)
 
 
168
 
169
 
170
 
171
+ # from huggingface_hub import InferenceClient
172
+ # import gradio as gr
173
+ # client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
174
+
175
+ # def format_prompt(message, history):
176
+ # prompt = "<s>"
177
+ # for user_prompt, bot_response in history:
178
+ # prompt += f"[INST] {user_prompt} [/INST]"
179
+ # prompt += f" {bot_response}</s> "
180
+ # prompt += f"[INST] {message} [/INST]"
181
+ # return prompt
182
+
183
+ # def generate(
184
+ # prompt, history, temperature=0.2, max_new_tokens=3000, top_p=0.95, repetition_penalty=1.0,
185
+ # ):
186
+ # temperature = float(temperature)
187
+ # if temperature < 1e-2:
188
+ # temperature = 1e-2
189
+ # top_p = float(top_p)
190
+
191
+ # generate_kwargs = dict(
192
+ # temperature=temperature,
193
+ # max_new_tokens=max_new_tokens,
194
+ # top_p=top_p,
195
+ # repetition_penalty=repetition_penalty,
196
+ # do_sample=True,
197
+ # seed=42,
198
+ # )
199
+
200
+ # formatted_prompt = format_prompt(prompt, history)
201
+
202
+ # stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
203
+ # output = ""
204
+
205
+ # for response in stream:
206
+ # output += response.token.text
207
+ # yield output
208
+ # return output
209
+
210
+
211
+ # mychatbot = gr.Chatbot(
212
+ # avatar_images=["./user.png", "./bot.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
213
+
214
+ # demo = gr.ChatInterface(fn=generate,
215
+ # chatbot=mychatbot,
216
+ # title="Mistral-Chat",
217
+ # retry_btn=None,
218
+ # undo_btn=None
219
+ # )
220
+
221
+ # demo.queue().launch(show_api=False)
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+ import boto3
231
+ import json
232
+ from botocore.exceptions import ClientError
233
+ import os
234
+
235
+ access_key_id = os.environ['aws_access_key_id']
236
+ secret_access_key = os.environ['aws_secret_access_key']
237
  import gradio as gr
238
+
239
+ bedrock = boto3.client(service_name='bedrock-runtime',region_name='us-east-1',aws_access_key_id=access_key_id,aws_secret_access_key=secret_access_key)
240
+
241
+
242
+ def invoke_llama3_8b(user_message):
243
+ try:
244
+ # Set the model ID, e.g., Llama 3 8B Instruct.
245
+ model_id = "meta.llama3-8b-instruct-v1:0"
246
+
247
+
248
+ # Embed the message in Llama 3's prompt format.
249
+ prompt = f"""
250
+ <|begin_of_text|>
251
+ <|start_header_id|>user<|end_header_id|>
252
+ {user_message}
253
+ <|eot_id|>
254
+ <|start_header_id|>assistant<|end_header_id|>
255
+ """
256
+
257
+ # Format the request payload using the model's native structure.
258
+ request = {
259
+ "prompt": prompt,
260
+ # Optional inference parameters:
261
+ "max_gen_len": 1024,
262
+ "temperature": 0.6,
263
+ "top_p": 0.9,
264
+ }
265
+
266
+ # Encode and send the request.
267
+ response = bedrock.invoke_model(body=json.dumps(request), modelId=model_id)
268
+
269
+ # Decode the native response body.
270
+ model_response = json.loads(response["body"].read())
271
+
272
+ # Extract and print the generated text.
273
+ response_text = model_response["generation"]
274
+
275
+ return response_text
276
+
277
+ except ClientError:
278
+ print("Couldn't invoke llama3 8B")
279
+ raise
280
+
281
 
282
 
283
  mychatbot = gr.Chatbot(
284
  avatar_images=["./user.png", "./bot.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
285
 
286
+ demo = gr.ChatInterface(fn=invoke_llama3_8b,
287
  chatbot=mychatbot,
288
+ title="llama3-Chat",
289
  retry_btn=None,
290
  undo_btn=None
291
  )
292
 
293
+ demo.queue().launch(show_api=False)
294
+