Spaces:

aipatseer
/

chatllm

Sleeping

App Files Files Community

Ganesh Karbhari commited on May 2

Commit

6c79cf1

•

1 Parent(s): b8f046a

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -40

app.py CHANGED Viewed

@@ -168,54 +168,127 @@
-from huggingface_hub import InferenceClient
 import gradio as gr
-client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
-def format_prompt(message, history):
-  prompt = "<s>"
-  for user_prompt, bot_response in history:
-    prompt += f"[INST] {user_prompt} [/INST]"
-    prompt += f" {bot_response}</s> "
-  prompt += f"[INST] {message} [/INST]"
-  return prompt
-def generate(
-    prompt, history, temperature=0.2, max_new_tokens=3000, top_p=0.95, repetition_penalty=1.0,
-):
-    temperature = float(temperature)
-    if temperature < 1e-2:
-        temperature = 1e-2
-    top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        seed=42,
-    )
-    formatted_prompt = format_prompt(prompt, history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
-    for response in stream:
-        output += response.token.text
-        yield output
-    return output
 mychatbot = gr.Chatbot(
     avatar_images=["./user.png", "./bot.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
-demo = gr.ChatInterface(fn=generate,
                         chatbot=mychatbot,
-                        title="Mistral-Chat",
                         retry_btn=None,
                         undo_btn=None
                        )
-demo.queue().launch(show_api=False)

+# from huggingface_hub import InferenceClient
+# import gradio as gr
+# client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
+# def format_prompt(message, history):
+#   prompt = "<s>"
+#   for user_prompt, bot_response in history:
+#     prompt += f"[INST] {user_prompt} [/INST]"
+#     prompt += f" {bot_response}</s> "
+#   prompt += f"[INST] {message} [/INST]"
+#   return prompt
+# def generate(
+#     prompt, history, temperature=0.2, max_new_tokens=3000, top_p=0.95, repetition_penalty=1.0,
+# ):
+#     temperature = float(temperature)
+#     if temperature < 1e-2:
+#         temperature = 1e-2
+#     top_p = float(top_p)
+#     generate_kwargs = dict(
+#         temperature=temperature,
+#         max_new_tokens=max_new_tokens,
+#         top_p=top_p,
+#         repetition_penalty=repetition_penalty,
+#         do_sample=True,
+#         seed=42,
+#     )
+#     formatted_prompt = format_prompt(prompt, history)
+#     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+#     output = ""
+#     for response in stream:
+#         output += response.token.text
+#         yield output
+#     return output
+# mychatbot = gr.Chatbot(
+#     avatar_images=["./user.png", "./bot.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
+# demo = gr.ChatInterface(fn=generate,
+#                         chatbot=mychatbot,
+#                         title="Mistral-Chat",
+#                         retry_btn=None,
+#                         undo_btn=None
+#                        )
+# demo.queue().launch(show_api=False)
+import boto3
+import json
+from botocore.exceptions import ClientError
+import os
+access_key_id = os.environ['aws_access_key_id']
+secret_access_key = os.environ['aws_secret_access_key']
 import gradio as gr
+bedrock = boto3.client(service_name='bedrock-runtime',region_name='us-east-1',aws_access_key_id=access_key_id,aws_secret_access_key=secret_access_key)
+def invoke_llama3_8b(user_message):
+    try:
+        # Set the model ID, e.g., Llama 3 8B Instruct.
+        model_id = "meta.llama3-8b-instruct-v1:0"
+        # Embed the message in Llama 3's prompt format.
+        prompt = f"""
+        <|begin_of_text|>
+        <|start_header_id|>user<|end_header_id|>
+        {user_message}
+        <|eot_id|>
+        <|start_header_id|>assistant<|end_header_id|>
+        """
+        # Format the request payload using the model's native structure.
+        request = {
+            "prompt": prompt,
+            # Optional inference parameters:
+            "max_gen_len": 1024,
+            "temperature": 0.6,
+            "top_p": 0.9,
+        }
+        # Encode and send the request.
+        response = bedrock.invoke_model(body=json.dumps(request), modelId=model_id)
+        # Decode the native response body.
+        model_response = json.loads(response["body"].read())
+        # Extract and print the generated text.
+        response_text = model_response["generation"]
+        return response_text
+    except ClientError:
+        print("Couldn't invoke llama3 8B")
+        raise
 mychatbot = gr.Chatbot(
     avatar_images=["./user.png", "./bot.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
+demo = gr.ChatInterface(fn=invoke_llama3_8b,
                         chatbot=mychatbot,
+                        title="llama3-Chat",
                         retry_btn=None,
                         undo_btn=None
                        )
+demo.queue().launch(show_api=False)