llama-2-13b-chat

Runtime error

App Files Files Community

chatham84 commited on Sep 1, 2023

Commit

f581acd

•

1 Parent(s): 8515885

Update app.py

Browse files

Files changed (1) hide show

app.py +273 -109

app.py CHANGED Viewed

@@ -1,111 +1,275 @@
-import json
 import gradio as gr
-import os
-import requests
-hf_token = os.getenv('HF_TOKEN')
-api_url = os.getenv('API_URL')
-api_url_nostream = os.getenv('API_URL_NOSTREAM')
-headers = {
-    'Content-Type': 'application/json',
-}
-system_message = "\nYou are a helpful assistant who has a very narrow scope of knowledge: Medical Claims data. You have access to a medical claims database for Northern California. Do not answer questions you do not know. Respond exactly with '''I'm not trained in that area''' for any questions not related to claims data."
-title = "Vern SLM Bot"
-description = """
-Ask Vern Questions about Claims data..."""
-css = """.toast-wrap { display: none !important } """
-examples=[]
-def predict(message, chatbot):
-    input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
-    for interaction in chatbot:
-        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
-    input_prompt = input_prompt + str(message) + " [/INST] "
-    data = {
-        "inputs": input_prompt,
-        "parameters": {"max_new_tokens":256,
-                      "do_sample":True,
-                      "top_p":0.6,
-                      "temperature":0.9,}
-    }
-    response = requests.post(api_url, headers=headers, data=json.dumps(data), auth=('hf', hf_token), stream=True)
-    partial_message = ""
-    for line in response.iter_lines():
-        if line:  # filter out keep-alive new lines
-            # Decode from bytes to string
-            decoded_line = line.decode('utf-8')
-            # Remove 'data:' prefix
-            if decoded_line.startswith('data:'):
-                json_line = decoded_line[5:]  # Exclude the first 5 characters ('data:')
-            else:
-                gr.Warning(f"This line does not start with 'data:': {decoded_line}")
-                continue
-            # Load as JSON
-            try:
-                json_obj = json.loads(json_line)
-                if 'token' in json_obj:
-                    partial_message = partial_message + json_obj['token']['text']
-                    yield partial_message
-                elif 'error' in json_obj:
-                    yield json_obj['error'] + '. Please refresh and try again with an appropriate smaller input prompt.'
-                else:
-                    gr.Warning(f"The key 'token' does not exist in this JSON object: {json_obj}")
-            except json.JSONDecodeError:
-                gr.Warning(f"This line is not valid JSON: {json_line}")
-                continue
-            except KeyError as e:
-                gr.Warning(f"KeyError: {e} occurred for JSON object: {json_obj}")
-                continue
-def predict_batch(message, chatbot):
-    input_prompt = f"[INST]<<SYS>>\n{system_message}\n<</SYS>>\n\n "
-    for interaction in chatbot:
-        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
-    input_prompt = input_prompt + str(message) + " [/INST] "
-    data = {
-        "inputs": input_prompt,
-        "parameters": {"max_new_tokens":256}
-    }
-    response = requests.post(api_url_nostream, headers=headers, data=json.dumps(data), auth=('hf', hf_token))
-    if response.status_code == 200:  # check if the request was successful
-        try:
-            json_obj = response.json()
-            if 'generated_text' in json_obj and len(json_obj['generated_text']) > 0:
-                return json_obj['generated_text']
-            elif 'error' in json_obj:
-                return json_obj['error'] + ' Please refresh and try again with smaller input prompt'
-            else:
-                print(f"Unexpected response: {json_obj}")
-        except json.JSONDecodeError:
-            print(f"Failed to decode response as JSON: {response.text}")
-    else:
-        print(f"Request failed with status code {response.status_code}")
-# Gradio Demo
-with gr.Blocks() as demo:
-    with gr.Tab("Streaming"):
-        gr.ChatInterface(predict, title=title, description=description, css=css, examples=examples, cache_examples=True)
-    with gr.Tab("Batch"):
-        gr.ChatInterface(predict_batch, title=title, description=description, css=css, examples=examples, cache_examples=True)
-demo.queue(concurrency_count=75, max_size=100).launch(debug=True)

+from typing import Iterator
 import gradio as gr
+import torch
+from model import get_input_token_length, run
+DEFAULT_SYSTEM_PROMPT = """\
+You are a helpful assistant who has a very narrow scope of knowledge: Medical Claims data. You have access to a medical claims database for Northern California. Do not answer questions you do not know. Respond exactly with '''I'm not trained in that area''' for any questions not related to claims data.\
+"""
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = 4000
+DESCRIPTION = """
+# Vern Bot
+Testing Vern Bot below
+"""
+LICENSE = """
+<p/>
+---
+As a derivate work of [Llama-2-13b-chat](https://huggingface.co/meta-llama/Llama-2-13b-chat) by Meta,
+this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat/blob/main/USE_POLICY.md).
+"""
+if not torch.cuda.is_available():
+    DESCRIPTION += '\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>'
+def clear_and_save_textbox(message: str) -> tuple[str, str]:
+    return '', message
+def display_input(message: str,
+                  history: list[tuple[str, str]]) -> list[tuple[str, str]]:
+    history.append((message, ''))
+    return history
+def delete_prev_fn(
+        history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
+    try:
+        message, _ = history.pop()
+    except IndexError:
+        message = ''
+    return history, message or ''
+def generate(
+    message: str,
+    history_with_input: list[tuple[str, str]],
+    system_prompt: str,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    top_k: int,
+) -> Iterator[list[tuple[str, str]]]:
+    if max_new_tokens > MAX_MAX_NEW_TOKENS:
+        raise ValueError
+    history = history_with_input[:-1]
+    generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
+    try:
+        first_response = next(generator)
+        yield history + [(message, first_response)]
+    except StopIteration:
+        yield history + [(message, '')]
+    for response in generator:
+        yield history + [(message, response)]
+def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
+    generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 1, 0.95, 50)
+    for x in generator:
+        pass
+    return '', x
+def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
+    input_token_length = get_input_token_length(message, chat_history, system_prompt)
+    if input_token_length > MAX_INPUT_TOKEN_LENGTH:
+        raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
+with gr.Blocks(css='style.css') as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(value='',
+                       elem_id='')
+    with gr.Group():
+        chatbot = gr.Chatbot(label='Chatbot')
+        with gr.Row():
+            textbox = gr.Textbox(
+                container=False,
+                show_label=False,
+                placeholder='Type a message...',
+                scale=10,
+            )
+            submit_button = gr.Button('Submit',
+                                      variant='primary',
+                                      scale=1,
+                                      min_width=0)
+    with gr.Row():
+        retry_button = gr.Button('🔄  Retry', variant='secondary')
+        undo_button = gr.Button('↩️ Undo', variant='secondary')
+        clear_button = gr.Button('🗑️  Clear', variant='secondary')
+    saved_input = gr.State()
+    with gr.Accordion(label='Advanced options', open=False):
+        system_prompt = gr.Textbox(label='System prompt',
+                                   value=DEFAULT_SYSTEM_PROMPT,
+                                   lines=6)
+        max_new_tokens = gr.Slider(
+            label='Max new tokens',
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        )
+        temperature = gr.Slider(
+            label='Temperature',
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=1.0,
+        )
+        top_p = gr.Slider(
+            label='Top-p (nucleus sampling)',
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.95,
+        )
+        top_k = gr.Slider(
+            label='Top-k',
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        )
+    gr.Examples(
+        examples=[
+            'Hello there! How are you doing?',
+            'Can you explain briefly to me what is the Python programming language?',
+            'Explain the plot of Cinderella in a sentence.',
+            'How many hours does it take a man to eat a Helicopter?',
+            "Write a 100-word article on 'Benefits of Open-Source in AI research'",
+        ],
+        inputs=textbox,
+        outputs=[textbox, chatbot],
+        fn=process_example,
+        cache_examples=True,
+    )
+    gr.Markdown(LICENSE)
+    textbox.submit(
+        fn=clear_and_save_textbox,
+        inputs=textbox,
+        outputs=[textbox, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=check_input_token_length,
+        inputs=[saved_input, chatbot, system_prompt],
+        api_name=False,
+        queue=False,
+    ).success(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    button_event_preprocess = submit_button.click(
+        fn=clear_and_save_textbox,
+        inputs=textbox,
+        outputs=[textbox, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=check_input_token_length,
+        inputs=[saved_input, chatbot, system_prompt],
+        api_name=False,
+        queue=False,
+    ).success(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    retry_button.click(
+        fn=delete_prev_fn,
+        inputs=chatbot,
+        outputs=[chatbot, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    undo_button.click(
+        fn=delete_prev_fn,
+        inputs=chatbot,
+        outputs=[chatbot, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=lambda x: x,
+        inputs=[saved_input],
+        outputs=textbox,
+        api_name=False,
+        queue=False,
+    )
+    clear_button.click(
+        fn=lambda: ([], ''),
+        outputs=[chatbot, saved_input],
+        queue=False,
+        api_name=False,
+    )
+demo.queue(max_size=20).launch()