Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

ChatApp/app.py +253 -0
ChatApp/app_modules/__pycache__/overwrites.cpython-39.pyc +0 -0
ChatApp/app_modules/__pycache__/presets.cpython-39.pyc +0 -0
ChatApp/app_modules/__pycache__/utils.cpython-39.pyc +0 -0
ChatApp/app_modules/overwrites.py +33 -0
ChatApp/app_modules/presets.py +81 -0
ChatApp/app_modules/utils.py +235 -0
ChatApp/assets/custom.css +488 -0
ChatApp/assets/custom.js +1 -0
ChatApp/interface/__pycache__/base_interface.cpython-39.pyc +0 -0
ChatApp/interface/__pycache__/empty_stub_interface.cpython-39.pyc +0 -0
ChatApp/interface/__pycache__/hddr_llama_onnx_interface.cpython-39.pyc +0 -0
ChatApp/interface/base_interface.py +6 -0
ChatApp/interface/empty_stub_interface.py +39 -0
ChatApp/interface/hddr_llama_onnx_interface.py +395 -0
ChatApp/requirements.txt +18 -0

ChatApp/app.py ADDED Viewed

	@@ -0,0 +1,253 @@

+# -*- coding:utf-8 -*-
+import os
+import logging
+import gradio as gr
+import gc
+from interface.hddr_llama_onnx_interface import LlamaOnnxInterface
+from interface.empty_stub_interface import EmptyStubInterface
+from ChatApp.app_modules.utils import (
+    reset_textbox,
+    transfer_input,
+    reset_state,
+    delete_last_conversation,
+    cancel_outputing,
+)
+from ChatApp.app_modules.presets import (
+    small_and_beautiful_theme,
+    title,
+    description_top,
+    description,
+)
+from ChatApp.app_modules.overwrites import postprocess
+logging.basicConfig(
+    level=logging.DEBUG,
+    format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s",
+)
+# we can filter this dictionary at the start according to the actual available files on disk
+empty_stub_model_name = "_Empty Stub_"
+top_directory = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+tokenizer_path = os.path.join(top_directory, "tokenizer.model")
+available_models = {
+    "Llama-2 13B Float16": {
+        "onnx_file": os.path.join(
+            top_directory, "FP16", "LlamaV2_13B_float16.onnx"
+        ),
+        "tokenizer_path": tokenizer_path,
+        "embedding_file": os.path.join(top_directory, "embeddings.pth"),
+    },
+    "Llama-2 13B FP32": {
+        "onnx_file": os.path.join(
+            top_directory, "FP32", "LlamaV2_13B_float16.onnx"
+        ),
+        "tokenizer_path": tokenizer_path,
+        "embedding_file": os.path.join(
+            top_directory, "embeddings.pth"
+        ),
+    },
+}
+interface = EmptyStubInterface()
+interface.initialize()
+# interface = None
+gr.Chatbot.postprocess = postprocess
+with open("ChatApp/assets/custom.css", "r", encoding="utf-8") as f:
+    custom_css = f.read()
+def change_model_listener(new_model_name):
+    if new_model_name is None:
+        new_model_name = empty_stub_model_name
+    global interface
+    # if a model exists - shut it down before trying to create the new one
+    if interface is not None:
+        interface.shutdown()
+        del interface
+        gc.collect()
+    logging.info(f"Creating a new model [{new_model_name}]")
+    if new_model_name == empty_stub_model_name:
+        interface = EmptyStubInterface()
+        interface.initialize()
+    else:
+        d = available_models[new_model_name]
+        interface = LlamaOnnxInterface(
+            onnx_file=d["onnx_file"],
+            tokenizer_path=d["tokenizer_path"],
+            embedding_file=d["embedding_file"],
+        )
+        interface.initialize()
+    return new_model_name
+def interface_predict(*args):
+    global interface
+    res = interface.predict(*args)
+    for x in res:
+        yield x
+def interface_retry(*args):
+    global interface
+    res = interface.retry(*args)
+    for x in res:
+        yield x
+with gr.Blocks(css=custom_css, theme=small_and_beautiful_theme) as demo:
+    history = gr.State([])
+    user_question = gr.State("")
+    with gr.Row():
+        gr.HTML(title)
+        status_display = gr.Markdown("Success", elem_id="status_display")
+    gr.Markdown(description_top)
+    with gr.Row():
+        with gr.Column(scale=5):
+            with gr.Row():
+                chatbot = gr.Chatbot(elem_id="chuanhu_chatbot", height=900)
+            with gr.Row():
+                with gr.Column(scale=12):
+                    user_input = gr.Textbox(show_label=False, placeholder="Enter text")
+                with gr.Column(min_width=70, scale=1):
+                    submit_button = gr.Button("Send")
+                with gr.Column(min_width=70, scale=1):
+                    cancel_button = gr.Button("Stop")
+            with gr.Row():
+                empty_button = gr.Button(
+                    "🧹 New Conversation",
+                )
+                retry_button = gr.Button("🔄 Regenerate")
+                delete_last_button = gr.Button("🗑️ Remove Last Turn")
+        with gr.Column():
+            with gr.Column(min_width=50, scale=1):
+                with gr.Tab(label="Parameter Setting"):
+                    gr.Markdown("# Model")
+                    model_name = gr.Dropdown(
+                        choices=[empty_stub_model_name] + list(available_models.keys()),
+                        label="Model",
+                        show_label=False,  # default="Empty STUB",
+                    )
+                    model_name.change(
+                        change_model_listener, inputs=[model_name], outputs=[model_name]
+                    )
+                    gr.Markdown("# Parameters")
+                    top_p = gr.Slider(
+                        minimum=-0,
+                        maximum=1.0,
+                        value=0.9,
+                        step=0.05,
+                        interactive=True,
+                        label="Top-p",
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.1,
+                        maximum=2.0,
+                        value=0.75,
+                        step=0.1,
+                        interactive=True,
+                        label="Temperature",
+                    )
+                    max_length_tokens = gr.Slider(
+                        minimum=0,
+                        maximum=512,
+                        value=256,
+                        step=8,
+                        interactive=True,
+                        label="Max Generation Tokens",
+                    )
+                    max_context_length_tokens = gr.Slider(
+                        minimum=0,
+                        maximum=4096,
+                        value=2048,
+                        step=128,
+                        interactive=True,
+                        label="Max History Tokens",
+                    )
+    gr.Markdown(description)
+    predict_args = dict(
+        # fn=interface.predict,
+        fn=interface_predict,
+        inputs=[
+            user_question,
+            chatbot,
+            history,
+            top_p,
+            temperature,
+            max_length_tokens,
+            max_context_length_tokens,
+        ],
+        outputs=[chatbot, history, status_display],
+        show_progress=True,
+    )
+    retry_args = dict(
+        fn=interface_retry,
+        inputs=[
+            user_input,
+            chatbot,
+            history,
+            top_p,
+            temperature,
+            max_length_tokens,
+            max_context_length_tokens,
+        ],
+        outputs=[chatbot, history, status_display],
+        show_progress=True,
+    )
+    reset_args = dict(fn=reset_textbox, inputs=[], outputs=[user_input, status_display])
+    # Chatbot
+    transfer_input_args = dict(
+        fn=transfer_input,
+        inputs=[user_input],
+        outputs=[user_question, user_input, submit_button],
+        show_progress=True,
+    )
+    predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args)
+    predict_event2 = submit_button.click(**transfer_input_args).then(**predict_args)
+    empty_button.click(
+        reset_state,
+        outputs=[chatbot, history, status_display],
+        show_progress=True,
+    )
+    empty_button.click(**reset_args)
+    predict_event3 = retry_button.click(**retry_args)
+    delete_last_button.click(
+        delete_last_conversation,
+        [chatbot, history],
+        [chatbot, history, status_display],
+        show_progress=True,
+    )
+    cancel_button.click(
+        cancel_outputing,
+        [],
+        [status_display],
+        cancels=[predict_event1, predict_event2, predict_event3],
+    )
+    demo.load(change_model_listener, inputs=None, outputs=model_name)
+demo.title = "Llama-2 Chat UI"
+demo.queue(concurrency_count=1).launch()

ChatApp/app_modules/__pycache__/overwrites.cpython-39.pyc ADDED Viewed

Binary file (1.15 kB). View file

ChatApp/app_modules/__pycache__/presets.cpython-39.pyc ADDED Viewed

Binary file (1.92 kB). View file

ChatApp/app_modules/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (6.25 kB). View file

ChatApp/app_modules/overwrites.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from __future__ import annotations
+from typing import List, Tuple
+from app_modules.presets import gr
+from app_modules.utils import detect_converted_mark, convert_asis, convert_mdtext
+def postprocess(
+    self, y: List[Tuple[str | None, str | None]]
+) -> List[Tuple[str | None, str | None]]:
+    """
+    Parameters:
+        y: List of tuples representing the message and response pairs.
+        Each message and response should be a string,
+        which may be in Markdown format.
+    Returns:
+        List of tuples representing the message and response.
+        Each message and response will be a string of HTML.
+    """
+    if y is None or y == []:
+        return []
+    temp = []
+    for x in y:
+        user, bot = x
+        if not detect_converted_mark(user):
+            user = convert_asis(user)
+        if not detect_converted_mark(bot):
+            bot = convert_mdtext(bot)
+        temp.append((user, bot))
+    return temp
+GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse

ChatApp/app_modules/presets.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# -*- coding:utf-8 -*-
+import gradio as gr
+title = """<h1 align="left" style="min-width:200px; margin-top:0;">Llama-2 Chat UI</h1>"""
+description_top = """\
+<div align="left">
+Use at your own risk...
+</p >
+</div>
+"""
+description = """\
+<div align="center" style="margin:16px 0">
+This is a chat demo using the ONNX versions of the Llama 2 model
+</div>
+"""
+CONCURRENT_COUNT = 100
+ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
+small_and_beautiful_theme = gr.themes.Soft(
+    primary_hue=gr.themes.Color(
+        c50="#02C160",
+        c100="rgba(2, 193, 96, 0.2)",
+        c200="#02C160",
+        c300="rgba(2, 193, 96, 0.32)",
+        c400="rgba(2, 193, 96, 0.32)",
+        c500="rgba(2, 193, 96, 1.0)",
+        c600="rgba(2, 193, 96, 1.0)",
+        c700="rgba(2, 193, 96, 0.32)",
+        c800="rgba(2, 193, 96, 0.32)",
+        c900="#02C160",
+        c950="#02C160",
+    ),
+    secondary_hue=gr.themes.Color(
+        c50="#576b95",
+        c100="#576b95",
+        c200="#576b95",
+        c300="#576b95",
+        c400="#576b95",
+        c500="#576b95",
+        c600="#576b95",
+        c700="#576b95",
+        c800="#576b95",
+        c900="#576b95",
+        c950="#576b95",
+    ),
+    neutral_hue=gr.themes.Color(
+        name="gray",
+        c50="#f9fafb",
+        c100="#f3f4f6",
+        c200="#e5e7eb",
+        c300="#d1d5db",
+        c400="#B2B2B2",
+        c500="#808080",
+        c600="#636363",
+        c700="#515151",
+        c800="#393939",
+        c900="#272727",
+        c950="#171717",
+    ),
+    radius_size=gr.themes.sizes.radius_sm,
+).set(
+    button_primary_background_fill="#06AE56",
+    button_primary_background_fill_dark="#06AE56",
+    button_primary_background_fill_hover="#07C863",
+    button_primary_border_color="#06AE56",
+    button_primary_border_color_dark="#06AE56",
+    button_primary_text_color="#FFFFFF",
+    button_primary_text_color_dark="#FFFFFF",
+    button_secondary_background_fill="#F2F2F2",
+    button_secondary_background_fill_dark="#2B2B2B",
+    button_secondary_text_color="#393939",
+    button_secondary_text_color_dark="#FFFFFF",
+    background_fill_primary="#F7F7F7",
+    background_fill_primary_dark="#1F1F1F",
+    block_title_text_color="*primary_500",
+    block_title_background_fill="*primary_100",
+    input_background_fill="#F6F6F6",
+)

ChatApp/app_modules/utils.py ADDED Viewed

	@@ -0,0 +1,235 @@

+# -*- coding:utf-8 -*-
+from __future__ import annotations
+import logging
+import re
+import html
+import gradio as gr
+import mdtex2html
+from markdown import markdown
+from pygments import highlight
+from pygments.lexers import guess_lexer, get_lexer_by_name, ClassNotFound
+from pygments.formatters import HtmlFormatter
+from ChatApp.app_modules.presets import ALREADY_CONVERTED_MARK
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s",
+)
+def markdown_to_html_with_syntax_highlight(md_str):
+    def replacer(match):
+        lang = match.group(1) or "text"
+        code = match.group(2)
+        lang = lang.strip()
+        # print(1,lang)
+        if lang == "text":
+            lexer = guess_lexer(code)
+            lang = lexer.name
+            # print(2,lang)
+        try:
+            lexer = get_lexer_by_name(lang, stripall=True)
+        except ValueError:
+            lexer = get_lexer_by_name("python", stripall=True)
+        formatter = HtmlFormatter()
+        # print(3,lexer.name)
+        highlighted_code = highlight(code, lexer, formatter)
+        return f'<pre><code class="{lang}">{highlighted_code}</code></pre>'
+    code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```"
+    md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE)
+    html_str = markdown(md_str)
+    return html_str
+def normalize_markdown(md_text: str) -> str:
+    lines = md_text.split("\n")
+    normalized_lines = []
+    inside_list = False
+    for i, line in enumerate(lines):
+        if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()):
+            if not inside_list and i > 0 and lines[i - 1].strip() != "":
+                normalized_lines.append("")
+            inside_list = True
+            normalized_lines.append(line)
+        elif inside_list and line.strip() == "":
+            if i < len(lines) - 1 and not re.match(
+                r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()
+            ):
+                normalized_lines.append(line)
+            continue
+        else:
+            inside_list = False
+            normalized_lines.append(line)
+    return "\n".join(normalized_lines)
+def convert_mdtext(md_text):
+    code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
+    inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
+    code_blocks = code_block_pattern.findall(md_text)
+    non_code_parts = code_block_pattern.split(md_text)[::2]
+    result = []
+    for non_code, code in zip(non_code_parts, code_blocks + [""]):
+        if non_code.strip():
+            non_code = normalize_markdown(non_code)
+            if inline_code_pattern.search(non_code):
+                result.append(markdown(non_code, extensions=["tables"]))
+            else:
+                result.append(mdtex2html.convert(non_code, extensions=["tables"]))
+        if code.strip():
+            code = f"\n```{code}\n\n```"
+            code = markdown_to_html_with_syntax_highlight(code)
+            result.append(code)
+    result = "".join(result)
+    result += ALREADY_CONVERTED_MARK
+    return result
+def convert_asis(userinput):
+    return (
+        f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
+        + ALREADY_CONVERTED_MARK
+    )
+def detect_converted_mark(userinput):
+    if userinput.endswith(ALREADY_CONVERTED_MARK):
+        return True
+    else:
+        return False
+def detect_language(code):
+    if code.startswith("\n"):
+        first_line = ""
+    else:
+        first_line = code.strip().split("\n", 1)[0]
+    language = first_line.lower() if first_line else ""
+    code_without_language = code[len(first_line) :].lstrip() if first_line else code
+    return language, code_without_language
+def convert_to_markdown(text):
+    text = text.replace("$", "&#36;")
+    def replace_leading_tabs_and_spaces(line):
+        new_line = []
+        for char in line:
+            if char == "\t":
+                new_line.append("&#9;")
+            elif char == " ":
+                new_line.append("&nbsp;")
+            else:
+                break
+        return "".join(new_line) + line[len(new_line) :]
+    markdown_text = ""
+    lines = text.split("\n")
+    in_code_block = False
+    for line in lines:
+        if in_code_block is False and line.startswith("```"):
+            in_code_block = True
+            markdown_text += f"{line}\n"
+        elif in_code_block is True and line.startswith("```"):
+            in_code_block = False
+            markdown_text += f"{line}\n"
+        elif in_code_block:
+            markdown_text += f"{line}\n"
+        else:
+            line = replace_leading_tabs_and_spaces(line)
+            line = re.sub(r"^(#)", r"\\\1", line)
+            markdown_text += f"{line}  \n"
+    return markdown_text
+def add_language_tag(text):
+    def detect_language(code_block):
+        try:
+            lexer = guess_lexer(code_block)
+            return lexer.name.lower()
+        except ClassNotFound:
+            return ""
+    code_block_pattern = re.compile(r"(```)(\w*\n[^`]+```)", re.MULTILINE)
+    def replacement(match):
+        code_block = match.group(2)
+        if match.group(2).startswith("\n"):
+            language = detect_language(code_block)
+            if language:
+                return f"```{language}{code_block}```"
+            else:
+                return f"```\n{code_block}```"
+        else:
+            return match.group(1) + code_block + "```"
+    text2 = code_block_pattern.sub(replacement, text)
+    return text2
+def delete_last_conversation(chatbot, history):
+    if len(chatbot) > 0:
+        chatbot.pop()
+    if len(history) > 0:
+        history.pop()
+    return (
+        chatbot,
+        history,
+        "Delete Done",
+    )
+def reset_state():
+    return [], [], "Reset Done"
+def reset_textbox():
+    return gr.update(value=""), ""
+def cancel_outputing():
+    return "Stop Done"
+def transfer_input(inputs):
+    return (
+        inputs,
+        gr.update(value=""),
+        gr.Button.update(visible=True),
+    )
+class State:
+    interrupted = False
+    def interrupt(self):
+        self.interrupted = True
+    def recover(self):
+        self.interrupted = False
+shared_state = State()
+def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
+    for stop_word in stop_words:
+        if s.endswith(stop_word):
+            return True
+        for i in range(1, len(stop_word)):
+            if s.endswith(stop_word[:i]):
+                return True
+    return False

ChatApp/assets/custom.css ADDED Viewed

	@@ -0,0 +1,488 @@

+:root {
+    --chatbot-color-light: #F3F3F3;
+    --chatbot-color-dark: #121111;
+}
+/* status_display */
+#status_display {
+    display: flex;
+    min-height: 2.5em;
+    align-items: flex-end;
+    justify-content: flex-end;
+}
+#status_display p {
+    font-size: .85em;
+    font-family: monospace;
+    color: var(--body-text-color-subdued);
+}
+/* usage_display */
+#usage_display {
+    height: 1em;
+}
+#usage_display p {
+    padding: 0 1em;
+    font-size: .85em;
+    font-family: monospace;
+    color: var(--body-text-color-subdued);
+}
+/* list */
+ol:not(.options),
+ul:not(.options) {
+    padding-inline-start: 2em !important;
+}
+/* Thank @Keldos-Li for fixing it */
+/* Light mode (default) */
+#chuanhu_chatbot {
+    background-color: var(--chatbot-color-light) !important;
+    color: #000000 !important;
+}
+[data-testid="bot"] {
+    background-color: #FFFFFF !important;
+}
+[data-testid="user"] {
+    background-color: #95EC69 !important;
+}
+/* Dark mode */
+.dark #chuanhu_chatbot {
+    background-color: var(--chatbot-color-dark) !important;
+    color: #FFFFFF !important;
+}
+.dark [data-testid="bot"] {
+    background-color: #2C2C2C !important;
+}
+.dark [data-testid="user"] {
+    background-color: #26B561 !important;
+}
+#chuanhu_chatbot {
+    height: 100%;
+    min-height: 400px;
+}
+[class *="message"] {
+    border-radius: var(--radius-xl) !important;
+    border: none;
+    padding: var(--spacing-xl) !important;
+    font-size: var(--text-md) !important;
+    line-height: var(--line-md) !important;
+    min-height: calc(var(--text-md)*var(--line-md) + 2*var(--spacing-xl));
+    min-width: calc(var(--text-md)*var(--line-md) + 2*var(--spacing-xl));
+}
+[data-testid="bot"] {
+    max-width: 85%;
+    border-bottom-left-radius: 0 !important;
+}
+[data-testid="user"] {
+    max-width: 85%;
+    width: auto !important;
+    border-bottom-right-radius: 0 !important;
+}
+/* Table */
+table {
+    margin: 1em 0;
+    border-collapse: collapse;
+    empty-cells: show;
+}
+td,
+th {
+    border: 1.2px solid var(--border-color-primary) !important;
+    padding: 0.2em;
+}
+thead {
+    background-color: rgba(175, 184, 193, 0.2);
+}
+thead th {
+    padding: .5em .2em;
+}
+/* Inline code */
+#chuanhu_chatbot code {
+    display: inline;
+    white-space: break-spaces;
+    border-radius: 6px;
+    margin: 0 2px 0 2px;
+    padding: .2em .4em .1em .4em;
+    background-color: rgba(175, 184, 193, 0.2);
+}
+/* Code block */
+#chuanhu_chatbot pre code {
+    display: block;
+    overflow: auto;
+    white-space: pre;
+    background-color: hsla(0, 0%, 0%, 80%) !important;
+    border-radius: 10px;
+    padding: 1.4em 1.2em 0em 1.4em;
+    margin: 1.2em 2em 1.2em 0.5em;
+    color: #FFFF;
+    box-shadow: 6px 6px 16px hsla(0, 0%, 0%, 0.2);
+}
+/* Hightlight */
+#chuanhu_chatbot .highlight {
+    background-color: transparent
+}
+#chuanhu_chatbot .highlight .hll {
+    background-color: #49483e
+}
+#chuanhu_chatbot .highlight .c {
+    color: #75715e
+}
+/* Comment */
+#chuanhu_chatbot .highlight .err {
+    color: #960050;
+    background-color: #1e0010
+}
+/* Error */
+#chuanhu_chatbot .highlight .k {
+    color: #66d9ef
+}
+/* Keyword */
+#chuanhu_chatbot .highlight .l {
+    color: #ae81ff
+}
+/* Literal */
+#chuanhu_chatbot .highlight .n {
+    color: #8828f2
+}
+/* Name */
+#chuanhu_chatbot .highlight .o {
+    color: #f92672
+}
+/* Operator */
+#chuanhu_chatbot .highlight .p {
+    color: #482822
+}
+/* Punctuation */
+#chuanhu_chatbot .highlight .ch {
+    color: #75715e
+}
+/* Comment.Hashbang */
+#chuanhu_chatbot .highlight .cm {
+    color: #75715e
+}
+/* Comment.Multiline */
+#chuanhu_chatbot .highlight .cp {
+    color: #75715e
+}
+/* Comment.Preproc */
+#chuanhu_chatbot .highlight .cpf {
+    color: #75715e
+}
+/* Comment.PreprocFile */
+#chuanhu_chatbot .highlight .c1 {
+    color: #75715e
+}
+/* Comment.Single */
+#chuanhu_chatbot .highlight .cs {
+    color: #75715e
+}
+/* Comment.Special */
+#chuanhu_chatbot .highlight .gd {
+    color: #f92672
+}
+/* Generic.Deleted */
+#chuanhu_chatbot .highlight .ge {
+    font-style: italic
+}
+/* Generic.Emph */
+#chuanhu_chatbot .highlight .gi {
+    color: #a6e22e
+}
+/* Generic.Inserted */
+#chuanhu_chatbot .highlight .gs {
+    font-weight: bold
+}
+/* Generic.Strong */
+#chuanhu_chatbot .highlight .gu {
+    color: #75715e
+}
+/* Generic.Subheading */
+#chuanhu_chatbot .highlight .kc {
+    color: #66d9ef
+}
+/* Keyword.Constant */
+#chuanhu_chatbot .highlight .kd {
+    color: #66d9ef
+}
+/* Keyword.Declaration */
+#chuanhu_chatbot .highlight .kn {
+    color: #f92672
+}
+/* Keyword.Namespace */
+#chuanhu_chatbot .highlight .kp {
+    color: #66d9ef
+}
+/* Keyword.Pseudo */
+#chuanhu_chatbot .highlight .kr {
+    color: #66d9ef
+}
+/* Keyword.Reserved */
+#chuanhu_chatbot .highlight .kt {
+    color: #66d9ef
+}
+/* Keyword.Type */
+#chuanhu_chatbot .highlight .ld {
+    color: #162b74
+}
+/* Literal.Date */
+#chuanhu_chatbot .highlight .m {
+    color: #ae81ff
+}
+/* Literal.Number */
+#chuanhu_chatbot .highlight .s {
+    color: #062b84
+}
+/* Literal.String */
+#chuanhu_chatbot .highlight .na {
+    color: #a6e22e
+}
+/* Name.Attribute */
+#chuanhu_chatbot .highlight .nb {
+    color: #482822
+}
+/* Name.Builtin */
+#chuanhu_chatbot .highlight .nc {
+    color: #a6e22e
+}
+/* Name.Class */
+#chuanhu_chatbot .highlight .no {
+    color: #66d9ef
+}
+/* Name.Constant */
+#chuanhu_chatbot .highlight .nd {
+    color: #a6e22e
+}
+/* Name.Decorator */
+#chuanhu_chatbot .highlight .ni {
+    color: #482822
+}
+/* Name.Entity */
+#chuanhu_chatbot .highlight .ne {
+    color: #a6e22e
+}
+/* Name.Exception */
+#chuanhu_chatbot .highlight .nf {
+    color: #a6e22e
+}
+/* Name.Function */
+#chuanhu_chatbot .highlight .nl {
+    color: #1818f2
+}
+/* Name.Label */
+#chuanhu_chatbot .highlight .nn {
+    color: #482822
+}
+/* Name.Namespace */
+#chuanhu_chatbot .highlight .nx {
+    color: #a6e22e
+}
+/* Name.Other */
+#chuanhu_chatbot .highlight .py {
+    color: #482822
+}
+/* Name.Property */
+#chuanhu_chatbot .highlight .nt {
+    color: #f92672
+}
+/* Name.Tag */
+#chuanhu_chatbot .highlight .nv {
+    color: #482822
+}
+/* Name.Variable */
+#chuanhu_chatbot .highlight .ow {
+    color: #f92672
+}
+/* Operator.Word */
+#chuanhu_chatbot .highlight .w {
+    color: #482822
+}
+/* Text.Whitespace */
+#chuanhu_chatbot .highlight .mb {
+    color: #ae81ff
+}
+/* Literal.Number.Bin */
+#chuanhu_chatbot .highlight .mf {
+    color: #ae81ff
+}
+/* Literal.Number.Float */
+#chuanhu_chatbot .highlight .mh {
+    color: #ae81ff
+}
+/* Literal.Number.Hex */
+#chuanhu_chatbot .highlight .mi {
+    color: #ae81ff
+}
+/* Literal.Number.Integer */
+#chuanhu_chatbot .highlight .mo {
+    color: #ae81ff
+}
+/* Literal.Number.Oct */
+#chuanhu_chatbot .highlight .sa {
+    color: #162b74
+}
+/* Literal.String.Affix */
+#chuanhu_chatbot .highlight .sb {
+    color: #161b74
+}
+/* Literal.String.Backtick */
+#chuanhu_chatbot .highlight .sc {
+    color: #162b74
+}
+/* Literal.String.Char */
+#chuanhu_chatbot .highlight .dl {
+    color: #162b74
+}
+/* Literal.String.Delimiter */
+#chuanhu_chatbot .highlight .sd {
+    color: #162b74
+}
+/* Literal.String.Doc */
+#chuanhu_chatbot .highlight .s2 {
+    color: #162b74
+}
+/* Literal.String.Double */
+#chuanhu_chatbot .highlight .se {
+    color: #ae81ff
+}
+/* Literal.String.Escape */
+#chuanhu_chatbot .highlight .sh {
+    color: #162b74
+}
+/* Literal.String.Heredoc */
+#chuanhu_chatbot .highlight .si {
+    color: #162b74
+}
+/* Literal.String.Interpol */
+#chuanhu_chatbot .highlight .sx {
+    color: #162b74
+}
+/* Literal.String.Other */
+#chuanhu_chatbot .highlight .sr {
+    color: #162b74
+}
+/* Literal.String.Regex */
+#chuanhu_chatbot .highlight .s1 {
+    color: #162b74
+}
+/* Literal.String.Single */
+#chuanhu_chatbot .highlight .ss {
+    color: #162b74
+}
+/* Literal.String.Symbol */
+#chuanhu_chatbot .highlight .bp {
+    color: #482822
+}
+/* Name.Builtin.Pseudo */
+#chuanhu_chatbot .highlight .fm {
+    color: #a6e22e
+}
+/* Name.Function.Magic */
+#chuanhu_chatbot .highlight .vc {
+    color: #482822
+}
+/* Name.Variable.Class */
+#chuanhu_chatbot .highlight .vg {
+    color: #482822
+}
+/* Name.Variable.Global */
+#chuanhu_chatbot .highlight .vi {
+    color: #482822
+}
+/* Name.Variable.Instance */
+#chuanhu_chatbot .highlight .vm {
+    color: #482822
+}
+/* Name.Variable.Magic */
+#chuanhu_chatbot .highlight .il {
+    color: #ae81ff
+}
+/* Literal.Number.Integer.Long */

ChatApp/assets/custom.js ADDED Viewed

	@@ -0,0 +1 @@


1	+ // custom javascript here

ChatApp/interface/__pycache__/base_interface.cpython-39.pyc ADDED Viewed

Binary file (574 Bytes). View file

ChatApp/interface/__pycache__/empty_stub_interface.cpython-39.pyc ADDED Viewed

Binary file (1.33 kB). View file

ChatApp/interface/__pycache__/hddr_llama_onnx_interface.cpython-39.pyc ADDED Viewed

Binary file (8.94 kB). View file

ChatApp/interface/base_interface.py ADDED Viewed

	@@ -0,0 +1,6 @@

+class BaseLLMInterface:
+    def __init__(self):
+        pass
+    def foo(self):
+        pass

ChatApp/interface/empty_stub_interface.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from app_modules.utils import logging
+class EmptyStubInterface:
+    def __init__(self):
+        pass
+    def initialize(self):
+        pass
+    def shutdown(self):
+        pass
+    def predict(
+        self,
+        text,
+        chatbot,
+        history,
+        top_p,
+        temperature,
+        max_length_tokens,
+        max_context_length_tokens,
+    ):
+        logging.info("hi there")
+        logging.info("-" * 100)
+        # yield chatbot,history,"Empty context."
+        yield [[text, "No Model Found"]], [], "No Model Found"
+    def retry(
+        self,
+        text,
+        chatbot,
+        history,
+        top_p,
+        temperature,
+        max_length_tokens,
+        max_context_length_tokens,
+    ):
+        yield chatbot, history, "Empty context"

ChatApp/interface/hddr_llama_onnx_interface.py ADDED Viewed

	@@ -0,0 +1,395 @@

+import torch
+import onnxruntime
+import numpy as np
+from sentencepiece import SentencePieceProcessor
+from typing import List
+import os
+import logging
+import gc
+from .base_interface import BaseLLMInterface
+from ChatApp.app_modules.utils import (
+    is_stop_word_or_prefix,
+    convert_to_markdown,
+    shared_state,
+)
+class Tokenizer:
+    def __init__(self, model_path: str):
+        # reload tokenizer
+        assert os.path.isfile(model_path), model_path
+        self.sp_model = SentencePieceProcessor(model_file=model_path)
+        # BOS / EOS token IDs
+        self.n_words: int = self.sp_model.vocab_size()
+        self.bos_id: int = self.sp_model.bos_id()
+        self.eos_id: int = self.sp_model.eos_id()
+        self.pad_id: int = self.sp_model.pad_id()
+        assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
+    def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
+        assert type(s) is str
+        t = self.sp_model.encode(s)
+        if bos:
+            t = [self.bos_id] + t
+        if eos:
+            t = t + [self.eos_id]
+        return t
+    def decode(self, t: List[int]) -> str:
+        return self.sp_model.decode(t)
+class LlamaOnnxInterface(BaseLLMInterface):
+    def __init__(self, onnx_file="", embedding_file="", tokenizer_path=""):
+        super().__init__()
+        self.onnx_file = onnx_file
+        self.embedding_file = embedding_file
+        self.tokenizer_path = tokenizer_path
+        self.total_count = 0
+    def initialize(self):
+        # Create the ONNX session
+        logging.info(f"Creating ONNX session for [{self.onnx_file}]")
+        options = onnxruntime.SessionOptions()
+        self.llm_session = onnxruntime.InferenceSession(
+            self.onnx_file,
+            sess_options=options,
+            providers=[
+                "DmlExecutionProvider",
+                "CUDAExecutionProvider",
+                "CPUExecutionProvider",
+            ],
+        )
+        # get the data type used by the model
+        data_type_str = self.llm_session.get_inputs()[0].type
+        if data_type_str == "tensor(float16)":
+            self.data_type = np.float16
+        elif data_type_str == "tensor(float32)":
+            self.data_type = np.float32
+        else:
+            raise Exception(f"Unknown data type {data_type_str}")
+        logging.info(f"Detected Data Type [{self.data_type}]")
+        # Get the relevant shapes so we can create the inputs
+        for inputs_meta in self.llm_session._inputs_meta:
+            if inputs_meta.name == "x":
+                x_shape = inputs_meta.shape
+            elif inputs_meta.name == "attn_mask":
+                attn_mask_shape = inputs_meta.shape
+            elif inputs_meta.name == "k_cache":
+                k_cache_shape = inputs_meta.shape
+        self.hidden_size = x_shape[2]
+        self.max_seq_len = attn_mask_shape[1]
+        self.n_layers = k_cache_shape[1]
+        self.n_heads = k_cache_shape[3]
+        # Initialize the tokenizer and produce the initial tokens.
+        self.tokenizer = Tokenizer(model_path=self.tokenizer_path)
+        # create the embedding layer.
+        logging.info(
+            f"Creating the Embedding Layer. Size [{self.tokenizer.n_words}, {self.hidden_size}]"
+        )
+        self.embeddingLayer = torch.nn.Embedding(
+            self.tokenizer.n_words, self.hidden_size
+        )
+        # rg hack - dont have the embeddings.pth file - taking it from the original llama model
+        d = torch.load(self.embedding_file)
+        self.embeddingLayer.load_state_dict(d)
+        self.embeddingLayer.eval()
+        # Create the attention mask.
+        self.attn_mask = -10000.0 * torch.triu(
+            torch.ones(attn_mask_shape), diagonal=1
+        ).cpu().detach().numpy().astype(self.data_type)
+        # Create the K and V caches.
+        self.head_dim = int(self.hidden_size / self.n_heads)
+        self.k_cache = np.zeros(
+            [1, self.n_layers, self.max_seq_len, self.n_heads, self.head_dim],
+            dtype=self.data_type,
+        )
+        self.v_cache = np.zeros(
+            [1, self.n_layers, self.max_seq_len, self.n_heads, self.head_dim],
+            dtype=self.data_type,
+        )
+    def shutdown(self):
+        pass
+    def generate_prompt_with_history(self, text, history, tokenizer, max_length=2048):
+        prompt = "[|Human|]Hey there I am a human that would like to have\
+a conversation with you.\n[|AI|]Sure, I am happy to answer most questions\
+\n[|Human|]Great, I insist that we take turns.\n[|AI|]I agree, we should\
+ take turns.\n[|Human|]Great, can we also keep answers short\n[|AI|]Yes, \
+short answers are usually best"
+        history = ["\n[|Human|]{}\n[|AI|]{}".format(x[0], x[1]) for x in history]
+        history.append("\n[|Human|]{}\n[|AI|]".format(text))
+        history_text = ""
+        flag = False
+        for x in history[::-1]:
+            # tokens = self.tokenizer.encode(text, bos=True, eos=False)
+            if (
+                len(
+                    self.tokenizer.encode(
+                        prompt + history_text + x, bos=True, eos=False
+                    )
+                )
+                <= max_length
+            ):
+                history_text = x + history_text
+                flag = True
+            else:
+                break
+        if flag:
+            return prompt + history_text, torch.tensor(
+                self.tokenizer.encode(prompt + history_text, bos=True, eos=False)
+            ).unsqueeze(0)
+        else:
+            return None
+    def sample_logits(
+        self,
+        logits: np.ndarray,
+        sampling_method: str = "greedy",
+        sampling_value: float = None,
+        temperature: float = 1.0,
+    ) -> np.ndarray:
+        if temperature == 0 or sampling_method == "greedy":
+            next_token = np.argmax(logits, axis=-1).astype(np.int64)
+        elif sampling_method == "top_k" or sampling_method == "top_p":
+            assert sampling_value is not None
+            # temperature, converting to probabilities and sorting are common to both top-k and top-p
+            # convert logits to 32-bit float to avoid numerical issues with np.exp
+            logits = logits.astype(np.float32)
+            # Scale the logits by the temperature
+            logits /= temperature
+            # Convert logits to probabilities
+            probs = np.exp(logits) / np.sum(np.exp(logits))
+            # Sort th probabilities and indexes
+            sorted_probs = np.sort(probs)[:, ::-1]
+            sorted_indices = np.argsort(probs)[:, ::-1]
+            # find the index of interest for each of the methods.
+            if sampling_method == "top_k":
+                index_of_interest = int(sampling_value)
+            elif sampling_method == "top_p":
+                p = sampling_value
+                cumulative_probs = np.cumsum(sorted_probs, axis=-1)
+                # find the value of the first cumalitive probability that exceeds p
+                for index_of_interest, cumulative_prob in enumerate(
+                    cumulative_probs[0]
+                ):
+                    if cumulative_prob > p:
+                        break
+            probs_of_interest = sorted_probs[:, : index_of_interest + 1]
+            indices_of_interest = sorted_indices[:, : index_of_interest + 1]
+            # Normalize the probabilities and select the next token
+            probs_of_interest /= np.sum(probs_of_interest)
+            next_token = np.array(
+                [np.random.choice(indices_of_interest[0], p=probs_of_interest[0])]
+            )
+        else:
+            raise Exception(f"Unknown sampling method {sampling_method}")
+        return next_token
+    def greedy_search(
+        self,
+        input_ids,
+        model,
+        tokenizer,
+        stop_words: list,
+        max_length: int,
+        temperature: float = 1.0,
+        top_p: float = 1.0,
+        top_k: int = 25,
+    ):
+        generated_tokens = []
+        pos = np.array(0)
+        x = (
+            self.embeddingLayer(torch.tensor(input_ids))
+            .detach()
+            .cpu()
+            .numpy()
+            .astype(self.data_type)
+        )
+        for i in range(max_length):
+            results = self.llm_session.run(
+                None,
+                {
+                    "x": x,
+                    "attn_mask": self.attn_mask,
+                    "k_cache": self.k_cache[:, :, :pos],
+                    "v_cache": self.v_cache[:, :, :pos],
+                    "pos": pos.astype(np.int64),
+                },
+            )
+            logits, k_out, v_out = results[:3]
+            next_token = self.sample_logits(logits, "top_p", top_p, temperature)
+            next_token = next_token.reshape(1, -1)
+            # Stop if/when we get an ENDOFTEXT token before reaching maximum sequence length
+            if next_token[0] == tokenizer.eos_id:
+                del logits
+                gc.collect()
+                return
+            input_ids = torch.cat((input_ids, torch.tensor(next_token)), dim=-1)
+            generated_tokens.append(next_token[0].item())
+            text = tokenizer.decode(generated_tokens)
+            seq_len = x.shape[1]
+            self.k_cache[:, :, pos : pos + seq_len] = k_out
+            self.v_cache[:, :, pos : pos + seq_len] = v_out
+            pos = np.array(int(pos) + seq_len)
+            x = (
+                self.embeddingLayer(torch.tensor(next_token))
+                .unsqueeze(0)
+                .reshape([1, 1, self.hidden_size])
+                .cpu()
+                .detach()
+                .numpy()
+                .astype(self.data_type)
+            )
+            yield text
+            if any([x in text for x in stop_words]):
+                del logits
+                gc.collect()
+                return
+    def predict(
+        self,
+        text,
+        chatbot,
+        history,
+        top_p,
+        temperature,
+        max_length_tokens,
+        max_context_length_tokens,
+    ):
+        if text == "":
+            yield chatbot, history, "Empty context."
+            return
+        try:
+            self.llm_session
+        except (ValueError, RuntimeError, TypeError):
+            yield [[text, "No Model Found"]], [], "No Model Found"
+            return
+        inputs = self.generate_prompt_with_history(
+            text, history, self.tokenizer, max_length=max_context_length_tokens
+        )
+        if inputs is None:
+            yield chatbot, history, "Input too long."
+            return
+        else:
+            prompt, inputs = inputs
+        input_ids = inputs[:, -max_context_length_tokens:]
+        # global total_count
+        self.total_count += 1
+        print(self.total_count)
+        self.head_dim = int(self.hidden_size / self.n_heads)
+        self.k_cache = np.zeros(
+            [1, self.n_layers, self.max_seq_len, self.n_heads, self.head_dim],
+            dtype=self.data_type,
+        )
+        self.v_cache = np.zeros(
+            [1, self.n_layers, self.max_seq_len, self.n_heads, self.head_dim],
+            dtype=self.data_type,
+        )
+        x = input_ids
+        for x in self.greedy_search(
+            input_ids,
+            self.llm_session,
+            self.tokenizer,
+            stop_words=["[|Human|]", "[|AI|]"],
+            max_length=max_length_tokens,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            if is_stop_word_or_prefix(x, ["[|Human|]", "[|AI|]"]) is False:
+                if "[|Human|]" in x:
+                    x = x[: x.index("[|Human|]")].strip()
+                if "[|AI|]" in x:
+                    x = x[: x.index("[|AI|]")].strip()
+                x = x.strip()
+                a, b = [[y[0], convert_to_markdown(y[1])] for y in history] + [
+                    [text, convert_to_markdown(x)]
+                ], history + [[text, x]]
+                yield a, b, "Generating..."
+            if shared_state.interrupted:
+                shared_state.recover()
+                try:
+                    yield a, b, "Stop: Success"
+                    return
+                except Exception as e:
+                    print(type(e).__name__, e)
+                    pass
+        del input_ids
+        gc.collect()
+        torch.cuda.empty_cache()
+        try:
+            yield a, b, "Generate: Success"
+        except Exception as e:
+            print(type(e).__name__, e)
+            pass
+        return
+    def retry(
+        self,
+        text,
+        chatbot,
+        history,
+        top_p,
+        temperature,
+        max_length_tokens,
+        max_context_length_tokens,
+    ):
+        logging.info("Retry...")
+        if len(history) == 0:
+            yield chatbot, history, "Empty context"
+            return
+        chatbot.pop()
+        inputs = history.pop()[0]
+        for x in self.predict(
+            inputs,
+            chatbot,
+            history,
+            top_p,
+            temperature,
+            max_length_tokens,
+            max_context_length_tokens,
+        ):
+            yield x

ChatApp/requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+gradio
+mdtex2html
+pypinyin
+tiktoken
+socksio
+tqdm
+colorama
+duckduckgo_search
+Pygments
+llama_index
+langchain
+markdown
+markdown2
+torch
+git+https://github.com/huggingface/peft.git
+git+https://github.com/huggingface/transformers.git
+SentencePiece
+onnxruntime-gpu