Spaces:

yentinglin
/

Taiwan-LLaMa2

Running

App Files Files Community

yentinglin commited on Jun 8

Commit

7a42d65

•

1 Parent(s): d219c1f

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -35

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import os
 import gradio as gr
-from text_generation import Client
-from conversation import get_conv_template
 from transformers import AutoTokenizer
 from pymongo import MongoClient
 DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
 USER = os.getenv("MONGO_USER")
@@ -60,7 +58,7 @@ LICENSE = """
 ## Licenses
 - Code is licensed under Apache 2.0 License.
-- Models are licensed under the LLAMA 2 Community License.
 - By using this model, you agree to the terms and conditions specified in the license.
 - By using this demo, you agree to share your input utterances with us to improve the model.
@@ -72,14 +70,12 @@ Taiwan-LLaMa project acknowledges the efforts of the [Meta LLaMa team](https://g
 DEFAULT_SYSTEM_PROMPT = "你是人工智慧助理，以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。 您是由國立臺灣大學的林彥廷博士生為研究目的而建造的。"
 endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
-client = Client(endpoint_url, timeout=120)
-eos_token = "</s>"
 MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 1536
 max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10
-model_name = "yentinglin/Taiwan-LLM-7B-v2.0-chat"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 with gr.Blocks() as demo:
@@ -142,34 +138,30 @@ with gr.Blocks() as demo:
         return "", history + [[user_message, None]]
-    def bot(history, max_new_tokens, temperature, top_p, top_k, system_prompt):
-        conv = get_conv_template("twllm_v2").copy()
-        roles = {"human": conv.roles[0], "gpt": conv.roles[1]}  # map human to USER and gpt to ASSISTANT
-        conv.system = system_prompt
         for user, bot in history:
-            conv.append_message(roles['human'], user)
-            conv.append_message(roles["gpt"], bot)
-        msg = conv.get_prompt()
-        prompt_tokens = tokenizer.encode(msg)
-        length_of_prompt = len(prompt_tokens)
-        if length_of_prompt > max_prompt_length:
-            msg = tokenizer.decode(prompt_tokens[-max_prompt_length + 1:])
         history[-1][1] = ""
-        for response in client.generate_stream(
-                msg,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                top_p=top_p,
-                top_k=top_k,
-                repetition_penalty=1.1,
-        ):
-            if not response.token.special:
-                character = response.token.text
-                history[-1][1] += character
-                yield history
-        # After generating the response, store the conversation history in MongoDB
         conversation_document = {
             "model_name": model_name,
             "history": history,
@@ -177,7 +169,6 @@ with gr.Blocks() as demo:
             "max_new_tokens": max_new_tokens,
             "temperature": temperature,
             "top_p": top_p,
-            "top_k": top_k,
         }
         conversations_collection.insert_one(conversation_document)
@@ -266,5 +257,5 @@ with gr.Blocks() as demo:
     gr.Markdown(LICENSE)
-demo.queue(concurrency_count=4, max_size=128)
-demo.launch()

 import os
 import gradio as gr
 from transformers import AutoTokenizer
 from pymongo import MongoClient
+import openai
 DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
 USER = os.getenv("MONGO_USER")
 ## Licenses
 - Code is licensed under Apache 2.0 License.
+- Models are licensed under the LLAMA Community License.
 - By using this model, you agree to the terms and conditions specified in the license.
 - By using this demo, you agree to share your input utterances with us to improve the model.
 DEFAULT_SYSTEM_PROMPT = "你是人工智慧助理，以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。 您是由國立臺灣大學的林彥廷博士生為研究目的而建造的。"
 endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
 MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 1536
 max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10
+model_name = "yentinglin/Llama-3-Taiwan-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 with gr.Blocks() as demo:
         return "", history + [[user_message, None]]
+    def bot(history, max_new_tokens, temperature, top_p, system_prompt):
+        messages = [{"role": "system", "content": system_prompt}]
         for user, bot in history:
+            messages.append({"role": "user", "content": user})
+            messages.append({"role": "assistant", "content": bot})
         history[-1][1] = ""
+        response = openai.ChatCompletion.create(
+            model=model_name,
+            messages=messages,
+            max_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=1,
+            stream=True,
+        )
+        for chunk in response:
+            if 'choices' in chunk:
+                delta = chunk['choices'][0]['delta']
+                if 'content' in delta:
+                    history[-1][1] += delta['content']
+                    yield history
         conversation_document = {
             "model_name": model_name,
             "history": history,
             "max_new_tokens": max_new_tokens,
             "temperature": temperature,
             "top_p": top_p,
         }
         conversations_collection.insert_one(conversation_document)
     gr.Markdown(LICENSE)
+demo.queue(max_size=128)
+demo.launch(max_threads=10)