Spaces:

izumi-lab
/

llama-13b-japanese-lora-v0-1ep

Paused

App Files Files Community

masanorihirano commited on May 23, 2023

Commit

c040907

1 Parent(s): 737268c

update script

Browse files

Files changed (2) hide show

.gitignore +2 -0
app.py +80 -59

.gitignore CHANGED Viewed

@@ -1,3 +1,5 @@
 .idea
 .env
 poetry.lock

+secret.txt
+slack_url.txt
 .idea
 .env
 poetry.lock

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import Optional
 from typing import Tuple
 import gradio as gr
 import torch
 from fastchat.serve.inference import compress_module
 from fastchat.serve.inference import raise_warning_for_old_weights
@@ -31,6 +32,7 @@ BASE_MODEL = "decapoda-research/llama-13b-hf"
 LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 DATASET_REPOSITORY = os.environ.get("DATASET_REPOSITORY", None)
 repo = None
 LOCAL_DIR = "/home/user/data/"
@@ -161,12 +163,65 @@ def evaluate(
     max_tokens=384,
     repetition_penalty=1.0,
 ):
-    num_beams: int = 1
-    top_p: float = 0.75
-    top_k: int = 40
-    prompt = generate_prompt(instruction, input)
-    inputs = tokenizer(prompt, return_tensors="pt")
-    if len(inputs["input_ids"][0]) > max_tokens + 10:
         if HF_TOKEN and DATASET_REPOSITORY:
             try:
                 now = datetime.datetime.now()
@@ -175,7 +230,7 @@ def evaluate(
                 save_inputs_and_outputs(
                     now,
                     prompt,
-                    "",
                     {
                         "temperature": temperature,
                         "top_p": top_p,
@@ -187,59 +242,27 @@ def evaluate(
                 )
             except Exception as e:
                 print(e)
         return (
-            f"please reduce the input length. Currently, {len(inputs['input_ids'][0])} tokens are used.",
             gr.update(interactive=True),
             gr.update(interactive=True),
         )
-    input_ids = inputs["input_ids"].to(device)
-    generation_config = GenerationConfig(
-        do_sample=False,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        repetition_penalty=repetition_penalty,
-        num_beams=num_beams,
-        pad_token_id=tokenizer.pad_token_id,
-        eos_token=tokenizer.eos_token_id,
-    )
-    with torch.no_grad():
-        generation_output = model.generate(
-            input_ids=input_ids,
-            generation_config=generation_config,
-            return_dict_in_generate=True,
-            output_scores=True,
-            max_new_tokens=max_tokens - len(input_ids),
-        )
-    s = generation_output.sequences[0]
-    output = tokenizer.decode(s, skip_special_tokens=True)
-    if prompt.endswith("Response:"):
-        output = output.split("### Response:")[1].strip()
-    elif prompt.endswith("返答:"):
-        output = output.split("### 返答:")[1].strip()
-    else:
-        raise ValueError(f"No valid prompt ends. {prompt}")
-    if HF_TOKEN and DATASET_REPOSITORY:
-        try:
-            now = datetime.datetime.now()
-            current_time = now.strftime("%Y-%m-%d %H:%M:%S")
-            print(f"[{current_time}] Pushing prompt and completion to the Hub")
-            save_inputs_and_outputs(
-                now,
-                prompt,
-                output,
-                {
-                    "temperature": temperature,
-                    "top_p": top_p,
-                    "top_k": top_k,
-                    "num_beams": num_beams,
-                    "max_tokens": max_tokens,
-                    "repetition_penalty": repetition_penalty,
-                },
-            )
-        except Exception as e:
-            print(e)
-    return output, gr.update(interactive=True), gr.update(interactive=True)
 def reset_textbox():
@@ -324,8 +347,6 @@ with gr.Blocks(
             Please note that this space utilizes [decapoda-research/llama-13b-hf](https://huggingface.co/decapoda-research/llama-13b-hf) and its special license is applied.
             ## データ収集、利用、共有に関するユーザーの同意：
             本アプリを使用することにより、提供するデータに関する以下の条件に同意するものとします：
@@ -367,5 +388,5 @@ with gr.Blocks(
     clear_button.click(reset_textbox, [], [instruction, inputs, outputs], queue=False)
     demo.queue(max_size=20, concurrency_count=NUM_THREADS, api_open=False).launch(
-        share=True, server_name="0.0.0.0", server_port=7860
     )

 from typing import Tuple
 import gradio as gr
+import requests
 import torch
 from fastchat.serve.inference import compress_module
 from fastchat.serve.inference import raise_warning_for_old_weights
 LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 DATASET_REPOSITORY = os.environ.get("DATASET_REPOSITORY", None)
+SLACK_WEBHOOK = os.environ.get("SLACK_WEBHOOK", None)
 repo = None
 LOCAL_DIR = "/home/user/data/"
     max_tokens=384,
     repetition_penalty=1.0,
 ):
+    try:
+        num_beams: int = 1
+        top_p: float = 0.75
+        top_k: int = 40
+        prompt = generate_prompt(instruction, input)
+        inputs = tokenizer(prompt, return_tensors="pt")
+        if len(inputs["input_ids"][0]) > max_tokens + 10:
+            if HF_TOKEN and DATASET_REPOSITORY:
+                try:
+                    now = datetime.datetime.now()
+                    current_time = now.strftime("%Y-%m-%d %H:%M:%S")
+                    print(f"[{current_time}] Pushing prompt and completion to the Hub")
+                    save_inputs_and_outputs(
+                        now,
+                        prompt,
+                        "",
+                        {
+                            "temperature": temperature,
+                            "top_p": top_p,
+                            "top_k": top_k,
+                            "num_beams": num_beams,
+                            "max_tokens": max_tokens,
+                            "repetition_penalty": repetition_penalty,
+                        },
+                    )
+                except Exception as e:
+                    print(e)
+            return (
+                f"please reduce the input length. Currently, {len(inputs['input_ids'][0])} tokens are used.",
+                gr.update(interactive=True),
+                gr.update(interactive=True),
+            )
+        input_ids = inputs["input_ids"].to(device)
+        generation_config = GenerationConfig(
+            do_sample=False,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repetition_penalty=repetition_penalty,
+            num_beams=num_beams,
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token=tokenizer.eos_token_id,
+        )
+        with torch.no_grad():
+            generation_output = model.generate(
+                input_ids=input_ids,
+                generation_config=generation_config,
+                return_dict_in_generate=True,
+                output_scores=True,
+                max_new_tokens=max_tokens - len(input_ids),
+            )
+        s = generation_output.sequences[0]
+        output = tokenizer.decode(s, skip_special_tokens=True)
+        if prompt.endswith("Response:"):
+            output = output.split("### Response:")[1].strip()
+        elif prompt.endswith("返答:"):
+            output = output.split("### 返答:")[1].strip()
+        else:
+            raise ValueError(f"No valid prompt ends. {prompt}")
         if HF_TOKEN and DATASET_REPOSITORY:
             try:
                 now = datetime.datetime.now()
                 save_inputs_and_outputs(
                     now,
                     prompt,
+                    output,
                     {
                         "temperature": temperature,
                         "top_p": top_p,
                 )
             except Exception as e:
                 print(e)
+        return output, gr.update(interactive=True), gr.update(interactive=True)
+    except Exception as e:
+        print(e)
+        import traceback
+        if SLACK_WEBHOOK:
+            payload_dic = {
+                "text": f"BASE_MODEL: {BASE_MODEL}\n LORA_WEIGHTS: {LORA_WEIGHTS}\n"
+                + f"instruction: {instruction}\ninput: {input}\ntemperature: {temperature}\n"
+                + f"max_tokens: {max_tokens}\nrepetition_penalty: {repetition_penalty}\n\n"
+                + str(traceback.format_exc()),
+                "username": "Hugging Face Space",
+                "channel": "#monitor",
+            }
+            requests.post(SLACK_WEBHOOK, data=json.dumps(payload_dic))
         return (
+            "Error happend. Please return later.",
             gr.update(interactive=True),
             gr.update(interactive=True),
         )
 def reset_textbox():
             Please note that this space utilizes [decapoda-research/llama-13b-hf](https://huggingface.co/decapoda-research/llama-13b-hf) and its special license is applied.
             ## データ収集、利用、共有に関するユーザーの同意：
             本アプリを使用することにより、提供するデータに関する以下の条件に同意するものとします：
     clear_button.click(reset_textbox, [], [instruction, inputs, outputs], queue=False)
     demo.queue(max_size=20, concurrency_count=NUM_THREADS, api_open=False).launch(
+        server_name="0.0.0.0", server_port=7860
     )