Spaces:

love2poppy
/

alpaca-cot

Paused

App Files Files Community

love2poppy commited on Mar 29, 2023

Commit

6ebf55c

1 Parent(s): 3a634f5

add application file

Browse files

Files changed (5) hide show

README.md +5 -5
app.py +160 -0
requirements.txt +14 -0
saved-alpaca-belle-cot7b/adapter_config.json +18 -0
saved-alpaca-belle-cot7b/adapter_model.bin +3 -0

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: Alpaca Cot
-emoji: 😻
-colorFrom: red
-colorTo: purple
 sdk: gradio
 sdk_version: 3.23.0
 app_file: app.py
 pinned: false
-license: gpl-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Alpaca CoT
+emoji: 👁
+colorFrom: indigo
+colorTo: yellow
 sdk: gradio
 sdk_version: 3.23.0
 app_file: app.py
 pinned: false
+license: gpl
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import gradio as gr
+from transformers import AutoModel, AutoTokenizer
+import sys
+import torch
+import argparse
+from peft import PeftModel
+import transformers
+from collections import namedtuple
+from transformers import (
+    LlamaForCausalLM, LlamaTokenizer,
+    AutoModel, AutoTokenizer,
+    BloomForCausalLM, BloomTokenizerFast, GenerationConfig)
+tokenizer=None
+model=None
+LOAD_8BIT = False
+ModelClass = namedtuple("ModelClass", ('tokenizer', 'model'))
+_MODEL_CLASSES = {
+    "llama": ModelClass(**{
+        "tokenizer": LlamaTokenizer,
+        "model": LlamaForCausalLM,
+    }),
+    "bloom": ModelClass(**{
+        "tokenizer": AutoTokenizer,
+        "model": BloomForCausalLM,
+    })
+}
+if torch.cuda.is_available():
+    device = "cuda"
+else:
+    device = "cpu"
+def get_model_class(model_type,
+                    model_name_or_path,
+                    lora_model_path):
+    global model, tokenizer
+    model_class = _MODEL_CLASSES[model_type] # tokenizer, model
+    model_base = model_class.model.from_pretrained(model_name_or_path,
+                                             load_in_8bit=LOAD_8BIT,
+                                             torch_dtype=torch.float16,
+                                             device_map="auto")
+    tokenizer = model_class.tokenizer.from_pretrained(model_name_or_path) # default add_eos_token=False
+    model = PeftModel.from_pretrained(
+        model_base,
+        lora_model_path,
+        torch_dtype=torch.float16,
+    )
+    if not LOAD_8BIT:
+        model.half()
+def predict(
+            instruction,
+            top_p=0.75,
+            temperature=0.1,
+            history=None,
+            top_k=40,
+            num_beams=4,
+            max_new_tokens=512,
+            **kwargs,
+            ):
+    history = history or []
+    prompt = (
+        "Below is an instruction that describes a task. "
+        "Write a response that appropriately completes the request.\n\n"
+        "### Instruction:\n{0}\n\n### Response:"
+    ).format(instruction)
+    inputs = tokenizer(prompt, return_tensors="pt")
+    input_ids = inputs["input_ids"].to(device)
+    generation_config = GenerationConfig(
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
+        num_beams=num_beams,
+        **kwargs,
+    )
+    with torch.no_grad():
+        generation_output = model.generate(
+            input_ids=input_ids,
+            generation_config=generation_config,
+            return_dict_in_generate=True,
+            output_scores=True,
+            max_new_tokens=max_new_tokens,
+        )
+    s = generation_output.sequences[0]
+    output = tokenizer.decode(s)
+    print('模型回复', output)
+    bot_response = output.split("### Response:")[1].strip()
+    history.append((instruction, bot_response))
+    return "", history
+def predict_test(message, top_p, temperature, history):
+    history = history or []
+    user_message = f"{message} {top_p}, {temperature}"
+    print(user_message)
+    history.append((message, user_message))
+    return history, history
+def clear_session():
+    return '', None
+parser = argparse.ArgumentParser(description='Process some integers.')
+parser.add_argument('--size', default=7, type=int, help='the size of llama model')
+parser.add_argument('--data', default="", type=str, help='the data used for instructing tuning')
+parser.add_argument('--local_rank', default=-1, type=int, help='node rank for distributed training')
+parser.add_argument('--model_type', default="llama", choices=['llama', 'chatglm', 'bloom'])
+parser.add_argument('--model_name_or_path', default="decapoda-research/llama-7b-hf", type=str)
+parser.add_argument('--lora_name_or_path', default="./saved-alpaca-belle-cot7b", type=str)
+args = parser.parse_args()
+get_model_class(args.model_type, args.model_name_or_path, args.lora_name_or_path)
+block = gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""")
+with block as demo:
+    #top_p, temperature
+    with gr.Accordion("Parameters", open=False):
+        top_p = gr.Slider( minimum=-0, maximum=1.0, value=0.75, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
+        temperature = gr.Slider( minimum=-0, maximum=5.0, value=0.1, step=0.1, interactive=True, label="Temperature",)
+    chatbot = gr.Chatbot(label="Alpaca-CoT")
+    message = gr.Textbox()
+    state = gr.State()
+    message.submit(predict, inputs=[message, top_p, temperature, state], outputs=[chatbot, state], queue=False)
+    with gr.Row():
+        clear_history = gr.Button("🗑 清除历史对话 | Clear History")
+        clear = gr.Button('🧹 清除输入 | Clear Input')
+        send = gr.Button("🚀 发送 | Send")
+        regenerate = gr.Button("🚗 重新生成 | regenerate")
+    # regenerate.click(regenerate, inputs=[message], outputs=[chatbot])
+    regenerate.click(fn=clear_session , inputs=[], outputs=[chatbot, state], queue=False)
+    send.click(predict, inputs=[message, top_p, temperature, state], outputs=[chatbot, state])
+    clear.click(lambda: None, None, message, queue=False)
+    clear_history.click(fn=clear_session , inputs=[], outputs=[chatbot, state], queue=False)
+demo.queue(max_size=20, concurrency_count=20).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+# chatglm
+icetk
+cpm_kernels==1.0.11
+torch>=1.13
+datasets
+loralib
+sentencepiece
+git+https://github.com/huggingface/transformers.git
+accelerate
+bitsandbytes
+git+https://github.com/huggingface/peft.git
+gradio
+appdirs

saved-alpaca-belle-cot7b/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "base_model_name_or_path": "/mnt/bn/qingyi-bn-lq/llama/llama-7b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

saved-alpaca-belle-cot7b/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:330494735335477a234baf27e22b489c89e7e7a34d26a212ccec73a8434164e3
+size 16822989