Spaces:

OptimalScale
/

Robin-7b

Runtime error

App Files Files Community

Hanze Dong commited on Apr 23, 2023

Commit

5516bfe

1 Parent(s): 15adf4e

add

Browse files

Files changed (5) hide show

app.py +220 -4
configs/ds_config_chatbot.json +17 -0
configs/ds_config_zero2.json +45 -0
configs/ds_config_zero3.json +52 -0
configs/ds_config_zero3_for_eval.json +29 -0

app.py CHANGED Viewed

@@ -1,8 +1,224 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.
+"""A simple shell chatbot implemented with lmflow APIs.
+"""
+import logging
+import json
+import sys
+import warnings
 import gradio as gr
+from dataclasses import dataclass, field
+from transformers import HfArgumentParser
+from typing import Optional
+from lmflow.datasets.dataset import Dataset
+from lmflow.pipeline.auto_pipeline import AutoPipeline
+from lmflow.models.auto_model import AutoModel
+from lmflow.args import ModelArguments, DatasetArguments, AutoArguments
+MAX_BOXES = 20
+logging.disable(logging.ERROR)
+warnings.filterwarnings("ignore")
+title = """
+<h1 align="center">LMFlow-CHAT</h1>
+<link rel="stylesheet" href="/path/to/styles/default.min.css">
+<script src="/path/to/highlight.min.js"></script>
+<script>hljs.highlightAll();</script>
+<img src="https://optimalscale.github.io/LMFlow/_static/logo.png" alt="LMFlow" style="width: 30%; min-width: 60px; display: block; margin: auto; background-color: transparent;">
+<p>LMFlow is in extensible, convenient, and efficient toolbox for finetuning large machine learning models, designed to be user-friendly, speedy and reliable, and accessible to the entire community.</p>
+<p>We have thoroughly tested this toolkit and are pleased to make it available under <a class="reference external" href="https://github.com/OptimalScale/LMFlow">Github</a>.</p>
+"""
+css = """
+#user {
+    float: right;
+    position:relative;
+    right:5px;
+    width:auto;
+    min-height:32px;
+    max-width: 60%
+    line-height: 32px;
+    padding: 2px 8px;
+    font-size: 14px;
+    background:	#9DC284;
+    border-radius:5px;
+    margin:10px 0px;
+}
+#chatbot {
+    float: left;
+    position:relative;
+    right:5px;
+    width:auto;
+    min-height:32px;
+    max-width: 60%
+    line-height: 32px;
+    padding: 2px 8px;
+    font-size: 14px;
+    background:#7BA7D7;
+    border-radius:5px;
+    margin:10px 0px;
+}
+"""
+@dataclass
+class ChatbotArguments:
+    prompt_structure: Optional[str] = field(
+        default="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: {input_text}###Assistant:",
+        metadata={
+            "help": "prompt structure given user's input text"
+        },
+    )
+    end_string: Optional[str] = field(
+        default="#",
+        metadata={
+            "help": "end string mark of the chatbot's output"
+        },
+    )
+    max_new_tokens: Optional[int] = field(
+        default=1000,
+        metadata={
+            "help": "maximum number of generated tokens"
+        },
+    )
+    temperature: Optional[float] = field(
+        default=0.7,
+        metadata={
+            "help": "higher this value, more random the model output"
+        },
+    )
+def main():
+    pipeline_name = "inferencer"
+    PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name)
+    parser = HfArgumentParser((
+        ModelArguments,
+        PipelineArguments,
+        ChatbotArguments,
+    ))
+    model_args, pipeline_args, chatbot_args = (
+        parser.parse_args_into_dataclasses()
+    )
+    model_args.model_name_or_path = "pinkmanlove/llama-7b-hf"
+    model_args.lora_model_path = "./robin-7b"
+    with open ("configs/ds_config_chatbot.json", "r") as f:
+        ds_config = json.load(f)
+    model = AutoModel.get_model(
+        model_args,
+        tune_strategy='none',
+        ds_config=ds_config,
+        device=pipeline_args.device,
+    )
+    # We don't need input data, we will read interactively from stdin
+    data_args = DatasetArguments(dataset_path=None)
+    dataset = Dataset(data_args)
+    inferencer = AutoPipeline.get_pipeline(
+        pipeline_name=pipeline_name,
+        model_args=model_args,
+        data_args=data_args,
+        pipeline_args=pipeline_args,
+    )
+    # Chats
+    model_name = model_args.model_name_or_path
+    if model_args.lora_model_path is not None:
+        model_name += f" + {model_args.lora_model_path}"
+    # context = (
+    #     "You are a helpful assistant who follows the given instructions"
+    #     " unconditionally."
+    # )
+    end_string = chatbot_args.end_string
+    prompt_structure = chatbot_args.prompt_structure
+    token_per_step = 4
+    def chat_stream( context, query: str, history= None, **kwargs):
+        if history is None:
+            history = []
+        print_index = 0
+        context += prompt_structure.format(input_text=query)
+        context = context[-model.get_max_length():]
+        input_dataset = dataset.from_dict({
+            "type": "text_only",
+            "instances": [ { "text": context } ]
+        })
+        for response, flag_break in inferencer.stream_inference(context=context, model=model, max_new_tokens=chatbot_args.max_new_tokens,
+                                        token_per_step=token_per_step, temperature=chatbot_args.temperature,
+                                        end_string=end_string, input_dataset=input_dataset):
+            delta = response[print_index:]
+            seq = response
+            print_index = len(response)
+            yield delta, history + [(query, seq)]
+            if flag_break:
+                context += response + "\n"
+                break
+    def predict(input, history=None):
+        try:
+            global context
+            context = ""
+        except SyntaxError:
+            pass
+        if history is None:
+            history = []
+        for response, history in chat_stream(context, input, history):
+            updates = []
+            for query, response in history:
+                updates.append(gr.update(visible=True, value="" + query))
+                updates.append(gr.update(visible=True, value="" + response))
+            if len(updates) < MAX_BOXES:
+                updates = updates + [gr.Textbox.update(visible=False)] * (MAX_BOXES - len(updates))
+            yield [history] + updates
+    with gr.Blocks(css=css) as demo:
+        gr.HTML(title)
+        state = gr.State([])
+        text_boxes = []
+        for i in range(MAX_BOXES):
+            if i % 2 == 0:
+                text_boxes.append(gr.Markdown(visible=False, label="Q:", elem_id="user"))
+            else:
+                text_boxes.append(gr.Markdown(visible=False, label="A:", elem_id="chatbot"))
+        txt = gr.Textbox(
+            show_label=False,
+            placeholder="Enter text and press send.",
+        )
+        button = gr.Button("Send")
+        button.click(predict, [txt, state], [state] + text_boxes)
+        demo.queue().launch()
+if __name__ == "__main__":
+    main()

configs/ds_config_chatbot.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "fp16": {
+        "enabled": false
+    },
+    "bf16": {
+        "enabled": true
+    },
+    "comms_logger": {
+        "enabled": false,
+        "verbose": false,
+        "prof_all": false,
+        "debug": false
+    },
+    "steps_per_print": 20000000000000000,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": false
+}

configs/ds_config_zero2.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+    "bf16": {
+        "enabled": "auto"
+    },
+    "optimizer": {
+        "type": "AdamW",
+        "params": {
+            "lr": "auto",
+            "betas": "auto",
+            "eps": "auto",
+            "weight_decay": "auto"
+        }
+    },
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 2e8,
+        "overlap_comm": true,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 2e8,
+        "contiguous_gradients": true
+    },
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "steps_per_print": 2000,
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "wall_clock_breakdown": false
+}

configs/ds_config_zero3.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+    "bf16": {
+        "enabled": "auto"
+    },
+    "optimizer": {
+        "type": "AdamW",
+        "params": {
+            "lr": "auto",
+            "betas": "auto",
+            "eps": "auto",
+            "weight_decay": "auto"
+        }
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    },
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "steps_per_print": 2000,
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "wall_clock_breakdown": false
+}

configs/ds_config_zero3_for_eval.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "bf16": {
+        "enabled": true
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    },
+    "steps_per_print": 2000,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": false
+}