InfoFusion

Runtime error

App Files Files Community

arnocandel commited on May 13, 2023

Commit

24b4b28

•

1 Parent(s): b43c18e

Update with h2oGPT hash e35e6ce906c57495ee80b1e3b8507ad374f6a50d

Browse files

Files changed (4) hide show

finetune.py +20 -5
generate.py +51 -6
gradio_runner.py +3 -2
requirements.txt +3 -3

finetune.py CHANGED Viewed

@@ -30,6 +30,7 @@ class PromptType(Enum):
     human_bot_orig = 9
     prompt_answer = 10
     open_assistant = 11
 prompt_type_to_model_name = {
@@ -56,6 +57,8 @@ prompt_type_to_model_name = {
         'h2oai/h2ogpt-gm-oasst1-en-1024-20b',
         'h2oai/h2ogpt-gm-oasst1-en-1024-12b',
         'h2oai/h2ogpt-gm-oasst1-multilang-1024-20b',
     ],
     'instruct': [],
     'instruct_with_end': ['databricks/dolly-v2-12b'],
@@ -63,15 +66,18 @@ prompt_type_to_model_name = {
     'human_bot': [
         'h2oai/h2ogpt-oasst1-512-12b',
         'h2oai/h2ogpt-oasst1-512-20b',
         'h2oai/h2ogpt-oig-oasst1-512-6.9b',
         'h2oai/h2ogpt-research-oasst1-512-30b',  # private
     ],
     'dai_faq': [],
     'summarize': [],
     'simple_instruct': ['t5-small', 't5-large', 'google/flan-t5', 'google/flan-t5-xxl', 'google/flan-ul2'],
-    'instruct_vicuna': ['AlekseyKorshuk/vicuna-7b'],
     'human_bot_orig': ['togethercomputer/GPT-NeoXT-Chat-Base-20B'],
     "open_assistant": ['OpenAssistant/oasst-sft-7-llama-30b-xor', 'oasst-sft-7-llama-30b'],
 }
 inv_prompt_type_to_model_name = {v.strip(): k for k, l in prompt_type_to_model_name.items() for v in l}
@@ -222,8 +228,6 @@ def train(
             NOTE: for current pytorch 2.0, flash attention requires installing cuda 11.7 via https://developer.nvidia.com/cuda-11-7-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=runfile_local and then when running, to avoid installing driver, docs, samples, just install toolkit.  Then when pip installing flash attention do:
             CUDA_HOME=/usr/local/cuda-11.7 pip install flash-attn""")
-        from llama_flash_attn_monkey_patch import replace_llama_attn_with_flash_attn
-        replace_llama_attn_with_flash_attn()
     assert (
         base_model
     ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'"
@@ -590,8 +594,8 @@ def train(
         tokenizer=tokenizer,
         train_dataset=train_data,
         eval_dataset=valid_data,
-        # NOTE: CausalLM is not supporting Seq2SeqTrainingArguments arguments, but not incompatible
-        args=transformers.Seq2SeqTrainingArguments(
             per_device_train_batch_size=micro_batch_size,
             per_device_eval_batch_size=1,
             eval_accumulation_steps=10,
@@ -901,6 +905,17 @@ Current Time: {}
         eos = "</s>"
         terminate_response = [start, PreResponse, pend, eos]
         chat_sep = eos
     else:
         raise RuntimeError("No such prompt_type=%s" % prompt_type)

     human_bot_orig = 9
     prompt_answer = 10
     open_assistant = 11
+    wizard_lm = 12
 prompt_type_to_model_name = {
         'h2oai/h2ogpt-gm-oasst1-en-1024-20b',
         'h2oai/h2ogpt-gm-oasst1-en-1024-12b',
         'h2oai/h2ogpt-gm-oasst1-multilang-1024-20b',
+        'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt',
+        'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2',
     ],
     'instruct': [],
     'instruct_with_end': ['databricks/dolly-v2-12b'],
     'human_bot': [
         'h2oai/h2ogpt-oasst1-512-12b',
         'h2oai/h2ogpt-oasst1-512-20b',
+        'h2oai/h2ogpt-oig-oasst1-512-20b',
+        'h2oai/h2ogpt-oig-oasst1-512-12b',
         'h2oai/h2ogpt-oig-oasst1-512-6.9b',
         'h2oai/h2ogpt-research-oasst1-512-30b',  # private
     ],
     'dai_faq': [],
     'summarize': [],
     'simple_instruct': ['t5-small', 't5-large', 'google/flan-t5', 'google/flan-t5-xxl', 'google/flan-ul2'],
+    'instruct_vicuna': ['AlekseyKorshuk/vicuna-7b', 'TheBloke/stable-vicuna-13B-HF', 'junelee/wizard-vicuna-13b'],
     'human_bot_orig': ['togethercomputer/GPT-NeoXT-Chat-Base-20B'],
     "open_assistant": ['OpenAssistant/oasst-sft-7-llama-30b-xor', 'oasst-sft-7-llama-30b'],
+    "wizard_lm": ['ehartford/WizardLM-7B-Uncensored', 'ehartford/WizardLM-13B-Uncensored'],
 }
 inv_prompt_type_to_model_name = {v.strip(): k for k, l in prompt_type_to_model_name.items() for v in l}
             NOTE: for current pytorch 2.0, flash attention requires installing cuda 11.7 via https://developer.nvidia.com/cuda-11-7-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=runfile_local and then when running, to avoid installing driver, docs, samples, just install toolkit.  Then when pip installing flash attention do:
             CUDA_HOME=/usr/local/cuda-11.7 pip install flash-attn""")
     assert (
         base_model
     ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'"
         tokenizer=tokenizer,
         train_dataset=train_data,
         eval_dataset=valid_data,
+        # FIXME: might need Seq2SeqTrainingArguments for some models
+        args=transformers.TrainingArguments(
             per_device_train_batch_size=micro_batch_size,
             per_device_eval_batch_size=1,
             eval_accumulation_steps=10,
         eos = "</s>"
         terminate_response = [start, PreResponse, pend, eos]
         chat_sep = eos
+    elif prompt_type in [12, "12", "wizard_lm"]:
+        # https://github.com/ehartford/WizardLM/blob/main/src/train_freeform.py
+        preprompt = ''
+        start = ''
+        promptB = promptA = '%s%s' % (preprompt, start)
+        PreInstruct = ""
+        PreInput = None
+        PreResponse = "\n\n### Response"
+        eos = "</s>"
+        terminate_response = [PreResponse, eos]
+        chat_sep = eos
     else:
         raise RuntimeError("No such prompt_type=%s" % prompt_type)

generate.py CHANGED Viewed

@@ -84,6 +84,7 @@ def main(
         api_open: bool = False,
         allow_api: bool = True,
         input_lines: int = 1,
         sanitize_user_prompt: bool = True,
         sanitize_bot_response: bool = True,
@@ -145,6 +146,8 @@ def main(
     :param api_open: If False, don't let API calls skip gradio queue
     :param allow_api: whether to allow API calls at all to gradio server
     :param input_lines: how many input lines to show for chat box (>1 forces shift-enter for submit, else enter is submit)
     :param sanitize_user_prompt: whether to remove profanity from user input
     :param sanitize_bot_response: whether to remove profanity and repeat lines from bot output
     :param extra_model_options: extra models to show in list in gradio
@@ -211,7 +214,7 @@ def main(
         if psutil.virtual_memory().available < 94*1024**3:
             # 12B uses ~94GB
             # 6.9B uses ~47GB
-            base_model = 'h2oai/h2ogpt-oig-oasst1-512-6.9b'
     # get defaults
     model_lower = base_model.lower()
@@ -881,13 +884,17 @@ def evaluate(
     else:
         gen_kwargs.update(dict(pad_token_id=tokenizer.eos_token_id))
     decoder = functools.partial(tokenizer.decode,
-                                skip_special_tokens=True,
-                                clean_up_tokenization_spaces=True,
                                 )
     decoder_raw = functools.partial(tokenizer.decode,
-                                    skip_special_tokens=False,
-                                    clean_up_tokenization_spaces=True,
                                     )
     with torch.no_grad():
@@ -915,14 +922,16 @@ def evaluate(
                     # some models specify special tokens that are part of normal prompt, so can't skip them
                     inputs_decoded = prompt = inputs_decoded_raw
                     decoder = decoder_raw
                 elif inputs_decoded_raw.replace("<unk> ", "").replace("<unk>", "").replace('\n', ' ').replace(' ', '') == prompt.replace('\n', ' ').replace(' ', ''):
                     inputs_decoded = prompt = inputs_decoded_raw
                     decoder = decoder_raw
                 else:
                     print("WARNING: Special characters in prompt", flush=True)
                 if stream_output:
                     skip_prompt = False
-                    streamer = H2OTextIteratorStreamer(tokenizer, skip_prompt=skip_prompt, block=False)
                     gen_kwargs.update(dict(streamer=streamer))
                     target_func = generate_with_exceptions
                     target = wrapped_partial(generate_with_exceptions, model.generate, prompt, inputs_decoded,
@@ -1312,3 +1321,39 @@ if __name__ == "__main__":
     python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-512-6.9b
     """
     fire.Fire(main)

         api_open: bool = False,
         allow_api: bool = True,
         input_lines: int = 1,
+        auth: typing.List[typing.Tuple[str, str]] = None,
         sanitize_user_prompt: bool = True,
         sanitize_bot_response: bool = True,
     :param api_open: If False, don't let API calls skip gradio queue
     :param allow_api: whether to allow API calls at all to gradio server
     :param input_lines: how many input lines to show for chat box (>1 forces shift-enter for submit, else enter is submit)
+    :param auth: gradio auth for launcher in form [(user1, pass1), (user2, pass2), ...]
+                 e.g. --auth=[('jon','password')] with no spaces
     :param sanitize_user_prompt: whether to remove profanity from user input
     :param sanitize_bot_response: whether to remove profanity and repeat lines from bot output
     :param extra_model_options: extra models to show in list in gradio
         if psutil.virtual_memory().available < 94*1024**3:
             # 12B uses ~94GB
             # 6.9B uses ~47GB
+            base_model = 'h2oai/h2ogpt-oig-oasst1-512-6.9b' if not base_model else base_model
     # get defaults
     model_lower = base_model.lower()
     else:
         gen_kwargs.update(dict(pad_token_id=tokenizer.eos_token_id))
+    decoder_kwargs = dict(skip_special_tokens=True,
+                          clean_up_tokenization_spaces=True)
     decoder = functools.partial(tokenizer.decode,
+                                **decoder_kwargs
                                 )
+    decoder_raw_kwargs = dict(skip_special_tokens=False,
+                          clean_up_tokenization_spaces=True)
     decoder_raw = functools.partial(tokenizer.decode,
+                                    **decoder_raw_kwargs
                                     )
     with torch.no_grad():
                     # some models specify special tokens that are part of normal prompt, so can't skip them
                     inputs_decoded = prompt = inputs_decoded_raw
                     decoder = decoder_raw
+                    decoder_kwargs = decoder_raw_kwargs
                 elif inputs_decoded_raw.replace("<unk> ", "").replace("<unk>", "").replace('\n', ' ').replace(' ', '') == prompt.replace('\n', ' ').replace(' ', ''):
                     inputs_decoded = prompt = inputs_decoded_raw
                     decoder = decoder_raw
+                    decoder_kwargs = decoder_raw_kwargs
                 else:
                     print("WARNING: Special characters in prompt", flush=True)
                 if stream_output:
                     skip_prompt = False
+                    streamer = H2OTextIteratorStreamer(tokenizer, skip_prompt=skip_prompt, block=False, **decoder_kwargs)
                     gen_kwargs.update(dict(streamer=streamer))
                     target_func = generate_with_exceptions
                     target = wrapped_partial(generate_with_exceptions, model.generate, prompt, inputs_decoded,
     python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-512-6.9b
     """
     fire.Fire(main)
+import pytest
+@pytest.mark.parametrize(
+    "base_model",
+    [
+        "h2oai/h2ogpt-oig-oasst1-512-6.9b",
+        "h2oai/h2ogpt-oig-oasst1-512-12b",
+        "h2oai/h2ogpt-oig-oasst1-512-20b",
+        "h2oai/h2ogpt-oasst1-512-12b",
+        "h2oai/h2ogpt-oasst1-512-20b",
+        "h2oai/h2ogpt-gm-oasst1-en-1024-20b",
+        "databricks/dolly-v2-12b",
+        "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2",
+        "ehartford/WizardLM-7B-Uncensored",
+        "ehartford/WizardLM-13B-Uncensored",
+        "AlekseyKorshuk/vicuna-7b",
+        "TheBloke/stable-vicuna-13B-HF",
+        "decapoda-research/llama-7b-hf",
+        "decapoda-research/llama-13b-hf",
+        "decapoda-research/llama-30b-hf",
+        "junelee/wizard-vicuna-13b",
+    ]
+)
+def test_score_eval(base_model):
+    main(
+        base_model=base_model,
+        chat=False,
+        stream_output=False,
+        gradio=False,
+        eval_sharegpt_prompts_only=500,
+        eval_sharegpt_as_output=False,
+        num_beams=2,
+        infer_devices=False,
+    )

gradio_runner.py CHANGED Viewed

@@ -50,7 +50,7 @@ def go_gradio(**kwargs):
                       """
     else:
         description = "For more information, visit our GitHub pages: [h2oGPT](https://github.com/h2oai/h2ogpt) and [H2O LLM Studio](https://github.com/h2oai/h2o-llmstudio)<br>"
-    description += "If this host is busy, try [gpt.h2o.ai 20B](https://gpt.h2o.ai) and [30B](http://gpu.hopto.org) and [HF Spaces1 12B](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot) and [HF Spaces2 12B](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot2)<br>"
     description += """<p>By using h2oGPT, you accept our [Terms of Service](https://github.com/h2oai/h2ogpt/blob/main/tos.md)</p>"""
     if kwargs['verbose']:
@@ -921,7 +921,8 @@ def go_gradio(**kwargs):
     scheduler.start()
     demo.launch(share=kwargs['share'], server_name="0.0.0.0", show_error=True,
-                favicon_path=favicon_path, prevent_thread_lock=True)  # , enable_queue=True)
     print("Started GUI", flush=True)
     if kwargs['block_gradio_exit']:
         demo.block_thread()

                       """
     else:
         description = "For more information, visit our GitHub pages: [h2oGPT](https://github.com/h2oai/h2ogpt) and [H2O LLM Studio](https://github.com/h2oai/h2o-llmstudio)<br>"
+    description += "If this host is busy, try [12B](https://gpt.h2o.ai), [30B](http://gpt2.h2o.ai), [HF Spaces1 12B](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot) or [HF Spaces2 12B](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot2)<br>"
     description += """<p>By using h2oGPT, you accept our [Terms of Service](https://github.com/h2oai/h2ogpt/blob/main/tos.md)</p>"""
     if kwargs['verbose']:
     scheduler.start()
     demo.launch(share=kwargs['share'], server_name="0.0.0.0", show_error=True,
+                favicon_path=favicon_path, prevent_thread_lock=True,
+                auth=kwargs['auth'])
     print("Started GUI", flush=True)
     if kwargs['block_gradio_exit']:
         demo.block_thread()

requirements.txt CHANGED Viewed

@@ -1,13 +1,13 @@
 # for generate (gradio server) and finetune
-datasets==2.11.0
 sentencepiece==0.1.97
 accelerate==0.18.0
 gradio==3.27.0
-huggingface_hub==0.13.4
 appdirs==1.4.4
 fire==0.5.0
 docutils==0.19
-torch==2.0.0
 evaluate==0.4.0
 rouge_score==0.1.2
 sacrebleu==2.3.1

 # for generate (gradio server) and finetune
+datasets==2.12.0
 sentencepiece==0.1.97
 accelerate==0.18.0
 gradio==3.27.0
+huggingface_hub==0.14.1
 appdirs==1.4.4
 fire==0.5.0
 docutils==0.19
+torch==2.0.1
 evaluate==0.4.0
 rouge_score==0.1.2
 sacrebleu==2.3.1