Spaces:

joaogante
/

medusa-maker

Paused

App Files Files Community

joaogante HF staff commited on Jan 3

Commit

9471796

•

1 Parent(s): 4edd5e3

without elastic

Browse files

Files changed (1) hide show

app.py +115 -119

app.py CHANGED Viewed

@@ -5,122 +5,118 @@ import torch.distributed.run as distributed_run
 from git import Repo
 from huggingface_hub import HfApi
-if __name__ == "__main__":
-    # Clone the medusa repo locally
-    print("Cloning the medusa repo locally...")
-    Repo.clone_from("https://github.com/FasterDecoding/Medusa.git", "medusa")
-    print("Done")
-    def create_medusa_heads(model_id: str):
-        parser = distributed_run.get_args_parser()
-        args = parser.parse_args([
-            "--nproc_per_node", "4",
-            "training_script", "medusa/medusa/train/train.py",
-            "training_script_args",
-            "--model_name_or_path", model_id,
-            "--data_path", "ShareGPT_Vicuna_unfiltered/ShareGPT_V4.3_unfiltered_cleaned_split.json",
-            "--bf16", "True",
-            "--output_dir", "medusa_heads",
-            "--num_train_epochs", "1",
-            "--per_device_train_batch_size", "8",
-            "--per_device_eval_batch_size", "8",
-            "--gradient_accumulation_steps", "4",
-            "--evaluation_strategy", "no",
-            "--save_strategy", "no",
-            "--learning_rate", "1e-3",
-            "--weight_decay", "0.0",
-            "--warmup_ratio", "0.1",
-            "--lr_scheduler_type", "cosine",
-            "--logging_steps", "1",
-            "--tf32", "True",
-            "--model_max_length", "2048",
-            "--lazy_preprocess", "True",
-            "--medusa_num_heads", "3",
-            "--medusa_num_layers", "1",
-        ])
-        distributed_run.run(args)
-        # Upload the medusa heads to the Hub
-        repo_id = f"medusa-{model_id}"
-        api = HfApi()
-        api.create_repo(
-            repo_id=repo_id,
-            exist_ok=True,
-        )
-        api.upload_folder(
-            folder_path="medusa_heads",
-            repo_id=repo_id,
-        )
-        return repo_id
-    def run(model_id: str) -> str:
-        print(f"\n\n\nNEW RUN: {model_id}")
-        # Input validation
-        if model_id == "":
-            return """
-            ### Invalid input 🐞
-            Please fill a model_id.
-            """
-        print(f"Valid inputs ✅\nValidating model_id: {model_id}")
-        # Attempt to load the base model
-        try:
-            config = AutoConfig.from_pretrained(model_id)
-            tokenizer = AutoTokenizer.from_pretrained(model_id)
-            model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
-            del config, tokenizer, model
-        except Exception as e:
-            return f"""
-            ### {model_id} can't be loaded with AutoClasses 🐞
-            {e}
-            """
-        print(f"{model_id} can be loaded ✅\nCreating medusa heads (will take a few hours)")
-        # Run the medusa heads creation
-        try:
-            repo_id = create_medusa_heads(model_id=model_id)
-            print("Success ✅\nMedusa heads uploaded to: ", repo_id)
-            return f"""
-            ### Success 🔥
-            Yay! Medusa heads were successfully created and uploaded to, {repo_id}
-            """
-        except Exception as e:
-            return f"""
-            ### Error 😢😢😢
-            {e}
-            """
-    DESCRIPTION = """
-    The steps to create [medusa](https://sites.google.com/view/medusa-llm) heads are the following:
-    1. Input a public model id from the Hub
-    2. Click "Submit"
-    3. That's it! You'll get feedback if it works or not, and if it worked, you'll get the URL of the new repo 🔥
-    """
-    title="Create LLM medusa heads in a new repo 🐍"
-    with gr.Blocks(title=title) as demo:
-        description = gr.Markdown(f"""# {title}""")
-        description = gr.Markdown(DESCRIPTION)
-        with gr.Row() as r:
-            with gr.Column() as c:
-                model_id = gr.Text(max_lines=1, label="model_id")
-                with gr.Row() as c:
-                    clean = gr.ClearButton()
-                    submit = gr.Button("Submit", variant="primary")
-            with gr.Column() as d:
-                status_box = gr.Markdown()
-        submit.click(run, inputs=[model_id], outputs=status_box, concurrency_limit=1)
-    demo.queue(max_size=10).launch(show_api=True)

 from git import Repo
 from huggingface_hub import HfApi
+# Clone the medusa repo locally
+print("Cloning the medusa repo locally...")
+Repo.clone_from("https://github.com/FasterDecoding/Medusa.git", "medusa")
+print("Done")
+def create_medusa_heads(model_id: str):
+    training_args = [
+        "--model_name_or_path", model_id,
+        "--data_path", "ShareGPT_Vicuna_unfiltered/ShareGPT_V4.3_unfiltered_cleaned_split.json",
+        "--bf16", "True",
+        "--output_dir", "medusa_heads",
+        "--num_train_epochs", "1",
+        "--per_device_train_batch_size", "8",
+        "--per_device_eval_batch_size", "8",
+        "--gradient_accumulation_steps", "4",
+        "--evaluation_strategy", "no",
+        "--save_strategy", "no",
+        "--learning_rate", "1e-3",
+        "--weight_decay", "0.0",
+        "--warmup_ratio", "0.1",
+        "--lr_scheduler_type", "cosine",
+        "--logging_steps", "1",
+        "--tf32", "True",
+        "--model_max_length", "2048",
+        "--lazy_preprocess", "True",
+        "--medusa_num_heads", "3",
+        "--medusa_num_layers", "1",
+    ]
+    distributed_run.run_script_path("medusa/medusa/train/train.py", *training_args)
+    # Upload the medusa heads to the Hub
+    repo_id = f"medusa-{model_id}"
+    api = HfApi()
+    api.create_repo(
+        repo_id=repo_id,
+        exist_ok=True,
+    )
+    api.upload_folder(
+        folder_path="medusa_heads",
+        repo_id=repo_id,
+    )
+    return repo_id
+def run(model_id: str) -> str:
+    print(f"\n\n\nNEW RUN: {model_id}")
+    # Input validation
+    if model_id == "":
+        return """
+        ### Invalid input 🐞
+        Please fill a model_id.
+        """
+    print(f"Valid inputs ✅\nValidating model_id: {model_id}")
+    # Attempt to load the base model
+    try:
+        config = AutoConfig.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+        del config, tokenizer, model
+    except Exception as e:
+        return f"""
+        ### {model_id} can't be loaded with AutoClasses 🐞
+        {e}
+        """
+    print(f"{model_id} can be loaded ✅\nCreating medusa heads (will take a few hours)")
+    # Run the medusa heads creation
+    try:
+        repo_id = create_medusa_heads(model_id=model_id)
+        print("Success ✅\nMedusa heads uploaded to: ", repo_id)
+        return f"""
+        ### Success 🔥
+        Yay! Medusa heads were successfully created and uploaded to, {repo_id}
+        """
+    except Exception as e:
+        return f"""
+        ### Error 😢😢😢
+        {e}
+        """
+DESCRIPTION = """
+The steps to create [medusa](https://sites.google.com/view/medusa-llm) heads are the following:
+1. Input a public model id from the Hub
+2. Click "Submit"
+3. That's it! You'll get feedback if it works or not, and if it worked, you'll get the URL of the new repo 🔥
+"""
+title="Create LLM medusa heads in a new repo 🐍"
+with gr.Blocks(title=title) as demo:
+    description = gr.Markdown(f"""# {title}""")
+    description = gr.Markdown(DESCRIPTION)
+    with gr.Row() as r:
+        with gr.Column() as c:
+            model_id = gr.Text(max_lines=1, label="model_id")
+            with gr.Row() as c:
+                clean = gr.ClearButton()
+                submit = gr.Button("Submit", variant="primary")
+        with gr.Column() as d:
+            status_box = gr.Markdown()
+    submit.click(run, inputs=[model_id], outputs=status_box, concurrency_limit=1)
+demo.queue(max_size=10).launch(show_api=True)