Spaces:

joaogante
/

medusa-maker

Paused

App Files Files Community

joaogante HF staff commited on Jan 3

Commit

4edd5e3

•

1 Parent(s): 2fda365

nit

Browse files

Files changed (1) hide show

app.py +119 -117

app.py CHANGED Viewed

@@ -5,120 +5,122 @@ import torch.distributed.run as distributed_run
 from git import Repo
 from huggingface_hub import HfApi
-# Clone the medusa repo locally
-print("Cloning the medusa repo locally...")
-Repo.clone_from("https://github.com/FasterDecoding/Medusa.git", "medusa")
-print("Done")
-def create_medusa_heads(model_id: str):
-    parser = distributed_run.get_args_parser()
-    args = parser.parse_args([
-        "--nproc_per_node", "4",
-        "training_script", "medusa/medusa/train/train.py",
-        "training_script_args",
-        "--model_name_or_path", model_id,
-        "--data_path", "ShareGPT_Vicuna_unfiltered/ShareGPT_V4.3_unfiltered_cleaned_split.json",
-        "--bf16", "True",
-        "--output_dir", "medusa_heads",
-        "--num_train_epochs", "1",
-        "--per_device_train_batch_size", "8",
-        "--per_device_eval_batch_size", "8",
-        "--gradient_accumulation_steps", "4",
-        "--evaluation_strategy", "no",
-        "--save_strategy", "no",
-        "--learning_rate", "1e-3",
-        "--weight_decay", "0.0",
-        "--warmup_ratio", "0.1",
-        "--lr_scheduler_type", "cosine",
-        "--logging_steps", "1",
-        "--tf32", "True",
-        "--model_max_length", "2048",
-        "--lazy_preprocess", "True",
-        "--medusa_num_heads", "3",
-        "--medusa_num_layers", "1",
-    ])
-    distributed_run.run(args)
-    # Upload the medusa heads to the Hub
-    repo_id = f"medusa-{model_id}"
-    api = HfApi()
-    api.create_repo(
-        repo_id=repo_id,
-        exist_ok=True,
-    )
-    api.upload_folder(
-        folder_path="medusa_heads",
-        repo_id=repo_id,
-    )
-    return repo_id
-def run(model_id: str) -> str:
-    # Input validation
-    if model_id == "":
-        return """
-        ### Invalid input 🐞
-        Please fill a model_id.
-        """
-    print(f"Valid inputs ✅\nValidating model_id: {model_id}")
-    # Attempt to load the base model
-    try:
-        config = AutoConfig.from_pretrained(model_id)
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
-        del config, tokenizer, model
-    except Exception as e:
-        return f"""
-        ### {model_id} can't be loaded with AutoClasses 🐞
-        {e}
-        """
-    print(f"{model_id} can be loaded ✅\nCreating medusa heads (will take a few hours)")
-    # Run the medusa heads creation
-    try:
-        repo_id = create_medusa_heads(model_id=model_id)
-        print("Success ✅\nMedusa heads uploaded to: ", repo_id)
-        return f"""
-        ### Success 🔥
-        Yay! Medusa heads were successfully created and uploaded to, {repo_id}
-        """
-    except Exception as e:
-        return f"""
-        ### Error 😢😢😢
-        {e}
-        """
-DESCRIPTION = """
-The steps to create [medusa](https://sites.google.com/view/medusa-llm) heads are the following:
-1. Input a public model id from the Hub
-2. Click "Submit"
-3. That's it! You'll get feedback if it works or not, and if it worked, you'll get the URL of the new repo 🔥
-"""
-title="Create LLM medusa heads in a new repo 🐍"
-with gr.Blocks(title=title) as demo:
-    description = gr.Markdown(f"""# {title}""")
-    description = gr.Markdown(DESCRIPTION)
-    with gr.Row() as r:
-        with gr.Column() as c:
-            model_id = gr.Text(max_lines=1, label="model_id")
-            with gr.Row() as c:
-                clean = gr.ClearButton()
-                submit = gr.Button("Submit", variant="primary")
-        with gr.Column() as d:
-            status_box = gr.Markdown()
-    submit.click(run, inputs=[model_id], outputs=status_box, concurrency_limit=1)
-demo.queue(max_size=10).launch(show_api=True)

 from git import Repo
 from huggingface_hub import HfApi
+if __name__ == "__main__":
+    # Clone the medusa repo locally
+    print("Cloning the medusa repo locally...")
+    Repo.clone_from("https://github.com/FasterDecoding/Medusa.git", "medusa")
+    print("Done")
+    def create_medusa_heads(model_id: str):
+        parser = distributed_run.get_args_parser()
+        args = parser.parse_args([
+            "--nproc_per_node", "4",
+            "training_script", "medusa/medusa/train/train.py",
+            "training_script_args",
+            "--model_name_or_path", model_id,
+            "--data_path", "ShareGPT_Vicuna_unfiltered/ShareGPT_V4.3_unfiltered_cleaned_split.json",
+            "--bf16", "True",
+            "--output_dir", "medusa_heads",
+            "--num_train_epochs", "1",
+            "--per_device_train_batch_size", "8",
+            "--per_device_eval_batch_size", "8",
+            "--gradient_accumulation_steps", "4",
+            "--evaluation_strategy", "no",
+            "--save_strategy", "no",
+            "--learning_rate", "1e-3",
+            "--weight_decay", "0.0",
+            "--warmup_ratio", "0.1",
+            "--lr_scheduler_type", "cosine",
+            "--logging_steps", "1",
+            "--tf32", "True",
+            "--model_max_length", "2048",
+            "--lazy_preprocess", "True",
+            "--medusa_num_heads", "3",
+            "--medusa_num_layers", "1",
+        ])
+        distributed_run.run(args)
+        # Upload the medusa heads to the Hub
+        repo_id = f"medusa-{model_id}"
+        api = HfApi()
+        api.create_repo(
+            repo_id=repo_id,
+            exist_ok=True,
+        )
+        api.upload_folder(
+            folder_path="medusa_heads",
+            repo_id=repo_id,
+        )
+        return repo_id
+    def run(model_id: str) -> str:
+        print(f"\n\n\nNEW RUN: {model_id}")
+        # Input validation
+        if model_id == "":
+            return """
+            ### Invalid input 🐞
+            Please fill a model_id.
+            """
+        print(f"Valid inputs ✅\nValidating model_id: {model_id}")
+        # Attempt to load the base model
+        try:
+            config = AutoConfig.from_pretrained(model_id)
+            tokenizer = AutoTokenizer.from_pretrained(model_id)
+            model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+            del config, tokenizer, model
+        except Exception as e:
+            return f"""
+            ### {model_id} can't be loaded with AutoClasses 🐞
+            {e}
+            """
+        print(f"{model_id} can be loaded ✅\nCreating medusa heads (will take a few hours)")
+        # Run the medusa heads creation
+        try:
+            repo_id = create_medusa_heads(model_id=model_id)
+            print("Success ✅\nMedusa heads uploaded to: ", repo_id)
+            return f"""
+            ### Success 🔥
+            Yay! Medusa heads were successfully created and uploaded to, {repo_id}
+            """
+        except Exception as e:
+            return f"""
+            ### Error 😢😢😢
+            {e}
+            """
+    DESCRIPTION = """
+    The steps to create [medusa](https://sites.google.com/view/medusa-llm) heads are the following:
+    1. Input a public model id from the Hub
+    2. Click "Submit"
+    3. That's it! You'll get feedback if it works or not, and if it worked, you'll get the URL of the new repo 🔥
+    """
+    title="Create LLM medusa heads in a new repo 🐍"
+    with gr.Blocks(title=title) as demo:
+        description = gr.Markdown(f"""# {title}""")
+        description = gr.Markdown(DESCRIPTION)
+        with gr.Row() as r:
+            with gr.Column() as c:
+                model_id = gr.Text(max_lines=1, label="model_id")
+                with gr.Row() as c:
+                    clean = gr.ClearButton()
+                    submit = gr.Button("Submit", variant="primary")
+            with gr.Column() as d:
+                status_box = gr.Markdown()
+        submit.click(run, inputs=[model_id], outputs=status_box, concurrency_limit=1)
+    demo.queue(max_size=10).launch(show_api=True)