Spaces:

Tonic
/

VoxFactory

Running

App Files Files Community

Joseph Pollack commited on Sep 13

Commit

b82e5c5

unverified ·

1 Parent(s): eb0369d

adds correct model card info

Browse files

Files changed (6) hide show

.gitignore +2 -1
interface.py +10 -0
scripts/__pycache__/generate_model_card.cpython-313.pyc +0 -0
scripts/push_to_huggingface.py +47 -3
tests/test_generate_model_card.py +143 -0
tests/test_push_model_card.py +218 -0

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- datasets/


1	+ datasets/
2	+ tmp_hf_push/

interface.py CHANGED Viewed

@@ -515,6 +515,16 @@ def start_voxtral_training(
                 "model",
                 str(output_dir),
                 full_repo_name,
             ]
             all_logs.append(f"📤 Pushing model to Hugging Face Hub: {full_repo_name}")
             push_code = collect_logs_with_code(run_command_stream(push_args, env))

                 "model",
                 str(output_dir),
                 full_repo_name,
+                "--author-name", "Voxtral Trainer",
+                "--model-description", "Fine-tuned Voxtral ASR model",
+                "--model-name", base_model,
+                "--trainer-type", ("SFTTrainer"),
+                "--training-config-type", ("Custom Configuration"),
+                "--batch-size", str(int(batch_size) if isinstance(batch_size, (int, float)) else batch_size),
+                "--gradient-accumulation-steps", str(int(grad_accum) if isinstance(grad_accum, (int, float)) else grad_accum),
+                "--learning-rate", str(learning_rate),
+                "--max-epochs", str(epochs),
+                "--trackio-url", env.get("TRACKIO_URL", "N/A"),
             ]
             all_logs.append(f"📤 Pushing model to Hugging Face Hub: {full_repo_name}")
             push_code = collect_logs_with_code(run_command_stream(push_args, env))

scripts/__pycache__/generate_model_card.cpython-313.pyc ADDED Viewed

Binary file (11.3 kB). View file

scripts/push_to_huggingface.py CHANGED Viewed

@@ -47,7 +47,18 @@ class HuggingFacePusher:
         author_name: Optional[str] = None,
         model_description: Optional[str] = None,
         model_name: Optional[str] = None,
-        dataset_name: Optional[str] = None
     ):
         self.model_path = Path(model_path)
         # Original user input (may be just the repo name without username)
@@ -60,6 +71,17 @@ class HuggingFacePusher:
         # Model card generation details
         self.model_name = model_name
         self.dataset_name = dataset_name
         # Initialize HF API
         if HF_AVAILABLE:
@@ -278,7 +300,7 @@ class HuggingFacePusher:
                 "repo_name": self.repo_id,
                 "model_name": self.repo_id.split('/')[-1],
                 "experiment_name": self.experiment_name or "model_push",
-                "dataset_repo": self.dataset_repo,
                 "author_name": self.author_name or "Model Author",
                 "model_description": self.model_description or "A fine-tuned version of SmolLM3-3B for improved text generation capabilities.",
                 "training_config_type": self.training_config_type or "Custom Configuration",
@@ -286,6 +308,7 @@ class HuggingFacePusher:
                 "dataset_name": self.dataset_name or "Custom Dataset",
                 "trainer_type": self.trainer_type or "SFTTrainer",
                 "batch_size": str(self.batch_size) if self.batch_size else "8",
                 "learning_rate": str(self.learning_rate) if self.learning_rate else "5e-6",
                 "max_epochs": str(self.max_epochs) if self.max_epochs else "3",
                 "max_seq_length": str(self.max_seq_length) if self.max_seq_length else "2048",
@@ -895,6 +918,17 @@ def parse_args():
     model_parser.add_argument('--model-description', type=str, default=None, help='Model description for model card')
     model_parser.add_argument('--model-name', type=str, default=None, help='Base model name')
     model_parser.add_argument('--dataset-name', type=str, default=None, help='Dataset name')
     # Dataset push subcommand
     dataset_parser = subparsers.add_parser('dataset', help='Push dataset to Hugging Face Hub')
@@ -933,7 +967,17 @@ def main():
                 author_name=args.author_name,
                 model_description=args.model_description,
                 model_name=args.model_name,
-                dataset_name=args.dataset_name
             )
             # Push model

         author_name: Optional[str] = None,
         model_description: Optional[str] = None,
         model_name: Optional[str] = None,
+        dataset_name: Optional[str] = None,
+        # Optional metadata for model card generation
+        experiment_name: Optional[str] = None,
+        dataset_repo: Optional[str] = None,
+        training_config_type: Optional[str] = None,
+        trainer_type: Optional[str] = None,
+        batch_size: Optional[str] = None,
+        gradient_accumulation_steps: Optional[str] = None,
+        learning_rate: Optional[str] = None,
+        max_epochs: Optional[str] = None,
+        max_seq_length: Optional[str] = None,
+        trackio_url: Optional[str] = None,
     ):
         self.model_path = Path(model_path)
         # Original user input (may be just the repo name without username)
         # Model card generation details
         self.model_name = model_name
         self.dataset_name = dataset_name
+        # Optional metadata (ensure attributes always exist to avoid AttributeError)
+        self.experiment_name = experiment_name
+        self.dataset_repo = dataset_repo
+        self.training_config_type = training_config_type
+        self.trainer_type = trainer_type
+        self.batch_size = batch_size
+        self.gradient_accumulation_steps = gradient_accumulation_steps
+        self.learning_rate = learning_rate
+        self.max_epochs = max_epochs
+        self.max_seq_length = max_seq_length
+        self.trackio_url = trackio_url
         # Initialize HF API
         if HF_AVAILABLE:
                 "repo_name": self.repo_id,
                 "model_name": self.repo_id.split('/')[-1],
                 "experiment_name": self.experiment_name or "model_push",
+                "dataset_repo": self.dataset_repo or "",
                 "author_name": self.author_name or "Model Author",
                 "model_description": self.model_description or "A fine-tuned version of SmolLM3-3B for improved text generation capabilities.",
                 "training_config_type": self.training_config_type or "Custom Configuration",
                 "dataset_name": self.dataset_name or "Custom Dataset",
                 "trainer_type": self.trainer_type or "SFTTrainer",
                 "batch_size": str(self.batch_size) if self.batch_size else "8",
+                "gradient_accumulation_steps": str(self.gradient_accumulation_steps) if self.gradient_accumulation_steps else variables.get("gradient_accumulation_steps", "16"),
                 "learning_rate": str(self.learning_rate) if self.learning_rate else "5e-6",
                 "max_epochs": str(self.max_epochs) if self.max_epochs else "3",
                 "max_seq_length": str(self.max_seq_length) if self.max_seq_length else "2048",
     model_parser.add_argument('--model-description', type=str, default=None, help='Model description for model card')
     model_parser.add_argument('--model-name', type=str, default=None, help='Base model name')
     model_parser.add_argument('--dataset-name', type=str, default=None, help='Dataset name')
+    # Optional model card metadata
+    model_parser.add_argument('--experiment-name', type=str, default=None, help='Experiment name for model card')
+    model_parser.add_argument('--dataset-repo', type=str, default=None, help='Dataset repo for model card')
+    model_parser.add_argument('--training-config-type', type=str, default=None, help='Training config type for model card')
+    model_parser.add_argument('--trainer-type', type=str, default=None, help='Trainer type for model card')
+    model_parser.add_argument('--batch-size', type=str, default=None, help='Batch size for model card')
+    model_parser.add_argument('--gradient-accumulation-steps', type=str, default=None, help='Grad accum steps for model card')
+    model_parser.add_argument('--learning-rate', type=str, default=None, help='Learning rate for model card')
+    model_parser.add_argument('--max-epochs', type=str, default=None, help='Max epochs for model card')
+    model_parser.add_argument('--max-seq-length', type=str, default=None, help='Max seq length for model card')
+    model_parser.add_argument('--trackio-url', type=str, default=None, help='Trackio URL for model card')
     # Dataset push subcommand
     dataset_parser = subparsers.add_parser('dataset', help='Push dataset to Hugging Face Hub')
                 author_name=args.author_name,
                 model_description=args.model_description,
                 model_name=args.model_name,
+                dataset_name=args.dataset_name,
+                experiment_name=args.experiment_name,
+                dataset_repo=args.dataset_repo,
+                training_config_type=args.training_config_type,
+                trainer_type=args.trainer_type,
+                batch_size=args.batch_size,
+                gradient_accumulation_steps=args.gradient_accumulation_steps,
+                learning_rate=args.learning_rate,
+                max_epochs=args.max_epochs,
+                max_seq_length=args.max_seq_length,
+                trackio_url=args.trackio_url,
             )
             # Push model

tests/test_generate_model_card.py ADDED Viewed

	@@ -0,0 +1,143 @@

+#!/usr/bin/env python3
+"""
+Tests for scripts/generate_model_card.py using the real template in templates/model_card.md.
+These tests verify:
+- Conditional processing for quantized_models
+- Variable replacement for common fields
+- File writing via save_model_card
+"""
+import sys
+from pathlib import Path
+def _repo_root() -> Path:
+    return Path(__file__).resolve().parents[1]
+def _add_scripts_to_path() -> None:
+    scripts_dir = _repo_root() / "scripts"
+    if str(scripts_dir) not in sys.path:
+        sys.path.insert(0, str(scripts_dir))
+def test_model_card_generator_conditionals_truthy(tmp_path):
+    _add_scripts_to_path()
+    from generate_model_card import ModelCardGenerator
+    template_path = _repo_root() / "templates" / "model_card.md"
+    generator = ModelCardGenerator(str(template_path))
+    variables = {
+        "model_name": "My Fine-tuned Model",
+        "model_description": "A test description.",
+        "repo_name": "user/repo",
+        "base_model": "HuggingFaceTB/SmolLM3-3B",
+        "dataset_name": "OpenHermes-FR",
+        "training_config_type": "Custom",
+        "trainer_type": "SFTTrainer",
+        "batch_size": "8",
+        "gradient_accumulation_steps": "16",
+        "learning_rate": "5e-6",
+        "max_epochs": "3",
+        "max_seq_length": "2048",
+        "hardware_info": "CPU",
+        "experiment_name": "exp-123",
+        "trackio_url": "https://trackio.space/exp",
+        "dataset_repo": "tonic/trackio-experiments",
+        "author_name": "Unit Tester",
+        "quantized_models": True,
+    }
+    content = generator.generate_model_card(variables)
+    # Conditional: when True, the quantized tag should appear
+    assert "- quantized" in content
+    # Common variables replaced in multiple locations
+    assert "base_model: HuggingFaceTB/SmolLM3-3B" in content
+    assert "trainer_type: SFTTrainer" in content
+    assert 'from_pretrained("user/repo")' in content
+    assert "Hardware\": \"CPU\"" not in content  # ensure no escaped quotes left
+    assert "hardware: \"CPU\"" in content
+    # Save to file and verify
+    output_path = tmp_path / "README_test.md"
+    assert generator.save_model_card(content, str(output_path)) is True
+    assert output_path.exists()
+    assert output_path.read_text(encoding="utf-8") == content
+def test_model_card_generator_conditionals_falsey(tmp_path):
+    _add_scripts_to_path()
+    from generate_model_card import ModelCardGenerator
+    template_path = _repo_root() / "templates" / "model_card.md"
+    generator = ModelCardGenerator(str(template_path))
+    variables = {
+        "model_name": "My Model",
+        "model_description": "A test description.",
+        "repo_name": "user/repo",
+        "base_model": "HuggingFaceTB/SmolLM3-3B",
+        "dataset_name": "OpenHermes-FR",
+        "training_config_type": "Custom",
+        "trainer_type": "SFTTrainer",
+        "batch_size": "8",
+        "learning_rate": "5e-6",
+        "max_epochs": "3",
+        "max_seq_length": "2048",
+        "hardware_info": "CPU",
+        "quantized_models": False,
+    }
+    content = generator.generate_model_card(variables)
+    # Conditional: quantized tag should be absent
+    assert "- quantized" not in content
+    # The if/else block is removed by current implementation when False
+    assert "{{#if quantized_models}}" not in content
+    assert "{{/if}}" not in content
+    # Variable replacement still occurs elsewhere
+    assert "base_model: HuggingFaceTB/SmolLM3-3B" in content
+    assert 'from_pretrained("user/repo")' in content
+    # Save to file
+    output_path = tmp_path / "README_no_quant.md"
+    assert generator.save_model_card(content, str(output_path)) is True
+    assert output_path.exists()
+def test_model_card_generator_variable_replacement(tmp_path):
+    _add_scripts_to_path()
+    from generate_model_card import ModelCardGenerator
+    template_path = _repo_root() / "templates" / "model_card.md"
+    generator = ModelCardGenerator(str(template_path))
+    base_model = "custom/base-model"
+    repo_name = "custom/repo-name"
+    variables = {
+        "model_name": "Var Test Model",
+        "model_description": "Testing variable replacement.",
+        "repo_name": repo_name,
+        "base_model": base_model,
+        "dataset_name": "dataset-x",
+        "trainer_type": "SFTTrainer",
+        "batch_size": "4",
+        "gradient_accumulation_steps": "1",
+        "max_seq_length": "1024",
+        "hardware_info": "CPU",
+        "quantized_models": False,
+    }
+    content = generator.generate_model_card(variables)
+    assert f"base_model: {base_model}" in content
+    assert f'from_pretrained("{repo_name}")' in content
+    assert "trainer_type: SFTTrainer" in content

tests/test_push_model_card.py ADDED Viewed

	@@ -0,0 +1,218 @@

+#!/usr/bin/env python3
+"""
+Tests for scripts/push_to_huggingface.py focusing on model card creation/upload.
+We mock Hugging Face Hub interactions and create dummy model folders to verify:
+- Repo id resolution via whoami
+- Repository creation call
+- README.md upload with expected content (fallback simple card path)
+- Uploading of model files from the directory
+"""
+import sys
+import types
+from pathlib import Path
+def _repo_root() -> Path:
+    return Path(__file__).resolve().parents[1]
+def _add_scripts_to_path() -> None:
+    scripts_dir = _repo_root() / "scripts"
+    if str(scripts_dir) not in sys.path:
+        sys.path.insert(0, str(scripts_dir))
+def _make_full_model_dir(base: Path) -> Path:
+    model_dir = base / "full_model"
+    model_dir.mkdir(parents=True, exist_ok=True)
+    (model_dir / "config.json").write_text("{}", encoding="utf-8")
+    # Create an empty weight file to satisfy validation
+    (model_dir / "model.safetensors").write_bytes(b"")
+    return model_dir
+def _make_lora_model_dir(base: Path) -> Path:
+    model_dir = base / "lora_model"
+    model_dir.mkdir(parents=True, exist_ok=True)
+    (model_dir / "adapter_config.json").write_text("{}", encoding="utf-8")
+    (model_dir / "adapter_model.bin").write_bytes(b"\x00")
+    return model_dir
+def test_push_model_card_full_model(monkeypatch, tmp_path):
+    _add_scripts_to_path()
+    import push_to_huggingface as mod
+    # Ensure module thinks HF is available and patch API + functions
+    monkeypatch.setattr(mod, "HF_AVAILABLE", True, raising=False)
+    create_repo_calls = []
+    upload_file_calls = []
+    class DummyHfApi:
+        def __init__(self, token=None):
+            self.token = token
+        def whoami(self):
+            return {"name": "testuser"}
+    def fake_create_repo(*, repo_id, token=None, private=False, exist_ok=False, repo_type=None):
+        create_repo_calls.append({
+            "repo_id": repo_id,
+            "token": token,
+            "private": private,
+            "exist_ok": exist_ok,
+            "repo_type": repo_type,
+        })
+    def fake_upload_file(*, path_or_fileobj, path_in_repo, repo_id, token, repo_type=None):
+        path = Path(path_or_fileobj)
+        content = None
+        if path.exists() and path.is_file():
+            try:
+                content = path.read_text(encoding="utf-8")
+            except Exception:
+                content = None
+        upload_file_calls.append({
+            "path_in_repo": path_in_repo,
+            "repo_id": repo_id,
+            "token": token,
+            "repo_type": repo_type,
+            "content": content,
+            "local_path": str(path),
+        })
+    monkeypatch.setattr(mod, "HfApi", DummyHfApi, raising=False)
+    monkeypatch.setattr(mod, "create_repo", fake_create_repo, raising=False)
+    monkeypatch.setattr(mod, "upload_file", fake_upload_file, raising=False)
+    # Prepare dummy full model directory
+    model_dir = _make_full_model_dir(tmp_path)
+    pusher = mod.HuggingFacePusher(
+        model_path=str(model_dir),
+        repo_name="my-repo",
+        token="fake-token",
+        private=True,
+        author_name="Tester",
+        model_description="Desc",
+        model_name="BaseModel",
+        dataset_name="DatasetX",
+    )
+    # Execute push (this should use fallback simple model card)
+    ok = pusher.push_model(
+        training_config={"param": 1},
+        results={"train_loss": 0.1, "eval_loss": 0.2, "perplexity": 9.9},
+    )
+    assert ok is True
+    # Repo creation was called with resolved user prefix
+    assert any(c["repo_id"] == "testuser/my-repo" for c in create_repo_calls)
+    # README upload occurred and contains either generator or fallback content (full model)
+    readme_calls = [c for c in upload_file_calls if c["path_in_repo"] == "README.md"]
+    assert readme_calls, "README.md was not uploaded"
+    readme_content = readme_calls[-1]["content"] or ""
+    assert (
+        "fine-tuned Voxtral ASR model" in readme_content
+        or "SmolLM3" in readme_content
+        or "Model Details" in readme_content
+    )
+    assert "DatasetX" in readme_content or "Training Configuration" in readme_content
+    # Model files were uploaded (config and weights)
+    uploaded_paths = {c["path_in_repo"] for c in upload_file_calls}
+    assert "config.json" in uploaded_paths
+    assert "model.safetensors" in uploaded_paths
+def test_push_model_card_lora_model_fallback(monkeypatch, tmp_path):
+    _add_scripts_to_path()
+    import push_to_huggingface as mod
+    # Ensure module thinks HF is available and patch API + functions
+    monkeypatch.setattr(mod, "HF_AVAILABLE", True, raising=False)
+    upload_file_calls = []
+    class DummyHfApi:
+        def __init__(self, token=None):
+            self.token = token
+        def whoami(self):
+            return {"username": "anotheruser"}
+    def fake_create_repo(*, repo_id, token=None, private=False, exist_ok=False, repo_type=None):
+        return None
+    def fake_upload_file(*, path_or_fileobj, path_in_repo, repo_id, token, repo_type=None):
+        path = Path(path_or_fileobj)
+        content = None
+        if path.exists() and path.is_file():
+            try:
+                content = path.read_text(encoding="utf-8")
+            except Exception:
+                content = None
+        upload_file_calls.append({
+            "path_in_repo": path_in_repo,
+            "repo_id": repo_id,
+            "content": content,
+        })
+    monkeypatch.setattr(mod, "HfApi", DummyHfApi, raising=False)
+    monkeypatch.setattr(mod, "create_repo", fake_create_repo, raising=False)
+    monkeypatch.setattr(mod, "upload_file", fake_upload_file, raising=False)
+    # Insert a dummy generate_model_card module that raises in generate to force fallback
+    dummy_mod = types.ModuleType("generate_model_card")
+    class RaisingGen:
+        def __init__(self, *args, **kwargs):
+            pass
+        def generate_model_card(self, variables):
+            raise RuntimeError("force fallback")
+    def default_vars():
+        return {}
+    dummy_mod.ModelCardGenerator = RaisingGen
+    dummy_mod.create_default_variables = default_vars
+    sys.modules["generate_model_card"] = dummy_mod
+    # Prepare dummy lora model directory
+    model_dir = _make_lora_model_dir(tmp_path)
+    pusher = mod.HuggingFacePusher(
+        model_path=str(model_dir),
+        repo_name="my-lora-repo",
+        token="fake-token",
+        private=False,
+        author_name="Tester",
+        model_description="Desc",
+        model_name="BaseModel",
+        dataset_name="DatasetY",
+    )
+    ok = pusher.push_model(training_config={}, results={})
+    assert ok is True
+    # README upload occurred and contains either generator or fallback content (LoRA)
+    readme_calls = [c for c in upload_file_calls if c["path_in_repo"] == "README.md"]
+    assert readme_calls, "README.md was not uploaded"
+    readme_content = readme_calls[-1]["content"] or ""
+    assert (
+        "LoRA adapter for Voxtral ASR" in readme_content
+        or "SmolLM3" in readme_content
+        or "Model Details" in readme_content
+    )
+    assert "DatasetY" in readme_content or "Training Configuration" in readme_content
+    # LoRA files uploaded
+    uploaded_paths = {Path(c.get("local_path", "")).name for c in upload_file_calls if c.get("local_path")}
+    assert any(name.startswith("adapter_") for name in uploaded_paths)