Spaces:

bjpietrzak
/

music_mind_app

Sleeping

App Files Files Community

bpietrzak commited on Jun 21

Commit

91a9e54

•

1 Parent(s): 710c57b

Training fix

Browse files

Files changed (8) hide show

.gitignore +1 -159
dl/make_dataset.py +0 -42
dl/push_model.py +0 -36
dl/testing.ipynb +0 -394
dl/train.py +0 -113
main.py +0 -29
requirements.txt +9 -7
train.py +134 -0

.gitignore CHANGED Viewed

@@ -1,160 +1,2 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-# C extensions
-*.so
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-.pybuilder/
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/#use-with-ide
-.pdm.toml
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
-.env
-.venv
-env/
 venv/
-ENV/
-env.bak/
-venv.bak/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-# pytype static type analyzer
-.pytype/
-# Cython debug symbols
-cython_debug/
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/































































































































1	venv/
2	+ Data/

dl/make_dataset.py DELETED Viewed

@@ -1,42 +0,0 @@
-import os
-import json
-import argparse
-import librosa
-import pandas as pd
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--dir", type=str, help="Directory containing OGG audio files.")
-    parser.add_argument("--file", type=str, help="JSON file mapping filenames to classes.")
-    parser.add_argument('-o', '--output', type=str, default="output_dataset.csv", help="Output CSV file.")
-    return vars(parser.parse_args())
-def load_audio_files(audio_dir, file_class_mapping):
-    data = []
-    for filename, class_label in file_class_mapping.items():
-        file_path = os.path.join(audio_dir, filename)
-        if os.path.exists(file_path):
-            audio, sr = librosa.load(file_path, sr=None)
-            data.append({
-                'filename': filename,
-                'audio': audio,
-                'sampling_rate': sr,
-                'label': class_label
-            })
-    return data
-def main(args):
-    audio_dir = args['dir']
-    json_file = args['file']
-    with open(json_file, 'r') as f:
-        file_class_mapping = json.load(f)
-    dataset = load_audio_files(audio_dir, file_class_mapping)
-    df = pd.DataFrame(dataset)
-    df.to_csv(args['output'], index=False)
-if __name__ == "__main__":
-    main(parse_args())

dl/push_model.py DELETED Viewed

@@ -1,36 +0,0 @@
-import argparse
-from transformers import AutoModel, AutoTokenizer
-from huggingface_hub import HfApi, HfFolder
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--username", type=str, required=True, help="Nazwa użytkownika Hugging Face.")
-    parser.add_argument("--model_dir", type=str, required=True, help="Ścieżka do zapisanego modelu.")
-    parser.add_argument("--repo_name", type=str, required=True, help="Nazwa repozytorium HuggingFace Hub.")
-    parser.add_argument("--private", type=bool, default=False, help="Flaga określająca, czy repozytorium powinno być prywatne.")
-    return parser.parse_args()
-def main():
-    args = parse_args()
-    token = HfFolder.get_token()
-    if token is None:
-        raise ValueError("Token uwierzytelniający nie został znaleziony. Zaloguj się za pomocą CLI Hugging Face.")
-    model = AutoModel.from_pretrained(args.model_dir)
-    tokenizer = AutoTokenizer.from_pretrained(args.model_dir)
-    repo_url = HfApi().create_repo(
-        token=token,
-        name=args.repo_name,
-        organization=args.username,
-        private=args.private,
-        exist_ok=True
-    )
-    model.push_to_hub(args.repo_name, use_auth_token=token)
-    tokenizer.push_to_hub(args.repo_name, use_auth_token=token)
-    print(f"Model i tokajzer zostały wysłane do {repo_url}")
-if __name__ == "__main__":
-    main()

dl/testing.ipynb DELETED Viewed

@@ -1,394 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from datasets import load_dataset, Audio\n",
-    "from transformers import AutoFeatureExtractor\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/potato/.virtualenvs/studia/lib/python3.10/site-packages/datasets/load.py:1486: FutureWarning: The repository for marsyas/gtzan contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/marsyas/gtzan\n",
-      "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n",
-      "Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "data = load_dataset(\"marsyas/gtzan\", \"all\")\n",
-    "data = data['train'].train_test_split(seed=42, shuffle=True, test_size=.1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "map_class = data['train'].features['genre'].int2str"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Models to train:\n",
-    "\n",
-    "- ntu-spml/distilhubert\n",
-    "- dima806/music_genres_classification"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "distilhubert = AutoFeatureExtractor.from_pretrained(\n",
-    "    'ntu-spml/distilhubert', do_normalize=True, return_attention_mask=True\n",
-    ")\n",
-    "# music_genres_classification = AutoFeatureExtractor.from_pretrained(\n",
-    "#     'dima806/music_genres_classification', do_normalize=True, return_attention_mask=True\n",
-    "# )\n",
-    "\n",
-    "# models = {'distilhubert': distilhubert,\n",
-    "#           'music_genres_classification': music_genres_classification}\n",
-    "\n",
-    "# def get_sampling_rate(model):\n",
-    "#     return model.sampling_rate\n",
-    "\n",
-    "# if np.all([ get_sampling_rate(model) == 16000 for model in models.values()]):\n",
-    "#     sampling_rate = 16000\n",
-    "# else:\n",
-    "#     raise ValueError('You need to setup different values than 16000 for a sampling rate')\n",
-    "\n",
-    "data = data.cast_column(\"audio\", Audio(sampling_rate=16000))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class Preprocess:\n",
-    "    def __init__(self, model):\n",
-    "        self.model = model\n",
-    "    \n",
-    "    def __call__(self, examples):\n",
-    "        audio_arrays = [x[\"array\"] for x in examples[\"audio\"]]\n",
-    "        inputs = self.model(\n",
-    "            audio_arrays,\n",
-    "            sampling_rate=self.model.sampling_rate,\n",
-    "            max_length=int(self.model.sampling_rate * 30.0),\n",
-    "            truncation=True,\n",
-    "            return_attention_mask=True)\n",
-    "        return inputs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "distilhubert_preprocess = Preprocess(distilhubert)\n",
-    "# music_genres_classification_preprocess = Preprocess(music_genres_classification)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def process_data(preprocess):\n",
-    "    data_preprocessed = data.map(\n",
-    "    preprocess,\n",
-    "    remove_columns=[\"audio\", \"file\"],\n",
-    "    batched=True,\n",
-    "    batch_size=100,\n",
-    "    num_proc=1)\n",
-    "    return data_preprocessed\n",
-    "\n",
-    "distilhubert_data = process_data(distilhubert_preprocess)\n",
-    "# music_genres_classification_data = process_data(music_genres_classification_preprocess)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "distilhubert_data = distilhubert_data.rename_column(\"genre\", \"label\")\n",
-    "# music_genres_classification_data = music_genres_classification_data.rename_column(\"genre\", \"label\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "id2label = {\n",
-    "    str(i): map_class(i)\n",
-    "    for i in range(len(distilhubert_data[\"train\"].features[\"label\"].names))\n",
-    "}\n",
-    "label2id = {v: k for k, v in id2label.items()}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from transformers import AutoModelForAudioClassification\n",
-    "from transformers import TrainingArguments\n",
-    "import numpy as np\n",
-    "from transformers import Trainer\n",
-    "\n",
-    "\n",
-    "class Eval:\n",
-    "    def __init__(self, metric) -> None:\n",
-    "        self.metric = metric\n",
-    "\n",
-    "    def __call__(self, eval_pred):\n",
-    "        predictions = np.argmax(eval_pred.predictions, axis=1)\n",
-    "        return self.metric.compute(predictions=predictions, references=eval_pred.label_ids)\n",
-    "\n",
-    "def train(model_name, class_nb, label2id, id2label, batch_size, epochs, eval_metric, data, feature_extractor):\n",
-    "    model = AutoModelForAudioClassification.from_pretrained(\n",
-    "        model_name,\n",
-    "        num_labels=class_nb,\n",
-    "        label2id=label2id,\n",
-    "        id2label=id2label)\n",
-    "\n",
-    "    training_args = TrainingArguments(\n",
-    "        f\"{model_name.split('/')[-1]}-ft-gtzan-{batch_size}-{epochs}\",\n",
-    "        evaluation_strategy=\"epoch\",\n",
-    "        save_strategy=\"epoch\",\n",
-    "        learning_rate=5e-5,\n",
-    "        per_device_train_batch_size=batch_size,\n",
-    "        gradient_accumulation_steps=2,\n",
-    "        per_device_eval_batch_size=batch_size,\n",
-    "        num_train_epochs=epochs,\n",
-    "        warmup_ratio=0.1,\n",
-    "        logging_steps=5,\n",
-    "        load_best_model_at_end=True,\n",
-    "        metric_for_best_model=\"accuracy\",\n",
-    "        fp16=True,\n",
-    "        push_to_hub=True)\n",
-    "    \n",
-    "    trainer = Trainer(\n",
-    "        model,\n",
-    "        training_args,\n",
-    "        train_dataset=data[\"train\"],\n",
-    "        eval_dataset=data[\"test\"],\n",
-    "        tokenizer=feature_extractor,\n",
-    "        compute_metrics=eval_metric)\n",
-    "\n",
-    "    trainer.train()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0282b77db7d1478f8e96988688e4b049",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from huggingface_hub import notebook_login\n",
-    "\n",
-    "notebook_login()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'Eval' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[14], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mevaluate\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m acc \u001b[38;5;241m=\u001b[39m \u001b[43mEval\u001b[49m(evaluate\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maccuracy\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m      6\u001b[0m models \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m      7\u001b[0m     {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_name\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mntu-spml/distilhubert\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclass_nb\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mlen\u001b[39m(id2label), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlabel2id\u001b[39m\u001b[38;5;124m'\u001b[39m: label2id, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mid2label\u001b[39m\u001b[38;5;124m'\u001b[39m: id2label, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbatch_size\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m4\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepochs\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m8\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124meval_metric\u001b[39m\u001b[38;5;124m'\u001b[39m: acc, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m'\u001b[39m: distilhubert_data, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfeature_extractor\u001b[39m\u001b[38;5;124m'\u001b[39m: distilhubert},\n\u001b[1;32m      8\u001b[0m     \u001b[38;5;66;03m# {'model_name': 'dima806/music_genres_classification', 'class_nb': len(id2label), 'label2id': label2id, 'id2label': id2label, 'batch_size': 25, 'epochs': 8, 'eval_metric': acc, 'data': music_genres_classification_data, 'feature_extractor': music_genres_classification}]\u001b[39;00m\n\u001b[1;32m      9\u001b[0m ]\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'Eval' is not defined"
-     ]
-    }
-   ],
-   "source": [
-    "import evaluate\n",
-    "\n",
-    "acc = Eval(evaluate.load('accuracy'))\n",
-    "\n",
-    "\n",
-    "models = [\n",
-    "    {'model_name': 'ntu-spml/distilhubert', 'class_nb': len(id2label), 'label2id': label2id, 'id2label': id2label, 'batch_size': 4, 'epochs': 8, 'eval_metric': acc, 'data': distilhubert_data, 'feature_extractor': distilhubert},\n",
-    "    # {'model_name': 'dima806/music_genres_classification', 'class_nb': len(id2label), 'label2id': label2id, 'id2label': id2label, 'batch_size': 25, 'epochs': 8, 'eval_metric': acc, 'data': music_genres_classification_data, 'feature_extractor': music_genres_classification}]\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at ntu-spml/distilhubert and are newly initialized: ['classifier.bias', 'classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.bias', 'projector.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
-      "/home/potato/.virtualenvs/studia/lib/python3.10/site-packages/transformers/training_args.py:1474: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "abba0ecb6fb242fe8538bcdeec44ef9b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/896 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/potato/.virtualenvs/studia/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)\n",
-      "  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 2.2907, 'grad_norm': 1.133973240852356, 'learning_rate': 2.777777777777778e-06, 'epoch': 0.04}\n",
-      "{'loss': 2.2975, 'grad_norm': 1.526039719581604, 'learning_rate': 5.555555555555556e-06, 'epoch': 0.09}\n",
-      "{'loss': 2.2871, 'grad_norm': 1.1069303750991821, 'learning_rate': 8.333333333333334e-06, 'epoch': 0.13}\n",
-      "{'loss': 2.3107, 'grad_norm': 1.4785107374191284, 'learning_rate': 1.1111111111111112e-05, 'epoch': 0.18}\n",
-      "{'loss': 2.2712, 'grad_norm': 1.5087419748306274, 'learning_rate': 1.388888888888889e-05, 'epoch': 0.22}\n",
-      "{'loss': 2.3081, 'grad_norm': 1.904876708984375, 'learning_rate': 1.6666666666666667e-05, 'epoch': 0.27}\n",
-      "{'loss': 2.2583, 'grad_norm': 1.2942432165145874, 'learning_rate': 1.9444444444444445e-05, 'epoch': 0.31}\n",
-      "{'loss': 2.2572, 'grad_norm': 1.8840770721435547, 'learning_rate': 2.2222222222222223e-05, 'epoch': 0.36}\n",
-      "{'loss': 2.2733, 'grad_norm': 1.25327730178833, 'learning_rate': 2.5e-05, 'epoch': 0.4}\n",
-      "{'loss': 2.273, 'grad_norm': 1.499450922012329, 'learning_rate': 2.777777777777778e-05, 'epoch': 0.44}\n",
-      "{'loss': 2.2221, 'grad_norm': 1.6644848585128784, 'learning_rate': 3.055555555555556e-05, 'epoch': 0.49}\n",
-      "{'loss': 2.2279, 'grad_norm': 1.5860854387283325, 'learning_rate': 3.3333333333333335e-05, 'epoch': 0.53}\n",
-      "{'loss': 2.2253, 'grad_norm': 1.8796266317367554, 'learning_rate': 3.611111111111111e-05, 'epoch': 0.58}\n",
-      "{'loss': 2.1556, 'grad_norm': 2.5186994075775146, 'learning_rate': 3.888888888888889e-05, 'epoch': 0.62}\n",
-      "{'loss': 2.1329, 'grad_norm': 2.4733965396881104, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.67}\n",
-      "{'loss': 2.1214, 'grad_norm': 1.7492904663085938, 'learning_rate': 4.4444444444444447e-05, 'epoch': 0.71}\n",
-      "{'loss': 2.0805, 'grad_norm': 3.5089523792266846, 'learning_rate': 4.722222222222222e-05, 'epoch': 0.76}\n",
-      "{'loss': 1.9769, 'grad_norm': 1.759109377861023, 'learning_rate': 5e-05, 'epoch': 0.8}\n",
-      "{'loss': 2.0086, 'grad_norm': 3.3685412406921387, 'learning_rate': 4.968982630272953e-05, 'epoch': 0.84}\n",
-      "{'loss': 1.9664, 'grad_norm': 4.404444694519043, 'learning_rate': 4.937965260545906e-05, 'epoch': 0.89}\n",
-      "{'loss': 1.9937, 'grad_norm': 9.0780611038208, 'learning_rate': 4.9069478908188585e-05, 'epoch': 0.93}\n",
-      "{'loss': 1.876, 'grad_norm': 4.4436798095703125, 'learning_rate': 4.8759305210918115e-05, 'epoch': 0.98}\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7c5670c3679841ab80f531ecb12310a1",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/25 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "ename": "TypeError",
-     "evalue": "'Accuracy' object is not callable",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m m \u001b[38;5;129;01min\u001b[39;00m models:\n\u001b[0;32m----> 2\u001b[0m     \u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mm\u001b[49m\u001b[43m)\u001b[49m\n",
-      "Cell \u001b[0;32mIn[10], line 42\u001b[0m, in \u001b[0;36mtrain\u001b[0;34m(model_name, class_nb, label2id, id2label, batch_size, epochs, eval_metric, data, feature_extractor)\u001b[0m\n\u001b[1;32m     18\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m     19\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodel_name\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m-ft-gtzan-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbatch_size\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mepochs\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     20\u001b[0m     evaluation_strategy\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mepoch\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     31\u001b[0m     fp16\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m     32\u001b[0m     push_to_hub\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m     34\u001b[0m trainer \u001b[38;5;241m=\u001b[39m Trainer(\n\u001b[1;32m     35\u001b[0m     model,\n\u001b[1;32m     36\u001b[0m     training_args,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     39\u001b[0m     tokenizer\u001b[38;5;241m=\u001b[39mfeature_extractor,\n\u001b[1;32m     40\u001b[0m     compute_metrics\u001b[38;5;241m=\u001b[39meval_metric)\n\u001b[0;32m---> 42\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/.virtualenvs/studia/lib/python3.10/site-packages/transformers/trainer.py:1876\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1873\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1874\u001b[0m     \u001b[38;5;66;03m# Disable progress bars when uploading models during checkpoints to avoid polluting stdout\u001b[39;00m\n\u001b[1;32m   1875\u001b[0m     hf_hub_utils\u001b[38;5;241m.\u001b[39mdisable_progress_bars()\n\u001b[0;32m-> 1876\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1877\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1878\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1879\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1880\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1881\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1882\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m   1883\u001b[0m     hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n",
-      "File \u001b[0;32m~/.virtualenvs/studia/lib/python3.10/site-packages/transformers/trainer.py:2311\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   2308\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol\u001b[38;5;241m.\u001b[39mshould_training_stop \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m   2310\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_epoch_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 2311\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_log_save_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_norm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2313\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m DebugOption\u001b[38;5;241m.\u001b[39mTPU_METRICS_DEBUG \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdebug:\n\u001b[1;32m   2314\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m is_torch_xla_available():\n\u001b[1;32m   2315\u001b[0m         \u001b[38;5;66;03m# tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)\u001b[39;00m\n",
-      "File \u001b[0;32m~/.virtualenvs/studia/lib/python3.10/site-packages/transformers/trainer.py:2721\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   2719\u001b[0m metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   2720\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol\u001b[38;5;241m.\u001b[39mshould_evaluate:\n\u001b[0;32m-> 2721\u001b[0m     metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2722\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[1;32m   2724\u001b[0m     \u001b[38;5;66;03m# Run delayed LR scheduler now that metrics are populated\u001b[39;00m\n",
-      "File \u001b[0;32m~/.virtualenvs/studia/lib/python3.10/site-packages/transformers/trainer.py:3572\u001b[0m, in \u001b[0;36mTrainer.evaluate\u001b[0;34m(self, eval_dataset, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m   3569\u001b[0m start_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m   3571\u001b[0m eval_loop \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprediction_loop \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39muse_legacy_prediction_loop \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluation_loop\n\u001b[0;32m-> 3572\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43meval_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3573\u001b[0m \u001b[43m    \u001b[49m\u001b[43meval_dataloader\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3574\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdescription\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mEvaluation\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3575\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# No point gathering the predictions if there are no metrics, otherwise we defer to\u001b[39;49;00m\n\u001b[1;32m   3576\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# self.args.prediction_loss_only\u001b[39;49;00m\n\u001b[1;32m   3577\u001b[0m \u001b[43m    \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_metrics\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   3578\u001b[0m \u001b[43m    \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3579\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3580\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3582\u001b[0m total_batch_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39meval_batch_size \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mworld_size\n\u001b[1;32m   3583\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetric_key_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_jit_compilation_time\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m output\u001b[38;5;241m.\u001b[39mmetrics:\n",
-      "File \u001b[0;32m~/.virtualenvs/studia/lib/python3.10/site-packages/transformers/trainer.py:3854\u001b[0m, in \u001b[0;36mTrainer.evaluation_loop\u001b[0;34m(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m   3850\u001b[0m         metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_metrics(\n\u001b[1;32m   3851\u001b[0m             EvalPrediction(predictions\u001b[38;5;241m=\u001b[39mall_preds, label_ids\u001b[38;5;241m=\u001b[39mall_labels, inputs\u001b[38;5;241m=\u001b[39mall_inputs)\n\u001b[1;32m   3852\u001b[0m         )\n\u001b[1;32m   3853\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 3854\u001b[0m         metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_metrics\u001b[49m\u001b[43m(\u001b[49m\u001b[43mEvalPrediction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpredictions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mall_preds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mall_labels\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3855\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m metrics \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   3856\u001b[0m     metrics \u001b[38;5;241m=\u001b[39m {}\n",
-      "\u001b[0;31mTypeError\u001b[0m: 'Accuracy' object is not callable"
-     ]
-    }
-   ],
-   "source": [
-    "for m in models:\n",
-    "    train(**m)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "studia",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

dl/train.py DELETED Viewed

@@ -1,113 +0,0 @@
-import argparse
-import numpy as np
-from datasets import load_dataset, Audio
-from transformers import (AutoFeatureExtractor,
-                          AutoModelForAudioClassification, TrainingArguments,
-                          Trainer)
-import os
-import evaluate
-import random
-accuracy_metric = evaluate.load("accuracy")
-def parse_args() -> dict:
-    parser = argparse.ArgumentParser(description="Skrypt do trenowania modelu klasyfikacji audio.")
-    parser.add_argument("--learning_rate", type=float, default=5e-5,
-                        help="Współczynnik uczenia podczas treningu modelu.")
-    parser.add_argument("--train_eval_split", type=float, default=0.9,
-                        help="Stosunek danych trenujących do całego zbioru; reszta to dane walidacyjne.")
-    parser.add_argument("--model_id", type=str, required=True,
-                        help="Identyfikator modelu z Hugging Face lub ścieżka do lokalnego modelu.")
-    parser.add_argument("--num_epochs", type=int, default=20,
-                        help="Liczba epok treningowych.")
-    parser.add_argument("--seed", type=int, default=42,
-                        help="Ziarno liczb losowych.")
-    parser.add_argument("--save_dir", type=str, default=".",
-                        help="Ścieżka do katalogu wag tranowanego modelu.")
-    parser.add_argument("--dataset", type=str, default="marsyas/gtzan",
-                        help="Nazwa/lokalizacja zbioru danych.")
-    return vars(parser.parse_args())
-def compute_metrics(eval_pred):
-    predictions = np.argmax(eval_pred.predictions, axis=1)
-    return accuracy_metric.compute(predictions=predictions,
-                                   references=eval_pred.label_ids)
-def main(args: dict) -> None:
-    random.seed(args["seed"])
-    max_duration = 30.0
-    gtzan = load_dataset(args["dataset"], "all")
-    gtzan = gtzan["train"].train_test_split(seed=42, shuffle=True,
-        test_size=1 - args["train_eval_split"])
-    feature_extractor = AutoFeatureExtractor.from_pretrained(
-        args["model_id"], do_normalize=True, return_attention_mask=True)
-    sampling_rate = feature_extractor.sampling_rate
-    def preprocess_function(examples):
-        audio_arrays = [x["array"] for x in examples["audio"]]
-        inputs = feature_extractor(
-            audio_arrays,
-            sampling_rate=sampling_rate,
-            max_length=int(sampling_rate * max_duration),
-            truncation=True,
-            return_attention_mask=True,
-        )
-        return inputs
-    gtzan = gtzan.cast_column("audio", Audio(sampling_rate=sampling_rate))
-    gtzan_encoded = gtzan.map(
-        preprocess_function,
-        remove_columns=["audio", "file"],
-        batched=True,
-        batch_size=100,
-        num_proc=1)
-    gtzan_encoded = gtzan_encoded.rename_column("genre", "label")
-    id2label = {str(i): gtzan["train"].features["genre"].int2str(i)
-        for i in range(len(gtzan_encoded["train"].features["label"].names))}
-    label2id = {v: k for k, v in id2label.items()}
-    num_labels = len(id2label)
-    model = AutoModelForAudioClassification.from_pretrained(
-        args["model_id"],
-        num_labels=num_labels,
-        label2id=label2id,
-        id2label=id2label)
-    dir_name = f"{args["model_id"]}-{args["seed"]}-{args["dataset"]}-{args['learning_rate']}".replace("/", "-")
-    training_args = TrainingArguments(
-        output_dir=os.path.join(args["save_dir"], dir_name),
-        evaluation_strategy="epoch",
-        save_strategy="epoch",
-        learning_rate=args["learning_rate"],
-        per_device_train_batch_size=5,
-        gradient_accumulation_steps=2,
-        per_device_eval_batch_size=5,
-        num_train_epochs=args["num_epochs"],
-        warmup_ratio=0.1,
-        logging_dir="./logs",
-        logging_steps=5,
-        load_best_model_at_end=True,
-        metric_for_best_model="accuracy",
-        fp16=True)
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=gtzan_encoded["train"],
-        eval_dataset=gtzan_encoded["test"],
-        tokenizer=feature_extractor,
-        compute_metrics=compute_metrics)
-    trainer.train()
-if __name__ == "__main__":
-    main(parse_args())

main.py DELETED Viewed

@@ -1,29 +0,0 @@
-from transformers import pipeline
-import librosa
-import json
-import gradio as gr
-def audio_pipeline(file_path: str, top_k: int = 7) -> dict[str, float]:
-    y, _ = librosa.load(file_path, sr=config['sampling_rate'])
-    out = pipe(y, top_k=top_k)
-    print(out)
-    return {clas['label']: clas['score'] for clas in out}
-with open('config.json', 'r') as f:
-    config = json.load(f)
-pipe = pipeline("audio-classification", model=config['models_path'])
-demo = gr.Interface(
-    fn=audio_pipeline,
-    inputs=[gr.Audio(type="filepath"), gr.Slider(1, 10, 1,
-                                                 label="Top K Results")],
-    outputs=gr.Label(num_top_classes=7),
-    title="Music Mind Audio Classification",
-    description="Upload an .mp3 or .ogg audio file "
-    "to classify the content using a pre-trained model.")
-if __name__ == "__main__":
-    demo.launch(debug=True)

requirements.txt CHANGED Viewed

@@ -1,7 +1,9 @@
-torch
-datasets
-transformers[torch]
-evaluate
-numpy
-librosa
-soundfile

+torch  --index-url https://download.pytorch.org/whl/cu121
+torchvision --index-url https://download.pytorch.org/whl/cu121
+transformers==4.41.2
+gradio==4.36.1
+numpy==1.26.4
+evaluate==0.4.2
+tqdm==4.66.4
+mlflow==2.13.2
+librosa==0.10.2.post1

train.py ADDED Viewed

	@@ -0,0 +1,134 @@

+from transformers import AutoModelForAudioClassification
+from torch.utils.data import DataLoader
+import evaluate
+import torch
+from tqdm import tqdm
+import argparse
+import json
+import os
+import shutil
+import mlflow
+import mlflow.pytorch
+from gtzan import GtzanDataset
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+metric = evaluate.load("accuracy")
+def parse_args():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--label2id", type=str)
+    ap.add_argument("--model_id", type=str)
+    ap.add_argument("--batch_size", type=int, default=32)
+    ap.add_argument("--train_dir", type=str, default="data/train")
+    ap.add_argument("--val_dir", type=str, default="data/val")
+    ap.add_argument("--num_workers", type=int, default=4)
+    ap.add_argument("--lr", type=float, default=1e-4)
+    ap.add_argument("--epochs", type=int, default=10)
+    ap.add_argument("--output_dir", type=str, default="./weights")
+    ap.add_argument("--seed", type=int, default=42)
+    ap.add_argument("--name", type=str, default="model")
+    return vars(ap.parse_args())
+def train(args):
+    torch.manual_seed(args["seed"])
+    label2id = json.load(open(args["label2id"]))
+    id2label = {v: k for k, v in label2id.items()}
+    num_labels = len(label2id)
+    if not os.path.exists(args["output_dir"]):
+        os.makedirs(args["output_dir"])
+    train_dataset = GtzanDataset(args["train_dir"], label2id)
+    val_dataset = GtzanDataset(args["val_dir"], label2id)
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=args["batch_size"],
+        shuffle=True,
+        num_workers=args["num_workers"])
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=args["batch_size"],
+        shuffle=False,
+        num_workers=args["num_workers"])
+    model = AutoModelForAudioClassification.from_pretrained(
+        args['model_id'],
+        num_labels=num_labels,
+        label2id=label2id,
+        id2label=id2label,
+    ).to(device)
+    optimizer = torch.optim.AdamW(model.parameters(), lr=args["lr"])
+    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+        optimizer, T_max=len(train_loader) * args["epochs"]
+    )
+    max_val_accuracy = 0
+    best_path = ""
+    with mlflow.start_run():
+        mlflow.log_params({
+            "model_id": args["model_id"],
+            "batch_size": args["batch_size"],
+            "lr": args["lr"],
+            "epochs": args["epochs"],
+            "seed": args["seed"]
+        })
+        for epoch in tqdm(range(args["epochs"])):
+            model.train()
+            train_progress_bar = tqdm(train_loader, desc=f"Training Epoch {epoch + 1}")
+            for batch in train_progress_bar:
+                input_values, attention_mask, label = [b.to(device) for b in batch]
+                outputs = model(input_values=input_values,
+                                attention_mask=attention_mask,
+                                labels=label)
+                loss = outputs.loss
+                loss.backward()
+                optimizer.step()
+                lr_scheduler.step()
+                optimizer.zero_grad()
+                train_progress_bar.set_postfix({"loss": loss.item()})
+                train_progress_bar.update(1)
+                mlflow.log_metric("train_loss", loss.item())  # Log training loss
+            torch.cuda.empty_cache()
+            model.eval()
+            val_progress_bar = tqdm(val_loader, desc="Validation")
+            for batch in val_progress_bar:
+                input_values, attention_mask, label = [b.to(device) for b in batch]
+                with torch.no_grad():
+                    outputs = model(input_values=input_values,
+                                    attention_mask=attention_mask,
+                                    labels=label)
+                logits = outputs.logits
+                predictions = torch.argmax(logits, dim=-1)
+                metric.add_batch(predictions=predictions, references=label)
+                val_progress_bar.update(1)
+            val_accuracy = metric.compute()
+            mlflow.log_metric("val_accuracy", val_accuracy["accuracy"], step=epoch)  # Log validation accuracy
+            torch.cuda.empty_cache()
+            if val_accuracy["accuracy"] > max_val_accuracy:
+                if best_path:
+                    shutil.rmtree(best_path)
+                model_save_dir = os.path.join(
+                    args["output_dir"],
+                    args['name'],
+                    f"{int(round(val_accuracy['accuracy'], 2) * 100)}")
+                if not os.path.exists(model_save_dir):
+                    os.makedirs(model_save_dir, exist_ok=True)
+                model.save_pretrained(model_save_dir)
+                max_val_accuracy = val_accuracy["accuracy"]
+                best_path = model_save_dir
+        mlflow.pytorch.log_model(model, "model")
+if __name__ == "__main__":
+    train(parse_args())