Spaces:

flax-community
/

dalle-mini

Running

App Files Files Community

boris commited on Oct 9, 2021

Commit

bf3640d

•

1 Parent(s): 38705a9

refactor: loop over runs

Browse files

Files changed (1) hide show

dev/inference/wandb-backend.ipynb +98 -229

dev/inference/wandb-backend.ipynb CHANGED Viewed

@@ -13,6 +13,7 @@
     "import random\n",
     "import numpy as np\n",
     "from PIL import Image\n",
     "import jax\n",
     "import jax.numpy as jnp\n",
     "from flax.training.common_utils import shard, shard_prng_key\n",
@@ -47,18 +48,10 @@
     "num_images = 128\n",
     "top_k = 8\n",
     "text_normalizer = TextNormalizer() if normalize_text else None\n",
-    "padding_item = 'NONE'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6a045827-3461-4499-8959-38d173bc4e5e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
     "seed = random.randint(0, 2**32-1)\n",
-    "key = jax.random.PRNGKey(seed)"
    ]
   },
   {
@@ -70,18 +63,26 @@
    "source": [
     "vqgan = VQModel.from_pretrained(VQGAN_REPO, revision=VQGAN_COMMIT_ID)\n",
     "clip = FlaxCLIPModel.from_pretrained(\"openai/clip-vit-base-patch32\")\n",
-    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4927529a-8828-4150-bc76-e1b60d8dee62",
    "metadata": {},
    "outputs": [],
    "source": [
-    "clip_params = replicate(clip.params)\n",
-    "vqgan_params = replicate(vqgan.params)"
    ]
   },
   {
@@ -103,36 +104,6 @@
     "    samples = [samples[i:i+batch_size] for i in range(0, len(samples), batch_size)]"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f75b2869-fc25-4f56-b937-e97bbb712ede",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(samples)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c48525c9-447a-4430-81d7-4b699f545638",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "samples[-1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a2c629e9-1a82-40c6-a260-ca1780c19a2e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "api = wandb.Api()"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -142,7 +113,7 @@
    "source": [
     "# TODO: iterate on runs\n",
     "wandb_run = wandb_runs[0]\n",
-    "functions_pmapped = False"
    ]
   },
   {
@@ -152,60 +123,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "try:\n",
-    "    versions = api.artifact_versions(type_name='bart_model', name=f'dalle-mini/dalle-mini/model-{wandb_run}', per_page=10000)\n",
-    "except:\n",
-    "    versions = []"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e8026e63-9e73-472c-9440-5e742c614901",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "versions, len(versions)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ead44aee-52d5-4ca2-8984-c4d267d9e72a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "versions[0].version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cfd48de9-6022-444f-8b12-05cba8fad071",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "artifact = versions[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4db848c1-2bb5-432c-a732-1c6d0636e172",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "version = int(artifact.version[1:])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "25fac577-146d-4e62-a3ea-f0baea79ef83",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "version"
    ]
   },
   {
@@ -215,20 +138,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# retrieve training run\n",
-    "training_run = api.run(f'dalle-mini/dalle-mini/{wandb_run}')\n",
-    "config = training_run.config"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9b9393c6-0a3c-46a8-ba27-ba37982b0009",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# see summary metrics\n",
-    "training_run.summary"
    ]
   },
   {
@@ -239,7 +152,7 @@
    "outputs": [],
    "source": [
     "# retrieve inference run details\n",
-    "def get_last_version_inference(run_id):\n",
     "    try:\n",
     "        inference_run = api.run(f'dalle-mini/dalle-mini/inference-{run_id}')\n",
     "        return inference_run.summary.get('_step', None)\n",
@@ -250,147 +163,103 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "93b8d869-1658-4fa4-a401-2b91f8ac7a11",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "last_version_inference = get_last_version_inference(wandb_run)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8324835e-fd94-408e-b106-138be308480b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if last_version_inference is None:\n",
-    "    assert version == 0\n",
-    "elif last_version_inference >= version:\n",
-    "    print(f'Version {version} has already been logged')\n",
-    "else:\n",
-    "    assert version == last_version_inference + 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8ce9d2d3-aea3-4d5e-834a-c5caf85dd117",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "run = wandb.init(job_type='inference', config=config, id=f'inference-{wandb_run}', resume='allow')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ffe392c9-36d2-4aaa-a1b3-a827e348c1ef",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tmp_f.cleanup\n",
-    "tmp_f = tempfile.TemporaryDirectory()\n",
-    "tmp = tmp_f.name\n",
-    "#TODO: use context manager"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "562036ed-dc86-48af-90b1-9c18383b3552",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# remove tmp\n",
-    "tmp_f.cleanup()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "299db1bb-fbe6-4d79-a48f-89893f8ed809",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "artifact = run.use_artifact(artifact)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d71481bf-98aa-42cb-b7e2-545d13ae4309",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# only download required files\n",
-    "for f in ['config.json', 'flax_model.msgpack', 'merges.txt', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json', 'vocab.json']:\n",
-    "    artifact.get_path(f).download(tmp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6f8ad8dd-da8f-40f9-b438-e43b779d637c",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# we verify all the files are present\n",
-    "from pathlib import Path\n",
-    "list(Path(tmp).glob('*'))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5b715c32-e757-4cb0-9912-ff90238b9f10",
    "metadata": {},
    "outputs": [],
    "source": [
-    "tokenizer = BartTokenizer.from_pretrained(tmp)\n",
-    "model = CustomFlaxBartForConditionalGeneration.from_pretrained(tmp)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "320823c9-124a-4fc3-a12c-8c015a128285",
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_params = replicate(model.params)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d1cc9993-1bfc-4ec6-a004-c056189c42ac",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# function to generate encoded images\n",
-    "# we should generate this function only once per run\n",
-    "if not functions_pmapped:\n",
-    "    @partial(jax.pmap, axis_name=\"batch\")\n",
-    "    def p_generate(tokenized_prompt, key, params):\n",
-    "        return model.generate(\n",
-    "            **tokenized_prompt,\n",
-    "            do_sample=True,\n",
-    "            num_beams=1,\n",
-    "            prng_key=key,\n",
-    "            params=params\n",
-    "        )\n",
-    "    \n",
-    "    @partial(jax.pmap, axis_name=\"batch\")\n",
-    "    def p_decode(indices, params):\n",
-    "        return vqgan.decode_code(indices, params=params)\n",
-    "    \n",
-    "    @partial(jax.pmap, axis_name=\"batch\")\n",
-    "    def p_clip(inputs):\n",
-    "        logits = clip(**inputs).logits_per_image\n",
-    "        return logits\n",
-    "    \n",
-    "    functions_pmapped = False"
    ]
   },
   {

     "import random\n",
     "import numpy as np\n",
     "from PIL import Image\n",
+    "from tqdm import tqdm\n",
     "import jax\n",
     "import jax.numpy as jnp\n",
     "from flax.training.common_utils import shard, shard_prng_key\n",
     "num_images = 128\n",
     "top_k = 8\n",
     "text_normalizer = TextNormalizer() if normalize_text else None\n",
+    "padding_item = 'NONE'\n",
     "seed = random.randint(0, 2**32-1)\n",
+    "key = jax.random.PRNGKey(seed)\n",
+    "api = wandb.Api()"
    ]
   },
   {
    "source": [
     "vqgan = VQModel.from_pretrained(VQGAN_REPO, revision=VQGAN_COMMIT_ID)\n",
     "clip = FlaxCLIPModel.from_pretrained(\"openai/clip-vit-base-patch32\")\n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")\n",
+    "clip_params = replicate(clip.params)\n",
+    "vqgan_params = replicate(vqgan.params)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "a500dd07-dbc3-477d-80d4-2b73a3b83ef3",
    "metadata": {},
    "outputs": [],
    "source": [
+    "@partial(jax.pmap, axis_name=\"batch\")\n",
+    "def p_decode(indices, params):\n",
+    "    return vqgan.decode_code(indices, params=params)\n",
+    "\n",
+    "@partial(jax.pmap, axis_name=\"batch\")\n",
+    "def p_clip(inputs):\n",
+    "    logits = clip(**inputs).logits_per_image\n",
+    "    return logits"
    ]
   },
   {
     "    samples = [samples[i:i+batch_size] for i in range(0, len(samples), batch_size)]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "source": [
     "# TODO: iterate on runs\n",
     "wandb_run = wandb_runs[0]\n",
+    "model_pmapped = False"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "def get_artifact_versions(run_id):\n",
+    "    try:\n",
+    "        versions = api.artifact_versions(type_name='bart_model', name=f'dalle-mini/dalle-mini/model-{run_id}', per_page=10000)\n",
+    "    except:\n",
+    "        versions = []\n",
+    "    return versions"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "def get_training_config(run_id):\n",
+    "    training_run = api.run(f'dalle-mini/dalle-mini/{run_id}')\n",
+    "    config = training_run.config\n",
+    "    return config"
    ]
   },
   {
    "outputs": [],
    "source": [
     "# retrieve inference run details\n",
+    "def get_last_inference_version(run_id):\n",
     "    try:\n",
     "        inference_run = api.run(f'dalle-mini/dalle-mini/inference-{run_id}')\n",
     "        return inference_run.summary.get('_step', None)\n",
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "d1cc9993-1bfc-4ec6-a004-c056189c42ac",
    "metadata": {},
    "outputs": [],
    "source": [
+    "# compile functions - needed only once per run\n",
+    "def pmap_model_function(model):\n",
+    "    \n",
+    "    @partial(jax.pmap, axis_name=\"batch\")\n",
+    "    def _generate(tokenized_prompt, key, params):\n",
+    "        return model.generate(\n",
+    "            **tokenized_prompt,\n",
+    "            do_sample=True,\n",
+    "            num_beams=1,\n",
+    "            prng_key=key,\n",
+    "            params=params\n",
+    "        )\n",
+    "    \n",
+    "    return _generate"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "bba70f33-af8b-4eb3-9973-7be672301a0b",
    "metadata": {},
    "outputs": [],
    "source": [
+    "def log_run(run_id):\n",
+    "    artifact_versions = get_artifact_versions(run_id)\n",
+    "    last_inference_version = get_last_inference_version(run_id)\n",
+    "    training_config = get_training_config(run_id)\n",
+    "    run = None\n",
+    "    p_generate = None\n",
+    "    model_files = ['config.json', 'flax_model.msgpack', 'merges.txt', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json', 'vocab.json']\n",
+    "    for artifact in artifact_versions:\n",
+    "        print(f'Processing artifact: {artifact.name}')\n",
+    "        version = int(artifact.version[1:])\n",
+    "        if last_version_inference is None:\n",
+    "            # we should start from v0\n",
+    "            assert version == 0\n",
+    "        elif version <= last_version_inference:\n",
+    "            print(f'v{version} has already been logged (versions logged up to v{last_version_inference}')\n",
+    "        else:\n",
+    "            # check we are logging the correct version\n",
+    "            assert version == last_version_inference + 1\n",
+    "        \n",
+    "        # start/resume corresponding run\n",
+    "        if run is None:\n",
+    "            run = wandb.init(job_type='inference', config=config, id=f'inference-{wandb_run}', resume='allow')\n",
+    "        \n",
+    "        # work in temporary directory\n",
+    "        with tempfile.TemporaryDirectory() as tmp:\n",
+    "            \n",
+    "            # download model files\n",
+    "            artifact = run.use_artifact(artifact)\n",
+    "            for f in model_files:\n",
+    "                artifact.get_path(f).download(tmp)\n",
+    "                \n",
+    "            # load tokenizer and model\n",
+    "            tokenizer = BartTokenizer.from_pretrained(tmp)\n",
+    "            model = CustomFlaxBartForConditionalGeneration.from_pretrained(tmp)\n",
+    "            model_params = replicate(model.params)\n",
+    "            \n",
+    "            # pmap model function needs to happen only once per model config\n",
+    "            if p_generate is None:\n",
+    "                p_generate = pmap_model_function(model)\n",
+    "            \n",
+    "            for batch in tqdm(samples):\n",
+    "                prompts = [x['Caption'] for x in batch]\n",
+    "                processed_prompts = [text_normalizer(x) for x in prompts] if normalize_text else prompts\n",
+    "            \n",
+    "\n",
+    "            \n",
+    "        \n",
+    "        "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "4d542342-3232-48a5-a0aa-3cb5c157aa8c",
    "metadata": {},
    "outputs": [],
    "source": [
+    "log_run(wandb_run)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "4e4c7d0c-2848-4f88-b967-82fd571534f1",
    "metadata": {},
    "outputs": [],
    "source": [
+    "def log_runs(runs):\n",
+    "    for run in tqdm(runs):\n",
+    "        log_run(run)"
    ]
   },
   {