Upload folder using huggingface_hub (#4)

- 20017bb5ff648be55f2f447a142d6d1f9f54be10d5864282b10f4dc02f301d9d (d6495b231d669ffc0f4b321e2d3fcdfbee1ff7b7)
- cf85dc8afc610efccec498b1689e6ea89b7e195406e1b10e602fef4d657c2d95 (2d9a1a744e94e36fce0f9e70414db8411990141e)

Files changed (5) hide show

README.md +4 -3
config.json +1 -1
model/optimized_model.pkl +2 -2
model/smash_config.json +1 -1
plots.png +0 -0

README.md CHANGED Viewed

@@ -37,16 +37,17 @@ metrics:
 ![image info](./plots.png)
 **Important remarks:**
-- The quality of the model output might slightly vary compared to the base model. There might be minimal quality loss.
 - These results were obtained on NVIDIA A100-PCIE-40GB with configuration described in config.json and are obtained after a hardware warmup. Efficiency results may vary in other settings (e.g. other hardware, image size, batch size, ...).
 - You can request premium access to more compression methods and tech support for your specific use-cases [here](https://z0halsaff74.typeform.com/pruna-access?typeform-source=www.pruna.ai).
 ## Setup
 You can run the smashed model with these steps:
-0. Check cuda, torch, packaging requirements are installed. For cuda, check with `nvcc --version` and install with `conda install nvidia/label/cuda-12.1.0::cuda`. For packaging and torch, run `pip install packaging torch`.
-1. Install the `pruna-engine` available [here](https://pypi.org/project/pruna-engine/) on Pypi. It might take 15 minutes to install.
     ```bash
    pip install pruna-engine[gpu]==0.6.0 --extra-index-url https://pypi.nvidia.com --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://prunaai.pythonanywhere.com/
     ```

 ![image info](./plots.png)
 **Important remarks:**
+- The quality of the model output might slightly vary compared to the base model.
 - These results were obtained on NVIDIA A100-PCIE-40GB with configuration described in config.json and are obtained after a hardware warmup. Efficiency results may vary in other settings (e.g. other hardware, image size, batch size, ...).
 - You can request premium access to more compression methods and tech support for your specific use-cases [here](https://z0halsaff74.typeform.com/pruna-access?typeform-source=www.pruna.ai).
+- Results mentioning "first" are obtained after the first run of the model. The first run might take more memory or be slower than the subsequent runs due cuda overheads.
 ## Setup
 You can run the smashed model with these steps:
+0. Check that you have linux, python 3.10, and cuda 12.1.0 requirements installed. For cuda, check with `nvcc --version` and install with `conda install nvidia/label/cuda-12.1.0::cuda`.
+1. Install the `pruna-engine` available [here](https://pypi.org/project/pruna-engine/) on Pypi. It might take up to 15 minutes to install.
     ```bash
    pip install pruna-engine[gpu]==0.6.0 --extra-index-url https://pypi.nvidia.com --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://prunaai.pythonanywhere.com/
     ```

config.json CHANGED Viewed

@@ -1 +1 @@

- {"pruners": "None", "pruning_ratio": 0.0, "factorizers": "None", "quantizers": "None", "n_quantization_bits": 32, "output_deviation": 0.~~005~~, "compilers": "['~~diffusers2~~', 'tiling', '~~step_caching~~']", "static_batch": true, "static_shape": true, "controlnet": "None", "unet_dim": 4, "device": "cuda", "batch_size": 1, "max_batch_size": 1, "image_height": 1024, "image_width": 1024, "version": "xl-1.0", "scheduler": "DDIM", "task": "txt2imgxl", "model_name": "segmind/Segmind-Vega", "weight_name": "None", "save_load_fn": "stable_fast"}

+ {"pruners": "None", "pruning_ratio": 0.0, "factorizers": "None", "quantizers": "None", "n_quantization_bits": 32, "output_deviation": 0.01, "compilers": "['step_caching', 'tiling', 'diffusers2']", "static_batch": true, "static_shape": true, "controlnet": "None", "unet_dim": 4, "device": "cuda", "batch_size": 1, "max_batch_size": 1, "image_height": 1024, "image_width": 1024, "version": "xl-1.0", "scheduler": "DDIM", "task": "txt2imgxl", "model_name": "segmind/Segmind-Vega", "weight_name": "None", "save_load_fn": "stable_fast"}

model/optimized_model.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4815f524b0669cbdae5b419f8a9d441d624f59672c593d7a65f77db0943f3b77
-size 3298150061

 version https://git-lfs.github.com/spec/v1
+oid sha256:1db098c6aaea3ca8a96d8ffbc4eed6c868f9487d97e5ffdd3f628cf391fb3288
+size 3298150163

model/smash_config.json CHANGED Viewed

@@ -1 +1 @@

- {"api_key": "pruna_c4c77860c62a2965f6bc281841ee1d7bd3", "verify_url": "http://johnrachwan.pythonanywhere.com", "smash_config": {"pruners": "None", "pruning_ratio": 0.0, "factorizers": "None", "quantizers": "None", "n_quantization_bits": 32, "output_deviation": 0.~~005~~, "compilers": "['~~diffusers2~~', 'tiling', '~~step_caching~~']", "static_batch": true, "static_shape": true, "controlnet": "None", "unet_dim": 4, "device": "cuda", "cache_dir": ".models/optimized_model", "batch_size": 1, "max_batch_size": 1, "image_height": 1024, "image_width": 1024, "version": "xl-1.0", "scheduler": "DDIM", "task": "txt2imgxl", "model_name": "segmind/Segmind-Vega", "weight_name": "None", "save_load_fn": "stable_fast"}}

+ {"api_key": "pruna_c4c77860c62a2965f6bc281841ee1d7bd3", "verify_url": "http://johnrachwan.pythonanywhere.com", "smash_config": {"pruners": "None", "pruning_ratio": 0.0, "factorizers": "None", "quantizers": "None", "n_quantization_bits": 32, "output_deviation": 0.01, "compilers": "['step_caching', 'tiling', 'diffusers2']", "static_batch": true, "static_shape": true, "controlnet": "None", "unet_dim": 4, "device": "cuda", "cache_dir": ".models/optimized_model", "batch_size": 1, "max_batch_size": 1, "image_height": 1024, "image_width": 1024, "version": "xl-1.0", "scheduler": "DDIM", "task": "txt2imgxl", "model_name": "segmind/Segmind-Vega", "weight_name": "None", "save_load_fn": "stable_fast"}}

plots.png CHANGED Viewed