Spaces:

YiftachEde
/

Sharp-It

Running on Zero

App Files Files Community

YiftachEde commited on 8 days ago

Commit

2fc2bf3

1 Parent(s): 49f568d

fix

Browse files

Files changed (1) hide show

app.py +62 -14

app.py CHANGED Viewed

@@ -16,6 +16,8 @@ from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
 import spaces
 from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
 import math
 from src.utils.train_util import instantiate_from_config
 from src.utils.camera_util import (
@@ -83,13 +85,29 @@ def load_models():
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    # Load diffusion pipeline
     print('Loading diffusion pipeline...')
-    pipeline = DiffusionPipeline.from_pretrained(
-        "sudo-ai/zero123plus-v1.2",
-        custom_pipeline="zero123plus",
-        torch_dtype=torch.float16
-    )
     pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
         pipeline.scheduler.config, timestep_spacing='trailing'
     )
@@ -107,19 +125,49 @@ def load_models():
         new_conv_in.weight[:, :4, :, :].copy_(pipeline.unet.conv_in.weight)
         pipeline.unet.conv_in = new_conv_in
-    # Load custom UNet
     print('Loading custom UNet...')
-    pipeline.unet = pipeline.unet.from_pretrained("YiftachEde/Sharp-It").to(torch.float16)
     pipeline = pipeline.to(device).to(torch_dtype=torch.float16)
-    # Load reconstruction model
     print('Loading reconstruction model...')
     model = instantiate_from_config(model_config)
-    model_path = hf_hub_download(
-        repo_id="TencentARC/InstantMesh",
-        filename="instant_nerf_large.ckpt",
-        repo_type="model"
-    )
     state_dict = torch.load(model_path, map_location='cpu')['state_dict']
     state_dict = {k[14:]: v for k, v in state_dict.items()
                  if k.startswith('lrm_generator.') and 'source_camera' not in k}

 import spaces
 from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
 import math
+import time
+from requests.exceptions import ReadTimeout, ConnectionError
 from src.utils.train_util import instantiate_from_config
 from src.utils.camera_util import (
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    # Load diffusion pipeline with retry logic
     print('Loading diffusion pipeline...')
+    max_retries = 3
+    retry_delay = 5
+    for attempt in range(max_retries):
+        try:
+            pipeline = DiffusionPipeline.from_pretrained(
+                "sudo-ai/zero123plus-v1.2",
+                custom_pipeline="zero123plus",
+                torch_dtype=torch.float16,
+                local_files_only=False,
+                resume_download=True,
+                token=True  # Use token-based auth
+            )
+            break
+        except (ReadTimeout, ConnectionError) as e:
+            if attempt == max_retries - 1:
+                raise Exception(f"Failed to download pipeline after {max_retries} attempts: {str(e)}")
+            print(f"Download attempt {attempt + 1} failed, retrying in {retry_delay} seconds...")
+            time.sleep(retry_delay)
+            retry_delay *= 2  # Exponential backoff
     pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
         pipeline.scheduler.config, timestep_spacing='trailing'
     )
         new_conv_in.weight[:, :4, :, :].copy_(pipeline.unet.conv_in.weight)
         pipeline.unet.conv_in = new_conv_in
+    # Load custom UNet with retry logic
     print('Loading custom UNet...')
+    for attempt in range(max_retries):
+        try:
+            pipeline.unet = pipeline.unet.from_pretrained(
+                "YiftachEde/Sharp-It",
+                local_files_only=False,
+                resume_download=True,
+                token=True  # Use token-based auth
+            ).to(torch.float16)
+            break
+        except (ReadTimeout, ConnectionError) as e:
+            if attempt == max_retries - 1:
+                raise Exception(f"Failed to download UNet after {max_retries} attempts: {str(e)}")
+            print(f"Download attempt {attempt + 1} failed, retrying in {retry_delay} seconds...")
+            time.sleep(retry_delay)
+            retry_delay *= 2
     pipeline = pipeline.to(device).to(torch_dtype=torch.float16)
+    # Load reconstruction model with retry logic
     print('Loading reconstruction model...')
     model = instantiate_from_config(model_config)
+    for attempt in range(max_retries):
+        try:
+            model_path = hf_hub_download(
+                repo_id="TencentARC/InstantMesh",
+                filename="instant_nerf_large.ckpt",
+                repo_type="model",
+                local_files_only=False,
+                resume_download=True,
+                token=True,  # Use token-based auth
+                cache_dir="model_cache"  # Use a specific cache directory
+            )
+            break
+        except (ReadTimeout, ConnectionError) as e:
+            if attempt == max_retries - 1:
+                raise Exception(f"Failed to download model after {max_retries} attempts: {str(e)}")
+            print(f"Download attempt {attempt + 1} failed, retrying in {retry_delay} seconds...")
+            time.sleep(retry_delay)
+            retry_delay *= 2
     state_dict = torch.load(model_path, map_location='cpu')['state_dict']
     state_dict = {k[14:]: v for k, v in state_dict.items()
                  if k.startswith('lrm_generator.') and 'source_camera' not in k}