Update pipeline.py
Browse files- pipeline.py +5 -14
pipeline.py
CHANGED
@@ -108,8 +108,6 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
108 |
Text-encoder, for empty text embedding.
|
109 |
tokenizer (`CLIPTokenizer`):
|
110 |
CLIP tokenizer.
|
111 |
-
prediction_type (`str`, *optional*):
|
112 |
-
Type of predictions made by the model.
|
113 |
default_processing_resolution (`int`, *optional*):
|
114 |
The recommended value of the `processing_resolution` parameter of the pipeline. This value must be set in
|
115 |
the model config. When the pipeline is called without explicitly setting `processing_resolution`, the
|
@@ -118,7 +116,6 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
118 |
"""
|
119 |
|
120 |
model_cpu_offload_seq = "text_encoder->unet->vae"
|
121 |
-
supported_prediction_types = ("depth", "disparity")
|
122 |
|
123 |
def __init__(
|
124 |
self,
|
@@ -127,17 +124,10 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
127 |
scheduler: Union[DDIMScheduler],
|
128 |
text_encoder: CLIPTextModel,
|
129 |
tokenizer: CLIPTokenizer,
|
130 |
-
|
131 |
-
default_processing_resolution: Optional[int] = None,
|
132 |
):
|
133 |
super().__init__()
|
134 |
|
135 |
-
if prediction_type not in self.supported_prediction_types:
|
136 |
-
logger.warning(
|
137 |
-
f"Potentially unsupported `prediction_type='{prediction_type}'`; values supported by the pipeline: "
|
138 |
-
f"{self.supported_prediction_types}."
|
139 |
-
)
|
140 |
-
|
141 |
self.register_modules(
|
142 |
unet=unet,
|
143 |
vae=vae,
|
@@ -146,7 +136,6 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
146 |
tokenizer=tokenizer,
|
147 |
)
|
148 |
self.register_to_config(
|
149 |
-
prediction_type=prediction_type,
|
150 |
default_processing_resolution=default_processing_resolution,
|
151 |
)
|
152 |
|
@@ -473,6 +462,8 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
473 |
|
474 |
prediction = prediction.mean(dim=1, keepdim=True) # [B,1,H,W]
|
475 |
prediction = torch.clip(prediction, -1.0, 1.0) # [B,1,H,W]
|
476 |
-
prediction = (prediction + 1.0) / 2.0
|
477 |
|
478 |
-
|
|
|
|
|
|
|
|
108 |
Text-encoder, for empty text embedding.
|
109 |
tokenizer (`CLIPTokenizer`):
|
110 |
CLIP tokenizer.
|
|
|
|
|
111 |
default_processing_resolution (`int`, *optional*):
|
112 |
The recommended value of the `processing_resolution` parameter of the pipeline. This value must be set in
|
113 |
the model config. When the pipeline is called without explicitly setting `processing_resolution`, the
|
|
|
116 |
"""
|
117 |
|
118 |
model_cpu_offload_seq = "text_encoder->unet->vae"
|
|
|
119 |
|
120 |
def __init__(
|
121 |
self,
|
|
|
124 |
scheduler: Union[DDIMScheduler],
|
125 |
text_encoder: CLIPTextModel,
|
126 |
tokenizer: CLIPTokenizer,
|
127 |
+
default_processing_resolution: Optional[int] = 768,
|
|
|
128 |
):
|
129 |
super().__init__()
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
self.register_modules(
|
132 |
unet=unet,
|
133 |
vae=vae,
|
|
|
136 |
tokenizer=tokenizer,
|
137 |
)
|
138 |
self.register_to_config(
|
|
|
139 |
default_processing_resolution=default_processing_resolution,
|
140 |
)
|
141 |
|
|
|
462 |
|
463 |
prediction = prediction.mean(dim=1, keepdim=True) # [B,1,H,W]
|
464 |
prediction = torch.clip(prediction, -1.0, 1.0) # [B,1,H,W]
|
|
|
465 |
|
466 |
+
# add
|
467 |
+
prediction = (prediction - prediction.min()) / (prediction.max() - prediction.min())
|
468 |
+
|
469 |
+
return prediction # [B,1,H,W]
|