Ashoka74 commited on
Commit
6cbd596
1 Parent(s): 5420551

Upload 7 files

Browse files
config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "architectures": [
4
+ "DepthAnythingForDepthEstimation"
5
+ ],
6
+ "backbone": null,
7
+ "backbone_config": {
8
+ "architectures": [
9
+ "Dinov2Model"
10
+ ],
11
+ "hidden_size": 1024,
12
+ "image_size": 518,
13
+ "model_type": "dinov2",
14
+ "num_attention_heads": 16,
15
+ "num_hidden_layers": 24,
16
+ "out_features": [
17
+ "stage5",
18
+ "stage12",
19
+ "stage18",
20
+ "stage24"
21
+ ],
22
+ "out_indices": [
23
+ 5,
24
+ 12,
25
+ 18,
26
+ 24
27
+ ],
28
+ "patch_size": 14,
29
+ "reshape_hidden_states": false,
30
+ "stage_names": [
31
+ "stem",
32
+ "stage1",
33
+ "stage2",
34
+ "stage3",
35
+ "stage4",
36
+ "stage5",
37
+ "stage6",
38
+ "stage7",
39
+ "stage8",
40
+ "stage9",
41
+ "stage10",
42
+ "stage11",
43
+ "stage12",
44
+ "stage13",
45
+ "stage14",
46
+ "stage15",
47
+ "stage16",
48
+ "stage17",
49
+ "stage18",
50
+ "stage19",
51
+ "stage20",
52
+ "stage21",
53
+ "stage22",
54
+ "stage23",
55
+ "stage24"
56
+ ],
57
+ "torch_dtype": "float32"
58
+ },
59
+ "fusion_hidden_size": 256,
60
+ "head_hidden_size": 32,
61
+ "head_in_index": -1,
62
+ "initializer_range": 0.02,
63
+ "model_type": "depth_anything",
64
+ "neck_hidden_sizes": [
65
+ 256,
66
+ 512,
67
+ 1024,
68
+ 1024
69
+ ],
70
+ "patch_size": 14,
71
+ "reassemble_factors": [
72
+ 4,
73
+ 2,
74
+ 1,
75
+ 0.5
76
+ ],
77
+ "reassemble_hidden_size": 1024,
78
+ "torch_dtype": "float32",
79
+ "transformers_version": null,
80
+ "use_pretrained_backbone": false
81
+ }
depth_anything_v2_vits.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715fade13be8f229f8a70cc02066f656f2423a59effd0579197bbf57860e1378
3
+ size 99218434
iclight_sd15_fbc.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb8ccedaa4944b16cfa8356afcbc2c2174cc4c4af57de19124ae0cddd0d96947
3
+ size 1719171352
iclight_sd15_fc.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a033fbaaa2f3f7859fa6a4477ee63ebbf9c116bf3569d5811856d2807f3468cd
3
+ size 1719148312
preprocessor_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "keep_aspect_ratio",
7
+ "ensure_multiple_of",
8
+ "resample",
9
+ "do_rescale",
10
+ "rescale_factor",
11
+ "do_normalize",
12
+ "image_mean",
13
+ "image_std",
14
+ "do_pad",
15
+ "size_divisor",
16
+ "return_tensors",
17
+ "data_format",
18
+ "input_data_format"
19
+ ],
20
+ "do_normalize": true,
21
+ "do_pad": false,
22
+ "do_rescale": true,
23
+ "do_resize": true,
24
+ "ensure_multiple_of": 14,
25
+ "image_mean": [
26
+ 0.485,
27
+ 0.456,
28
+ 0.406
29
+ ],
30
+ "image_processor_type": "DPTImageProcessor",
31
+ "image_std": [
32
+ 0.229,
33
+ 0.224,
34
+ 0.225
35
+ ],
36
+ "keep_aspect_ratio": true,
37
+ "resample": 3,
38
+ "rescale_factor": 0.00392156862745098,
39
+ "size": {
40
+ "height": 518,
41
+ "width": 518
42
+ },
43
+ "size_divisor": null
44
+ }
prompt_free_demo.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dds cloudapi for DINO-X
2
+ from dds_cloudapi_sdk import Config
3
+ from dds_cloudapi_sdk import Client
4
+ from dds_cloudapi_sdk.tasks.dinox import DinoxTask
5
+ from dds_cloudapi_sdk.tasks.detection import DetectionTask
6
+ from dds_cloudapi_sdk.tasks.types import DetectionTarget
7
+ from dds_cloudapi_sdk import TextPrompt
8
+
9
+ # using supervision for visualization
10
+ import os
11
+ import cv2
12
+ import numpy as np
13
+ import supervision as sv
14
+ from pathlib import Path
15
+
16
+ import gradio as gr
17
+
18
+
19
+ """
20
+ Hyper Parameters
21
+ """
22
+ API_TOKEN = "Your API Token"
23
+ IMG_PATH = "demo2.jpg"
24
+ TEXT_PROMPT = "<prompt_free>"
25
+ OUTPUT_DIR = Path("./outputs/prompt_free_detection_segmentation")
26
+
27
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
28
+
29
+ """
30
+ Prompting DINO-X with Text for Box and Mask Generation with Cloud API
31
+ """
32
+
33
+ # Step 1: initialize the config
34
+ token = API_TOKEN
35
+ config = Config(token)
36
+
37
+ # Step 2: initialize the client
38
+ client = Client(config)
39
+
40
+ # Step 3: Run DINO-X task
41
+ # if you are processing local image file, upload them to DDS server to get the image url
42
+ image_url = client.upload_file(IMG_PATH)
43
+
44
+ task = DinoxTask(
45
+ image_url=image_url,
46
+ prompts=[TextPrompt(text=TEXT_PROMPT)],
47
+ bbox_threshold=0.25,
48
+ targets=[DetectionTarget.BBox, DetectionTarget.Mask]
49
+ )
50
+ client.run_task(task)
51
+ predictions = task.result.objects
52
+
53
+ """
54
+ Visualization
55
+ """
56
+ # decode the prediction results
57
+ classes = [pred.category for pred in predictions]
58
+ classes = list(set(classes))
59
+ class_name_to_id = {name: id for id, name in enumerate(classes)}
60
+ class_id_to_name = {id: name for name, id in class_name_to_id.items()}
61
+
62
+ boxes = []
63
+ masks = []
64
+ confidences = []
65
+ class_names = []
66
+ class_ids = []
67
+
68
+ for idx, obj in enumerate(predictions):
69
+ boxes.append(obj.bbox)
70
+ masks.append(DetectionTask.rle2mask(DetectionTask.string2rle(obj.mask.counts), obj.mask.size)) # convert mask to np.array using DDS API
71
+ confidences.append(obj.score)
72
+ cls_name = obj.category.lower().strip()
73
+ class_names.append(cls_name)
74
+ class_ids.append(class_name_to_id[cls_name])
75
+
76
+ boxes = np.array(boxes)
77
+ masks = np.array(masks)
78
+ class_ids = np.array(class_ids)
79
+ labels = [
80
+ f"{class_name} {confidence:.2f}"
81
+ for class_name, confidence
82
+ in zip(class_names, confidences)
83
+ ]
84
+
85
+ img = cv2.imread(IMG_PATH)
86
+ detections = sv.Detections(
87
+ xyxy = boxes,
88
+ mask = masks.astype(bool),
89
+ class_id = class_ids,
90
+ )
91
+
92
+ box_annotator = sv.BoxAnnotator()
93
+ annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
94
+
95
+ label_annotator = sv.LabelAnnotator()
96
+ annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
97
+ cv2.imwrite(os.path.join(OUTPUT_DIR, "annotated_demo_image.jpg"), annotated_frame)
98
+
99
+
100
+ mask_annotator = sv.MaskAnnotator()
101
+ annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
102
+ cv2.imwrite(os.path.join(OUTPUT_DIR, "annotated_demo_image_with_mask.jpg"), annotated_frame)
103
+
104
+ print(f"Annotated image has already been saved to {OUTPUT_DIR}")
sam2_hiera_large.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7442e4e9b732a508f80e141e7c2913437a3610ee0c77381a66658c3a445df87b
3
+ size 897952466