Philipp S commited on
Commit
2dd8e33
·
1 Parent(s): 3a0fd02

Add WebGPU demo files

Browse files
Files changed (3) hide show
  1. README.md +9 -7
  2. index.html +38 -18
  3. script.js +138 -0
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: DA 2 WebGPU
3
- emoji: 📈
4
- colorFrom: red
5
- colorTo: indigo
6
  sdk: static
7
  pinned: false
8
- license: apache-2.0
9
- short_description: DA-2-WebGPU
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
+ title: DA-2 WebGPU Demo
3
+ emoji: 🌍
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
+ # DA-2 WebGPU Demo
11
+
12
+ This is a client-side WebGPU demo for [DA-2: Depth Anything in Any Direction](https://huggingface.co/phiph/DA-2-WebGPU).
13
+
14
+ It runs entirely in your browser using ONNX Runtime Web.
index.html CHANGED
@@ -1,19 +1,39 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>DA-2 WebGPU Demo</title>
7
+ <style>
8
+ body { font-family: sans-serif; padding: 20px; max-width: 1200px; margin: 0 auto; }
9
+ canvas { max-width: 100%; border: 1px solid #ccc; margin-top: 10px; display: block; }
10
+ #controls { margin-bottom: 20px; padding: 10px; background: #f0f0f0; border-radius: 5px; }
11
+ .container { display: flex; flex-wrap: wrap; gap: 20px; }
12
+ .view { flex: 1; min-width: 300px; }
13
+ #status { margin-left: 10px; font-weight: bold; }
14
+ </style>
15
+ </head>
16
+ <body>
17
+ <h1>DA-2 Depth Estimation (WebGPU)</h1>
18
+ <p>Upload a 360° panorama image to estimate depth.</p>
19
+
20
+ <div id="controls">
21
+ <input type="file" id="imageInput" accept="image/*">
22
+ <button id="runBtn" disabled>Run Inference</button>
23
+ <span id="status">Initializing...</span>
24
+ </div>
25
+
26
+ <div class="container">
27
+ <div class="view">
28
+ <h3>Input Image</h3>
29
+ <canvas id="inputCanvas"></canvas>
30
+ </div>
31
+ <div class="view">
32
+ <h3>Depth Map</h3>
33
+ <canvas id="outputCanvas"></canvas>
34
+ </div>
35
+ </div>
36
+
37
+ <script type="module" src="script.js"></script>
38
+ </body>
39
  </html>
script.js ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
2
+
3
+ // Skip local model checks since we are fetching from HF Hub
4
+ env.allowLocalModels = false;
5
+
6
+ const MODEL_ID = 'phiph/DA-2-WebGPU';
7
+ const INPUT_WIDTH = 1092;
8
+ const INPUT_HEIGHT = 546;
9
+
10
+ let depth_estimator = null;
11
+ const statusElement = document.getElementById('status');
12
+ const runBtn = document.getElementById('runBtn');
13
+ const imageInput = document.getElementById('imageInput');
14
+ const inputCanvas = document.getElementById('inputCanvas');
15
+ const outputCanvas = document.getElementById('outputCanvas');
16
+ const inputCtx = inputCanvas.getContext('2d');
17
+ const outputCtx = outputCanvas.getContext('2d');
18
+
19
+ // Initialize Transformers.js Pipeline
20
+ async function init() {
21
+ try {
22
+ statusElement.textContent = 'Loading model... (this may take a while)';
23
+
24
+ // Initialize the pipeline
25
+ depth_estimator = await pipeline('depth-estimation', MODEL_ID, {
26
+ device: 'webgpu',
27
+ dtype: 'fp32', // Important: Model is FP32
28
+ });
29
+
30
+ statusElement.textContent = 'Model loaded. Ready.';
31
+ runBtn.disabled = false;
32
+ } catch (e) {
33
+ console.error(e);
34
+ statusElement.textContent = 'Error loading model: ' + e.message;
35
+ // Fallback to wasm if webgpu fails
36
+ try {
37
+ statusElement.textContent = 'WebGPU failed, trying WASM...';
38
+ depth_estimator = await pipeline('depth-estimation', MODEL_ID, {
39
+ device: 'wasm',
40
+ dtype: 'fp32'
41
+ });
42
+ statusElement.textContent = 'Model loaded (WASM). Ready.';
43
+ runBtn.disabled = false;
44
+ } catch (e2) {
45
+ statusElement.textContent = 'Error loading model (WASM): ' + e2.message;
46
+ }
47
+ }
48
+ }
49
+
50
+ imageInput.addEventListener('change', (e) => {
51
+ const file = e.target.files[0];
52
+ if (!file) return;
53
+
54
+ const img = new Image();
55
+ img.onload = () => {
56
+ inputCanvas.width = INPUT_WIDTH;
57
+ inputCanvas.height = INPUT_HEIGHT;
58
+ inputCtx.drawImage(img, 0, 0, INPUT_WIDTH, INPUT_HEIGHT);
59
+
60
+ // Clear output
61
+ outputCanvas.width = INPUT_WIDTH;
62
+ outputCanvas.height = INPUT_HEIGHT;
63
+ outputCtx.clearRect(0, 0, INPUT_WIDTH, INPUT_HEIGHT);
64
+ };
65
+ img.src = URL.createObjectURL(file);
66
+ });
67
+
68
+ runBtn.addEventListener('click', async () => {
69
+ if (!depth_estimator) return;
70
+
71
+ statusElement.textContent = 'Running inference...';
72
+ runBtn.disabled = true;
73
+
74
+ try {
75
+ // Get the image source from the canvas (or the file URL directly)
76
+ // Using the canvas data ensures we are passing what the user sees
77
+ const url = inputCanvas.toDataURL();
78
+
79
+ // Run inference
80
+ // The pipeline handles preprocessing (resize, rescale) automatically
81
+ const output = await depth_estimator(url);
82
+
83
+ // output.depth is the raw tensor
84
+ // output.mask is the visualized depth map (Image object) if available,
85
+ // but for custom models it might just return the tensor.
86
+
87
+ // Let's check what we got
88
+ if (output.depth) {
89
+ // Visualize the raw tensor manually to be safe
90
+ visualize(output.depth.data, INPUT_WIDTH, INPUT_HEIGHT);
91
+ } else {
92
+ // Fallback if structure is different
93
+ console.log("Output structure:", output);
94
+ statusElement.textContent = 'Done (Check console for output structure).';
95
+ }
96
+
97
+ statusElement.textContent = 'Done.';
98
+ } catch (e) {
99
+ console.error(e);
100
+ statusElement.textContent = 'Error running inference: ' + e.message;
101
+ } finally {
102
+ runBtn.disabled = false;
103
+ }
104
+ });
105
+
106
+ function visualize(data, width, height) {
107
+ // Find min and max for normalization
108
+ let min = Infinity;
109
+ let max = -Infinity;
110
+ for (let i = 0; i < data.length; i++) {
111
+ if (data[i] < min) min = data[i];
112
+ if (data[i] > max) max = data[i];
113
+ }
114
+
115
+ const range = max - min;
116
+ const imageData = outputCtx.createImageData(width, height);
117
+
118
+ for (let i = 0; i < data.length; i++) {
119
+ // Normalize to 0-1
120
+ const val = (data[i] - min) / (range || 1);
121
+
122
+ // Simple heatmap (Magma-like or just grayscale)
123
+ // Inverted depth usually looks better (closer is brighter)
124
+ // But here it's distance, so closer is smaller value.
125
+ // If we map min (close) to 255 (white) and max (far) to 0 (black)
126
+
127
+ const pixelVal = Math.floor((1 - val) * 255);
128
+
129
+ imageData.data[i * 4] = pixelVal; // R
130
+ imageData.data[i * 4 + 1] = pixelVal; // G
131
+ imageData.data[i * 4 + 2] = pixelVal; // B
132
+ imageData.data[i * 4 + 3] = 255; // Alpha
133
+ }
134
+
135
+ outputCtx.putImageData(imageData, 0, 0);
136
+ }
137
+
138
+ init();