Spaces:
Running
Running
feat: handle images instead webcam
Browse files- .gitignore +1 -0
- package-lock.json +0 -0
- src/App.tsx +24 -85
- src/components/ImageAnalysisView.tsx +207 -0
- src/components/ImageUpload.tsx +129 -0
- src/components/PromptInput.tsx +10 -6
- src/context/VLMContext.tsx +27 -13
- src/types/index.ts +7 -1
- src/types/vlm.ts +1 -1
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
node_modules/
|
package-lock.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/App.tsx
CHANGED
@@ -1,109 +1,48 @@
|
|
1 |
-
import { useState,
|
2 |
import LoadingScreen from "./components/LoadingScreen";
|
3 |
-
import
|
4 |
-
import
|
5 |
-
import WebcamPermissionDialog from "./components/WebcamPermissionDialog";
|
6 |
import type { AppState } from "./types";
|
7 |
|
8 |
export default function App() {
|
9 |
-
const [appState, setAppState] = useState<AppState>("
|
10 |
-
const [
|
11 |
-
const [isVideoReady, setIsVideoReady] = useState(false);
|
12 |
-
const videoRef = useRef<HTMLVideoElement | null>(null);
|
13 |
|
14 |
-
const
|
15 |
-
|
16 |
-
setAppState("welcome");
|
17 |
-
}, []);
|
18 |
-
|
19 |
-
const handleStart = useCallback(() => {
|
20 |
setAppState("loading");
|
21 |
}, []);
|
22 |
|
23 |
const handleLoadingComplete = useCallback(() => {
|
24 |
-
setAppState("
|
25 |
}, []);
|
26 |
|
27 |
-
const
|
28 |
-
|
29 |
-
|
30 |
-
} catch (error) {
|
31 |
-
console.error("Failed to play video:", error);
|
32 |
-
}
|
33 |
}, []);
|
34 |
|
35 |
-
const setupVideo = useCallback(
|
36 |
-
(video: HTMLVideoElement, stream: MediaStream) => {
|
37 |
-
video.srcObject = stream;
|
38 |
-
|
39 |
-
const handleCanPlay = () => {
|
40 |
-
setIsVideoReady(true);
|
41 |
-
playVideo(video);
|
42 |
-
};
|
43 |
-
|
44 |
-
video.addEventListener("canplay", handleCanPlay, { once: true });
|
45 |
-
|
46 |
-
return () => {
|
47 |
-
video.removeEventListener("canplay", handleCanPlay);
|
48 |
-
};
|
49 |
-
},
|
50 |
-
[playVideo],
|
51 |
-
);
|
52 |
-
|
53 |
-
useEffect(() => {
|
54 |
-
if (webcamStream && videoRef.current) {
|
55 |
-
const video = videoRef.current;
|
56 |
-
|
57 |
-
video.srcObject = null;
|
58 |
-
video.load();
|
59 |
-
|
60 |
-
const cleanup = setupVideo(video, webcamStream);
|
61 |
-
return cleanup;
|
62 |
-
}
|
63 |
-
}, [webcamStream, setupVideo]);
|
64 |
-
|
65 |
-
const videoBlurState = useMemo(() => {
|
66 |
-
switch (appState) {
|
67 |
-
case "requesting-permission":
|
68 |
-
return "blur(20px) brightness(0.2) saturate(0.5)";
|
69 |
-
case "welcome":
|
70 |
-
return "blur(12px) brightness(0.3) saturate(0.7)";
|
71 |
-
case "loading":
|
72 |
-
return "blur(8px) brightness(0.4) saturate(0.8)";
|
73 |
-
case "captioning":
|
74 |
-
return "none";
|
75 |
-
default:
|
76 |
-
return "blur(20px) brightness(0.2) saturate(0.5)";
|
77 |
-
}
|
78 |
-
}, [appState]);
|
79 |
-
|
80 |
return (
|
81 |
<div className="App relative h-screen overflow-hidden">
|
82 |
-
<div className="absolute inset-0 bg-gray-900" />
|
83 |
|
84 |
-
{
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
className="absolute inset-0 w-full h-full object-cover transition-all duration-1000 ease-out"
|
91 |
-
style={{
|
92 |
-
filter: videoBlurState,
|
93 |
-
opacity: isVideoReady ? 1 : 0,
|
94 |
-
}}
|
95 |
/>
|
96 |
)}
|
97 |
|
98 |
-
{appState !== "captioning" && <div className="absolute inset-0 bg-gray-900/80 backdrop-blur-sm" />}
|
99 |
-
|
100 |
-
{appState === "requesting-permission" && <WebcamPermissionDialog onPermissionGranted={handlePermissionGranted} />}
|
101 |
-
|
102 |
-
{appState === "welcome" && <WelcomeScreen onStart={handleStart} />}
|
103 |
-
|
104 |
{appState === "loading" && <LoadingScreen onComplete={handleLoadingComplete} />}
|
105 |
|
106 |
-
{appState === "
|
|
|
|
|
|
|
|
|
|
|
107 |
</div>
|
108 |
);
|
109 |
}
|
|
|
1 |
+
import { useState, useCallback } from "react";
|
2 |
import LoadingScreen from "./components/LoadingScreen";
|
3 |
+
import ImageUpload from "./components/ImageUpload";
|
4 |
+
import ImageAnalysisView from "./components/ImageAnalysisView";
|
|
|
5 |
import type { AppState } from "./types";
|
6 |
|
7 |
export default function App() {
|
8 |
+
const [appState, setAppState] = useState<AppState>("upload");
|
9 |
+
const [uploadedImages, setUploadedImages] = useState<File[]>([]);
|
|
|
|
|
10 |
|
11 |
+
const handleImagesUploaded = useCallback((files: File[]) => {
|
12 |
+
setUploadedImages(files);
|
|
|
|
|
|
|
|
|
13 |
setAppState("loading");
|
14 |
}, []);
|
15 |
|
16 |
const handleLoadingComplete = useCallback(() => {
|
17 |
+
setAppState("analyzing");
|
18 |
}, []);
|
19 |
|
20 |
+
const handleBackToUpload = useCallback(() => {
|
21 |
+
setUploadedImages([]);
|
22 |
+
setAppState("upload");
|
|
|
|
|
|
|
23 |
}, []);
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
return (
|
26 |
<div className="App relative h-screen overflow-hidden">
|
27 |
+
<div className="absolute inset-0 bg-gradient-to-br from-gray-900 via-blue-900/20 to-purple-900/20" />
|
28 |
|
29 |
+
{appState !== "analyzing" && <div className="absolute inset-0 bg-gray-900/80 backdrop-blur-sm" />}
|
30 |
+
|
31 |
+
{appState === "upload" && (
|
32 |
+
<ImageUpload
|
33 |
+
onImagesUploaded={handleImagesUploaded}
|
34 |
+
isAnalyzing={false}
|
|
|
|
|
|
|
|
|
|
|
35 |
/>
|
36 |
)}
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
{appState === "loading" && <LoadingScreen onComplete={handleLoadingComplete} />}
|
39 |
|
40 |
+
{appState === "analyzing" && (
|
41 |
+
<ImageAnalysisView
|
42 |
+
images={uploadedImages}
|
43 |
+
onBackToUpload={handleBackToUpload}
|
44 |
+
/>
|
45 |
+
)}
|
46 |
</div>
|
47 |
);
|
48 |
}
|
src/components/ImageAnalysisView.tsx
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useState, useRef, useEffect, useCallback } from "react";
|
2 |
+
import DraggableContainer from "./DraggableContainer";
|
3 |
+
import PromptInput from "./PromptInput";
|
4 |
+
import GlassButton from "./GlassButton";
|
5 |
+
import GlassContainer from "./GlassContainer";
|
6 |
+
import { useVLMContext } from "../context/useVLMContext";
|
7 |
+
import { PROMPTS, GLASS_EFFECTS } from "../constants";
|
8 |
+
import type { ImageAnalysisResult } from "../types";
|
9 |
+
|
10 |
+
interface ImageAnalysisViewProps {
|
11 |
+
images: File[];
|
12 |
+
onBackToUpload: () => void;
|
13 |
+
}
|
14 |
+
|
15 |
+
export default function ImageAnalysisView({ images, onBackToUpload }: ImageAnalysisViewProps) {
|
16 |
+
const [results, setResults] = useState<ImageAnalysisResult[]>([]);
|
17 |
+
const [currentPrompt, setCurrentPrompt] = useState<string>(PROMPTS.default);
|
18 |
+
const [isAnalyzing, setIsAnalyzing] = useState<boolean>(false);
|
19 |
+
const [currentImageIndex, setCurrentImageIndex] = useState<number>(0);
|
20 |
+
const [selectedImageUrl, setSelectedImageUrl] = useState<string>("");
|
21 |
+
|
22 |
+
const { isLoaded, runInference } = useVLMContext();
|
23 |
+
const abortControllerRef = useRef<AbortController | null>(null);
|
24 |
+
|
25 |
+
// Create preview URL for selected image
|
26 |
+
useEffect(() => {
|
27 |
+
if (images[currentImageIndex]) {
|
28 |
+
const url = URL.createObjectURL(images[currentImageIndex]);
|
29 |
+
setSelectedImageUrl(url);
|
30 |
+
return () => URL.revokeObjectURL(url);
|
31 |
+
}
|
32 |
+
}, [images, currentImageIndex]);
|
33 |
+
|
34 |
+
const analyzeAllImages = useCallback(async () => {
|
35 |
+
if (!isLoaded || isAnalyzing) return;
|
36 |
+
|
37 |
+
setIsAnalyzing(true);
|
38 |
+
setResults([]);
|
39 |
+
|
40 |
+
abortControllerRef.current?.abort();
|
41 |
+
abortControllerRef.current = new AbortController();
|
42 |
+
|
43 |
+
const analysisResults: ImageAnalysisResult[] = [];
|
44 |
+
|
45 |
+
try {
|
46 |
+
for (let i = 0; i < images.length; i++) {
|
47 |
+
if (abortControllerRef.current.signal.aborted) break;
|
48 |
+
|
49 |
+
setCurrentImageIndex(i);
|
50 |
+
const file = images[i];
|
51 |
+
|
52 |
+
try {
|
53 |
+
const caption = await runInference(file, currentPrompt);
|
54 |
+
analysisResults.push({ file, caption });
|
55 |
+
} catch (error) {
|
56 |
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
57 |
+
analysisResults.push({ file, caption: "", error: errorMsg });
|
58 |
+
}
|
59 |
+
|
60 |
+
setResults([...analysisResults]);
|
61 |
+
}
|
62 |
+
} catch (error) {
|
63 |
+
console.error("Analysis interrupted:", error);
|
64 |
+
} finally {
|
65 |
+
setIsAnalyzing(false);
|
66 |
+
}
|
67 |
+
}, [images, currentPrompt, isLoaded, runInference, isAnalyzing]);
|
68 |
+
|
69 |
+
const handlePromptChange = useCallback((prompt: string) => {
|
70 |
+
setCurrentPrompt(prompt);
|
71 |
+
}, []);
|
72 |
+
|
73 |
+
const handleImageSelect = useCallback((index: number) => {
|
74 |
+
setCurrentImageIndex(index);
|
75 |
+
}, []);
|
76 |
+
|
77 |
+
const stopAnalysis = useCallback(() => {
|
78 |
+
abortControllerRef.current?.abort();
|
79 |
+
setIsAnalyzing(false);
|
80 |
+
}, []);
|
81 |
+
|
82 |
+
useEffect(() => {
|
83 |
+
return () => {
|
84 |
+
abortControllerRef.current?.abort();
|
85 |
+
};
|
86 |
+
}, []);
|
87 |
+
|
88 |
+
return (
|
89 |
+
<div className="absolute inset-0 text-white">
|
90 |
+
{/* Main image display */}
|
91 |
+
<div className="relative w-full h-full flex">
|
92 |
+
{/* Image preview */}
|
93 |
+
<div className="flex-1 flex items-center justify-center p-8">
|
94 |
+
{selectedImageUrl && (
|
95 |
+
<img
|
96 |
+
src={selectedImageUrl}
|
97 |
+
alt={`Preview of ${images[currentImageIndex]?.name}`}
|
98 |
+
className="max-w-full max-h-full object-contain rounded-lg shadow-2xl"
|
99 |
+
/>
|
100 |
+
)}
|
101 |
+
</div>
|
102 |
+
|
103 |
+
{/* Sidebar with image thumbnails and results */}
|
104 |
+
<div className="w-80 bg-black/20 backdrop-blur-sm border-l border-white/20 overflow-y-auto">
|
105 |
+
{/* Controls */}
|
106 |
+
<div className="p-4 border-b border-white/20">
|
107 |
+
<div className="flex gap-2 mb-4">
|
108 |
+
<GlassButton onClick={onBackToUpload} className="flex-1">
|
109 |
+
Back to Upload
|
110 |
+
</GlassButton>
|
111 |
+
{!isAnalyzing ? (
|
112 |
+
<GlassButton
|
113 |
+
onClick={analyzeAllImages}
|
114 |
+
disabled={!isLoaded}
|
115 |
+
className="flex-1"
|
116 |
+
>
|
117 |
+
Analyze All
|
118 |
+
</GlassButton>
|
119 |
+
) : (
|
120 |
+
<GlassButton onClick={stopAnalysis} className="flex-1 bg-red-500/20">
|
121 |
+
Stop
|
122 |
+
</GlassButton>
|
123 |
+
)}
|
124 |
+
</div>
|
125 |
+
|
126 |
+
{isAnalyzing && (
|
127 |
+
<div className="text-sm text-white/70 text-center">
|
128 |
+
Analyzing image {currentImageIndex + 1} of {images.length}...
|
129 |
+
</div>
|
130 |
+
)}
|
131 |
+
</div>
|
132 |
+
|
133 |
+
{/* Image list with results */}
|
134 |
+
<div className="p-4 space-y-4">
|
135 |
+
{images.map((file, index) => {
|
136 |
+
const result = results.find(r => r.file === file);
|
137 |
+
const isSelected = index === currentImageIndex;
|
138 |
+
const isProcessing = isAnalyzing && index === currentImageIndex;
|
139 |
+
|
140 |
+
return (
|
141 |
+
<div
|
142 |
+
key={`${file.name}-${index}`}
|
143 |
+
className={`cursor-pointer transition-all duration-200 ${
|
144 |
+
isSelected ? 'ring-2 ring-blue-400' : ''
|
145 |
+
}`}
|
146 |
+
onClick={() => handleImageSelect(index)}
|
147 |
+
>
|
148 |
+
<GlassContainer
|
149 |
+
bgColor={isSelected ? GLASS_EFFECTS.COLORS.BUTTON_BG : GLASS_EFFECTS.COLORS.DEFAULT_BG}
|
150 |
+
className="p-3 rounded-lg"
|
151 |
+
>
|
152 |
+
<div className="flex items-start gap-3">
|
153 |
+
{/* Thumbnail */}
|
154 |
+
<div className="w-16 h-16 bg-gray-700 rounded flex items-center justify-center text-xs flex-shrink-0">
|
155 |
+
<img
|
156 |
+
src={URL.createObjectURL(file)}
|
157 |
+
alt={file.name}
|
158 |
+
className="w-full h-full object-cover rounded"
|
159 |
+
onLoad={(e) => URL.revokeObjectURL((e.target as HTMLImageElement).src)}
|
160 |
+
/>
|
161 |
+
</div>
|
162 |
+
|
163 |
+
{/* Content */}
|
164 |
+
<div className="flex-1 min-w-0">
|
165 |
+
<div className="text-sm font-medium truncate mb-1">
|
166 |
+
{file.name}
|
167 |
+
</div>
|
168 |
+
|
169 |
+
{isProcessing && (
|
170 |
+
<div className="text-xs text-blue-400">
|
171 |
+
Processing...
|
172 |
+
</div>
|
173 |
+
)}
|
174 |
+
|
175 |
+
{result && (
|
176 |
+
<div className="text-xs">
|
177 |
+
{result.error ? (
|
178 |
+
<div className="text-red-400">
|
179 |
+
Error: {result.error}
|
180 |
+
</div>
|
181 |
+
) : (
|
182 |
+
<div className="text-white/80">
|
183 |
+
{result.caption}
|
184 |
+
</div>
|
185 |
+
)}
|
186 |
+
</div>
|
187 |
+
)}
|
188 |
+
</div>
|
189 |
+
</div>
|
190 |
+
</GlassContainer>
|
191 |
+
</div>
|
192 |
+
);
|
193 |
+
})}
|
194 |
+
</div>
|
195 |
+
</div>
|
196 |
+
</div>
|
197 |
+
|
198 |
+
{/* Draggable Prompt Input - Bottom Left */}
|
199 |
+
<DraggableContainer initialPosition="bottom-left">
|
200 |
+
<PromptInput
|
201 |
+
onPromptChange={handlePromptChange}
|
202 |
+
disabled={isAnalyzing}
|
203 |
+
/>
|
204 |
+
</DraggableContainer>
|
205 |
+
</div>
|
206 |
+
);
|
207 |
+
}
|
src/components/ImageUpload.tsx
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useState, useCallback, useRef } from "react";
|
2 |
+
import GlassButton from "./GlassButton";
|
3 |
+
import GlassContainer from "./GlassContainer";
|
4 |
+
import { GLASS_EFFECTS } from "../constants";
|
5 |
+
|
6 |
+
interface ImageUploadProps {
|
7 |
+
onImagesUploaded: (files: File[]) => void;
|
8 |
+
isAnalyzing: boolean;
|
9 |
+
}
|
10 |
+
|
11 |
+
export default function ImageUpload({ onImagesUploaded, isAnalyzing }: ImageUploadProps) {
|
12 |
+
const [dragActive, setDragActive] = useState(false);
|
13 |
+
const fileInputRef = useRef<HTMLInputElement>(null);
|
14 |
+
|
15 |
+
const handleFiles = useCallback(
|
16 |
+
(files: FileList | null) => {
|
17 |
+
if (!files) return;
|
18 |
+
|
19 |
+
const imageFiles = Array.from(files).filter(file =>
|
20 |
+
file.type.startsWith("image/")
|
21 |
+
);
|
22 |
+
|
23 |
+
if (imageFiles.length > 0) {
|
24 |
+
onImagesUploaded(imageFiles);
|
25 |
+
}
|
26 |
+
},
|
27 |
+
[onImagesUploaded]
|
28 |
+
);
|
29 |
+
|
30 |
+
const handleDrag = useCallback((e: React.DragEvent) => {
|
31 |
+
e.preventDefault();
|
32 |
+
e.stopPropagation();
|
33 |
+
}, []);
|
34 |
+
|
35 |
+
const handleDragIn = useCallback((e: React.DragEvent) => {
|
36 |
+
e.preventDefault();
|
37 |
+
e.stopPropagation();
|
38 |
+
if (e.dataTransfer?.items && e.dataTransfer.items.length > 0) {
|
39 |
+
setDragActive(true);
|
40 |
+
}
|
41 |
+
}, []);
|
42 |
+
|
43 |
+
const handleDragOut = useCallback((e: React.DragEvent) => {
|
44 |
+
e.preventDefault();
|
45 |
+
e.stopPropagation();
|
46 |
+
setDragActive(false);
|
47 |
+
}, []);
|
48 |
+
|
49 |
+
const handleDrop = useCallback(
|
50 |
+
(e: React.DragEvent) => {
|
51 |
+
e.preventDefault();
|
52 |
+
e.stopPropagation();
|
53 |
+
setDragActive(false);
|
54 |
+
|
55 |
+
if (e.dataTransfer?.files && e.dataTransfer.files.length > 0) {
|
56 |
+
handleFiles(e.dataTransfer.files);
|
57 |
+
}
|
58 |
+
},
|
59 |
+
[handleFiles]
|
60 |
+
);
|
61 |
+
|
62 |
+
const handleFileInputChange = useCallback(
|
63 |
+
(e: React.ChangeEvent<HTMLInputElement>) => {
|
64 |
+
handleFiles(e.target.files);
|
65 |
+
},
|
66 |
+
[handleFiles]
|
67 |
+
);
|
68 |
+
|
69 |
+
const handleClick = useCallback(() => {
|
70 |
+
if (!isAnalyzing) {
|
71 |
+
fileInputRef.current?.click();
|
72 |
+
}
|
73 |
+
}, [isAnalyzing]);
|
74 |
+
|
75 |
+
return (
|
76 |
+
<div className="absolute inset-0 flex items-center justify-center">
|
77 |
+
<GlassContainer
|
78 |
+
bgColor={dragActive ? GLASS_EFFECTS.COLORS.BUTTON_BG : GLASS_EFFECTS.COLORS.DEFAULT_BG}
|
79 |
+
className={`p-8 rounded-2xl border-2 border-dashed transition-all duration-300 cursor-pointer max-w-md mx-4 ${
|
80 |
+
dragActive ? "border-blue-400 scale-105" : "border-white/30"
|
81 |
+
} ${isAnalyzing ? "opacity-50 pointer-events-none" : "hover:border-white/50"}`}
|
82 |
+
onDragEnter={handleDragIn}
|
83 |
+
onDragLeave={handleDragOut}
|
84 |
+
onDragOver={handleDrag}
|
85 |
+
onDrop={handleDrop}
|
86 |
+
onClick={handleClick}
|
87 |
+
>
|
88 |
+
<div className="text-center text-white">
|
89 |
+
<div className="mb-4">
|
90 |
+
<svg
|
91 |
+
className="mx-auto w-16 h-16 text-white/60"
|
92 |
+
fill="none"
|
93 |
+
stroke="currentColor"
|
94 |
+
viewBox="0 0 24 24"
|
95 |
+
>
|
96 |
+
<path
|
97 |
+
strokeLinecap="round"
|
98 |
+
strokeLinejoin="round"
|
99 |
+
strokeWidth={1.5}
|
100 |
+
d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z"
|
101 |
+
/>
|
102 |
+
</svg>
|
103 |
+
</div>
|
104 |
+
|
105 |
+
<h3 className="text-xl font-semibold mb-2">Upload Images</h3>
|
106 |
+
<p className="text-white/80 mb-4">
|
107 |
+
Drag and drop images here, or click to select files
|
108 |
+
</p>
|
109 |
+
<p className="text-sm text-white/60 mb-6">
|
110 |
+
Supports JPG, PNG, GIF, WebP formats. Multiple files allowed.
|
111 |
+
</p>
|
112 |
+
|
113 |
+
<GlassButton disabled={isAnalyzing}>
|
114 |
+
{isAnalyzing ? "Analyzing..." : "Choose Files"}
|
115 |
+
</GlassButton>
|
116 |
+
</div>
|
117 |
+
|
118 |
+
<input
|
119 |
+
ref={fileInputRef}
|
120 |
+
type="file"
|
121 |
+
multiple
|
122 |
+
accept="image/*"
|
123 |
+
onChange={handleFileInputChange}
|
124 |
+
className="hidden"
|
125 |
+
/>
|
126 |
+
</GlassContainer>
|
127 |
+
</div>
|
128 |
+
);
|
129 |
+
}
|
src/components/PromptInput.tsx
CHANGED
@@ -5,9 +5,10 @@ import GlassContainer from "./GlassContainer";
|
|
5 |
interface PromptInputProps {
|
6 |
onPromptChange: (prompt: string) => void;
|
7 |
defaultPrompt?: string;
|
|
|
8 |
}
|
9 |
|
10 |
-
export default function PromptInput({ onPromptChange, defaultPrompt = PROMPTS.default }: PromptInputProps) {
|
11 |
const [prompt, setPrompt] = useState(defaultPrompt);
|
12 |
const [showSuggestions, setShowSuggestions] = useState(false);
|
13 |
const inputRef = useRef<HTMLTextAreaElement>(null);
|
@@ -116,10 +117,13 @@ export default function PromptInput({ onPromptChange, defaultPrompt = PROMPTS.de
|
|
116 |
ref={inputRef}
|
117 |
value={prompt}
|
118 |
onChange={handleInputChange}
|
119 |
-
onFocus={handleInputFocus}
|
120 |
-
onBlur={handleInputBlur}
|
121 |
-
onClick={handleInputClick}
|
122 |
-
|
|
|
|
|
|
|
123 |
style={{
|
124 |
background: "var(--input-bg)",
|
125 |
borderColor: "var(--input-border)",
|
@@ -132,7 +136,7 @@ export default function PromptInput({ onPromptChange, defaultPrompt = PROMPTS.de
|
|
132 |
placeholder={PROMPTS.placeholder}
|
133 |
rows={1}
|
134 |
/>
|
135 |
-
{prompt && (
|
136 |
<button
|
137 |
type="button"
|
138 |
onClick={clearInput}
|
|
|
5 |
interface PromptInputProps {
|
6 |
onPromptChange: (prompt: string) => void;
|
7 |
defaultPrompt?: string;
|
8 |
+
disabled?: boolean;
|
9 |
}
|
10 |
|
11 |
+
export default function PromptInput({ onPromptChange, defaultPrompt = PROMPTS.default, disabled = false }: PromptInputProps) {
|
12 |
const [prompt, setPrompt] = useState(defaultPrompt);
|
13 |
const [showSuggestions, setShowSuggestions] = useState(false);
|
14 |
const inputRef = useRef<HTMLTextAreaElement>(null);
|
|
|
117 |
ref={inputRef}
|
118 |
value={prompt}
|
119 |
onChange={handleInputChange}
|
120 |
+
onFocus={disabled ? undefined : handleInputFocus}
|
121 |
+
onBlur={disabled ? undefined : handleInputBlur}
|
122 |
+
onClick={disabled ? undefined : handleInputClick}
|
123 |
+
disabled={disabled}
|
124 |
+
className={`search-input w-full py-3 pl-4 pr-8 rounded-xl text-white text-base transition-all duration-400 border resize-none focus:outline-none focus:-translate-y-0.5 focus:shadow-lg ${
|
125 |
+
disabled ? 'opacity-50 cursor-not-allowed' : ''
|
126 |
+
}`}
|
127 |
style={{
|
128 |
background: "var(--input-bg)",
|
129 |
borderColor: "var(--input-border)",
|
|
|
136 |
placeholder={PROMPTS.placeholder}
|
137 |
rows={1}
|
138 |
/>
|
139 |
+
{prompt && !disabled && (
|
140 |
<button
|
141 |
type="button"
|
142 |
onClick={clearInput}
|
src/context/VLMContext.tsx
CHANGED
@@ -67,9 +67,9 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
|
|
67 |
);
|
68 |
|
69 |
const runInference = useCallback(
|
70 |
-
async (
|
71 |
if (inferenceLock.current) {
|
72 |
-
console.log("Inference already running, skipping
|
73 |
return ""; // Return empty string to signal a skip
|
74 |
}
|
75 |
inferenceLock.current = true;
|
@@ -78,21 +78,35 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
|
|
78 |
throw new Error("Model/processor not loaded");
|
79 |
}
|
80 |
|
81 |
-
|
82 |
-
canvasRef.current = document.createElement("canvas");
|
83 |
-
}
|
84 |
-
const canvas = canvasRef.current;
|
85 |
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
91 |
|
92 |
-
|
93 |
|
94 |
-
|
95 |
-
|
|
|
96 |
const messages = [
|
97 |
{
|
98 |
role: "system",
|
|
|
67 |
);
|
68 |
|
69 |
const runInference = useCallback(
|
70 |
+
async (imageSource: HTMLVideoElement | File, instruction: string, onTextUpdate?: (text: string) => void): Promise<string> => {
|
71 |
if (inferenceLock.current) {
|
72 |
+
console.log("Inference already running, skipping");
|
73 |
return ""; // Return empty string to signal a skip
|
74 |
}
|
75 |
inferenceLock.current = true;
|
|
|
78 |
throw new Error("Model/processor not loaded");
|
79 |
}
|
80 |
|
81 |
+
let rawImg: RawImage;
|
|
|
|
|
|
|
82 |
|
83 |
+
if (imageSource instanceof File) {
|
84 |
+
// Handle uploaded image file
|
85 |
+
const url = URL.createObjectURL(imageSource);
|
86 |
+
try {
|
87 |
+
rawImg = await RawImage.fromURL(url);
|
88 |
+
} finally {
|
89 |
+
URL.revokeObjectURL(url);
|
90 |
+
}
|
91 |
+
} else {
|
92 |
+
// Handle video frame (original logic)
|
93 |
+
if (!canvasRef.current) {
|
94 |
+
canvasRef.current = document.createElement("canvas");
|
95 |
+
}
|
96 |
+
const canvas = canvasRef.current;
|
97 |
+
const video = imageSource;
|
98 |
+
|
99 |
+
canvas.width = video.videoWidth;
|
100 |
+
canvas.height = video.videoHeight;
|
101 |
|
102 |
+
const ctx = canvas.getContext("2d", { willReadFrequently: true });
|
103 |
+
if (!ctx) throw new Error("Could not get canvas context");
|
104 |
|
105 |
+
ctx.drawImage(video, 0, 0);
|
106 |
|
107 |
+
const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
108 |
+
rawImg = new RawImage(frame.data, frame.width, frame.height, 4);
|
109 |
+
}
|
110 |
const messages = [
|
111 |
{
|
112 |
role: "system",
|
src/types/index.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
export type AppState = "
|
2 |
|
3 |
export interface GlassEffectProps {
|
4 |
baseFrequency?: number;
|
@@ -25,3 +25,9 @@ export interface Dimensions {
|
|
25 |
}
|
26 |
|
27 |
export type InitialPosition = "bottom-left" | "bottom-right" | Position;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export type AppState = "upload" | "loading" | "analyzing";
|
2 |
|
3 |
export interface GlassEffectProps {
|
4 |
baseFrequency?: number;
|
|
|
25 |
}
|
26 |
|
27 |
export type InitialPosition = "bottom-left" | "bottom-right" | Position;
|
28 |
+
|
29 |
+
export interface ImageAnalysisResult {
|
30 |
+
file: File;
|
31 |
+
caption: string;
|
32 |
+
error?: string;
|
33 |
+
}
|
src/types/vlm.ts
CHANGED
@@ -4,7 +4,7 @@ export type VLMContextValue = {
|
|
4 |
error: string | null;
|
5 |
loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
|
6 |
runInference: (
|
7 |
-
|
8 |
instruction: string,
|
9 |
onTextUpdate?: (text: string) => void,
|
10 |
) => Promise<string>;
|
|
|
4 |
error: string | null;
|
5 |
loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
|
6 |
runInference: (
|
7 |
+
imageSource: HTMLVideoElement | File,
|
8 |
instruction: string,
|
9 |
onTextUpdate?: (text: string) => void,
|
10 |
) => Promise<string>;
|