Spaces:
Running
Running
| import { useState, useEffect, useRef, useCallback } from "react"; | |
| import { useModel } from "./hooks/useModel.js"; | |
| import InputBar from "./components/InputBar.jsx"; | |
| import MessageList from "./components/MessageList.jsx"; | |
| import LoadingBar from "./components/LoadingBar.jsx"; | |
| import OrbitalHero from "./components/OrbitalHero.jsx"; | |
| const SYSTEM_PROMPT = "You are a helpful assistant. When given images, describe and analyze them. When given audio, transcribe or describe it. Be concise and helpful."; | |
| const EXAMPLE_IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/16/Artemis_II_patch.svg/500px-Artemis_II_patch.svg.png"; | |
| const EXAMPLE_AUDIO_URL = "/neil-armstrong.oga"; | |
| const EXAMPLE_VIDEO_URL = "http://images-assets.nasa.gov/video/One_Small_Step_Comparison_720p/One_Small_Step_Comparison_720p~small.mp4"; | |
| const STARTER_PROMPTS = [ | |
| { label: "Describe this patch", text: "What do you see in this image? Describe it in detail.", icon: "π·", imageUrl: EXAMPLE_IMAGE_URL }, | |
| { label: "Transcribe audio", text: "Transcribe this audio recording.", icon: "π€", audioUrl: EXAMPLE_AUDIO_URL }, | |
| { label: "Analyze video*", text: "Describe what is happening in this video.", icon: "π¬", videoUrl: EXAMPLE_VIDEO_URL }, | |
| { label: "Explain a concept", text: "Explain quantum entanglement in simple terms.", icon: "π‘" }, | |
| ]; | |
| const HF_MODEL_URL = "https://huggingface.co/onnx-community/gemma-4-E2B-it-ONNX"; | |
| const GEMMA_GRADIENT = "bg-gradient-to-br from-[#3186FF] to-[#4FA0FF]"; | |
| function calcTokPerSec(text, startTime) { | |
| const tokens = text.split(/\s+/).length; | |
| const elapsed = (performance.now() - startTime) / 1000; | |
| return elapsed > 0.5 ? Math.round(tokens / elapsed * 10) / 10 : null; | |
| } | |
| function StatusScreen({ children }) { | |
| return ( | |
| <div className="min-h-screen flex items-center justify-center px-4"> | |
| <div className="text-center max-w-md"> | |
| <h1 className="text-2xl font-bold mb-3">Gemma 4 WebGPU</h1> | |
| {children} | |
| </div> | |
| </div> | |
| ); | |
| } | |
| export default function App() { | |
| const { status, loadProgress, error, checkWebGPU, loadModel, generate } = useModel(); | |
| const [messages, setMessages] = useState([]); | |
| const [streamingText, setStreamingText] = useState(""); | |
| const [isStreaming, setIsStreaming] = useState(false); | |
| const [processingStep, setProcessingStep] = useState(null); // null | "extracting frames" | "decoding audio" | "generating" | |
| const [tokPerSec, setTokPerSec] = useState(null); | |
| const [isCached, setIsCached] = useState(() => localStorage.getItem("gemma4-cached") === "true"); | |
| const [enableThinking, setEnableThinking] = useState(false); | |
| const [theme, setTheme] = useState(() => localStorage.getItem("gemma4-theme") || "system"); | |
| useEffect(() => { | |
| const root = document.documentElement; | |
| root.classList.remove("dark", "light"); | |
| if (theme !== "system") root.classList.add(theme); | |
| localStorage.setItem("gemma4-theme", theme); | |
| }, [theme]); | |
| const messagesEndRef = useRef(null); | |
| const genStartRef = useRef(0); | |
| const scrollRafRef = useRef(0); | |
| useEffect(() => { | |
| checkWebGPU(); | |
| }, [checkWebGPU]); | |
| // Throttled scroll-to-bottom via rAF | |
| useEffect(() => { | |
| cancelAnimationFrame(scrollRafRef.current); | |
| scrollRafRef.current = requestAnimationFrame(() => { | |
| messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); | |
| }); | |
| }, [messages, streamingText]); | |
| useEffect(() => { | |
| if (status === "ready") { | |
| setIsCached(true); | |
| localStorage.setItem("gemma4-cached", "true"); | |
| } | |
| }, [status]); | |
| const handleSubmit = useCallback(async ({ imageUrl, audioUrl, videoUrl, text }) => { | |
| const userContent = []; | |
| if (imageUrl) userContent.push({ type: "image" }); | |
| // Each video frame needs its own image token in the template | |
| if (videoUrl) for (let i = 0; i < 4; i++) userContent.push({ type: "image" }); | |
| if (audioUrl) userContent.push({ type: "audio" }); | |
| userContent.push({ type: "text", text: text || "Describe this." }); | |
| const userMsg = { role: "user", content: userContent, imageUrl, audioUrl, videoUrl }; | |
| const newMessages = [...messages, userMsg]; | |
| setMessages(newMessages); | |
| const apiMessages = [ | |
| { role: "system", content: SYSTEM_PROMPT }, | |
| ...newMessages.map((m) => ({ role: m.role, content: m.content })), | |
| ]; | |
| setStreamingText(""); | |
| setIsStreaming(true); | |
| setTokPerSec(null); | |
| setProcessingStep(videoUrl ? "extracting frames" : audioUrl ? "decoding audio" : "generating"); | |
| genStartRef.current = performance.now(); | |
| generate({ | |
| messages: apiMessages, | |
| imageUrl, | |
| videoUrl, | |
| audioUrl, | |
| enableThinking, | |
| onUpdate: (text) => { | |
| setProcessingStep(null); | |
| const tps = calcTokPerSec(text, genStartRef.current); | |
| if (tps !== null) setTokPerSec(tps); | |
| setStreamingText(text); | |
| }, | |
| onComplete: (text, err) => { | |
| setProcessingStep(null); | |
| if (!err && text) { | |
| setTokPerSec(calcTokPerSec(text, genStartRef.current)); | |
| setMessages((prev) => [...prev, { role: "assistant", content: text }]); | |
| } | |
| setStreamingText(""); | |
| setIsStreaming(false); | |
| }, | |
| }); | |
| }, [messages, generate, enableThinking]); | |
| if (status === "webgpu-unavailable") { | |
| return ( | |
| <StatusScreen> | |
| <p className="text-[var(--color-text-secondary)]">WebGPU is required. Use Chrome 113+ or Edge 113+.</p> | |
| </StatusScreen> | |
| ); | |
| } | |
| if (error) { | |
| return ( | |
| <StatusScreen> | |
| <p className="text-[var(--color-red)] text-sm font-mono">Error: {error}</p> | |
| </StatusScreen> | |
| ); | |
| } | |
| const isLoading = status === "idle" || status === "webgpu-available" || status === "loading"; | |
| return ( | |
| <div className="min-h-screen flex flex-col max-w-3xl mx-auto"> | |
| <header className="flex items-center justify-between px-4 py-3 border-b border-[var(--color-outline)]"> | |
| <div className="flex items-center gap-2.5"> | |
| <div className={`w-7 h-7 rounded-lg ${GEMMA_GRADIENT} flex items-center justify-center text-white text-xs font-bold`}>G</div> | |
| <a href={HF_MODEL_URL} target="_blank" rel="noopener" className="text-base font-medium hover:text-[var(--color-blue)] transition-colors">Gemma 4</a> | |
| {messages.length > 0 && !isStreaming && ( | |
| <button | |
| onClick={() => { setMessages([]); setStreamingText(""); setTokPerSec(null); }} | |
| className="ml-2 px-2 py-0.5 text-[10px] text-[var(--color-text-secondary)] hover:text-[var(--color-text)] border border-[var(--color-outline)] rounded-lg transition-colors cursor-pointer" | |
| > | |
| New chat | |
| </button> | |
| )} | |
| </div> | |
| <div className="flex items-center gap-3 text-xs text-[var(--color-text-secondary)]"> | |
| {!isLoading && ( | |
| <button | |
| onClick={() => setEnableThinking((v) => !v)} | |
| className={`flex items-center gap-1.5 px-2 py-0.5 rounded-full border transition-colors cursor-pointer ${ | |
| enableThinking | |
| ? "border-[var(--color-blue)]/50 bg-[var(--color-blue)]/10 text-[var(--color-blue)]" | |
| : "border-[var(--color-outline)] text-[var(--color-text-secondary)] hover:border-[var(--color-blue)]/30" | |
| }`} | |
| title={enableThinking ? "Thinking mode on" : "Thinking mode off"} | |
| > | |
| <span className="text-[10px] font-medium">π Think</span> | |
| </button> | |
| )} | |
| {tokPerSec != null && ( | |
| <span className="font-mono text-[var(--color-green)]">{tokPerSec} tok/s</span> | |
| )} | |
| {isCached && !isLoading && ( | |
| <span className="px-2 py-0.5 rounded-full bg-[var(--color-green)]/10 text-[var(--color-green)] text-[10px] font-medium">Cached</span> | |
| )} | |
| <span className="hidden sm:inline">In-Browser Β· WebGPU</span> | |
| <button | |
| onClick={() => setTheme((t) => t === "dark" ? "light" : t === "light" ? "system" : "dark")} | |
| className="p-1 rounded-full hover:bg-[var(--color-surface)] transition-colors cursor-pointer" | |
| title={`Theme: ${theme}`} | |
| > | |
| {theme === "dark" ? "π" : theme === "light" ? "βοΈ" : "π»"} | |
| </button> | |
| </div> | |
| </header> | |
| {isLoading ? ( | |
| <div className="flex-1 flex flex-col items-center justify-center gap-6 px-4"> | |
| <OrbitalHero /> | |
| <div className="text-center -mt-2"> | |
| <h2 className="text-4xl font-bold mb-2 tracking-tight"><a href={HF_MODEL_URL} target="_blank" rel="noopener" className="hover:text-[var(--color-blue)] transition-colors">Gemma 4 E2B</a></h2> | |
| <p className="text-[var(--color-text-secondary)] text-sm">Multimodal AI running entirely in your browser via WebGPU</p> | |
| </div> | |
| {status === "loading" ? ( | |
| <LoadingBar loadProgress={loadProgress} isCached={isCached} /> | |
| ) : ( | |
| <button | |
| onClick={loadModel} | |
| className="px-8 py-3 bg-[var(--color-blue)] hover:bg-[var(--color-blue)]/90 text-white text-sm font-medium rounded-xl transition-colors cursor-pointer" | |
| > | |
| {isCached ? "Load Model (cached)" : "Load Model"} | |
| </button> | |
| )} | |
| <footer className="mt-4 text-[10px] text-[var(--color-text-secondary)]/50"> | |
| Powered by <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noopener" className="underline hover:text-[var(--color-text-secondary)]">Transformers.js</a> | |
| </footer> | |
| </div> | |
| ) : ( | |
| <> | |
| <div className="flex-1 overflow-y-auto"> | |
| {messages.length === 0 && !isStreaming ? ( | |
| <div className="flex flex-col items-center justify-center h-full gap-6 px-4 py-12"> | |
| <div className="text-center"> | |
| <div className={`w-12 h-12 rounded-2xl ${GEMMA_GRADIENT} flex items-center justify-center text-white text-xl font-bold mx-auto mb-4`}>G</div> | |
| <h2 className="text-xl font-medium mb-1">How can I help?</h2> | |
| <p className="text-sm text-[var(--color-text-secondary)]">Send text, images, audio, or video β all processed locally.</p> | |
| </div> | |
| <div className="grid grid-cols-2 gap-2 max-w-md w-full"> | |
| {STARTER_PROMPTS.map((p) => ( | |
| <button | |
| key={p.label} | |
| onClick={() => handleSubmit({ imageUrl: p.imageUrl || null, audioUrl: p.audioUrl || null, videoUrl: p.videoUrl || null, text: p.text })} | |
| className="text-left p-3 bg-[var(--color-surface)] hover:bg-[var(--color-surface-high)] border border-[var(--color-outline)] rounded-xl text-sm transition-colors cursor-pointer" | |
| > | |
| {p.imageUrl && ( | |
| <img src={p.imageUrl} alt="" className="w-full h-20 object-contain rounded-lg mb-2 bg-black/20" /> | |
| )} | |
| {p.audioUrl && ( | |
| <div className="mb-2 text-[10px] text-[var(--color-text-secondary)]/60 truncate">π€ Neil Armstrong β Apollo 11</div> | |
| )} | |
| {p.videoUrl && ( | |
| <div className="mb-2 text-[10px] text-[var(--color-text-secondary)]/60 truncate">π¬ One Small Step β NASA (0:56)</div> | |
| )} | |
| <span className="mr-1.5">{p.icon}</span> | |
| <span className="text-[var(--color-text-secondary)]">{p.label}</span> | |
| </button> | |
| ))} | |
| </div> | |
| </div> | |
| ) : ( | |
| <MessageList messages={messages} streamingText={streamingText} isStreaming={isStreaming} processingStep={processingStep} /> | |
| )} | |
| <div ref={messagesEndRef} /> | |
| </div> | |
| <InputBar onSubmit={handleSubmit} disabled={isStreaming} /> | |
| <div className="text-center py-2 text-[10px] text-[var(--color-text-secondary)]/40"> | |
| {isCached && <span>Cached Β· </span>} | |
| Powered by <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noopener" className="underline hover:text-[var(--color-text-secondary)]">Transformers.js</a> | |
| <span className="block mt-0.5">*Video analyzes 4 sampled frames β a tradeoff between memory and processing speed</span> | |
| </div> | |
| </> | |
| )} | |
| </div> | |
| ); | |
| } | |