Spaces:

jbilcke-hf
/

ai-tube

Running

App Files Files Community

jbilcke-hf HF staff commited on Apr 18, 2024

Commit

09a7c47

1 Parent(s): 3d4392e

oh yeah

Browse files

Files changed (32) hide show

src/app/api/generators/search/defaultChannel.ts +70 -0
src/app/api/generators/search/getNewMediaInfo.ts +146 -0
src/app/api/utils/blobToDataUri.ts +21 -0
src/app/api/utils/dataUriToBlob.ts +20 -0
src/app/dream/embed/page.tsx +12 -0
src/app/dream/page.tsx +15 -3
src/app/main.tsx +42 -0
src/app/state/useStore.ts +27 -1
src/app/views/public-latent-media-embed-view/index.tsx +21 -0
src/app/views/public-latent-media-view/index.tsx +77 -0
src/app/views/public-media-view/index.tsx +10 -5
src/components/interface/latent-engine/components/content-layer/index.tsx +8 -3
src/components/interface/latent-engine/components/disclaimers/this-is-ai.tsx +2 -2
src/components/interface/latent-engine/core/drawSegmentation.ts +36 -0
src/components/interface/latent-engine/core/engine.tsx +69 -19
src/components/interface/latent-engine/core/types.ts +10 -3
src/components/interface/latent-engine/resolvers/image/index.tsx +4 -1
src/components/interface/latent-engine/store/useLatentEngine.ts +86 -5
src/components/interface/media-player/index.tsx +5 -4
src/components/interface/media-player/latent.tsx +1 -1
src/components/interface/stream-tag/index.tsx +6 -0
src/lib/clap/clapToDataUri.ts +9 -0
src/lib/clap/{mockClap.ts → getMockClap.ts} +18 -6
src/lib/clap/newClap.ts +1 -0
src/lib/clap/parseClap.ts +134 -21
src/lib/clap/serializeClap.ts +9 -4
src/lib/on-device-ai/getInteractiveSegmentationCanvas.tsx +37 -0
src/lib/on-device-ai/getSegmentationCanvas.tsx +1 -0
src/lib/on-device-ai/identifyFrame.ts +52 -0
src/lib/on-device-ai/segmentFrameOnClick.ts +58 -0
src/lib/utils/relativeCoords.ts +0 -0
src/types/general.ts +3 -1

src/app/api/generators/search/defaultChannel.ts ADDED Viewed

	@@ -0,0 +1,70 @@

+import { ChannelInfo } from "@/types/general";
+export const defaultChannel: ChannelInfo = {
+ /**
+   * We actually use the dataset ID for the channel ID.
+   *
+   */
+ id: "d25efcc1-3cc2-4b41-9f41-e3a93300ae5f",
+ /**
+  * The name used in the URL for the channel
+  *
+  * eg: my-time-travel-journeys
+  */
+ slug: "latent",
+ /**
+  * username id of the Hugging Face dataset
+  *
+  * ex: f9a38286ec3436a45edd2cca
+  */
+ // DISABLED FOR NOW
+ // datasetUserId: string
+ /**
+  * username slug of the Hugging Face dataset
+  *
+  * eg: jbilcke-hf
+  */
+ datasetUser: "",
+ /**
+  * dataset slug of the Hugging Face dataset
+  *
+  * eg: ai-tube-my-time-travel-journeys
+  */
+ datasetName: "",
+ label: "Latent",
+ description: "Latent",
+ thumbnail: "",
+ model: "SDXL",
+ lora: "",
+ style: "",
+ voice: "",
+ music: "",
+ /**
+  * The system prompt
+  */
+ prompt: "",
+ likes: 0,
+ tags: [],
+ updatedAt: new Date().toISOString(),
+ /**
+  * Default video orientation
+  */
+ orientation: "landscape"
+}

src/app/api/generators/search/getNewMediaInfo.ts ADDED Viewed

	@@ -0,0 +1,146 @@

+import { v4 as uuidv4 } from "uuid"
+import {
+  ChannelInfo,
+  MediaInfo,
+} from "@/types/general"
+import { defaultChannel } from "./defaultChannel"
+export function getNewMediaInfo(params: Partial<MediaInfo> = {}): MediaInfo {
+  const channel = defaultChannel
+  const mediaInfo: MediaInfo = {
+    /**
+     * UUID (v4)
+     */
+    id: uuidv4(),
+    /**
+     * Status of the media
+     */
+    status: "published",
+    /**
+     * Human readable title for the media
+     */
+    label: "",
+    /**
+     * Human readable description for the media
+     */
+    description: "",
+    /**
+     * Content prompt
+     */
+    prompt: "",
+    /**
+     * URL to the media thumbnail
+     */
+    thumbnailUrl: "",
+    /**
+     * URL to a clap file
+     */
+    clapUrl: "",
+    assetUrl: "",
+    /**
+     * This is contain the storage URL of the higher-resolution content
+     */
+    assetUrlHd: "",
+    /**
+     * Counter for the number of views
+     *
+     * Note: should be managed by the index to prevent cheating
+     */
+    numberOfViews: 0,
+    /**
+     * Counter for the number of likes
+     *
+     * Note: should be managed by the index to prevent cheating
+     */
+    numberOfLikes: 0,
+    /**
+     * Counter for the number of dislikes
+     *
+     * Note: should be managed by the index to prevent cheating
+     */
+    numberOfDislikes: 0,
+    /**
+     * When was the media updated
+     */
+    updatedAt: new Date().toISOString(),
+    /**
+     * Arbotrary string tags to label the content
+     */
+    tags: Array.isArray(params.tags) ? [
+      ...params.tags,
+    ] : [],
+    /**
+     * Model name
+     */
+    model: "SDXL",
+    /**
+     * LoRA name
+     */
+    lora: "",
+    /**
+     * style name
+     */
+    style: "",
+    /**
+     * Music prompt
+     */
+    music: "",
+    /**
+     * Voice prompt
+     */
+    voice: "",
+    /**
+     * The channel
+     */
+    channel,
+    /**
+     * Media duration (in seconds)
+     */
+    duration: 2,
+    /**
+     * Media width (eg. 1024)
+     */
+    width: 1024,
+    /**
+     * Media height (eg. 576)
+     */
+    height: 576,
+    /**
+     * General media aspect ratio
+     */
+    orientation: "landscape",
+    /**
+     * Media projection (cartesian by default)
+     */
+    projection: "latent"
+  }
+  return mediaInfo
+}

src/app/api/utils/blobToDataUri.ts ADDED Viewed

	@@ -0,0 +1,21 @@

+export async function blobToDataUri(blob: Blob, defaultContentType = ""): Promise<string> {
+  if (typeof window === "undefined") {
+    const arrayBuffer = await blob.arrayBuffer()
+    let buffer = Buffer.from(arrayBuffer)
+    return "data:" + (defaultContentType || blob.type) + ';base64,' + buffer.toString('base64');
+  } else {
+    return new Promise<string>((resolve, reject) => {
+      const reader = new FileReader()
+      reader.onload = _e => {
+        let dataUri = `${reader.result as string || ""}`
+        if (defaultContentType) {
+          dataUri = dataUri.replace("application/octet-stream", defaultContentType)
+        }
+        resolve(dataUri)
+      }
+      reader.onerror = _e => reject(reader.error)
+      reader.onabort = _e => reject(new Error("Read aborted"))
+      reader.readAsDataURL(blob)
+    });
+  }
+}

src/app/api/utils/dataUriToBlob.ts ADDED Viewed

	@@ -0,0 +1,20 @@

+export function dataUriToBlob(dataURI = "", defaultContentType = ""): Blob {
+  dataURI = dataURI.replace(/^data:/, '');
+  const type = dataURI.match(/(?:image|application|video|audio|text)\/[^;]+/)?.[0] || defaultContentType;
+  const base64 = dataURI.replace(/^[^,]+,/, '');
+  const arrayBuffer = new ArrayBuffer(base64.length);
+  const typedArray = new Uint8Array(arrayBuffer);
+  for (let i = 0; i < base64.length; i++) {
+    typedArray[i] = base64.charCodeAt(i);
+  }
+  console.log("dataUriToBlob DEBUG:", {
+    type,
+    base64: base64.slice(0, 80),
+    arrayBuffer
+  })
+  return new Blob([arrayBuffer], { type });
+}

src/app/dream/embed/page.tsx ADDED Viewed

	@@ -0,0 +1,12 @@

+import { cn } from "@/lib/utils/cn"
+export default async function Embed() {
+  return (
+    <div className={cn(
+      `w-full`,
+      `flex flex-col`
+    )}>
+      <a href={process.env.NEXT_PUBLIC_DOMAIN || "#"}>Please go to AiTube.at to fully enjoy this experience.</a>
+    </div>
+   )
+}

src/app/dream/page.tsx CHANGED Viewed

@@ -5,17 +5,29 @@ import { LatentQueryProps } from "@/types/general"
 import { Main } from "../main"
 import { searchResultToMediaInfo } from "../api/generators/search/searchResultToMediaInfo"
 import { LatentSearchResult } from "../api/generators/search/types"
 export default async function DreamPage({ searchParams: {
   l: latentContent,
 } }: LatentQueryProps) {
-  const latentSearchResult = JSON.parse(atob(`${latentContent}`)) as LatentSearchResult
   // this will hallucinate the thumbnail on the fly - maybe we should cache it
-  const latentMedia = await searchResultToMediaInfo(latentSearchResult)
   return (
-    <Main publicMedia={latentMedia} />
    )
 }

 import { Main } from "../main"
 import { searchResultToMediaInfo } from "../api/generators/search/searchResultToMediaInfo"
 import { LatentSearchResult } from "../api/generators/search/types"
+import { serializeClap } from "@/lib/clap/serializeClap"
+import { getMockClap } from "@/lib/clap/getMockClap"
+import { clapToDataUri } from "@/lib/clap/clapToDataUri"
+import { getNewMediaInfo } from "../api/generators/search/getNewMediaInfo"
 export default async function DreamPage({ searchParams: {
   l: latentContent,
 } }: LatentQueryProps) {
+  // const latentSearchResult = JSON.parse(atob(`${latentContent}`)) as LatentSearchResult
   // this will hallucinate the thumbnail on the fly - maybe we should cache it
+  // const latentMedia = await searchResultToMediaInfo(latentSearchResult)
+  // TODO: generate the clap from the media info
+  console.log("generating a mock media info and mock clap file")
+  const latentMedia = getNewMediaInfo()
+  latentMedia.clapUrl = await clapToDataUri(
+    getMockClap({showDisclaimer: true })
+  )
   return (
+    <Main latentMedia={latentMedia} />
    )
 }

src/app/main.tsx CHANGED Viewed

@@ -17,6 +17,8 @@ import { TubeLayout } from "../components/interface/tube-layout"
 import { PublicMusicVideosView } from "./views/public-music-videos-view"
 import { PublicMediaEmbedView } from "./views/public-media-embed-view"
 import { PublicMediaView } from "./views/public-media-view"
 // this is where we transition from the server-side space
 // and the client-side space
@@ -30,7 +32,12 @@ export function Main({
   // view,
   publicMedia,
   publicMedias,
   publicChannelVideos,
   publicTracks,
   publicTrack,
   channel,
@@ -39,9 +46,15 @@ export function Main({
  // view?: InterfaceView
  publicMedia?: MediaInfo
  publicMedias?: MediaInfo[]
  publicChannelVideos?: MediaInfo[]
  publicTracks?: MediaInfo[]
  publicTrack?: MediaInfo
  channel?: ChannelInfo
 }) {
   // this could be also a parameter of main, where we pass this manually
@@ -53,6 +66,8 @@ export function Main({
   const setPathname = useStore(s => s.setPathname)
   const setPublicChannel = useStore(s => s.setPublicChannel)
   const setPublicMedias = useStore(s => s.setPublicMedias)
   const setPublicChannelVideos = useStore(s => s.setPublicChannelVideos)
   const setPublicTracks = useStore(s => s.setPublicTracks)
   const setPublicTrack = useStore(s => s.setPublicTrack)
@@ -112,6 +127,28 @@ export function Main({
   }, [publicMedia?.id])
   useEffect(() => {
     // note: it is important to ALWAYS set the current video to videoId
@@ -143,6 +180,11 @@ export function Main({
       {view === "home" && <HomeView />}
       {view === "public_media_embed" && <PublicMediaEmbedView />}
       {view === "public_media" && <PublicMediaView />}
       {view === "public_music_videos" && <PublicMusicVideosView />}
       {view === "public_channels" && <PublicChannelsView />}
       {view === "public_channel" && <PublicChannelView />}

 import { PublicMusicVideosView } from "./views/public-music-videos-view"
 import { PublicMediaEmbedView } from "./views/public-media-embed-view"
 import { PublicMediaView } from "./views/public-media-view"
+import { PublicLatentMediaEmbedView } from "./views/public-latent-media-embed-view"
+import { PublicLatentMediaView } from "./views/public-latent-media-view"
 // this is where we transition from the server-side space
 // and the client-side space
   // view,
   publicMedia,
   publicMedias,
+  latentMedia,
+  latentMedias,
   publicChannelVideos,
   publicTracks,
   publicTrack,
   channel,
  // view?: InterfaceView
  publicMedia?: MediaInfo
  publicMedias?: MediaInfo[]
+ latentMedia?: MediaInfo
+ latentMedias?: MediaInfo[]
  publicChannelVideos?: MediaInfo[]
  publicTracks?: MediaInfo[]
  publicTrack?: MediaInfo
  channel?: ChannelInfo
 }) {
   // this could be also a parameter of main, where we pass this manually
   const setPathname = useStore(s => s.setPathname)
   const setPublicChannel = useStore(s => s.setPublicChannel)
   const setPublicMedias = useStore(s => s.setPublicMedias)
+  const setPublicLatentMedia = useStore(s => s.setPublicLatentMedia)
+  const setPublicLatentMedias = useStore(s => s.setPublicLatentMedias)
   const setPublicChannelVideos = useStore(s => s.setPublicChannelVideos)
   const setPublicTracks = useStore(s => s.setPublicTracks)
   const setPublicTrack = useStore(s => s.setPublicTrack)
   }, [publicMedia?.id])
+  useEffect(() => {
+    if (!latentMedias?.length) { return }
+    setPublicLatentMedias(latentMedias)
+  }, [getCollectionKey(latentMedias)])
+  useEffect(() => {
+    console.log("latentMedia:", {
+      "id": latentMedia?.id
+    })
+    console.log(latentMedia)
+    setPublicLatentMedia(latentMedia)
+    if (!latentMedia || !latentMedia?.id) { return }
+    if (pathname === "/dream/embed") { return }
+    if (pathname !== "/dream") {
+      // console.log("we are on huggingface apparently!")
+      // router.replace(`/watch?v=${publicMedia.id}`)
+      // TODO: add params in the URL to represent the latent result
+      router.replace(`/dream`)
+    }
+  }, [latentMedia?.id])
   useEffect(() => {
     // note: it is important to ALWAYS set the current video to videoId
       {view === "home" && <HomeView />}
       {view === "public_media_embed" && <PublicMediaEmbedView />}
       {view === "public_media" && <PublicMediaView />}
+      {/* latent content is the content that "doesn't exist" (is generated by the AI) */}
+      {view === "public_latent_media_embed" && <PublicLatentMediaEmbedView />}
+      {view === "public_latent_media" && <PublicLatentMediaView />}
       {view === "public_music_videos" && <PublicMusicVideosView />}
       {view === "public_channels" && <PublicChannelsView />}
       {view === "public_channel" && <PublicChannelView />}

src/app/state/useStore.ts CHANGED Viewed

@@ -68,7 +68,13 @@ export const useStore = create<{
   setPublicComments: (publicComment: CommentInfo[]) => void
   publicMedias: MediaInfo[]
-  setPublicMedias: (publicMedias: MediaInfo[]) => void
   publicChannelVideos: MediaInfo[]
   setPublicChannelVideos: (publicChannelVideos: MediaInfo[]) => void
@@ -109,7 +115,15 @@ export const useStore = create<{
       "/embed": "public_media_embed",
       "/music": "public_music_videos",
       "/channels": "public_channels",
       "/channel": "public_channel",
       "/account": "user_account",
       "/account/channel": "user_channel",
     }
@@ -219,6 +233,18 @@ export const useStore = create<{
   },
   publicTrack: undefined,
   setPublicTrack: (publicTrack?: MediaInfo) => {
     set({ publicTrack })

   setPublicComments: (publicComment: CommentInfo[]) => void
   publicMedias: MediaInfo[]
+  setPublicMedias: (publicMedias?: MediaInfo[]) => void
+  latentMedia?: MediaInfo
+  setPublicLatentMedia: (latentMedia?: MediaInfo) => void
+  latentMedias: MediaInfo[]
+  setPublicLatentMedias: (latentMedias?: MediaInfo[]) => void
   publicChannelVideos: MediaInfo[]
   setPublicChannelVideos: (publicChannelVideos: MediaInfo[]) => void
       "/embed": "public_media_embed",
       "/music": "public_music_videos",
       "/channels": "public_channels",
+      "/dream": "public_latent_media",
+      "/dream/embed": "public_latent_media_embed",
       "/channel": "public_channel",
+      // those are reserved for future use
+      "/gaming": "public_music_videos",
+      "/live": "public_music_videos",
+      "/tv": "public_music_videos",
       "/account": "user_account",
       "/account/channel": "user_channel",
     }
   },
+  latentMedia: undefined,
+  setPublicLatentMedia: (latentMedia?: MediaInfo) => {
+    set({ latentMedia })
+  },
+  latentMedias: [],
+  setPublicLatentMedias: (latentMedias: MediaInfo[] = []) => {
+    set({
+      latentMedias: Array.isArray(latentMedias) ? latentMedias : []
+    })
+  },
   publicTrack: undefined,
   setPublicTrack: (publicTrack?: MediaInfo) => {
     set({ publicTrack })

src/app/views/public-latent-media-embed-view/index.tsx ADDED Viewed

	@@ -0,0 +1,21 @@

+"use client"
+import { useStore } from "@/app/state/useStore"
+import { cn } from "@/lib/utils/cn"
+export function PublicLatentMediaEmbedView() {
+  const media = useStore(s => s.publicMedia)
+  if (!media) { return null }
+  // unfortunately we have to disable this,
+  // as we can't afford a dream to be generated in parallel by many X users,
+  // it would be way too costly
+  return (
+    <div className={cn(
+      `w-full`,
+      `flex flex-col`
+    )}>
+      <a href={process.env.NEXT_PUBLIC_DOMAIN || "#"}>Please go to AiTube.at to fully enjoy this experience.</a>
+    </div>
+  )
+}

src/app/views/public-latent-media-view/index.tsx ADDED Viewed

	@@ -0,0 +1,77 @@

+"use client"
+import { useStore } from "@/app/state/useStore"
+import { cn } from "@/lib/utils/cn"
+import { MediaPlayer } from "@/components/interface/media-player"
+export function PublicLatentMediaView() {
+  // note:
+  const media = useStore(s => s.latentMedia)
+  console.log("PublicLatentMediaView", {
+    "media (latentMedia)": media,
+})
+  if (!media) { return null }
+  return (
+    <div className={cn(
+      `w-full`,
+      `flex flex-col lg:flex-row`
+    )}>
+      <div className={cn(
+        `flex-grow`,
+        `flex flex-col`,
+        `transition-all duration-200 ease-in-out`,
+        `px-2 xl:px-0`
+      )}>
+        {/** AI MEDIA PLAYER - HORIZONTAL */}
+        <MediaPlayer
+          media={media}
+          enableShortcuts={false}
+          // that could be, but let's do it the dirty way for now
+          // currentTime={desiredCurrentTime}
+          className="rounded-xl overflow-hidden mb-4"
+        />
+        {/** AI MEDIA TITLE - HORIZONTAL */}
+        <div className={cn(
+          `flex flex-row space-x-2`,
+          `transition-all duration-200 ease-in-out`,
+          `text-lg lg:text-xl text-zinc-100 font-medium mb-0 line-clamp-2`,
+          `mb-2`,
+        )}>
+          <div className="">{media.label}</div>
+        </div>
+        {/** MEDIA TOOLBAR - HORIZONTAL */}
+        <div className={cn(
+          `flex flex-col space-y-3 xl:space-y-0 xl:flex-row`,
+          `transition-all duration-200 ease-in-out`,
+          `items-start xl:items-center`,
+          `justify-between`,
+          `mb-2 lg:mb-3`,
+        )}>
+        </div>
+        {/** MEDIA DESCRIPTION - VERTICAL */}
+        <div className={cn(
+          `flex flex-col p-3`,
+          `transition-all duration-200 ease-in-out`,
+          `rounded-xl`,
+          `bg-neutral-700/50`,
+          `text-sm text-zinc-100`,
+        )}>
+          {/* DESCRIPTION BLOCK */}
+          <div className="flex flex-row space-x-2 font-medium mb-1">
+            <div>no data</div>
+          </div>
+          <p>{media.description}</p>
+        </div>
+      </div>
+    </div>
+  )
+}

src/app/views/public-media-view/index.tsx CHANGED Viewed

@@ -116,12 +116,17 @@ export function PublicMediaView() {
       if (!media || !media.id) {
         return
       }
-      const numberOfViews = await countNewMediaView(mediaId)
-      setPublicMedia({
-        ...media,
-        numberOfViews
-      })
     })
   }, [media?.id])

       if (!media || !media.id) {
         return
       }
+      try {
+        const numberOfViews = await countNewMediaView(mediaId)
+        setPublicMedia({
+          ...media,
+          numberOfViews
+        })
+      } catch (err) {
+        console.error(`failed to count the number of view for mediaId ${mediaId}`)
+      }
     })
   }, [media?.id])

src/components/interface/latent-engine/components/content-layer/index.tsx CHANGED Viewed

@@ -1,25 +1,30 @@
-import { ForwardedRef, forwardRef, ReactNode } from "react"
 export const ContentLayer = forwardRef(function ContentLayer({
   width = 256,
   height = 256,
   className = "",
   children,
 }: {
   width?: number
   height?: number
   className?: string
   children?: ReactNode
 }, ref: ForwardedRef<HTMLDivElement>) {
   return (
-    <div className="
       absolute
       mt-0 mb-0 ml-0 mr-0
       flex flex-col
       items-center justify-center
-      "
       style={{ width, height }}
       ref={ref}
       >
       <div className="h-full aspect-video">
         {children}

+import { cn } from "@/lib/utils/cn"
+import { ForwardedRef, forwardRef, MouseEventHandler, ReactNode } from "react"
 export const ContentLayer = forwardRef(function ContentLayer({
   width = 256,
   height = 256,
   className = "",
   children,
+  onClick,
 }: {
   width?: number
   height?: number
   className?: string
   children?: ReactNode
+  onClick?: MouseEventHandler<HTMLDivElement>
 }, ref: ForwardedRef<HTMLDivElement>) {
   return (
+    <div className={cn(`
       absolute
       mt-0 mb-0 ml-0 mr-0
       flex flex-col
       items-center justify-center
+      pointer-events-none
+      `, className)}
       style={{ width, height }}
       ref={ref}
+      onClick={onClick}
       >
       <div className="h-full aspect-video">
         {children}

src/components/interface/latent-engine/components/disclaimers/this-is-ai.tsx CHANGED Viewed

@@ -4,12 +4,12 @@ import React from "react"
 import { cn } from "@/lib/utils/cn"
 import { arimoBold, arimoNormal } from "@/lib/fonts"
-import { StreamType } from "@/types/general"
 export function ThisIsAI({
   streamType,
 }: {
-  streamType?: StreamType
 } = {}) {
   return (

 import { cn } from "@/lib/utils/cn"
 import { arimoBold, arimoNormal } from "@/lib/fonts"
+import { ClapStreamType } from "@/lib/clap/types"
 export function ThisIsAI({
   streamType,
 }: {
+  streamType?: ClapStreamType
 } = {}) {
   return (

src/components/interface/latent-engine/core/drawSegmentation.ts ADDED Viewed

	@@ -0,0 +1,36 @@

+import { MPMask } from "@mediapipe/tasks-vision"
+/**
+ * Draw segmentation result
+ */
+export function drawSegmentation(mask?: MPMask, canvas?: HTMLCanvasElement) {
+  if (!mask) { throw new Error("drawSegmentation failed: empty mask") }
+  if (!canvas) { throw new Error("drawSegmentation failed: cannot access the canvas") }
+  const width = mask.width;
+  const height = mask.height;
+  const maskData = mask.getAsFloat32Array();
+  canvas.width = width;
+  canvas.height = height;
+  console.log("drawSegmentation: drawing..")
+  const ctx = canvas.getContext("2d")
+  if (!ctx) { throw new Error("drawSegmentation failed: cannot access the 2D context") }
+  ctx.fillStyle = "#00000000";
+  ctx.fillRect(0, 0, width, height);
+  ctx.fillStyle = "rgba(18, 181, 203, 0.7)";
+  maskData.forEach((category: number, index: number, array: Float32Array) => {
+    if (Math.round(category * 255.0) === 0) {
+      const x = (index + 1) % width;
+      const y = (index + 1 - x) / width;
+      ctx.fillRect(x, y, 1, 1);
+    }
+  })
+}

src/components/interface/latent-engine/core/engine.tsx CHANGED Viewed

@@ -1,32 +1,38 @@
 "use client"
-import React, { useEffect, useRef, useState } from "react"
-import { mockClap } from "@/lib/clap/mockClap"
 import { cn } from "@/lib/utils/cn"
 import { useLatentEngine } from "../store/useLatentEngine"
 import { PlayPauseButton } from "../components/play-pause-button"
 import { StreamTag } from "../../stream-tag"
 import { ContentLayer } from "../components/content-layer"
 function LatentEngine({
-  url,
   width,
   height,
   className = "" }: {
-  url: string
   width?: number
   height?: number
   className?: string
 }) {
   const setContainerDimension = useLatentEngine(s => s.setContainerDimension)
   const isLoaded = useLatentEngine(s => s.isLoaded)
-  const openLatentClapFile = useLatentEngine(s => s.openLatentClapFile)
-  const openClapFile = useLatentEngine(s => s.openClapFile)
   const setImageElement = useLatentEngine(s => s.setImageElement)
   const setVideoElement = useLatentEngine(s => s.setVideoElement)
   const streamType = useLatentEngine(s => s.streamType)
   const isStatic = useLatentEngine(s => s.isStatic)
@@ -39,6 +45,10 @@ function LatentEngine({
   const videoLayer = useLatentEngine(s => s.videoLayer)
   const segmentationLayer = useLatentEngine(s => s.segmentationLayer)
   const interfaceLayer = useLatentEngine(s => s.interfaceLayer)
   const stateRef = useRef({ isInitialized: false })
@@ -47,15 +57,29 @@ function LatentEngine({
   const overlayTimerRef = useRef<NodeJS.Timeout>()
   const videoLayerRef = useRef<HTMLDivElement>(null)
   useEffect(() => {
-    if (!stateRef.current.isInitialized) {
       stateRef.current.isInitialized = true
-      console.log("let's load an experience")
-      // openClapFile(mockClap({ showDisclaimer: true }))
-      openLatentClapFile("short story about a podracer race")
     }
-  }, [])
   const isPlayingRef = useRef(isPlaying)
   isPlayingRef.current = isPlaying
@@ -88,18 +112,35 @@ function LatentEngine({
   useEffect(() => {
     if (!videoLayerRef.current) { return }
-    const videoElements = Array.from(videoLayerRef.current.querySelectorAll('.latent-video')) as HTMLVideoElement[]
     setVideoElement(videoElements.at(0))
     // images are used for simpler or static experiences
-    const imageElements = Array.from(videoLayerRef.current.querySelectorAll('.latent-image')) as HTMLImageElement[]
     setImageElement(imageElements.at(0))
   })
   useEffect(() => {
     setContainerDimension({ width: width || 256, height: height || 256 })
   }, [width, height])
   return (
     <div
       style={{ width, height }}
@@ -115,24 +156,31 @@ function LatentEngine({
       {/* main content container */}
       <ContentLayer
-        className=""
         width={width}
         height={height}
         ref={videoLayerRef}
       >{videoLayer}</ContentLayer>
       <ContentLayer
-        className=""
         width={width}
         height={height}
-      >{segmentationLayer}</ContentLayer>
       <ContentLayer
-        className=""
         width={width}
         height={height}
-      >{interfaceLayer}</ContentLayer>
       {/* content overlay, with the gradient, buttons etc */}
       <div className={cn(`
@@ -142,6 +190,7 @@ function LatentEngine({
         items-center justify-end
         pt-5 px-3 pb-1
         transition-opacity duration-300 ease-in-out
       `,
       isOverlayVisible ? "opacity-100" : "opacity-0"
       )}
@@ -185,6 +234,7 @@ function LatentEngine({
           flex flex-row flex-none
           w-full h-14
           items-center justify-between
           `)}>
             {/* left-side buttons */}

 "use client"
+import React, { MouseEventHandler, useEffect, useRef, useState } from "react"
 import { cn } from "@/lib/utils/cn"
 import { useLatentEngine } from "../store/useLatentEngine"
 import { PlayPauseButton } from "../components/play-pause-button"
 import { StreamTag } from "../../stream-tag"
 import { ContentLayer } from "../components/content-layer"
+import { MediaInfo } from "@/types/general"
+import { getMockClap } from "@/lib/clap/getMockClap"
+import { serializeClap } from "@/lib/clap/serializeClap"
+import { blobToDataUri } from "@/app/api/utils/blobToDataUri"
+import { InteractiveSegmentationCanvas } from "@/lib/on-device-ai/getInteractiveSegmentationCanvas"
+import { InteractiveSegmenterResult } from "@mediapipe/tasks-vision"
 function LatentEngine({
+  media,
   width,
   height,
   className = "" }: {
+  media: MediaInfo
   width?: number
   height?: number
   className?: string
 }) {
   const setContainerDimension = useLatentEngine(s => s.setContainerDimension)
   const isLoaded = useLatentEngine(s => s.isLoaded)
+  const imagine = useLatentEngine(s => s.imagine)
+  const open = useLatentEngine(s => s.open)
   const setImageElement = useLatentEngine(s => s.setImageElement)
   const setVideoElement = useLatentEngine(s => s.setVideoElement)
+  const setSegmentationElement = useLatentEngine(s => s.setSegmentationElement)
   const streamType = useLatentEngine(s => s.streamType)
   const isStatic = useLatentEngine(s => s.isStatic)
   const videoLayer = useLatentEngine(s => s.videoLayer)
   const segmentationLayer = useLatentEngine(s => s.segmentationLayer)
   const interfaceLayer = useLatentEngine(s => s.interfaceLayer)
+  const videoElement = useLatentEngine(s => s.videoElement)
+  const imageElement = useLatentEngine(s => s.imageElement)
+  const onClickOnSegmentationLayer = useLatentEngine(s => s.onClickOnSegmentationLayer)
   const stateRef = useRef({ isInitialized: false })
   const overlayTimerRef = useRef<NodeJS.Timeout>()
   const videoLayerRef = useRef<HTMLDivElement>(null)
+  const segmentationLayerRef = useRef<HTMLDivElement>(null)
+  const mediaUrl = media.clapUrl || media.assetUrlHd || media.assetUrl
   useEffect(() => {
+    if (!stateRef.current.isInitialized && mediaUrl) {
       stateRef.current.isInitialized = true
+      const fn = async () => {
+        // TODO julian
+        // there is a bug, we can't unpack the .clap when it's from a data-uri :/
+        // open(mediaUrl)
+        const mockClap = getMockClap()
+        const mockArchive = await serializeClap(mockClap)
+        // for some reason conversion to data uri doesn't work
+        // const mockDataUri = await blobToDataUri(mockArchive, "application/x-gzip")
+        // console.log("mockDataUri:", mockDataUri)
+        open(mockArchive)
+      }
+      fn()
     }
+  }, [mediaUrl])
   const isPlayingRef = useRef(isPlaying)
   isPlayingRef.current = isPlaying
   useEffect(() => {
     if (!videoLayerRef.current) { return }
+    // note how in both cases we are pulling from the videoLayerRef
+    // that's because one day everything will be a video, but for now we
+    // "fake it until we make it"
+    const videoElements = Array.from(
+      videoLayerRef.current.querySelectorAll('.latent-video')
+    ) as HTMLVideoElement[]
     setVideoElement(videoElements.at(0))
     // images are used for simpler or static experiences
+    const imageElements = Array.from(
+      videoLayerRef.current.querySelectorAll('.latent-image')
+    ) as HTMLImageElement[]
     setImageElement(imageElements.at(0))
+    if (!segmentationLayerRef.current) { return }
+     const segmentationElements = Array.from(
+       segmentationLayerRef.current.querySelectorAll('.segmentation-canvas')
+     ) as HTMLCanvasElement[]
+     setSegmentationElement(segmentationElements.at(0))
   })
   useEffect(() => {
     setContainerDimension({ width: width || 256, height: height || 256 })
   }, [width, height])
   return (
     <div
       style={{ width, height }}
       {/* main content container */}
       <ContentLayer
+        className="pointer-events-auto"
         width={width}
         height={height}
         ref={videoLayerRef}
+        onClick={onClickOnSegmentationLayer}
       >{videoLayer}</ContentLayer>
       <ContentLayer
+        className="pointer-events-none"
         width={width}
         height={height}
+        ref={segmentationLayerRef}
+      ><canvas
+        className="segmentation-canvas"
+        style={{ width, height }}
+      ></canvas></ContentLayer>
+      {/*
       <ContentLayer
+        className="pointer-events-auto"
         width={width}
         height={height}
+  >{interfaceLayer}</ContentLayer>
+  */}
       {/* content overlay, with the gradient, buttons etc */}
       <div className={cn(`
         items-center justify-end
         pt-5 px-3 pb-1
         transition-opacity duration-300 ease-in-out
+        pointer-events-none
       `,
       isOverlayVisible ? "opacity-100" : "opacity-0"
       )}
           flex flex-row flex-none
           w-full h-14
           items-center justify-between
+          pointer-events-auto
           `)}>
             {/* left-side buttons */}

src/components/interface/latent-engine/core/types.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { ClapProject, ClapSegment, ClapStreamType } from "@/lib/clap/types"
-import { ReactNode } from "react"
 export type LatentEngineStatus =
   | "idle"
@@ -30,6 +31,7 @@ export type LatentEngineStore = {
   height: number
   clap: ClapProject
   streamType: ClapStreamType
@@ -61,6 +63,7 @@ export type LatentEngineStore = {
   videoLayerElement?: HTMLDivElement
   imageElement?: HTMLImageElement
   videoElement?: HTMLVideoElement
   videoLayer: ReactNode
   videoBuffer: "A" | "B"
@@ -75,13 +78,17 @@ export type LatentEngineStore = {
   interfaceBufferB: ReactNode
   setContainerDimension: ({ width, height }: { width: number; height: number }) => void
-  openLatentClapFile: (prompt: string) => Promise<void>
-  openClapFile: (clap: ClapProject) => void
   setVideoLayerElement: (videoLayerElement?: HTMLDivElement) => void
   setImageElement: (imageElement?: HTMLImageElement) => void
   setVideoElement: (videoElement?: HTMLVideoElement) => void
   togglePlayPause: () => boolean
   play: () => boolean
   pause: () => boolean

 import { ClapProject, ClapSegment, ClapStreamType } from "@/lib/clap/types"
+import { InteractiveSegmenterResult } from "@mediapipe/tasks-vision"
+import { MouseEventHandler, ReactNode } from "react"
 export type LatentEngineStatus =
   | "idle"
   height: number
   clap: ClapProject
+  debug: boolean
   streamType: ClapStreamType
   videoLayerElement?: HTMLDivElement
   imageElement?: HTMLImageElement
   videoElement?: HTMLVideoElement
+  segmentationElement?: HTMLCanvasElement
   videoLayer: ReactNode
   videoBuffer: "A" | "B"
   interfaceBufferB: ReactNode
   setContainerDimension: ({ width, height }: { width: number; height: number }) => void
+  imagine: (prompt: string) => Promise<void>
+  open: (src?: string | ClapProject | Blob) => Promise<void>
   setVideoLayerElement: (videoLayerElement?: HTMLDivElement) => void
   setImageElement: (imageElement?: HTMLImageElement) => void
   setVideoElement: (videoElement?: HTMLVideoElement) => void
+  setSegmentationElement: (segmentationElement?: HTMLCanvasElement) => void
+  processClickOnSegment: (data: InteractiveSegmenterResult) => void
+  onClickOnSegmentationLayer: MouseEventHandler<HTMLDivElement>
   togglePlayPause: () => boolean
   play: () => boolean
   pause: () => boolean

src/components/interface/latent-engine/resolvers/image/index.tsx CHANGED Viewed

@@ -23,6 +23,9 @@ export async function resolve(segment: ClapSegment, clap: ClapProject): Promise<
   // note: the latent-image class is not used for styling, but to grab the component
   // from JS when we need to segment etc
   return (
-    <img className="latent-image object-cover" src={assetUrl} />
   )
 }

   // note: the latent-image class is not used for styling, but to grab the component
   // from JS when we need to segment etc
   return (
+    <img
+      className="latent-image object-cover h-full"
+      src={assetUrl}
+    />
   )
 }

src/components/interface/latent-engine/store/useLatentEngine.ts CHANGED Viewed

@@ -4,17 +4,23 @@ import { create } from "zustand"
 import { ClapProject } from "@/lib/clap/types"
 import { newClap } from "@/lib/clap/newClap"
 import { sleep } from "@/lib/utils/sleep"
-import { getSegmentationCanvas } from "@/lib/on-device-ai/getSegmentationCanvas"
 import { LatentEngineStore } from "../core/types"
 import { resolveSegments } from "../resolvers/resolveSegments"
 import { fetchLatentClap } from "../core/fetchLatentClap"
 export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
   width: 1024,
   height: 576,
   clap: newClap(),
   streamType: "static",
   isStatic: false,
@@ -42,7 +48,8 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
   videoLayerElement: undefined,
   imageElement: undefined,
   videoElement: undefined,
   videoLayer: undefined,
   videoBuffer: "A",
   videoBufferA: null,
@@ -62,7 +69,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
     })
   },
-  openLatentClapFile: async (prompt: string): Promise<void> => {
     set({
       isLoaded: false,
       isLoading: true,
@@ -81,10 +88,30 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
     if (!clap) { return }
-    get().openClapFile(clap)
   },
-  openClapFile: (clap: ClapProject) => {
     set({
       clap,
       isLoading: false,
@@ -99,7 +126,59 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
   setVideoLayerElement: (videoLayerElement?: HTMLDivElement) => { set({ videoLayerElement }) },
   setImageElement: (imageElement?: HTMLImageElement) => { set({ imageElement }) },
   setVideoElement: (videoElement?: HTMLVideoElement) => { set({ videoElement }) },
   togglePlayPause: (): boolean => {
     const { isLoaded, isPlaying, renderingIntervalId } = get()
     if (!isLoaded) { return false }
@@ -176,6 +255,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
     try {
       // console.log("doing stuff")
       let timestamp = performance.now()
@@ -189,6 +269,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
         })
         set({ segmentationLayer })
       }
       await sleep(500)

 import { ClapProject } from "@/lib/clap/types"
 import { newClap } from "@/lib/clap/newClap"
 import { sleep } from "@/lib/utils/sleep"
+// import { getSegmentationCanvas } from "@/lib/on-device-ai/getSegmentationCanvas"
 import { LatentEngineStore } from "../core/types"
 import { resolveSegments } from "../resolvers/resolveSegments"
 import { fetchLatentClap } from "../core/fetchLatentClap"
+import { dataUriToBlob } from "@/app/api/utils/dataUriToBlob"
+import { parseClap } from "@/lib/clap/parseClap"
+import { InteractiveSegmenterResult, MPMask } from "@mediapipe/tasks-vision"
+import { segmentFrame } from "@/lib/on-device-ai/segmentFrameOnClick"
+import { drawSegmentation } from "../core/drawSegmentation"
 export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
   width: 1024,
   height: 576,
   clap: newClap(),
+  debug: true,
   streamType: "static",
   isStatic: false,
   videoLayerElement: undefined,
   imageElement: undefined,
   videoElement: undefined,
+  segmentationElement: undefined,
   videoLayer: undefined,
   videoBuffer: "A",
   videoBufferA: null,
     })
   },
+  imagine: async (prompt: string): Promise<void> => {
     set({
       isLoaded: false,
       isLoading: true,
     if (!clap) { return }
+    get().open(clap)
   },
+  open: async (src?: string | ClapProject | Blob) => {
+    const { debug } = get()
+    set({
+      isLoaded: false,
+      isLoading: true,
+    })
+    let clap: ClapProject | undefined = undefined
+    try {
+      clap = await parseClap(src, debug)
+    } catch (err) {
+      console.error(`failed to open the Clap: ${err}`)
+      set({
+        isLoading: false,
+      })
+    }
+    if (!clap) { return }
     set({
       clap,
       isLoading: false,
   setVideoLayerElement: (videoLayerElement?: HTMLDivElement) => { set({ videoLayerElement }) },
   setImageElement: (imageElement?: HTMLImageElement) => { set({ imageElement }) },
   setVideoElement: (videoElement?: HTMLVideoElement) => { set({ videoElement }) },
+  setSegmentationElement: (segmentationElement?: HTMLCanvasElement) => { set({ segmentationElement }) },
+  processClickOnSegment: (result: InteractiveSegmenterResult) => {
+    console.log(`processClickOnSegment: user clicked on something:`, result)
+    const { videoElement, imageElement, segmentationElement, debug } = get()
+    if (!result?.categoryMask) {
+      if (debug) {
+        console.log(`processClickOnSegment: no categoryMask, so we skip the click`)
+      }
+      return
+    }
+    try {
+      if (debug) {
+        console.log(`processClickOnSegment: callling drawSegmentation`)
+      }
+      drawSegmentation(result.categoryMask, segmentationElement)
+      if (debug) {
+        console.log("processClickOnSegment: TODO call data.close() to free the memory!")
+      }
+      result.close()
+    } catch (err) {
+      console.error(`processClickOnSegment: something failed ${err}`)
+    }
+  },
+  onClickOnSegmentationLayer: (event) => {
+    const { videoElement, imageElement, segmentationLayer, segmentationElement, debug } = get()
+    if (debug) {
+      console.log("onClickOnSegmentationLayer")
+    }
+    // TODO use the videoElement if this is is video!
+    if (!imageElement) { return }
+    const box = event.currentTarget.getBoundingClientRect()
+    const px = event.clientX
+    const py = event.clientY
+    const x = px / box.width
+    const y = py / box.height
+    console.log(`onClickOnSegmentationLayer: user clicked on `, { x, y, px, py, box, imageElement })
+    const fn = async () => {
+      const results: InteractiveSegmenterResult = await segmentFrame(imageElement, x, y)
+      get().processClickOnSegment(results)
+    }
+    fn()
+  },
   togglePlayPause: (): boolean => {
     const { isLoaded, isPlaying, renderingIntervalId } = get()
     if (!isLoaded) { return false }
     try {
+      /*
       // console.log("doing stuff")
       let timestamp = performance.now()
         })
         set({ segmentationLayer })
       }
+      */
       await sleep(500)

src/components/interface/media-player/index.tsx CHANGED Viewed

@@ -22,12 +22,13 @@ export function MediaPlayer({
   className?: string
   // currentTime?: number
 }) {
-  // console.log("MediaPlayer called for \"" + media?.label + "\"")
-  if (!media || !media?.assetUrl) { return null }
-  // uncomment one of those to forcefully test the .clap player!
- media.assetUrlHd = "https://huggingface.co/datasets/jbilcke/ai-tube-cinema/tree/main/404.clap"
   // uncomment one of those to forcefully test the .splatv player!
   // media.assetUrlHd = "https://huggingface.co/datasets/dylanebert/3dgs/resolve/main/4d/flame/flame.splatv"

   className?: string
   // currentTime?: number
 }) {
+  console.log("MediaPlayer called for \"" + media?.label + "\"")
+  if (!media) { return null }
+  if (!media?.assetUrl && !media?.clapUrl) { return null }
+  // uncomment one of those to forcefully test the .clap player from an external .clap file
+  // media.assetUrlHd = "https://huggingface.co/datasets/jbilcke/ai-tube-cinema/tree/main/404.clap"
   // uncomment one of those to forcefully test the .splatv player!
   // media.assetUrlHd = "https://huggingface.co/datasets/dylanebert/3dgs/resolve/main/4d/flame/flame.splatv"

src/components/interface/media-player/latent.tsx CHANGED Viewed

@@ -20,7 +20,7 @@ export function LatentPlayer({
   // TODO add a play bar which should support fixed, streaming and live modes
   return (
     <LatentEngine
-      url={media.clapUrl}
       width={width}
       height={height}
       className={className}

   // TODO add a play bar which should support fixed, streaming and live modes
   return (
     <LatentEngine
+      media={media}
       width={width}
       height={height}
       className={className}

src/components/interface/stream-tag/index.tsx CHANGED Viewed

@@ -15,6 +15,12 @@ export function StreamTag({
   const isInteractive = streamType === "interactive"
   const isLive = streamType === "live"
   const isStatic = !isInteractive && !isLive
   return (
     <div className={cn(`

   const isInteractive = streamType === "interactive"
   const isLive = streamType === "live"
   const isStatic = !isInteractive && !isLive
+  console.log("debug:", {
+    streamType,
+    isInteractive,
+    isLive,
+    isStatic
+  })
   return (
     <div className={cn(`

src/lib/clap/clapToDataUri.ts ADDED Viewed

	@@ -0,0 +1,9 @@

+import { blobToDataUri } from "@/app/api/utils/blobToDataUri"
+import { serializeClap } from "./serializeClap"
+import { ClapProject } from "./types"
+export async function clapToDataUri(clap: ClapProject): Promise<string> {
+  const archive = await serializeClap(clap)
+  const dataUri = await blobToDataUri(archive, "application/x-gzip")
+  return dataUri
+}

src/lib/clap/{mockClap.ts → getMockClap.ts} RENAMED Viewed

@@ -4,12 +4,25 @@ import { ClapProject } from "./types"
 let defaultSegmentDurationInMs = 2000
-export function mockClap({
-  showDisclaimer
 }: {
-  showDisclaimer: boolean
 }): ClapProject {
-  const clap = newClap()
   let currentElapsedTimeInMs = 0
   let currentSegmentDurationInMs = defaultSegmentDurationInMs
@@ -57,8 +70,7 @@ export function mockClap({
     startTimeInMs: currentElapsedTimeInMs,
     endTimeInMs: currentSegmentDurationInMs,
     category: "video",
-    // prompt: "closeup of Queen angelfish, bokeh",
-    prompt: "portrait of a man tv news anchor, pierre-jean-hyves, serious, bokeh",
     label: "demo",
     outputType: "video",
   }))

 let defaultSegmentDurationInMs = 2000
+// const demoPrompt = "closeup of Queen angelfish, bokeh"
+// const demoPrompt = "portrait of a man tv news anchor, pierre-jean-hyves, serious, bokeh"
+const demoPrompt = "dogs and cats, playing in garden, balls, trees"
+export function getMockClap({
+  prompt =demoPrompt,
+  showDisclaimer = true,
 }: {
+  prompt?: string
+  showDisclaimer?: boolean
+} = {
+  prompt: demoPrompt,
+  showDisclaimer: true,
 }): ClapProject {
+  const clap = newClap({
+    meta: {
+      streamType: "interactive"
+    }
+  })
   let currentElapsedTimeInMs = 0
   let currentSegmentDurationInMs = defaultSegmentDurationInMs
     startTimeInMs: currentElapsedTimeInMs,
     endTimeInMs: currentSegmentDurationInMs,
     category: "video",
+    prompt,
     label: "demo",
     outputType: "video",
   }))

src/lib/clap/newClap.ts CHANGED Viewed

@@ -16,6 +16,7 @@ export function newClap(clap: {
     id: clap?.meta?.id === "string" ? clap.meta.id : uuidv4(),
     title: clap?.meta?.title === "string" ? clap.meta.title : "",
     description: typeof clap?.meta?.description === "string" ? clap.meta.description : "",
     licence: typeof clap?.meta?.licence === "string" ? clap.meta.licence : "",
     orientation: clap?.meta?.orientation === "portrait" ? "portrait" : clap?.meta?.orientation === "square" ? "square" : "landscape",
     width: getValidNumber(clap?.meta?.width, 256, 8192, 1024),

     id: clap?.meta?.id === "string" ? clap.meta.id : uuidv4(),
     title: clap?.meta?.title === "string" ? clap.meta.title : "",
     description: typeof clap?.meta?.description === "string" ? clap.meta.description : "",
+    synopsis: typeof clap?.meta?.synopsis === "string" ? clap.meta.synopsis : "",
     licence: typeof clap?.meta?.licence === "string" ? clap.meta.licence : "",
     orientation: clap?.meta?.orientation === "portrait" ? "portrait" : clap?.meta?.orientation === "square" ? "square" : "landscape",
     width: getValidNumber(clap?.meta?.width, 256, 8192, 1024),

src/lib/clap/parseClap.ts CHANGED Viewed

@@ -3,48 +3,158 @@ import { v4 as uuidv4 } from "uuid"
 import { ClapHeader, ClapMeta, ClapModel, ClapProject, ClapScene, ClapSegment, ClapStreamType } from "./types"
 import { getValidNumber } from "@/lib/utils/getValidNumber"
 /**
- * import a Clap file (from a plain text string)
  *
  * note: it is not really async, because for some reason YAML.parse is a blocking call like for JSON,
- * they is no async version although we are now in the 20s not 90s
  */
-export async function parseClap(inputStringOrBlob: string | Blob): Promise<ClapProject> {
-  // Decompress the input blob using gzip
-  const decompressor = new DecompressionStream('gzip');
-  const inputBlob =
-    typeof inputStringOrBlob === "string"
-    ? new Blob([inputStringOrBlob], { type: "application/x-yaml" })
-    : inputStringOrBlob;
-  const decompressedStream = inputBlob.stream().pipeThrough(decompressor);
-  // Convert the stream to text using a Response object
-  const text = await new Response(decompressedStream).text();
-  // Parse YAML string to raw data
-  const rawData = YAML.parse(text);
-  if (!Array.isArray(rawData) || rawData.length < 2) {
     throw new Error("invalid clap file (need a clap format header block and project metadata block)")
   }
-  const maybeClapHeader = rawData[0] as ClapHeader
   if (maybeClapHeader.format !== "clap-0") {
     throw new Error("invalid clap file (sorry, but you can't make up version numbers like that)")
   }
-  const maybeClapMeta = rawData[1] as ClapMeta
   const clapMeta: ClapMeta = {
     id: typeof maybeClapMeta.title === "string" ? maybeClapMeta.id : uuidv4(),
     title: typeof maybeClapMeta.title === "string" ? maybeClapMeta.title : "",
     description: typeof maybeClapMeta.description === "string" ? maybeClapMeta.description : "",
     licence: typeof maybeClapMeta.licence === "string" ? maybeClapMeta.licence : "",
     orientation: maybeClapMeta.orientation === "portrait" ? "portrait" : maybeClapMeta.orientation === "square" ? "square" : "landscape",
     width: getValidNumber(maybeClapMeta.width, 256, 8192, 1024),
@@ -82,12 +192,12 @@ export async function parseClap(inputStringOrBlob: string | Blob): Promise<ClapP
   const afterTheScenes = afterTheModels + expectedNumberOfScenes
   // note: if there are no expected models, maybeModels will be empty
-  const maybeModels = rawData.slice(afterTheHeaders, afterTheModels) as ClapModel[]
   // note: if there are no expected scenes, maybeScenes will be empty
-  const maybeScenes = rawData.slice(afterTheModels, afterTheScenes) as ClapScene[]
-  const maybeSegments = rawData.slice(afterTheScenes) as ClapSegment[]
   const clapModels: ClapModel[] = maybeModels.map(({
     id,
@@ -191,6 +301,9 @@ export async function parseClap(inputStringOrBlob: string | Blob): Promise<ClapP
     seed,
   }))
   return {
     meta: clapMeta,
     models: clapModels,

 import { ClapHeader, ClapMeta, ClapModel, ClapProject, ClapScene, ClapSegment, ClapStreamType } from "./types"
 import { getValidNumber } from "@/lib/utils/getValidNumber"
+import { dataUriToBlob } from "@/app/api/utils/dataUriToBlob"
+type StringOrBlob = string | Blob
 /**
+ * Import a clap file from various data sources into an ClapProject
+ *
+ * Inputs can be:
+ * - a Clap project (which is an object)
+ * - an URL to a remote .clap file
+ * - a string containing a YAML array
+ * - a data uri containing a gzipped YAML array
+ * - a Blob containing a gzipped YAML array
  *
  * note: it is not really async, because for some reason YAML.parse is a blocking call like for JSON,
+ * there is no async version although we are now in the 20s not 90s
  */
+export async function parseClap(src?: ClapProject | string | Blob, debug = false): Promise<ClapProject> {
+  try {
+    if (typeof src === "object" && Array.isArray(src?.scenes) && Array.isArray(src?.models)) {
+      if (debug) {
+        console.log("parseClap: input is already a Clap file, nothing to do:", src)
+      }
+      // we can skip verification
+      return src as ClapProject
+    }
+  } catch (err) {
+    // well, this is not a clap project
+  }
+  let stringOrBlob = (src || "") as StringOrBlob
+  // both should work
+  const dataUriHeader1 = "data:application/x-gzip;base64,"
+  const dataUriHeader2 = "data:application/octet-stream;base64,"
+  const inputIsString = typeof stringOrBlob === "string"
+  const inputIsDataUri = typeof stringOrBlob === "string" ? stringOrBlob.startsWith(dataUriHeader1) || stringOrBlob.startsWith(dataUriHeader2) : false
+  const inputIsRemoteFile = typeof stringOrBlob === "string" ? (stringOrBlob.startsWith("http://") || stringOrBlob.startsWith("https://")) : false
+  let inputIsBlob = typeof stringOrBlob !== "string"
+  let inputYamlArrayString = ""
+  if (debug) {
+    console.log(`parseClap: pre-analysis: ${JSON.stringify({
+      inputIsString,
+      inputIsBlob,
+      inputIsDataUri,
+      inputIsRemoteFile
+    }, null, 2)}`)
+  }
+  if (typeof stringOrBlob === "string") {
+    if (debug) {
+      console.log("parseClap: input is a string ", stringOrBlob.slice(0, 120))
+    }
+    if (inputIsDataUri) {
+      if (debug) {
+        console.log(`parseClap: input is a data uri archive`)
+      }
+      stringOrBlob = dataUriToBlob(stringOrBlob, "application/x-gzip")
+      if (debug) {
+        console.log(`parseClap: inputBlob = `, stringOrBlob)
+      }
+      inputIsBlob = true
+    } else if (inputIsRemoteFile) {
+      try {
+        if (debug) {
+          console.log(`parseClap: input is a remote .clap file`)
+        }
+        const res = await fetch(stringOrBlob)
+        stringOrBlob = await res.blob()
+        if (!stringOrBlob) { throw new Error("blob is empty") }
+        inputIsBlob = true
+      } catch (err) {
+        // url seems invalid
+        throw new Error(`failed to download the .clap file (${err})`)
+      }
+    } else {
+      if (debug) {
+        console.log("parseClap: input is a text string containing a YAML array")
+      }
+      inputYamlArrayString = stringOrBlob
+      inputIsBlob = false
+    }
+  }
+  if (typeof stringOrBlob !== "string" && stringOrBlob) {
+    if (debug) {
+      console.log("parseClap: decompressing the blob..")
+    }
+    // Decompress the input blob using gzip
+    const decompressedStream = stringOrBlob.stream().pipeThrough(new DecompressionStream('gzip'))
+    try {
+      // Convert the stream to text using a Response object
+      const decompressedOutput = new Response(decompressedStream)
+      // decompressedOutput.headers.set("Content-Type", "application/x-gzip")
+      if (debug) {
+        console.log("parseClap: decompressedOutput: ", decompressedOutput)
+      }
+      // const blobAgain = await decompressedOutput.blob()
+      inputYamlArrayString = await decompressedOutput.text()
+      if (debug && inputYamlArrayString) {
+        console.log("parseClap: successfully decompressed the blob!")
+      }
+    } catch (err) {
+      const message = `parseClap: failed to decompress (${err})`
+      console.error(message)
+      throw new Error(message)
+    }
+  }
+  // we don't need this anymore I think
+  // new Blob([inputStringOrBlob], { type: "application/x-yaml" })
+  let maybeArray: any = {}
+  try {
+    if (debug) {
+      console.log("parseClap: parsing the YAML array..")
+    }
+    // Parse YAML string to raw data
+    maybeArray = YAML.parse(inputYamlArrayString)
+  } catch (err) {
+    throw new Error("invalid clap file (input string is not YAML)")
+  }
+  if (!Array.isArray(maybeArray) || maybeArray.length < 2) {
     throw new Error("invalid clap file (need a clap format header block and project metadata block)")
   }
+  if (debug) {
+    console.log("parseClap: the YAML seems okay, continuing decoding..")
+  }
+  const maybeClapHeader = maybeArray[0] as ClapHeader
   if (maybeClapHeader.format !== "clap-0") {
     throw new Error("invalid clap file (sorry, but you can't make up version numbers like that)")
   }
+  const maybeClapMeta = maybeArray[1] as ClapMeta
   const clapMeta: ClapMeta = {
     id: typeof maybeClapMeta.title === "string" ? maybeClapMeta.id : uuidv4(),
     title: typeof maybeClapMeta.title === "string" ? maybeClapMeta.title : "",
     description: typeof maybeClapMeta.description === "string" ? maybeClapMeta.description : "",
+    synopsis: typeof maybeClapMeta.synopsis === "string" ? maybeClapMeta.synopsis : "",
     licence: typeof maybeClapMeta.licence === "string" ? maybeClapMeta.licence : "",
     orientation: maybeClapMeta.orientation === "portrait" ? "portrait" : maybeClapMeta.orientation === "square" ? "square" : "landscape",
     width: getValidNumber(maybeClapMeta.width, 256, 8192, 1024),
   const afterTheScenes = afterTheModels + expectedNumberOfScenes
   // note: if there are no expected models, maybeModels will be empty
+  const maybeModels = maybeArray.slice(afterTheHeaders, afterTheModels) as ClapModel[]
   // note: if there are no expected scenes, maybeScenes will be empty
+  const maybeScenes = maybeArray.slice(afterTheModels, afterTheScenes) as ClapScene[]
+  const maybeSegments = maybeArray.slice(afterTheScenes) as ClapSegment[]
   const clapModels: ClapModel[] = maybeModels.map(({
     id,
     seed,
   }))
+  if (debug) {
+    console.log(`parseClap: successfully parsed ${clapModels.length} models, ${clapScenes.length} scenes and ${clapSegments.length} segments`)
+  }
   return {
     meta: clapMeta,
     models: clapModels,

src/lib/clap/serializeClap.ts CHANGED Viewed

@@ -125,6 +125,7 @@ export async function serializeClap({
     id: meta.id || uuidv4(),
     title: typeof meta.title === "string" ? meta.title : "Untitled",
     description: typeof meta.description === "string" ? meta.description : "",
     licence: typeof meta.licence === "string" ? meta.licence : "",
     orientation: meta.orientation === "portrait" ? "portrait" : meta.orientation === "square" ? "square" : "landscape",
     width: getValidNumber(meta.width, 256, 8192, 1024),
@@ -149,14 +150,18 @@ export async function serializeClap({
   const blobResult = new Blob([strigifiedResult], { type: "application/x-yaml" })
    // Create a stream for the blob
-   const readableStream = blobResult.stream();
    // Compress the stream using gzip
-   const compressionStream = new CompressionStream('gzip');
-   const compressedStream = readableStream.pipeThrough(compressionStream);
    // Create a new blob from the compressed stream
-   const compressedBlob = await new Response(compressedStream).blob();
   return compressedBlob
 }

     id: meta.id || uuidv4(),
     title: typeof meta.title === "string" ? meta.title : "Untitled",
     description: typeof meta.description === "string" ? meta.description : "",
+    synopsis: typeof meta.synopsis === "string" ? meta.synopsis : "",
     licence: typeof meta.licence === "string" ? meta.licence : "",
     orientation: meta.orientation === "portrait" ? "portrait" : meta.orientation === "square" ? "square" : "landscape",
     width: getValidNumber(meta.width, 256, 8192, 1024),
   const blobResult = new Blob([strigifiedResult], { type: "application/x-yaml" })
    // Create a stream for the blob
+   const readableStream = blobResult.stream()
    // Compress the stream using gzip
+   const compressionStream = new CompressionStream('gzip')
+   const compressedStream = readableStream.pipeThrough(compressionStream)
    // Create a new blob from the compressed stream
+   const response = new Response(compressedStream)
+   response.headers.set("Content-Type", "application/x-gzip")
+   const compressedBlob = await response.blob()
   return compressedBlob
 }

src/lib/on-device-ai/getInteractiveSegmentationCanvas.tsx ADDED Viewed

	@@ -0,0 +1,37 @@

+import { useRef } from "react"
+import { segmentFrame } from "./segmentFrameOnClick"
+import { ImageSource, InteractiveSegmenterResult } from "@mediapipe/tasks-vision"
+export function InteractiveSegmentationCanvas({
+  src,
+  onClick,
+}: {
+  src?: ImageSource
+  onClick?: (results: InteractiveSegmenterResult ) => void
+}) {
+  const segmentationClickRef = useRef<HTMLDivElement>(null)
+  return (
+    <div
+      ref={segmentationClickRef}
+      onClick={(event) => {
+        if (!segmentationClickRef.current || !src || !onClick) { return }
+        const box = segmentationClickRef.current.getBoundingClientRect()
+        const px = event.clientX
+        const py = event.clientY
+        const x = px / box.width
+        const y = py / box.height
+        const fn = async () => {
+          const results: InteractiveSegmenterResult = await segmentFrame(src, x, y);
+          onClick(results)
+        }
+        fn()
+    }}>
+    </div>
+  )
+}

src/lib/on-device-ai/getSegmentationCanvas.tsx CHANGED Viewed

@@ -26,6 +26,7 @@ export async function getSegmentationCanvas({
         height: `${height}px`,
       };
       const CanvasComponent = () => (
         <canvas
           ref={(node) => {

         height: `${height}px`,
       };
+      console.log("canvas:", canvas)
       const CanvasComponent = () => (
         <canvas
           ref={(node) => {

src/lib/on-device-ai/identifyFrame.ts ADDED Viewed

	@@ -0,0 +1,52 @@

+import {
+  FilesetResolver,
+  ObjectDetector,
+  ObjectDetectorResult,
+  ImageSource
+} from "@mediapipe/tasks-vision"
+export type VideoObjectDetector = (videoFrame: ImageSource, timestamp: number) => Promise<ObjectDetectorResult>
+const getObjectDetector = async (): Promise<VideoObjectDetector> => {
+  const vision = await FilesetResolver.forVisionTasks(
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+  );
+  const objectDetector = await ObjectDetector.createFromOptions(vision, {
+    baseOptions: {
+      modelAssetPath: `https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite`
+    },
+    scoreThreshold: 0.5,
+    runningMode: "VIDEO"
+  });
+  const detector: VideoObjectDetector = async (videoFrame: ImageSource, timestamp: number): Promise<ObjectDetectorResult> => {
+    const result = objectDetector.detectForVideo(videoFrame, timestamp)
+    return result
+  }
+  return detector
+}
+const globalState: { detector?: VideoObjectDetector } = {};
+(async () => {
+  globalState.detector = globalState.detector || (await getObjectDetector())
+})();
+export async function identifyFrame(frame: ImageSource, timestamp: number): Promise<ObjectDetectorResult> {
+  console.log("identifyFrame: loading segmenter..")
+  globalState.detector = globalState.detector || (await getObjectDetector())
+  console.log("identifyFrame: segmenting..")
+  return globalState.detector(frame, timestamp)
+}
+// to run:
+// see doc:
+// https://developers.google.com/mediapipe/solutions/vision/image_segmenter/web_js#video
+// imageSegmenter.segmentForVideo(video, startTimeMs, callbackForVideo);

src/lib/on-device-ai/segmentFrameOnClick.ts ADDED Viewed

	@@ -0,0 +1,58 @@

+import { FilesetResolver, InteractiveSegmenter, InteractiveSegmenterResult, ImageSource } from "@mediapipe/tasks-vision"
+export type InteractiveVideoSegmenter = (videoFrame: ImageSource, x: number, y: number) => Promise<InteractiveSegmenterResult>
+const getInteractiveSegmenter = async (): Promise<InteractiveVideoSegmenter> => {
+  const vision = await FilesetResolver.forVisionTasks(
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+  );
+  const interactiveSegmenter = await InteractiveSegmenter.createFromOptions(vision, {
+    baseOptions: {
+      modelAssetPath:
+        "https://storage.googleapis.com/mediapipe-tasks/interactive_segmenter/ptm_512_hdt_ptm_woid.tflite"
+    },
+    outputCategoryMask: true,
+    outputConfidenceMasks: false,
+  });
+  const segmenter: InteractiveVideoSegmenter = (
+    videoFrame: ImageSource,
+    x: number,
+    y: number
+  ): Promise<InteractiveSegmenterResult> => {
+    return new Promise((resolve, reject) => {
+      interactiveSegmenter.segment(
+        videoFrame,
+        {
+          keypoint: { x, y }
+        },
+        (results) => {
+        resolve(results)
+      })
+    })
+  }
+  return segmenter
+}
+const globalState: { segmenter?: InteractiveVideoSegmenter } = {};
+(async () => {
+  globalState.segmenter = globalState.segmenter || (await getInteractiveSegmenter())
+})();
+export async function segmentFrame(frame: ImageSource, x: number, y: number): Promise<InteractiveSegmenterResult> {
+  console.log("segmentFrame: loading segmenter..")
+  globalState.segmenter = globalState.segmenter || (await getInteractiveSegmenter())
+  console.log("segmentFrame: segmenting..")
+  return globalState.segmenter(frame, x, y)
+}
+// to run:
+// see doc:
+// https://developers.google.com/mediapipe/solutions/vision/image_segmenter/web_js#video
+// imageSegmenter.segmentForVideo(video, startTimeMs, callbackForVideo);

src/lib/utils/relativeCoords.ts ADDED Viewed

File without changes

src/types/general.ts CHANGED Viewed

@@ -637,9 +637,11 @@ export type InterfaceView =
   | "user_account"
   | "public_channels"
   | "public_channel" // public view of a channel
-  | "public_media" // public view of a video
   | "public_media_embed" // for integration into twitter etc
   | "public_music_videos" // public music videos - it's a special category, because music is *cool*
   | "public_gaming" // for AiTube Gaming
   | "public_4d" // for AiTube 4D
   | "public_live" // for AiTube Live

   | "user_account"
   | "public_channels"
   | "public_channel" // public view of a channel
+  | "public_media" // public view of an individual media (video, gaussian splat, clap video)
   | "public_media_embed" // for integration into twitter etc
   | "public_music_videos" // public music videos - it's a special category, because music is *cool*
+  | "public_latent_media" // public view of an individual dream (a latent media, so it's not a "real" file)
+  | "public_latent_media_embed" // for integration into twitter etc (which would be hardcore for our server load.. so maybe not)
   | "public_gaming" // for AiTube Gaming
   | "public_4d" // for AiTube 4D
   | "public_live" // for AiTube Live