|
import gradio as gr |
|
import pixeltable as pxt |
|
from pixeltable.functions.huggingface import clip_image, clip_text |
|
from pixeltable.iterators import FrameIterator |
|
import PIL.Image |
|
import os |
|
|
|
|
|
@pxt.expr_udf |
|
def embed_image(img: PIL.Image.Image): |
|
return clip_image(img, model_id='openai/clip-vit-base-patch32') |
|
|
|
@pxt.expr_udf |
|
def str_embed(s: str): |
|
return clip_text(s, model_id='openai/clip-vit-base-patch32') |
|
|
|
|
|
def process_video(video_file, progress=gr.Progress()): |
|
|
|
progress(0, desc="Initializing...") |
|
|
|
|
|
pxt.drop_dir('video_search', force=True) |
|
pxt.create_dir('video_search') |
|
|
|
video_table = pxt.create_table('video_search.videos', {'video': pxt.VideoType()}) |
|
|
|
frames_view = pxt.create_view( |
|
'video_search.frames', |
|
video_table, |
|
iterator=FrameIterator.create(video=video_table.video, fps=1) |
|
) |
|
|
|
progress(0.2, desc="Inserting video...") |
|
video_table.insert([{'video': video_file.name}]) |
|
|
|
progress(0.4, desc="Creating embedding index...") |
|
frames_view.add_embedding_index('frame', string_embed=str_embed, image_embed=embed_image) |
|
|
|
progress(1.0, desc="Processing complete") |
|
return "Good news! Your video has been processed. Easily find the moments you need by searching with text or images." |
|
|
|
|
|
def similarity_search(query, search_type, num_results, progress=gr.Progress()): |
|
|
|
frames_view = pxt.get_table('video_search.frames') |
|
|
|
progress(0.5, desc="Performing search...") |
|
if search_type == "Text": |
|
sim = frames_view.frame.similarity(query) |
|
else: |
|
sim = frames_view.frame.similarity(query) |
|
|
|
results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect() |
|
|
|
progress(1.0, desc="Search complete") |
|
|
|
return [row['frame'] for row in results] |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Base()) as demo: |
|
gr.Markdown( |
|
""" |
|
<div style= margin-bottom: 20px;"> |
|
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" /> |
|
<h2>Text and Image similarity search on video frames with embedding indexes</h2> |
|
</div> |
|
""" |
|
) |
|
gr.HTML( |
|
""" |
|
<p> |
|
<a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data. |
|
</p> |
|
""" |
|
) |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
|
|
gr.Markdown( |
|
""" |
|
<h3>1. Insert video</h3> |
|
""") |
|
|
|
video_file = gr.File(label="Upload Video") |
|
process_button = gr.Button("Process Video") |
|
process_output = gr.Textbox(label="Status", lines=2) |
|
|
|
gr.Markdown( |
|
""" |
|
<h3>2. Search video frames</h3> |
|
""") |
|
|
|
search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text") |
|
text_input = gr.Textbox(label="Text Query") |
|
image_input = gr.Image(label="Image Query", type="pil", visible=False) |
|
num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results") |
|
search_button = gr.Button("Search") |
|
|
|
with gr.Column(scale=2): |
|
|
|
gr.Markdown( |
|
""" |
|
<h3>3. Visualize results</h3> |
|
""") |
|
|
|
results_gallery = gr.Gallery(label="Search Results", columns=3) |
|
|
|
gr.Examples( |
|
examples=[ |
|
["bangkok.mp4"], |
|
["lotr.mp4"], |
|
["mi.mp4"], |
|
], |
|
label="Click one of the examples below to get started", |
|
inputs=[video_file], |
|
fn=process_video |
|
) |
|
|
|
def update_search_input(choice): |
|
return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image") |
|
|
|
search_type.change(update_search_input, search_type, [text_input, image_input]) |
|
|
|
process_button.click( |
|
process_video, |
|
inputs=[video_file], |
|
outputs=[process_output] |
|
) |
|
|
|
def perform_search(search_type, text_query, image_query, num_results): |
|
query = text_query if search_type == "Text" else image_query |
|
return similarity_search(query, search_type, num_results) |
|
|
|
search_button.click( |
|
perform_search, |
|
inputs=[search_type, text_input, image_input, num_results], |
|
outputs=[results_gallery] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |