|
import gradio as gr |
|
import pixeltable as pxt |
|
from pixeltable.functions.huggingface import clip_image, clip_text |
|
from pixeltable.iterators import FrameIterator |
|
import PIL.Image |
|
import os |
|
|
|
|
|
@pxt.expr_udf |
|
def embed_image(img: PIL.Image.Image): |
|
return clip_image(img, model_id='openai/clip-vit-base-patch32') |
|
|
|
@pxt.expr_udf |
|
def str_embed(s: str): |
|
return clip_text(s, model_id='openai/clip-vit-base-patch32') |
|
|
|
|
|
def process_video(video_file, progress=gr.Progress()): |
|
|
|
progress(0, desc="Initializing...") |
|
|
|
|
|
pxt.drop_dir('video_search', force=True) |
|
pxt.create_dir('video_search') |
|
|
|
video_table = pxt.create_table('video_search.videos', {'video': pxt.VideoType()}) |
|
|
|
frames_view = pxt.create_view( |
|
'video_search.frames', |
|
video_table, |
|
iterator=FrameIterator.create(video=video_table.video, fps=1) |
|
) |
|
|
|
progress(0.2, desc="Inserting video...") |
|
video_table.insert([{'video': video_file.name}]) |
|
|
|
progress(0.4, desc="Creating embedding index...") |
|
frames_view.add_embedding_index('frame', string_embed=str_embed, image_embed=embed_image) |
|
|
|
progress(1.0, desc="Processing complete") |
|
return "Video processed and indexed successfully!" |
|
|
|
|
|
def similarity_search(query, search_type, num_results, progress=gr.Progress()): |
|
|
|
frames_view = pxt.get_table('video_search.frames') |
|
|
|
progress(0.5, desc="Performing search...") |
|
if search_type == "Text": |
|
sim = frames_view.frame.similarity(query) |
|
else: |
|
sim = frames_view.frame.similarity(query) |
|
|
|
results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect() |
|
|
|
progress(1.0, desc="Search complete") |
|
|
|
return [row['frame'] for row in results] |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Base()) as demo: |
|
gr.Markdown( |
|
""" |
|
<div style=margin: 0 auto;"> |
|
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 200px; margin-bottom: 20px;" /> |
|
<h1 style="margin-bottom: 0.5em;">Text and image similarity search on video frames with embedding indexes</h1> |
|
</div> |
|
""" |
|
) |
|
gr.HTML( |
|
""" |
|
<p> |
|
<a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #4D148C; text-decoration: none; font-weight: bold;">Pixeltable</a> |
|
enables storage, versioning, indexing, and similarity search on video frames. |
|
</p> |
|
""" |
|
) |
|
|
|
with gr.Tab("1. Upload and Process Video"): |
|
video_file = gr.File(label="Upload Video") |
|
process_button = gr.Button("Process Video") |
|
process_output = gr.Textbox(label="Processing Status") |
|
|
|
process_button.click( |
|
process_video, |
|
inputs=[video_file], |
|
outputs=[process_output] |
|
) |
|
|
|
with gr.Tab("2. Text and Image Similarity Search on Frames"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text") |
|
text_input = gr.Textbox(label="Text Query") |
|
image_input = gr.Image(label="Image Query", type="pil") |
|
num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results") |
|
with gr.Column(): |
|
search_button = gr.Button("Search") |
|
results_gallery = gr.Gallery(label="Search Results") |
|
|
|
def update_search_input(choice): |
|
return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image") |
|
|
|
search_type.change(update_search_input, search_type, [text_input, image_input]) |
|
|
|
def perform_search(search_type, text_query, image_query, num_results): |
|
query = text_query if search_type == "Text" else image_query |
|
return similarity_search(query, search_type, num_results) |
|
|
|
search_button.click( |
|
perform_search, |
|
inputs=[search_type, text_input, image_input, num_results], |
|
outputs=[results_gallery] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |