Spaces:

Pixeltable
/

Text-image-similarity-search-on-video-frames-embedding-indexes

Running

Update app.py

36f43f4 verified about 2 months ago

4.83 kB

	import gradio as gr
	import pixeltable as pxt
	from pixeltable.functions.huggingface import clip_image, clip_text
	from pixeltable.iterators import FrameIterator
	import PIL.Image
	import os

	# Process video and create index
	def process_video(video_file, progress=gr.Progress()):
	progress(0, desc="Initializing...")

	# Pixeltable setup
	pxt.drop_dir('video_search', force=True)
	pxt.create_dir('video_search')

	# Update type declaration to use simpler syntax
	video_table = pxt.create_table('video_search.videos', {'video': pxt.Video})

	frames_view = pxt.create_view(
	'video_search.frames',
	video_table,
	iterator=FrameIterator.create(video=video_table.video, fps=1)
	)

	progress(0.2, desc="Inserting video...")
	video_table.insert([{'video': video_file.name}])

	progress(0.4, desc="Creating embedding index...")
	# Updated embedding pattern using .using()
	frames_view.add_embedding_index(
	'frame',
	string_embed=clip_text.using(model_id='openai/clip-vit-base-patch32'),
	image_embed=clip_image.using(model_id='openai/clip-vit-base-patch32')
	)

	progress(1.0, desc="Processing complete")
	return "Good news! Your video has been processed. Easily find the moments you need by searching with text or images."

	# Perform similarity search
	def similarity_search(query, search_type, num_results, progress=gr.Progress()):
	frames_view = pxt.get_table('video_search.frames')

	progress(0.5, desc="Performing search...")
	if search_type == "Text":
	sim = frames_view.frame.similarity(query)
	else: # Image search
	sim = frames_view.frame.similarity(query)

	results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect()

	progress(1.0, desc="Search complete")
	return [row['frame'] for row in results]

	# Gradio interface
	with gr.Blocks(theme=gr.themes.Base()) as demo:
	gr.Markdown(
	"""
	<div style= margin-bottom: 20px;">
	<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" />
	<h2>Text and Image similarity search on video frames with embedding indexes</h2>
	</div>
	"""
	)
	gr.HTML(
	"""
	<p>
	<a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
	</p>
	"""
	)


	with gr.Row():
	with gr.Column(scale=1):

	gr.Markdown(
	"""
	<h3>1. Insert video</h3>
	""")

	video_file = gr.File(label="Upload Video")
	process_button = gr.Button("Process Video")
	process_output = gr.Textbox(label="Status", lines=2)

	gr.Markdown(
	"""
	<h3>2. Search video frames</h3>
	""")

	search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text")
	text_input = gr.Textbox(label="Text Query")
	image_input = gr.Image(label="Image Query", type="pil", visible=False)
	num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results")
	search_button = gr.Button("Search")

	with gr.Column(scale=2):

	gr.Markdown(
	"""
	<h3>3. Visualize results</h3>
	""")

	results_gallery = gr.Gallery(label="Search Results", columns=3)

	gr.Examples(
	examples=[
	["bangkok.mp4"],
	["lotr.mp4"],
	["mi.mp4"],
	],
	label="Click one of the examples below to get started",
	inputs=[video_file],
	fn=process_video
	)

	def update_search_input(choice):
	return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")

	search_type.change(update_search_input, search_type, [text_input, image_input])

	process_button.click(
	process_video,
	inputs=[video_file],
	outputs=[process_output]
	)

	def perform_search(search_type, text_query, image_query, num_results):
	query = text_query if search_type == "Text" else image_query
	return similarity_search(query, search_type, num_results)

	search_button.click(
	perform_search,
	inputs=[search_type, text_input, image_input, num_results],
	outputs=[results_gallery]
	)

	if __name__ == "__main__":
	demo.launch()