socm / tasks.py
spencer's picture
add normal files
6df828c
import glob
from collections import namedtuple
from PIL import Image
from embeddings import FaissIndex, VectorSearch
class Summary:
def __init__(self, video_dir, llm):
self.video_dir = video_dir
self.llm = llm
self.vs = VectorSearch()
def flatten_list(self, s):
if s == []:
return s
if isinstance(s[0], list):
return self.flatten_list(s[0]) + self.flatten_list(s[1:])
return s[:1] + self.flatten_list(s[1:])
def parse_history(self):
history = []
with open(f"{self.video_dir}/history.txt") as f:
for line in f:
history.append(line.strip())
history_proc = []
proc = lambda x: list(map(str.strip, x.strip().split(",")))
Record = namedtuple("Record", "frame places objects activities".split(" "))
for hist in history:
hist_list = hist.split(":")
flat = self.flatten_list([x.split(".") for x in hist_list])
frame = flat[0]
places = proc(flat[3])
objects = proc(flat[5])
activities = proc(flat[-1])
history_proc.append(Record(*[frame, places, objects, activities]))
return history_proc
def create_prompts(self, history_proc):
split_idx = [i for i in range(len(history_proc)) if i % 5 == 0] + [
len(history_proc)
]
range_idx = [(split_idx[x - 1], split_idx[x]) for x in range(1, len(split_idx))]
prompts = []
for r in range_idx:
prompts.append(self.vs.prompt_summary(history_proc[r[0] : r[1]]))
return prompts
def call_model(self, prompts):
results = []
for prompt in prompts:
results.append(self.llm(prompt)[0]["generated_text"])
return zip(prompts, results)
def generate_summaries(self):
history_proc = self.parse_history()
prompts = self.create_prompts(history_proc)
results = self.call_model(prompts)
return results
class VideoSearch:
def __init__(self, video_dir, vlm, llm=None):
self.video_dir = video_dir
self.fi = FaissIndex(faiss_index_location=f"{self.video_dir}/video.index")
self.vlm = vlm
self.llm = llm
def find_nearest_frames(self, query):
test = self.vlm.get_text_emb(query)
D, I, frames = self.fi.search(test)
return D, frames
def get_images(self, frames, k=5):
images = []
for frame in frames[:k]:
loc = glob.glob(f"{self.video_dir}/*_{frame}.jpg")[0]
images.append(Image.open(loc))
return images
def search_engine(self, query):
D, frames = self.find_nearest_frames(query)
images = self.get_images(frames)
return images