ManBib
/

faster-whisper-readme

Inference Endpoints

Model card Files Files and versions Community

faster-whisper-readme / sorter.py

ManBib's picture

fixed processing through link or audio bytes

2b16bc4 about 1 year ago

history blame contribute delete

3.47 kB

	import argparse
	import os

	import cv2
	from tqdm import tqdm

	import imgcomparison as ic
	import mediaoutput
	import sources
	from analyzer import Analyzer
	from slides import SlideDataHelper


	class SlideSorter(Analyzer):
	"""
	Sorts the slides according to their timestamp.
	"""

	def __init__(self, source, comparator=ic.AbsDiffHistComparator(0.98)):
	"""
	Default initializer
	:param path: the path where the slides are located on disk
	:param comparator: the comparator to determine, if two slides
	are duplicates.
	"""
	self.comparator = comparator
	self.source = source

	def sort(self):
	"""
	Sorting the slides and write the new slides without duplicates
	but with a timetable to disk.
	"""
	slides = []
	with tqdm(total=len(self.source), desc="Sorting Slides: ") as pbar:
	for i, slide in self.group_slides():
	pbar.update(i)
	if slide is not None:
	slides.append(slide)

	return slides

	def group_slides(self):
	"""
	Groups the slides by eliminating duplicates.
	:param slides: the list of slides possibly containing duplicates
	:return: a list of slides without duplicates
	"""
	slides = []
	sorted_slides = []
	loop_counter = 0
	page_counter = 1
	for slide in self.source.contents():
	slides.append(slide)
	if slide.marked:
	continue
	found = False
	for other in slides[:-1]:
	if self.comparator.are_same(slide.img, other.img):
	found = True
	if other.marked:
	other.reference.add_time(slide.time)
	slide.reference = other.reference
	slide.marked = True
	else:
	slide.reference = other
	other.add_time(slide.time)
	slide.marked = True
	yield loop_counter, None

	if not found:
	slide.page_number = page_counter
	yield loop_counter, slide
	sorted_slides.append(slide)
	page_counter += 1
	loop_counter += 1

	def analyze(self):
	for _, slide in self.group_slides():
	if slide is None:
	continue
	yield slide


	if __name__ == '__main__':
	Parser = argparse.ArgumentParser(description="Slide Sorter")
	Parser.add_argument("-d", "--inputslides", help="path of the sequentially sorted slides", default="slides/")
	Parser.add_argument("-o", "--outpath", help="path to output slides", default="unique/", nargs='?')
	Parser.add_argument("-f", "--fileformat", help="file format of the output images e.g. '.jpg'",
	default=".jpg", nargs='?')
	Parser.add_argument("-t", "--timetable",
	help="path where the timetable should be written (default is the outpath+'timetable.txt')",
	nargs='?', default=None)
	Args = Parser.parse_args()
	if Args.timetable is None:
	Args.timetable = os.path.join(Args.outpath, "timetable.txt")

	sorter = SlideSorter(sources.ListSource(SlideDataHelper(Args.inputslides).get_slides()), Args.outpath,
	Args.timetable, Args.fileformat)
	sorter.sort()