Spaces:

Vchitect
/

Vlogger-ShowMaker

Runtime error

App Files Files Community

Vlogger-ShowMaker / vlogger /videocaption.py

GrayShine

Upload 60 files

2e5e07d verified 10 months ago

raw

history blame

1.97 kB

	import torch
	import ast
	import os
	import cv2 as cv
	from PIL import Image, ImageDraw, ImageFont
	from decord import VideoReader, cpu
	import torchvision
	import numpy as np


	def captioning(en_prompt_file, zh_prompt_file, input_video_dir, output_video_dir):
	prompt_list = []
	with open(en_prompt_file, 'r', encoding='utf-8') as f:
	video_prompts = f.read()
	video_fragments = ast.literal_eval(video_prompts)
	for video_fragment in video_fragments:
	prompt_list.append(video_fragment["video fragment description"])

	video_fnames = []
	for fname in os.listdir(input_video_dir):
	try:
	int(fname.split('.')[0])
	video_fnames.append(fname)
	except:
	continue
	video_fnames.sort(key=lambda x: int(x.split('.')[0]))

	font_face = cv.FONT_HERSHEY_COMPLEX
	if not os.path.exists(output_video_dir):
	os.makedirs(output_video_dir)
	for i in range(len(video_fnames)):
	font_zh = ImageFont.truetype(font='MSYH.TTC', size=18)
	fontScale = 0.4
	video_path = os.path.join(input_video_dir, video_fnames[i])
	video = VideoReader(video_path, ctx=cpu(0))
	video = video[:].asnumpy()
	(fw, fh), bh = cv.getTextSize(prompt_list[i], font_face, fontScale, 1)
	pos_en = (int((video[0].shape[1] - fw) / 2), 300)
	if pos_en[0] < 0:
	scale = video[0].shape[1] / fw
	fontScale *= scale
	pos_en = (0, 300)
	for j in range(video.shape[0]):
	cv.putText(video[j], prompt_list[i], pos_en, font_face, fontScale, (255, 255, 255), 1, cv.LINE_AA)
	img = Image.fromarray(cv.cvtColor(video[j], cv.COLOR_BGR2RGB))
	draw = ImageDraw.Draw(img)
	img = np.array(img)
	video[j] = cv.cvtColor(img, cv.COLOR_RGB2BGR)
	torchvision.io.write_video(output_video_dir + "/" + str(i) + '.mp4', video, fps=8)
	print("Caption OK", flush=True)