Spaces:

Vchitect
/

Vlogger-ShowMaker

Running on Zero

App Files Files Community

Vlogger-ShowMaker / vlogger /videocaption.py

GrayShine

Upload 60 files

2e5e07d verified 5 months ago

raw history blame contribute delete

No virus

1.97 kB

	import torch
	import ast
	import os
	import cv2 as cv
	from PIL import Image, ImageDraw, ImageFont
	from decord import VideoReader, cpu
	import torchvision
	import numpy as np


	def captioning(en_prompt_file, zh_prompt_file, input_video_dir, output_video_dir):
	prompt_list = []
	with open(en_prompt_file, 'r', encoding='utf-8') as f:
	video_prompts = f.read()
	video_fragments = ast.literal_eval(video_prompts)
	for video_fragment in video_fragments:
	prompt_list.append(video_fragment["video fragment description"])

	video_fnames = []
	for fname in os.listdir(input_video_dir):
	try:
	int(fname.split('.')[0])
	video_fnames.append(fname)
	except:
	continue
	video_fnames.sort(key=lambda x: int(x.split('.')[0]))

	font_face = cv.FONT_HERSHEY_COMPLEX
	if not os.path.exists(output_video_dir):
	os.makedirs(output_video_dir)
	for i in range(len(video_fnames)):
	font_zh = ImageFont.truetype(font='MSYH.TTC', size=18)
	fontScale = 0.4
	video_path = os.path.join(input_video_dir, video_fnames[i])
	video = VideoReader(video_path, ctx=cpu(0))
	video = video[:].asnumpy()
	(fw, fh), bh = cv.getTextSize(prompt_list[i], font_face, fontScale, 1)
	pos_en = (int((video[0].shape[1] - fw) / 2), 300)
	if pos_en[0] < 0:
	scale = video[0].shape[1] / fw
	fontScale *= scale
	pos_en = (0, 300)
	for j in range(video.shape[0]):
	cv.putText(video[j], prompt_list[i], pos_en, font_face, fontScale, (255, 255, 255), 1, cv.LINE_AA)
	img = Image.fromarray(cv.cvtColor(video[j], cv.COLOR_BGR2RGB))
	draw = ImageDraw.Draw(img)
	img = np.array(img)
	video[j] = cv.cvtColor(img, cv.COLOR_RGB2BGR)
	torchvision.io.write_video(output_video_dir + "/" + str(i) + '.mp4', video, fps=8)
	print("Caption OK", flush=True)