artificial-styletts2 / landscape2soundscape.py

fx PATH

0572d9a about 2 months ago

9.24 kB

	import numpy as np
	import subprocess
	import cv2

	# with subprocess and an extra argument 'scene' and a 'resized image saved as png' we can call the server

	# yt-dlp is instaled in .d4
	# Download Part of Video
	# yt-dlp https://www.youtube.com/watch?v=UZ9uyQI3pF0 --downloader ffmpeg --downloader-args "ffmpeg_i:-ss 997 -to 2512"
	# ffmpeg -i Sandra\ Kotevska\,\ Painting\ Rose\ bush\,\ mixed\ media\,\ 2017.\ \[NMzC_036MtE\].mkv -f mp3 -ar 22050 -vn out44.wav -ac 1

	# https://superuser.com/questions/583393/how-to-extract-subtitle-from-video-using-ffmpeg

	def _shift(x):
	n = x.shape[0]
	i = np.random.randint(.24 * n, .74 * n)
	return np.roll(x, i)

	#___________________________________________________________________________________________________
	# VIDEO FROM IMAGE with CAPTIONS
	#
	# UPLOAD to: Simaviro: Documents General WORK PACKAGES WP1 ContentRepository ANBPR_ROMANIA TTSvideos
	# __________________________________________________________________________________________________

	# TO DONLOAD SRT for youtub
	# yt-dlp --write-sub --sub-lang en --convert-subs "srt" https://www.youtube.com/watch?v=F1Ib7TAu7eg&list=PL4x2B6LSwFewdDvRnUTpBM7jkmpwouhPv&index=2

	# _voice = 'en_US/vctk_low#p330'
	# _voice = 'en_US/cmu-arctic_low#lnh' #en_US/vctk_low#p249' # 'en_US/vctk_low#p282'
	# _voice = ''en_US/vctk_low#p351''
	# _voice = 'en_US/vctk_low#p351' # avoid 318 it does the ghhhhhh
	# _voice = 'en_US/m-ailabs_low#judy_bieber' # Nice voice for ('Arta culinara romaneasca - Groza Irina [phIF0NxgwlQ].mkv' 'Arta culinara romaneasca - Groza Irina [phIF0NxgwlQ].en-GB.srt'),
	# _voice = 'en_UK/apope_low'
	# _voice = 'en_US/m-ailabs_low#mary_ann'
	# _voice = 'en_US/vctk_low#p351'
	# _voice = 'en_US/hifi-tts_low#92'
	# voice_str = f'_{_voice.replace("/", "")}'





	# image/descriptions provided by other SHIFT tool or Human curator

	# https://simaviro.sharepoint.com/sites/SHIFT/Shared%20Documents/Forms/AllItems.aspx?csf=1&web=1&e=JNK8dQ&cid=363c253d%2D4d61%2D4db1%2D8ffd%2Ddedda749da2d&RootFolder=%2Fsites%2FSHIFT%2FShared%20Documents%2FGENERAL%2FWORK%20PACKAGES%2FWP1%2FContent%20Repository%2Fshift%5FSPK%5Fuse%5Fcases%5Fshare%2F02%5Fuc%5Fspk%5FLandscape2Soundscape%2FLandscape2Soundscape%5F12%5FMasterpieces&FolderCTID=0x01200058F5037C0101524B82F6F0788C02A563
	# STATIC_FRAME = 'uc_spk_Landscape2Soundscape_Masterpieces_pics/01_Schick_AII840_001.jpg' #'assets/image_from_T31.jpg'




	PIC_DIR = 'uc_spk_Landscape2Soundscape_Masterpieces_pics/'

	DESCRIPTIONS = [
	# 1
	[
	'01_Schick_AII840_001.jpg', # image
	'01_Schick_AII840_001.txt', # text
	'Statue in shire hill on autumn beach.', # audiocraft
	'Gottlieb Schick - Bildnis der Heinrike Dannecker - 1802', # cv2 puttext title
	'en_US/m-ailabs_low#mary_ann',
	],
	# 2
	[
	'02_Constable_AI555_001.jpg',
	'02_Constable_AI555_001.txt',
	'Meadows country farm village in sight',
	'John Constable - Dorf an dem Flusse Stour - 1804',
	'en_US/m-ailabs_low#mary_ann',
	],
	# 3
	[
	'03_Schinkel_WS200-002.jpg',
	'03_Schinkel_WS200-002.txt',
	'Arriving at the shore on horses',
	'Karl Friedrich Schinkel - Gotische Kirche auf einem Felsen am Meer - 1815',
	'en_US/m-ailabs_low#mary_ann',
	],
	#
	[
	'04_Friedrich_FV317_001.jpg',
	'04_Friedrich_FV317_001.txt',
	'Land steppes',
	'Friedrich Caspar David - Der Watzmann - 1824/1825',
	'en_US/m-ailabs_low#mary_ann',
	],
	#
	[
	'05_Blechen_FV40_001.jpg',
	'05_Blechen_FV40_001.txt',
	'fjords',
	'Blechen - Carl Unwetter in der römischen Campagna - 1829',
	'en_US/m-ailabs_low#mary_ann',
	],
	# 6
	[
	'06_Menzel_AI900_001.jpg'
	'06_Menzel_AI900_001.txt',
	'Olive trees in Seville',
	'Adolph Menzel - Bauplatz mit Weiden - 1846',
	'en_US/m-ailabs_low#mary_ann',
	],
	# 7
	[
	'07_Courbet_AI967_001.jpg',
	'07_Courbet_AI967_001.txt',
	'Storm at the strand of waves Tsunami',
	'Gustave Courbet - Die Welle - 1869/1870',
	'en_US/m-ailabs_low#mary_ann',
	],
	# 8
	[
	'08_Monet_AI1013_001.jpg',
	'08_Monet_AI1013_001.txt',
	'Mai flowers blossom picnic',
	'Claude Monet - Sommertag - 1874',
	'en_US/m-ailabs_low#mary_ann',
	],
	# 9
	[
	'09_Blechen_AII823_001.jpg',
	'09_Blechen_AII823_001.txt',
	'Cascade in Africa',
	'Carl Blechen - Wasserfälle bei Tivoli - 1832',
	'en_US/m-ailabs_low#mary_ann',
	],
	# 10
	[
	'10_Boecklin_967648_NG2-80_001_rsz.jpg',
	'10_Boecklin_967648_NG2-80_001.txt',
	'Hades ades at it sisland',
	'Arnold Böcklin - Toteninsel - 1883',
	'en_US/m-ailabs_low#mary_ann',
	],
	# 11
	[
	'11_Liebermann_NG4-94_001.jpg',
	'11_Liebermann_NG4-94_001.txt',
	'Tavern at the waterfront',
	'Max Tiebermann - Gartenlokal an der Havel. Nikolskoe - 1916',
	'en_US/m-ailabs_low#mary_ann',
	],
	# 12
	[
	'12_Slevogt_AII1022_001.jpg',
	'12_Slevogt_AII1022_001.txt',
	'toy sailing yachts pool',
	'Max Slevogt - Segelboote auf der Alster am Abend -1905',
	'en_US/m-ailabs_low#mary_ann',
	],
	]


	SILENT_VIDEO = '_silent_video.mp4'


	# SILENT CLIP


	for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:20]:

	# cv2put txt
	im = cv2.imread(PIC_DIR + _img_) # IMG must have EVEN shape
	h, w, _ = im.shape
	im = im[(h%2):, (w%2):, :] # assure even image
	print(im.shape, "GLOBAL IM\n\n\n\n")
	fram = np.zeros((94, im.shape[1], 3), dtype=np.uint8)
	h, w, _ = fram.shape
	font = cv2.FONT_HERSHEY_SIMPLEX
	bottomLeftCornerOfText = (240, 74) # w,h
	fontScale = 2
	fontColor = (255, 255, 255)
	thickness = 4
	lineType = 2
	cv2.putText(fram, _title_, #'LandScape 2 SoundScape',
	bottomLeftCornerOfText,
	font,
	fontScale,
	fontColor,
	thickness,
	lineType)
	offset_h = 24
	im[offset_h:h+offset_h, :w, :] = (.4 * im[offset_h:h+offset_h, :w, :] + .6 * fram).astype(np.uint8)
	# cv2.imshow('i', im); cv2.waitKey(); cv2.destroyAllWindows()

	# logo aud

	logo = cv2.imread('assets/audeering_logo.jpg')[:740, :, :]
	logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
	h, w, _ = logo.shape
	offset_h = im.shape[0] - h
	im[offset_h:h+offset_h, :w, :] = (.23 * im[offset_h:h+offset_h, :w, :] + .77 * logo).astype(np.uint8)

	# logo SMB

	logo = cv2.imread('assets/SMB_logo.png')#[:740, :, :]
	logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
	h, w, _ = logo.shape
	offset_h = im.shape[0] - h
	# fill logo SMB with the pixels of im - where SMB is empty
	ptc = im[offset_h:h+offset_h, :w, :]
	logo[logo == 0] = ptc[logo == 0] # fill empty
	im[offset_h:h+offset_h, :w, :] = (.13 * im[offset_h:h+offset_h, :w, :] + .86 * logo).astype(np.uint8)

	# # logo shift

	# logo = cv2.imread('assets/shift_logo.png')#[:740, :, :]
	# logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
	# h, w, _ = logo.shape
	# offset_h = im.shape[0] - h #-274
	# offset_w = im.shape[1] - w #400
	# # # fill logo SMB with the pixels of im - where SMB is empty
	# ptc = im[offset_h:h+offset_h, :w, :]
	# # msk = np.tile(logo[:, :,0:1] > 252, [1,1,3])
	# # logo[msk] = ptc[msk] # fill empty
	# im[offset_h:h+offset_h, offset_w:w+offset_w, :] = (.0 * im[offset_h:h+offset_h, offset_w:w+offset_w, :] + 1 * logo).astype(np.uint8)

	# silent video - img
	# im = cv2.resize(im, (700, 700))
	cv2.imwrite('pic_logo_emb.png', im)




	# raw, _ = soundfile.read(soundscape_file) # 12345, 2

	# # fill
	# soundscape = []
	# for _replica in range(math.ceil(len(total) / raw.shape[0])+1):
	# soundscape.append(raw) # _shift non defined for stereo
	# soundscape = np.concatenate(soundscape, 0)

	# total = .36 * np.concatenate([total[:, None],
	# total[:, None]], 1) + .64 * soundscape[:len(total), :]

	# outfile

	OUT_FILE = _img_.split('/')[-1].replace('.','__') + '.mp4' # assets / -1
	print(f'{OUT_FILE=}\n')
	# call API passing img

	subprocess.run(
	[
	"python",
	"tts.py",
	"--text", PIC_DIR + _text_,
	'--image', 'pic_logo_emb.png',
	# "--title", _title_,
	# '--soundscape_text', soundscape_text,
	'--voice', _voice_,
	'--out_file', OUT_FILE,
	])

	# soundfile.write(AUDIO_TRACK, total, 22050)
	# subprocess.call(
	# ["ffmpeg",
	# "-y",
	# "-i",
	# SILENT_VIDEO,
	# "-i",
	# AUDIO_TRACK,
	# #"-c:v",
	# #"copy",
	# "-map",
	# "0:v:0",
	# "-map",
	# " 1:a:0",
	# "-vf",
	# "pad",
	# OUT_FILE])