Spaces:

aichina
/

youtube-whisper-09

Sleeping

App Files Files Community

youtube-whisper-09 / app.py

aichina

dfdsfs

e5a1d61 over 1 year ago

raw history blame contribute delete

No virus

4.11 kB

	import gradio as gr

	from pytube import YouTube
	import random
	import requests,json
	import subprocess,os


	def del_down_file():
	command = f'rm -rf *.mp4'
	subprocess.call(command, shell=True)

	def get_video(url):


	# 下载视频
	url = url
	output_dir = '.'
	command = f'you-get -o {output_dir} {url}'
	print(command)
	subprocess.call(command, shell=True)

	mp4_files = [] # 用于存储所有找到的 mp4 文件名

	# 遍历指定目录中的所有文件
	for filename in os.listdir('.'):
	# 检查文件是否以 '.mp4' 结尾
	if filename.endswith('.mp4'):
	# 如果是，将文件名添加到 mp4_files 列表中
	mp4_files.append(filename)
	print(mp4_files)
	mp4_file = mp4_files[0]
	os.rename(mp4_file, 'my_mp4.mp4')
	return 'my_mp4.mp4'



	def create(prompt,openai_key):

	headers = {
	'Content-Type': 'application/json',
	'Authorization': f'Bearer {openai_key}',

	}
	data = {
	"model": "text-davinci-003",
	"prompt": prompt,
	"temperature": 0.7,
	"max_tokens": 1024,
	"top_p": 1.0,
	"frequency_penalty": 0.0,
	"presence_penalty": 0.0
	}
	print(headers ,prompt,openai_key)
	url = 'https://api.openai.com/v1/completions'
	r = requests.post(url,headers=headers,
	data=json.dumps(data))
	print(r.text)
	return r.json()

	def split_list(l, n):
	for i in range(0, len(l), n):
	yield l[i:i+n]


	def convert(res,openai_key):


	data = res.json()
	prediction = data['prediction']
	content = []
	for x in prediction:
	content.append(x['transcription'])
	auido_txt = '\n'.join(content)
	answer = ''

	try:
	answer = ''
	for txt_line in split_list(content,10):
	txt_line_content = '\n'.join(txt_line)
	prompt = f"\n\n将下面的内容使用简体中文总结5条要点出来：\n\n{txt_line_content}"
	open_ai_res = create(prompt,openai_key)
	answer += prompt + '\n GPT3:\n' + open_ai_res['choices'][0]['text'].strip()
	except Exception as e:
	print('open ai api error',e)

	res_content = f'{answer}'

	return res_content




	def get_audio(url):

	yt = YouTube(url)
	audio_file = f'{random.randint(10000,100000)}.mp4'
	print(f'{url} {audio_file} start get audio ...')
	yt.streams.filter(only_audio=True)[0].download(filename=audio_file)
	print('aodio over ..')
	# audio_file = get_video(url)
	return audio_file

	def get_transcript(url,openai_key):
	headers = {
	'accept': 'application/json',
	'x-gladia-key': '89b0adf5-fb2c-48ba-8a66-76b02827fd14',
	# requests won't add a boundary if this header is set when you pass files=
	# 'Content-Type': 'multipart/form-data',
	}
	audio_file = get_audio(url)

	print(audio_file)

	files = {
	'audio': (f"{audio_file}", open(f'{audio_file}', 'rb'), 'video/mp4'),
	'language': (None, 'english'),
	'language_behaviour': (None, 'automatic single language'),
	}
	print('get transcription from api.gladia.io ...')
	response = requests.post('https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)
	print(response.text)
	del_down_file()
	return convert(response,openai_key)





	with gr.Blocks() as demo:

	with gr.Row():

	with gr.Column():

	with gr.Row():
	url = gr.Textbox(placeholder='Youtube video URL', label='URL')
	openai_key = gr.Textbox(placeholder='Your openai key', label='OPENAI KEY')


	with gr.Row():
	gr.Markdown("自动从youtube视频中，获取音频内容，并使用GPT总结其要点")
	transcribe_btn = gr.Button('Transcribe')

	with gr.Column():
	outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')

	transcribe_btn.click(get_transcript, inputs=[url,openai_key], outputs=outputs)

	demo.launch(debug=True)