vivekvar commited on
Commit
b1b430b
1 Parent(s): b0bf571

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -74
app.py DELETED
@@ -1,74 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- import soundfile as sf
4
- import librosa
5
- from moviepy.editor import VideoFileClip
6
- import os
7
-
8
- # Load Whisper base model and processor
9
- whisper_model_name = "openai/whisper-base"
10
- whisper_processor = WhisperProcessor.from_pretrained(whisper_model_name)
11
- whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name)
12
-
13
- # Load RAG sequence model and tokenizer
14
- rag_model_name = "facebook/rag-sequence-nq"
15
- rag_tokenizer = RagTokenizer.from_pretrained(rag_model_name)
16
- rag_retriever = RagRetriever.from_pretrained(rag_model_name, index_name="exact", use_dummy_dataset=True)
17
- rag_model = RagSequenceForGeneration.from_pretrained(rag_model_name, retriever=rag_retriever)
18
-
19
- def transcribe_audio(audio_path, language="ru"):
20
- speech, rate = librosa.load(audio_path, sr=16000)
21
- inputs = whisper_processor(speech, return_tensors="pt", sampling_rate=16000)
22
- input_features = whisper_processor.feature_extractor(speech, return_tensors="pt", sampling_rate=16000).input_features
23
- predicted_ids = whisper_model.generate(input_features, forced_decoder_ids=whisper_processor.get_decoder_prompt_ids(language=language, task="translate"))
24
- transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
25
- return transcription
26
-
27
- def translate_and_summarize(text):
28
- inputs = rag_tokenizer(text, return_tensors="pt")
29
- input_ids = inputs["input_ids"]
30
- attention_mask = inputs["attention_mask"]
31
- outputs = rag_model.generate(input_ids=input_ids, attention_mask=attention_mask)
32
- return rag_tokenizer.batch_decode(outputs, skip_special_tokens=True)
33
-
34
- def extract_audio_from_video(video_path, output_audio_path):
35
- video_clip = VideoFileClip(video_path)
36
- audio_clip = video_clip.audio
37
- if audio_clip is not None:
38
- audio_clip.write_audiofile(output_audio_path)
39
- return output_audio_path
40
- else:
41
- return None
42
-
43
- def transcribe_audio_interface(audio_file):
44
- audio_path = os.path.join("/tmp", audio_file.name)
45
- with open(audio_path, "wb") as f:
46
- f.write(audio_file.getvalue())
47
- transcription = transcribe_audio(audio_path)
48
- return transcription
49
-
50
- def summarize_text_interface(text):
51
- summary = translate_and_summarize(text)
52
- return summary
53
-
54
- def summarize_video_interface(video_file):
55
- video_path = os.path.join("/tmp", video_file.name)
56
- with open(video_path, "wb") as f:
57
- f.write(video_file.getvalue())
58
- audio_path = extract_audio_from_video(video_path, "/tmp/extracted_audio.wav")
59
- if audio_path is not None:
60
- transcription = transcribe_audio(audio_path)
61
- summary = translate_and_summarize(transcription)
62
- return summary
63
- else:
64
- return "No audio track found in the video file."
65
-
66
- # Create interfaces
67
- audio_transcription_interface = gr.Interface(transcribe_audio_interface, inputs="audio", outputs="text", title="Audio Transcription")
68
- text_summarization_interface = gr.Interface(summarize_text_interface, inputs="text", outputs="text", title="Text Summarization")
69
- video_summarization_interface = gr.Interface(summarize_video_interface, inputs="video", outputs="text", title="Video Summarization")
70
-
71
- # Launch the interfaces
72
- audio_transcription_interface.launch()
73
- text_summarization_interface.launch()
74
- video_summarization_interface.launch()