Dhruv Pai Dukle commited on
Commit
1947bbe
1 Parent(s): 6e76c51

Add application file

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from pydub import AudioSegment
4
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
5
+ import torch
6
+ import whisper
7
+
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+ models = T5ForConditionalGeneration.from_pretrained("Michau/t5-base-en-generate-headline")
10
+ tokenizer = T5Tokenizer.from_pretrained("Michau/t5-base-en-generate-headline")
11
+ models = models.to(device)
12
+ model = whisper.load_model("base")
13
+
14
+ st.title("Audio Analysis")
15
+
16
+ # Arguments input
17
+ st.subheader("Enter YouTube link and file name:")
18
+ url = st.text_input("YouTube link")
19
+ name = st.text_input("File name")
20
+
21
+ # Process audio and generate headings
22
+ if st.button("Process"):
23
+ if os.path.exists("audio.mp3"):
24
+ os.remove("audio.mp3")
25
+
26
+ os.system("youtube-dl "+"--write-thumbnail "+"--skip-download "+url + " -o logo.png")
27
+ os.system("yt-dlp -f 140 -o audio.mp3 " + url)
28
+
29
+ while not os.path.exists("audio.mp3"):
30
+ continue
31
+
32
+ if os.path.exists("segments"):
33
+ os.system("rm -rf segments")
34
+
35
+ audio = AudioSegment.from_file("audio.mp3")
36
+ segment_length = 30 * 1000
37
+
38
+ if not os.path.exists("segments"):
39
+ os.makedirs("segments")
40
+
41
+ for i, segment in enumerate(audio[::segment_length]):
42
+ segment.export(f"segments/{i}.mp3", format="mp3")
43
+
44
+ original_text = ""
45
+ audio_list = os.listdir("segments")
46
+ headings = []
47
+ original_texts = []
48
+ dataForWeb = {}
49
+
50
+ for i in range(len(audio_list)):
51
+ st.write(f"Processing segment {i+1}/{len(audio_list)}")
52
+ audio = whisper.load_audio(f"segments/{i}.mp3")
53
+ audio = whisper.pad_or_trim(audio)
54
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
55
+ _, probs = model.detect_language(mel)
56
+ options = whisper.DecodingOptions(fp16=False)
57
+ result = whisper.decode(model, mel, options)
58
+
59
+ text = "headline: " + result.text
60
+ max_len = 256
61
+ encoding = tokenizer.encode_plus(text, return_tensors="pt")
62
+ input_ids = encoding["input_ids"].to(device)
63
+ attention_masks = encoding["attention_mask"].to(device)
64
+ beam_outputs = models.generate(
65
+ input_ids=input_ids,
66
+ attention_mask=attention_masks,
67
+ max_length=64,
68
+ num_beams=3,
69
+ early_stopping=True,
70
+ )
71
+ generated_heading = tokenizer.decode(beam_outputs[0])
72
+ headings.append(generated_heading)
73
+ original_texts.append(result.text)
74
+ dataForWeb[i] = {
75
+ "heading": generated_heading,
76
+ "text": result.text
77
+ }
78
+
79
+ original_text += "\n"
80
+ original_text += "<h3>" + generated_heading + "</h3>"
81
+ original_text += "\n"
82
+ original_text += "<p>" + result.text + "</p>"
83
+
84
+ with open(name, "w") as f:
85
+ f.write(original_text)
86
+
87
+ st.success("Audio processing completed!")
88
+
89
+ # Display results
90
+ st.subheader("Generated Headings and Text:")
91
+ for i, heading in enumerate(headings):
92
+ st.write(f"Segment {i+1}:")
93
+ st.write("Heading:", heading)
94
+ st.write("Text:", original_texts[i])
95
+ st.write("-----------")