Spaces:
Sleeping
Sleeping
tensorkelechi
commited on
Reproduce Vidtext with distilled whisper
Browse files
app.py
CHANGED
@@ -4,25 +4,17 @@ from transformers import pipeline
|
|
4 |
from pytube import YouTube
|
5 |
from pydub import AudioSegment
|
6 |
from audio_extract import extract_audio
|
7 |
-
import google.generativeai as google_genai
|
8 |
-
|
9 |
import os
|
10 |
from dotenv import load_dotenv
|
11 |
|
12 |
-
|
13 |
-
|
14 |
load_dotenv()
|
15 |
|
16 |
-
GOOGLE_API_KEY =os.getenv("GOOGLE_API_KEY")
|
17 |
-
|
18 |
-
google_genai.configure(api_key=GOOGLE_API_KEY)
|
19 |
-
|
20 |
st.set_page_config(
|
21 |
-
page_title="
|
22 |
)
|
23 |
|
24 |
-
st.title('
|
25 |
-
st.write('A web app for video/audio transcription(Youtube, mp4, mp3)')
|
26 |
|
27 |
|
28 |
def youtube_video_downloader(url):
|
@@ -57,7 +49,7 @@ def audio_processing(mp3_audio):
|
|
57 |
|
58 |
@st.cache_resource
|
59 |
def load_asr_model():
|
60 |
-
asr_model = pipeline(task="automatic-speech-recognition", model="
|
61 |
return asr_model
|
62 |
|
63 |
transcriber_model = load_asr_model()
|
@@ -66,11 +58,6 @@ def transcriber_pass(processed_audio):
|
|
66 |
text_extract = transcriber_model(processed_audio)
|
67 |
return text_extract['text']
|
68 |
|
69 |
-
def generate_ai_summary(transcript):
|
70 |
-
model = google_genai.GenerativeModel('gemini-pro')
|
71 |
-
model_response = model.generate_content([f"Give a summary of the text {transcript}"], stream=True)
|
72 |
-
return model_response.text
|
73 |
-
|
74 |
|
75 |
|
76 |
# Streamlit UI
|
|
|
4 |
from pytube import YouTube
|
5 |
from pydub import AudioSegment
|
6 |
from audio_extract import extract_audio
|
|
|
|
|
7 |
import os
|
8 |
from dotenv import load_dotenv
|
9 |
|
|
|
|
|
10 |
load_dotenv()
|
11 |
|
|
|
|
|
|
|
|
|
12 |
st.set_page_config(
|
13 |
+
page_title="VidText_distilled"
|
14 |
)
|
15 |
|
16 |
+
st.title('Vidtext_distilwhisper')
|
17 |
+
st.write('A web app for video/audio transcription(Youtube, mp4, mp3). Using distilled Whisper')
|
18 |
|
19 |
|
20 |
def youtube_video_downloader(url):
|
|
|
49 |
|
50 |
@st.cache_resource
|
51 |
def load_asr_model():
|
52 |
+
asr_model = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-large-v3")
|
53 |
return asr_model
|
54 |
|
55 |
transcriber_model = load_asr_model()
|
|
|
58 |
text_extract = transcriber_model(processed_audio)
|
59 |
return text_extract['text']
|
60 |
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
|
63 |
# Streamlit UI
|