Spaces:
Sleeping
Sleeping
app-08-04-2024-19u00m.py
Browse files- app-08-04-2024-19u00m.py +204 -0
app-08-04-2024-19u00m.py
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# JBHF/VERTAAL-APP-EAGLE-SHELTER/app.py - 08-04-2024, 19u00m
|
2 |
+
# WERKT AL: DE OPGENOMEN AUDIO MBV DEZE APP, audio.wav, HOEFT NIET PERSÉ GEPERSISTEERD TE WORDEN !!!!!!
|
3 |
+
|
4 |
+
# https://github.com/theevann/streamlit-audiorecorder
|
5 |
+
# An audio Recorder for streamlit
|
6 |
+
#
|
7 |
+
# Description
|
8 |
+
# Audio recorder component for streamlit.
|
9 |
+
# It creates a button to start the recording and takes three arguments:
|
10 |
+
# the start button text, the stop button text, and the pause button text.
|
11 |
+
# If the pause button text is not specified, the pause button is not displayed.
|
12 |
+
#
|
13 |
+
# Parameters
|
14 |
+
# The signature of the component is:
|
15 |
+
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
|
16 |
+
# The prompt parameters are self-explanatory, and the optional key parameter is used internally by streamlit
|
17 |
+
# to properly distinguish multiple audiorecorders on the page.
|
18 |
+
#
|
19 |
+
# Return value
|
20 |
+
# The component's return value is a pydub AudioSegment.
|
21 |
+
#
|
22 |
+
# All AudioSegment methods are available, in particular you can:
|
23 |
+
# - Play the audio in the frontend with st.audio(audio.export().read())
|
24 |
+
# - Save the audio to a file with audio.export("audio.wav", format="wav")
|
25 |
+
# JB: Waarom zie ik in mijn HF Spaces omgeving de file "audio.wav" niet terug ?
|
26 |
+
# JB: 08-04-2024 - Mogelijk is caching al voldoende (anders file persistence)#
|
27 |
+
# Zie hiervoor:
|
28 |
+
#
|
29 |
+
# CACHING:
|
30 |
+
# ========
|
31 |
+
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
|
32 |
+
# https://docs.streamlit.io/develop/concepts/architecture/caching
|
33 |
+
#
|
34 |
+
# EVERNOTE :
|
35 |
+
# https://www.evernote.com/shard/s313/nl/41973486/31880952-8bd9-41ef-8047-ca844143e833/
|
36 |
+
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
|
37 |
+
#
|
38 |
+
# 08-04-2024
|
39 |
+
#
|
40 |
+
# EN
|
41 |
+
#
|
42 |
+
# PERSISTENCE:
|
43 |
+
# ============
|
44 |
+
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
|
45 |
+
# https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py
|
46 |
+
#
|
47 |
+
# ——->
|
48 |
+
#
|
49 |
+
# DUPLICATED TO:
|
50 |
+
# https://huggingface.co/spaces/JBHF/persistent-data?logs=container
|
51 |
+
#
|
52 |
+
# EVERNOTE :
|
53 |
+
# https://www.evernote.com/shard/s313/nl/41973486/1b07098e-3376-4316-abb3-b3d0996ebf03/
|
54 |
+
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
|
55 |
+
#
|
56 |
+
# 08-04-2024
|
57 |
+
#
|
58 |
+
|
59 |
+
|
60 |
+
###########################################################################################################
|
61 |
+
#
|
62 |
+
# Installation:
|
63 |
+
# pip install streamlit-audiorecorder
|
64 |
+
# Note: This package uses ffmpeg, so it should be installed for this audiorecorder to work properly.
|
65 |
+
#
|
66 |
+
# On ubuntu/debian: sudo apt update && sudo apt install ffmpeg
|
67 |
+
# On mac: brew install ffmpeg
|
68 |
+
|
69 |
+
import streamlit as st
|
70 |
+
from audiorecorder import audiorecorder
|
71 |
+
|
72 |
+
st.title("Audio Recorder")
|
73 |
+
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
|
74 |
+
audio = audiorecorder("Click to record", "Click to stop recording", "Click to pause recording")
|
75 |
+
|
76 |
+
|
77 |
+
# JB:
|
78 |
+
# https://docs.streamlit.io/develop/concepts/architecture/caching
|
79 |
+
# @st.cache_data
|
80 |
+
@st.cache_resource # 👈 Add the caching decorator
|
81 |
+
def audio_export(audio_wav_file, format):
|
82 |
+
# audio.export("audio.wav", format="wav") # ORIGINAL
|
83 |
+
audio.export(audio_wav_file, format=format)
|
84 |
+
|
85 |
+
if len(audio) > 0:
|
86 |
+
# To play audio in frontend:
|
87 |
+
st.audio(audio.export().read())
|
88 |
+
|
89 |
+
# To save audio to a file, use pydub export method:
|
90 |
+
# https://docs.streamlit.io/develop/concepts/architecture/caching
|
91 |
+
# @st.cache_data
|
92 |
+
# @st.cache_data
|
93 |
+
# audio.export("audio.wav", format="wav") # ORIGINAL
|
94 |
+
audio_export("audio.wav", format="wav") # JB 08-04-2024
|
95 |
+
|
96 |
+
# To get audio properties, use pydub AudioSegment properties:
|
97 |
+
st.write(f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds")
|
98 |
+
|
99 |
+
|
100 |
+
st.button("Rerun")
|
101 |
+
###########################################################################################################
|
102 |
+
|
103 |
+
|
104 |
+
###########################################################################################################
|
105 |
+
# TEST
|
106 |
+
# ZIE:
|
107 |
+
# infer_faster_whisper_large_v2 (CPU VERSIE !) 08-04-2024-COLAB-CPU-PYTHON3-tvscitechtalk.ipynb
|
108 |
+
# https://colab.research.google.com/drive/1EreiFx825oIrR2P43XSXjHXx01EWi6ZH#scrollTo=vuLjbPxexPDj&uniqifier=5
|
109 |
+
|
110 |
+
from faster_whisper import WhisperModel
|
111 |
+
|
112 |
+
model_size = "large-v2"
|
113 |
+
|
114 |
+
# Run on GPU with FP16
|
115 |
+
# model = WhisperModel(model_size, device="cuda", compute_type="float16") # ORIGINAL, DRAAIT OP COLAB T4 GPU OK
|
116 |
+
|
117 |
+
# TEST: Run on CPU
|
118 |
+
# model = WhisperModel(model_size, device="cpu", compute_type="float16") # JB, DRAAIT OP COLAB CPU OK ?
|
119 |
+
# ValueError: Requested float16 compute type, but the target device or backend do not support efficient float16 computation.
|
120 |
+
#
|
121 |
+
# st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
|
122 |
+
# model = WhisperModel(model_size, device="cpu") # , compute_type="float16") # JB, DRAAIT OP COLAB CPU OK: JA; HF SPACES STREAMLIT FREE TIER: JB OK !
|
123 |
+
# JB: Dit gebruikt mijn HF Token !
|
124 |
+
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
|
125 |
+
|
126 |
+
st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
|
127 |
+
model = WhisperModel(model_size, device="cpu", compute_type="int8") # , compute_type="float16") # JB
|
128 |
+
# JB: Dit gebruikt mijn HF Token !
|
129 |
+
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
|
130 |
+
# LOADING OF model = WhisperModel(model_size, device="cpu") TAKES ABOUT 1 MINUTE ON HF SPACES STREAMLIT FREE TIER
|
131 |
+
#
|
132 |
+
st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
|
133 |
+
# LOADING OF model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\") TAKES ABOUT 33 sec (Na RERUN 1 minute) ON HF SPACES STREAMLIT FREE TIER
|
134 |
+
|
135 |
+
|
136 |
+
# USING:
|
137 |
+
# model = WhisperModel(model_size, device="cpu", compute_type="int8") # JB
|
138 |
+
# segments, info = model.transcribe("sam_altman_lex_podcast_367.flac", beam_size=1)
|
139 |
+
|
140 |
+
# /content/Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3
|
141 |
+
# segments, info = model.transcribe("Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3", beam_size=1)
|
142 |
+
# TEST:
|
143 |
+
segments, info = model.transcribe("audio.wav", beam_size=1) # DIT WERKT: GEDURENDE DE SESSIE BLIJFT audio.wav FILE BESCHIKBAAR IN DEZE APP !!!!!
|
144 |
+
|
145 |
+
|
146 |
+
# print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
147 |
+
st.write("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
148 |
+
st.write("")
|
149 |
+
st.write("info.all_language_probs : ", info.all_language_probs)
|
150 |
+
st.write("len(info.all_language_probs): ", len(info.all_language_probs))
|
151 |
+
# 99
|
152 |
+
|
153 |
+
st.write("")
|
154 |
+
|
155 |
+
st.write("info: ", info)
|
156 |
+
|
157 |
+
# Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3 :
|
158 |
+
st.write("info.duration: ", info.duration)
|
159 |
+
# 233.8249375
|
160 |
+
# time: 3.98 ms (started: 2024-03-15 10:55:15 +00:00)
|
161 |
+
# minutes = int(info.duration / 60)
|
162 |
+
# seconds = info.duration - minutes*60
|
163 |
+
minutes = int(info.duration / 60)
|
164 |
+
seconds = info.duration - minutes*60
|
165 |
+
|
166 |
+
st.write(minutes," minutes and ", seconds, " seconds")
|
167 |
+
|
168 |
+
|
169 |
+
text_to_transcribe = ""
|
170 |
+
for segment in segments:
|
171 |
+
# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
172 |
+
st.write("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
173 |
+
text_to_transcribe = text_to_transcribe + " " + segment.text
|
174 |
+
|
175 |
+
st.write("---------------------------------------------------------------------")
|
176 |
+
|
177 |
+
#text_to_transcribe = ""
|
178 |
+
#st.write("TOTAL TEXT TO TRANSCRIBE:")
|
179 |
+
#for segment in segments:
|
180 |
+
# st.write(segment.text)
|
181 |
+
# text_to_transcribe = text_to_transcribe + " " + segment
|
182 |
+
# # print(segment)
|
183 |
+
|
184 |
+
st.write("text_to_transcribe: ", text_to_transcribe)
|
185 |
+
# DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST
|
186 |
+
# OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! :
|
187 |
+
# text_to_transcribe:
|
188 |
+
# князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів
|
189 |
+
|
190 |
+
|
191 |
+
###########################################################################################################
|
192 |
+
# VERTALING
|
193 |
+
# DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST
|
194 |
+
# OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! :
|
195 |
+
# text_to_transcribe:
|
196 |
+
# князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів
|
197 |
+
# ...
|
198 |
+
|
199 |
+
|
200 |
+
|
201 |
+
|
202 |
+
|
203 |
+
|
204 |
+
###########################################################################################################
|