Spaces:
Sleeping
Sleeping
ASG Models
commited on
Commit
•
6cddb08
1
Parent(s):
afef005
Update app.py
Browse files
app.py
CHANGED
@@ -99,21 +99,22 @@ class DataViewerApp:
|
|
99 |
text = re.sub(r"\d", " ", text)
|
100 |
return text
|
101 |
def save_row(self, text,data_oudio):
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
117 |
return self.get_page_data(self.current_page),None,""
|
118 |
def GetDataset_2(self,filename,ds=1.5):
|
119 |
audios_data = []
|
@@ -142,27 +143,28 @@ class DataViewerApp:
|
|
142 |
|
143 |
return self.get_page_data(self.current_page)
|
144 |
def trim_audio(self, text,data_oudio):
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
166 |
return self.get_page_data(self.current_page),None,""
|
167 |
def order_data(self):
|
168 |
self.df[['text','speaker_id','secs','flag']]=self.data
|
@@ -260,15 +262,15 @@ class DataViewerApp:
|
|
260 |
return namedata
|
261 |
|
262 |
def delete_row(self):
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
|
273 |
return self.get_page_data(self.current_page),None,""
|
274 |
def login(self, token):
|
|
|
99 |
text = re.sub(r"\d", " ", text)
|
100 |
return text
|
101 |
def save_row(self, text,data_oudio):
|
102 |
+
if text!="" :
|
103 |
+
row = self.data.iloc[self.current_selected]
|
104 |
+
row['text'] = text
|
105 |
+
row['flag']=1
|
106 |
+
self.data.iloc[self.current_selected] = row
|
107 |
+
sr,audio=data_oudio
|
108 |
+
if sr!=16000:
|
109 |
+
audio=audio.astype(np.float32)
|
110 |
+
audio/=np.max(np.abs(audio))
|
111 |
+
audio=librosa.resample(audio,orig_sr=sr,target_sr=16000)
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
self.sdata[self.current_selected] = audio
|
118 |
return self.get_page_data(self.current_page),None,""
|
119 |
def GetDataset_2(self,filename,ds=1.5):
|
120 |
audios_data = []
|
|
|
143 |
|
144 |
return self.get_page_data(self.current_page)
|
145 |
def trim_audio(self, text,data_oudio):
|
146 |
+
if text!="" :
|
147 |
+
audios_data = []
|
148 |
+
audios_samplerate = []
|
149 |
+
sr,audio=data_oudio
|
150 |
+
audio=audio.astype(np.float32)
|
151 |
+
audio/=np.max(np.abs(audio))
|
152 |
+
audio=librosa.resample(audio,orig_sr=sr,target_sr=16000)
|
153 |
+
audios_data.append(audio)
|
154 |
+
secs=round(len(audios_data)/16000,2)
|
155 |
+
audios_samplerate.append(16000)
|
156 |
+
df = pd.DataFrame()
|
157 |
+
df['secs'] = secs
|
158 |
+
df['audio'] =[ audio]
|
159 |
+
df['samplerate'] = 16000
|
160 |
+
df['text'] =text
|
161 |
+
df['speaker_id'] =self.speaker_id
|
162 |
+
df['_speaker_id'] =self.speaker_id
|
163 |
+
df['flag']=1
|
164 |
+
df = df[['text','audio','samplerate','secs','speaker_id','_speaker_id','flag']]
|
165 |
+
self.df = pd.concat([self.df, df], axis=0, ignore_index=True)
|
166 |
+
self.data =self.df[['text','speaker_id','secs','flag']]
|
167 |
+
self.sdata =self.df['audio'].to_list()
|
168 |
return self.get_page_data(self.current_page),None,""
|
169 |
def order_data(self):
|
170 |
self.df[['text','speaker_id','secs','flag']]=self.data
|
|
|
262 |
return namedata
|
263 |
|
264 |
def delete_row(self):
|
265 |
+
if len(self.data)!=0 or self.current_selected != -1 :
|
266 |
+
self.data.drop(self.current_selected, inplace=True)
|
267 |
+
self.data.reset_index(drop=True, inplace=True)
|
268 |
+
self.df.drop(self.current_selected, inplace=True)
|
269 |
+
self.df.reset_index(drop=True, inplace=True)
|
270 |
+
self.sdata.pop(self.current_selected)
|
271 |
+
self.current_selected = -1
|
272 |
+
# self.audio_player.update(None) # Clear audio player
|
273 |
+
# self.txt_audio.update("") # Clear text input
|
274 |
|
275 |
return self.get_page_data(self.current_page),None,""
|
276 |
def login(self, token):
|