ASG Models commited on
Commit
6cddb08
1 Parent(s): afef005

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -45
app.py CHANGED
@@ -99,21 +99,22 @@ class DataViewerApp:
99
  text = re.sub(r"\d", " ", text)
100
  return text
101
  def save_row(self, text,data_oudio):
102
- row = self.data.iloc[self.current_selected]
103
- row['text'] = text
104
- row['flag']=1
105
- self.data.iloc[self.current_selected] = row
106
- sr,audio=data_oudio
107
- if sr!=16000:
108
- audio=audio.astype(np.float32)
109
- audio/=np.max(np.abs(audio))
110
- audio=librosa.resample(audio,orig_sr=sr,target_sr=16000)
111
-
112
-
113
-
114
-
115
-
116
- self.sdata[self.current_selected] = audio
 
117
  return self.get_page_data(self.current_page),None,""
118
  def GetDataset_2(self,filename,ds=1.5):
119
  audios_data = []
@@ -142,27 +143,28 @@ class DataViewerApp:
142
 
143
  return self.get_page_data(self.current_page)
144
  def trim_audio(self, text,data_oudio):
145
- audios_data = []
146
- audios_samplerate = []
147
- sr,audio=data_oudio
148
- audio=audio.astype(np.float32)
149
- audio/=np.max(np.abs(audio))
150
- audio=librosa.resample(audio,orig_sr=sr,target_sr=16000)
151
- audios_data.append(audio)
152
- secs=round(len(audios_data)/16000,2)
153
- audios_samplerate.append(16000)
154
- df = pd.DataFrame()
155
- df['secs'] = secs
156
- df['audio'] =[ audio]
157
- df['samplerate'] = 16000
158
- df['text'] =text
159
- df['speaker_id'] =self.speaker_id
160
- df['_speaker_id'] =self.speaker_id
161
- df['flag']=1
162
- df = df[['text','audio','samplerate','secs','speaker_id','_speaker_id','flag']]
163
- self.df = pd.concat([self.df, df], axis=0, ignore_index=True)
164
- self.data =self.df[['text','speaker_id','secs','flag']]
165
- self.sdata =self.df['audio'].to_list()
 
166
  return self.get_page_data(self.current_page),None,""
167
  def order_data(self):
168
  self.df[['text','speaker_id','secs','flag']]=self.data
@@ -260,15 +262,15 @@ class DataViewerApp:
260
  return namedata
261
 
262
  def delete_row(self):
263
-
264
- self.data.drop(self.current_selected, inplace=True)
265
- self.data.reset_index(drop=True, inplace=True)
266
- self.df.drop(self.current_selected, inplace=True)
267
- self.df.reset_index(drop=True, inplace=True)
268
- self.sdata.pop(self.current_selected)
269
- self.current_selected = -1
270
- # self.audio_player.update(None) # Clear audio player
271
- # self.txt_audio.update("") # Clear text input
272
 
273
  return self.get_page_data(self.current_page),None,""
274
  def login(self, token):
 
99
  text = re.sub(r"\d", " ", text)
100
  return text
101
  def save_row(self, text,data_oudio):
102
+ if text!="" :
103
+ row = self.data.iloc[self.current_selected]
104
+ row['text'] = text
105
+ row['flag']=1
106
+ self.data.iloc[self.current_selected] = row
107
+ sr,audio=data_oudio
108
+ if sr!=16000:
109
+ audio=audio.astype(np.float32)
110
+ audio/=np.max(np.abs(audio))
111
+ audio=librosa.resample(audio,orig_sr=sr,target_sr=16000)
112
+
113
+
114
+
115
+
116
+
117
+ self.sdata[self.current_selected] = audio
118
  return self.get_page_data(self.current_page),None,""
119
  def GetDataset_2(self,filename,ds=1.5):
120
  audios_data = []
 
143
 
144
  return self.get_page_data(self.current_page)
145
  def trim_audio(self, text,data_oudio):
146
+ if text!="" :
147
+ audios_data = []
148
+ audios_samplerate = []
149
+ sr,audio=data_oudio
150
+ audio=audio.astype(np.float32)
151
+ audio/=np.max(np.abs(audio))
152
+ audio=librosa.resample(audio,orig_sr=sr,target_sr=16000)
153
+ audios_data.append(audio)
154
+ secs=round(len(audios_data)/16000,2)
155
+ audios_samplerate.append(16000)
156
+ df = pd.DataFrame()
157
+ df['secs'] = secs
158
+ df['audio'] =[ audio]
159
+ df['samplerate'] = 16000
160
+ df['text'] =text
161
+ df['speaker_id'] =self.speaker_id
162
+ df['_speaker_id'] =self.speaker_id
163
+ df['flag']=1
164
+ df = df[['text','audio','samplerate','secs','speaker_id','_speaker_id','flag']]
165
+ self.df = pd.concat([self.df, df], axis=0, ignore_index=True)
166
+ self.data =self.df[['text','speaker_id','secs','flag']]
167
+ self.sdata =self.df['audio'].to_list()
168
  return self.get_page_data(self.current_page),None,""
169
  def order_data(self):
170
  self.df[['text','speaker_id','secs','flag']]=self.data
 
262
  return namedata
263
 
264
  def delete_row(self):
265
+ if len(self.data)!=0 or self.current_selected != -1 :
266
+ self.data.drop(self.current_selected, inplace=True)
267
+ self.data.reset_index(drop=True, inplace=True)
268
+ self.df.drop(self.current_selected, inplace=True)
269
+ self.df.reset_index(drop=True, inplace=True)
270
+ self.sdata.pop(self.current_selected)
271
+ self.current_selected = -1
272
+ # self.audio_player.update(None) # Clear audio player
273
+ # self.txt_audio.update("") # Clear text input
274
 
275
  return self.get_page_data(self.current_page),None,""
276
  def login(self, token):