Helw150 commited on
Commit
e2607b6
1 Parent(s): 67da1a1

Revert "Add Buffering to Avoid Speech Gaps due to Orca Slowdown"

Browse files

This reverts commit 67da1a10bcbad20268886cfbe19245c983e72846.

Files changed (2) hide show
  1. app.py +5 -9
  2. requirements.txt +1 -1
app.py CHANGED
@@ -69,7 +69,7 @@ def response(state: AppState, audio: tuple):
69
  if not audio:
70
  return AppState()
71
 
72
- file_name = f"./{xxhash.xxh32(bytes(audio[1])).hexdigest()}.wav"
73
 
74
  sf.write(file_name, audio[1], audio[0], format="wav")
75
 
@@ -103,8 +103,7 @@ def response(state: AppState, audio: tuple):
103
  state.model_outs = None
104
  prev_outs = causal_outs
105
  stream = orca.stream_open()
106
- i = 0
107
- buff = []
108
  for resp, outs in diva_audio(
109
  (audio[0], audio[1]),
110
  prev_outs=(prev_outs if prev_outs is not None else None),
@@ -113,18 +112,15 @@ def response(state: AppState, audio: tuple):
113
  if prev_resp == LOADER_STR:
114
  prev_resp = ""
115
  state.conversation[-1]["content"] = resp
116
- audio_chunk = None
117
  pcm = stream.synthesize(resp[len(prev_resp) :])
 
118
  if pcm is not None:
119
- buff.extend(pcm)
120
- if len(buff) > (orca.sample_rate*2):
121
  mp3_io = io.BytesIO()
122
  sf.write(
123
- mp3_io, np.asarray(buff[:orca.sample_rate]).astype(np.int16), orca.sample_rate, format="mp3"
124
  )
125
  audio_chunk = mp3_io.getvalue()
126
  mp3_io.close()
127
- buff = buff[orca.sample_rate:]
128
  yield state, state.conversation, audio_chunk
129
 
130
  del outs.logits
@@ -260,4 +256,4 @@ with gr.Blocks(theme=theme, js=js) as demo:
260
  )
261
 
262
  if __name__ == "__main__":
263
- demo.launch(share=True)
 
69
  if not audio:
70
  return AppState()
71
 
72
+ file_name = f"/tmp/{xxhash.xxh32(bytes(audio[1])).hexdigest()}.wav"
73
 
74
  sf.write(file_name, audio[1], audio[0], format="wav")
75
 
 
103
  state.model_outs = None
104
  prev_outs = causal_outs
105
  stream = orca.stream_open()
106
+
 
107
  for resp, outs in diva_audio(
108
  (audio[0], audio[1]),
109
  prev_outs=(prev_outs if prev_outs is not None else None),
 
112
  if prev_resp == LOADER_STR:
113
  prev_resp = ""
114
  state.conversation[-1]["content"] = resp
 
115
  pcm = stream.synthesize(resp[len(prev_resp) :])
116
+ audio_chunk = None
117
  if pcm is not None:
 
 
118
  mp3_io = io.BytesIO()
119
  sf.write(
120
+ mp3_io, np.asarray(pcm).astype(np.int16), orca.sample_rate, format="mp3"
121
  )
122
  audio_chunk = mp3_io.getvalue()
123
  mp3_io.close()
 
124
  yield state, state.conversation, audio_chunk
125
 
126
  del outs.logits
 
256
  )
257
 
258
  if __name__ == "__main__":
259
+ demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  transformers==4.43.3
2
- gradio==5.1.0
3
  spaces
4
  accelerate
5
 
 
1
  transformers==4.43.3
2
+ gradio==5.0.1
3
  spaces
4
  accelerate
5