freddyaboulton HF staff commited on
Commit
042390d
·
1 Parent(s): 3202126
Files changed (1) hide show
  1. app.py +23 -15
app.py CHANGED
@@ -32,7 +32,11 @@ else:
32
  rtc_configuration = None
33
 
34
 
35
- def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict]):
 
 
 
 
36
  original_sr = audio[0]
37
  target_sr = 16000
38
 
@@ -41,15 +45,17 @@ def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict]):
41
  )
42
 
43
  output = pipe(
44
- {"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
45
  max_new_tokens=512,
46
  )
47
  transcription = whisper({"array": audio_sr.squeeze(), "sampling_rate": target_sr})
48
 
49
  conversation.append({"role": "user", "content": transcription["text"]})
50
  conversation.append({"role": "assistant", "content": output})
 
 
51
 
52
- yield AdditionalOutputs(conversation)
53
 
54
 
55
  with gr.Blocks() as demo:
@@ -68,17 +74,16 @@ with gr.Blocks() as demo:
68
  """
69
  )
70
  with gr.Row():
 
 
 
 
 
 
 
 
71
  with gr.Group():
72
- transcript = gr.Chatbot(
73
- label="transcript",
74
- type="messages",
75
- value=[
76
- {
77
- "role": "system",
78
- "content": "You are a friendly and helpful character. You love to answer questions for people.",
79
- }
80
- ],
81
- )
82
  audio = WebRTC(
83
  rtc_configuration=rtc_configuration,
84
  label="Stream",
@@ -88,12 +93,15 @@ with gr.Blocks() as demo:
88
 
89
  audio.stream(
90
  ReplyOnPause(transcribe),
91
- inputs=[audio, transcript],
92
  outputs=[audio],
93
  time_limit=90,
94
  )
95
  audio.on_additional_outputs(
96
- lambda s: s, outputs=[transcript], queue=False, show_progress="hidden"
 
 
 
97
  )
98
 
99
  if __name__ == "__main__":
 
32
  rtc_configuration = None
33
 
34
 
35
+ def transcribe(
36
+ audio: tuple[int, np.ndarray],
37
+ transformers_chat: list[dict],
38
+ conversation: list[dict],
39
+ ):
40
  original_sr = audio[0]
41
  target_sr = 16000
42
 
 
45
  )
46
 
47
  output = pipe(
48
+ {"audio": audio_sr, "turns": transformers_chat, "sampling_rate": target_sr},
49
  max_new_tokens=512,
50
  )
51
  transcription = whisper({"array": audio_sr.squeeze(), "sampling_rate": target_sr})
52
 
53
  conversation.append({"role": "user", "content": transcription["text"]})
54
  conversation.append({"role": "assistant", "content": output})
55
+ transformers_chat.append({"role": "user", "content": transcription["text"]})
56
+ transformers_chat.append({"role": "assistant", "content": output})
57
 
58
+ yield AdditionalOutputs(transformers_chat, conversation)
59
 
60
 
61
  with gr.Blocks() as demo:
 
74
  """
75
  )
76
  with gr.Row():
77
+ transformers_chat = gr.State(
78
+ value=[
79
+ {
80
+ "role": "system",
81
+ "content": "You are a friendly and helpful character. You love to answer questions for people.",
82
+ }
83
+ ]
84
+ )
85
  with gr.Group():
86
+ transcript = gr.Chatbot(label="transcript", type="messages")
 
 
 
 
 
 
 
 
 
87
  audio = WebRTC(
88
  rtc_configuration=rtc_configuration,
89
  label="Stream",
 
93
 
94
  audio.stream(
95
  ReplyOnPause(transcribe),
96
+ inputs=[audio, transformers_chat, transcript],
97
  outputs=[audio],
98
  time_limit=90,
99
  )
100
  audio.on_additional_outputs(
101
+ lambda t, g: (t, g),
102
+ outputs=[transformers_chat, transcript],
103
+ queue=False,
104
+ show_progress="hidden",
105
  )
106
 
107
  if __name__ == "__main__":