zzk1st commited on
Commit
4c4a1b3
1 Parent(s): 4146809

Another big commit

Browse files
README.md CHANGED
@@ -45,7 +45,14 @@ export WAVJOURNEY_OPENAI_KEY=your_openai_key_here
45
 
46
  6. Set environment variables for using API services
47
  ```bash
48
- export WAVJOURNEY_SERVICE_PORT=8021 WAVJOURNEY_SERVICE_URL=127.0.0.1
 
 
 
 
 
 
 
49
  ```
50
 
51
 
 
45
 
46
  6. Set environment variables for using API services
47
  ```bash
48
+ # Set the port for the WAVJOURNEY service to 8021
49
+ export WAVJOURNEY_SERVICE_PORT=8021
50
+
51
+ # Set the URL for the WAVJOURNEY service to 127.0.0.1
52
+ export WAVJOURNEY_SERVICE_URL=127.0.0.1
53
+
54
+ # Limit the maximum script lines for WAVJOURNEY to 999
55
+ export WAVJOURNEY_MAX_SCRIPT_LINES=999
56
  ```
57
 
58
 
data/voice_presets/metadata.json CHANGED
@@ -34,13 +34,13 @@
34
  "desc": "a female voice of a off-site news reporter, suitable for news scenario",
35
  "npz_path": "data/voice_presets/npz/news_female_speaker_outside.npz"
36
  },
37
- "child": {
38
- "id": "child",
39
  "desc": "a small young boy voice",
40
  "npz_path": "data/voice_presets/npz/child_boy.npz"
41
  },
42
- "old_man": {
43
- "id": "old_man",
44
  "desc": "a voice of an old man",
45
  "npz_path": "data/voice_presets/npz/elder_morgen.npz"
46
  }
 
34
  "desc": "a female voice of a off-site news reporter, suitable for news scenario",
35
  "npz_path": "data/voice_presets/npz/news_female_speaker_outside.npz"
36
  },
37
+ "Child": {
38
+ "id": "Child",
39
  "desc": "a small young boy voice",
40
  "npz_path": "data/voice_presets/npz/child_boy.npz"
41
  },
42
+ "Old_man": {
43
+ "id": "Old_man",
44
  "desc": "a voice of an old man",
45
  "npz_path": "data/voice_presets/npz/elder_morgen.npz"
46
  }
examples/1.mp4 ADDED
Binary file (365 kB). View file
 
examples/2.mp4 ADDED
Binary file (241 kB). View file
 
examples/3.mp4 ADDED
Binary file (346 kB). View file
 
examples/examples.py CHANGED
@@ -1,24 +1,87 @@
1
 
2
  example1 = {
3
- 'text': "A hammer is hitting a wooden surface",
4
- 'table_text': """
5
- | Audio Type | Layout | ID | Character | Action | Volume | Description | Length |
6
- |--------------|------------|------|-------------|----------|----------|---------------|----------|
7
- | speech | foreground | N/A | Character | N/A | -15 | Hi | Auto |
 
 
 
 
 
 
 
 
 
 
8
 
9
  """,
10
- 'wav_file': 'examples/example1.wav',
 
 
 
 
 
 
11
  }
12
 
13
  example2 = {
14
- 'text': "Two hammer is hitting a wooden surface",
15
- 'table_text': """
16
- | Audio Type | Layout | ID | Character | Action | Volume | Description | Length |
17
- |--------------|------------|------|-------------|----------|----------|---------------|----------|
18
- | speech | foreground | N/A | Character | N/A | -15 | Hi | Auto |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  """,
21
- 'wav_file': 'examples/example1.wav',
22
  }
23
 
24
- examples = [example1,example2]
 
 
 
1
 
2
  example1 = {
3
+ 'text': "An introduction to AI-assisted audio content creation.",
4
+ 'table_script': """
5
+ | Audio Type | Layout | ID | Character | Action | Volume | Description | Length |
6
+ |--------------|------------|----|-----------|--------|--------|------------------------------------------------------------------|--------|
7
+ | music | background | 1 | N/A | begin | -35 | Inspirational technology-themed music | Auto |
8
+ | speech | foreground | N/A| Narrator | N/A | -15 | Welcome to the future of audio content creation. | Auto |
9
+ | sound_effect | foreground | N/A| N/A | N/A | -35 | Digital startup sound | 2 |
10
+ | speech | foreground | N/A| Narrator | N/A | -15 | With evolving technology, we are introducing AI-assisted tools for pristine audio production. | Auto |
11
+ | sound_effect | foreground | N/A| N/A | N/A | -35 | Keyboard typing noise | 3 |
12
+ | speech | foreground | N/A| Narrator | N/A | -15 | Imagine crafting audio content with the power of AI at your fingertips. | Auto |
13
+ | sound_effect | background | 2 | N/A | begin | -35 | Ambiance of a busy control room | Auto |
14
+ | speech | foreground | N/A| Narrator | N/A | -15 | Enhanced quality, efficient production and limitless creativity, all under one roof. | Auto |
15
+ | sound_effect | background | 2 | N/A | end | N/A | N/A | Auto |
16
+ | speech | foreground | N/A| Narrator | N/A | -15 | Unleash your potential with AI-assisted audio content creation. | Auto |
17
+ | music | background | 1 | N/A | end | N/A | N/A | Auto |
18
 
19
  """,
20
+ 'table_voice': """
21
+ | Character | Voice |
22
+ |-------------|-----------|
23
+ | Narrator | News_Male |
24
+
25
+ """,
26
+ 'wav_file': 'examples/1.mp4',
27
  }
28
 
29
  example2 = {
30
+ 'text': "A couple dating in a cafe.",
31
+ 'table_script': """
32
+ | Audio Type | Layout | ID | Character | Action | Volume | Description | Length |
33
+ |--------------|------------|----|-----------|--------|--------|-----------------------------------------------|--------|
34
+ | sound_effect | background | 1 | N/A | begin | -35 | Soft chattering in a cafe | Auto |
35
+ | sound_effect | background | 2 | N/A | begin | -38 | Coffee brewing noises | Auto |
36
+ | music | background | 3 | N/A | begin | -35 | Soft jazz playing in the background | Auto |
37
+ | speech | foreground | N/A| Man | N/A | -15 | It’s really nice to finally get out and relax a little, isn’t it? | Auto |
38
+ | speech | foreground | N/A| Woman | N/A | -15 | I know, right? We should do this more often. | Auto |
39
+ | sound_effect | background | 2 | N/A | end | N/A | N/A | Auto |
40
+ | speech | foreground | N/A| Man | N/A | -15 | Here’s your coffee, just as you like it. | Auto |
41
+ | speech | foreground | N/A| Woman | N/A | -15 | Thank you, it smells wonderful. | Auto |
42
+ | music | background | 3 | N/A | end | N/A | N/A | Auto |
43
+ | sound_effect | background | 1 | N/A | end | N/A | N/A | Auto |
44
+
45
+ """,
46
+ 'table_voice': """
47
+ | Character | Voice |
48
+ |-------------|-----------|
49
+ | Man | Male1 |
50
+ | Woman | Female1 |
51
+
52
+ """,
53
+ 'wav_file': 'examples/2.mp4',
54
+ }
55
+
56
+
57
+ example3 = {
58
+ 'text': "A child is participating in a farting contest.",
59
+ 'table_script': """
60
+ | Audio Type | Layout | ID | Character | Action | Volume | Description | Length |
61
+ |--------------|------------|----|-----------|--------|--------|------------------------------------------------------|--------|
62
+ | sound_effect | background | 1 | N/A | begin | -35 | Outdoor park ambiance, people chattering | Auto |
63
+ | music | background | 2 | N/A | begin | -35 | Light comedy theme music, quirky | Auto |
64
+ | speech | foreground | N/A| Host | N/A | -15 | Welcome to the annual Fart Competition. | Auto |
65
+ | speech | foreground | N/A| Host | N/A | -15 | Now, let’s welcome our youngest participant. | Auto |
66
+ | sound_effect | foreground | N/A| N/A | N/A | -35 | Clapping sound | 2 |
67
+ | speech | foreground | N/A| Child | N/A | -15 | Hi, I’m excited to be here. | Auto |
68
+ | sound_effect | foreground | N/A| N/A | N/A | -35 | Short, cartoonish duration of a fart sound | 4 |
69
+ | sound_effect | foreground | N/A| N/A | N/A | -35 | Audience laughing and applauding | 2 |
70
+ | speech | foreground | N/A| Host | N/A | -15 | Wow, that was impressive! Let’s give another round of applause! | Auto |
71
+ | sound_effect | foreground | N/A| N/A | N/A | -35 | Audience clapping and cheering | 3 |
72
+ | music | background | 2 | N/A | end | N/A | N/A | Auto |
73
+ | sound_effect | background | 1 | N/A | end | N/A | N/A | Auto |
74
+ """,
75
+ 'table_voice': """
76
+ | Character | Voice |
77
+ |-------------|-----------|
78
+ | Host | Male1 |
79
+ | Child | Child |
80
 
81
  """,
82
+ 'wav_file': 'examples/3.mp4',
83
  }
84
 
85
+
86
+
87
+ examples = [example1, example2, example3]
pipeline.py CHANGED
@@ -194,6 +194,14 @@ def generate_json_file(session_id, input_text, api_key):
194
 
195
  # Function call used by Gradio: json to result wav
196
  def generate_audio(session_id, json_script, api_key):
 
 
 
 
 
 
 
 
197
  output_path = utils.get_session_path(session_id)
198
  output_audio_path = utils.get_session_audio_path(session_id)
199
  voices = voice_presets.get_merged_voice_presets(session_id)
 
194
 
195
  # Function call used by Gradio: json to result wav
196
  def generate_audio(session_id, json_script, api_key):
197
+ def count_lines(content):
198
+ # Split the string using the newline character and count the non-empty lines
199
+ return sum(1 for line in content.split('\n') if line.strip())
200
+
201
+ max_lines = utils.get_max_script_lines()
202
+ if count_lines(json_script) > max_lines:
203
+ raise ValueError(f'The number of lines of the JSON script has exceeded {max_lines}!')
204
+
205
  output_path = utils.get_session_path(session_id)
206
  output_audio_path = utils.get_session_audio_path(session_id)
207
  voices = voice_presets.get_merged_voice_presets(session_id)
services.py CHANGED
@@ -227,4 +227,5 @@ def parse_voice():
227
 
228
  if __name__ == '__main__':
229
  service_port = get_service_port()
230
- app.run(debug=False, port=service_port)
 
 
227
 
228
  if __name__ == '__main__':
229
  service_port = get_service_port()
230
+ # We disable multithreading to force services to process one request at a time and avoid CUDA OOM
231
+ app.run(debug=False, threaded=False, port=service_port)
ui_client.py CHANGED
@@ -54,7 +54,7 @@ def generate_script_fn(instruction, _state: gr.State):
54
  json_script = generate_json_file(session_id, instruction, api_key)
55
  table_text = convert_json_to_md(json_script)
56
  except Exception as e:
57
- gr.Warning(str(e) + traceback.format_exc())
58
  print(f"Generating script error: {str(e)}")
59
  traceback.print_exc()
60
  return [
@@ -99,7 +99,7 @@ def generate_audio_fn(state):
99
  except Exception as e:
100
  print(f"Generation audio error: {str(e)}")
101
  traceback.print_exc()
102
- gr.Warning(str(e) + traceback.format_exc())
103
 
104
  return [
105
  None,
@@ -210,7 +210,7 @@ def add_voice_preset(vp_id, vp_desc, file, ui_state, added_voice_preset):
210
  except Exception as exception:
211
  print(exception)
212
  traceback.print_exc()
213
- gr.Warning(str(exception) + traceback.format_exc())
214
 
215
  # After added
216
  dataframe = get_voice_preset_to_list(ui_state)
@@ -451,10 +451,29 @@ with gr.Blocks(css=css) as interface:
451
  loading_icon = gr.HTML(loading_icon_html)
452
  share_button = gr.Button(value="Share to community", elem_id="share-btn")
453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
  # System Voice Presets
455
  gr.Markdown(label='System Voice Presets', value='# System Voice Presets')
456
- system_markdown_voice_presets = gr.Dataframe(label='System Voice Presets', headers=VOICE_PRESETS_HEADERS,
457
- value=system_voice_presets)
 
458
  # User Voice Preset Related
459
  gr.Markdown('# (Optional) Speaker Customization ')
460
  with gr.Accordion("Click to add speakers", open=False):
@@ -476,22 +495,7 @@ with gr.Blocks(css=css) as interface:
476
  vp_file = gr.File(label='Wav File', type='file', file_types=['.wav'],
477
  interactive=True)
478
  vp_submit = gr.Button(label='Upload Voice Preset', value="Upload Voice Preset")
479
- # add examples
480
- from examples.examples import examples as WJExamples
481
- def example_fn(idx, _text_input):
482
- print('from example', idx, _text_input)
483
- example = WJExamples[int(idx)-1]
484
- return example['table_text'], gr.make_waveform(example['wav_file'])
485
-
486
- _idx_input = gr.Textbox(label='Example No')
487
- _idx_input.visible=False
488
- gr.Examples(
489
- [[idx+1, x['text']] for idx, x in enumerate(WJExamples)],
490
- fn=example_fn,
491
- inputs=[_idx_input, text_input],
492
- outputs=[char_voice_map_markdown, audio_output],
493
- cache_examples=True,
494
- )
495
  # clear btn, will re-new a session
496
  clear_btn = gr.ClearButton(value='Clear All')
497
 
@@ -579,5 +583,5 @@ with gr.Blocks(css=css) as interface:
579
  # debug only
580
  # print_state_btn = gr.Button(value='Print State')
581
  # print_state_btn.click(fn=lambda state, state2: print(state, state2), inputs=[ui_state, selected_voice_presets])
582
- interface.queue(concurrency_count=5, max_size=20)
583
  interface.launch()
 
54
  json_script = generate_json_file(session_id, instruction, api_key)
55
  table_text = convert_json_to_md(json_script)
56
  except Exception as e:
57
+ gr.Warning(str(e))
58
  print(f"Generating script error: {str(e)}")
59
  traceback.print_exc()
60
  return [
 
99
  except Exception as e:
100
  print(f"Generation audio error: {str(e)}")
101
  traceback.print_exc()
102
+ gr.Warning(str(e))
103
 
104
  return [
105
  None,
 
210
  except Exception as exception:
211
  print(exception)
212
  traceback.print_exc()
213
+ gr.Warning(str(exception))
214
 
215
  # After added
216
  dataframe = get_voice_preset_to_list(ui_state)
 
451
  loading_icon = gr.HTML(loading_icon_html)
452
  share_button = gr.Button(value="Share to community", elem_id="share-btn")
453
 
454
+ # add examples
455
+ from examples.examples import examples as WJExamples
456
+ def example_fn(idx, _text_input):
457
+ print('from example', idx, _text_input)
458
+ example = WJExamples[int(idx)-1]
459
+ print(example['table_script'], example['table_voice'], gr.make_waveform(example['wav_file']))
460
+ return example['table_script'], example['table_voice'], gr.make_waveform(example['wav_file'])
461
+
462
+ _idx_input = gr.Textbox(label='Example No.')
463
+ _idx_input.visible=False
464
+ gr.Examples(
465
+ [[idx+1, x['text']] for idx, x in enumerate(WJExamples)],
466
+ fn=example_fn,
467
+ inputs=[_idx_input, text_input],
468
+ outputs=[audio_script_markdown, char_voice_map_markdown, audio_output],
469
+ cache_examples=True,
470
+ )
471
+
472
  # System Voice Presets
473
  gr.Markdown(label='System Voice Presets', value='# System Voice Presets')
474
+ with gr.Accordion("Click to see system speakers", open=False):
475
+ system_markdown_voice_presets = gr.Dataframe(label='System Voice Presets', headers=VOICE_PRESETS_HEADERS,
476
+ value=system_voice_presets)
477
  # User Voice Preset Related
478
  gr.Markdown('# (Optional) Speaker Customization ')
479
  with gr.Accordion("Click to add speakers", open=False):
 
495
  vp_file = gr.File(label='Wav File', type='file', file_types=['.wav'],
496
  interactive=True)
497
  vp_submit = gr.Button(label='Upload Voice Preset', value="Upload Voice Preset")
498
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
  # clear btn, will re-new a session
500
  clear_btn = gr.ClearButton(value='Clear All')
501
 
 
583
  # debug only
584
  # print_state_btn = gr.Button(value='Print State')
585
  # print_state_btn.click(fn=lambda state, state2: print(state, state2), inputs=[ui_state, selected_voice_presets])
586
+ interface.queue(concurrency_count=1, max_size=20)
587
  interface.launch()
utils.py CHANGED
@@ -77,3 +77,6 @@ def get_api_key():
77
  api_key = os.environ.get('WAVJOURNEY_OPENAI_KEY')
78
  return api_key
79
 
 
 
 
 
77
  api_key = os.environ.get('WAVJOURNEY_OPENAI_KEY')
78
  return api_key
79
 
80
+ def get_max_script_lines():
81
+ max_lines = int(os.environ.get('WAVJOURNEY_MAX_SCRIPT_LINES', 999))
82
+ return max_lines