Tonic commited on
Commit
16c1c4f
1 Parent(s): 342fcb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -1,7 +1,6 @@
1
  # Welcome to Team Tonic's MultiMed
2
 
3
  from gradio_client import Client
4
- import os
5
  import numpy as np
6
  import base64
7
  import gradio as gr
@@ -10,10 +9,10 @@ import requests
10
  import json
11
  import dotenv
12
  from scipy.io.wavfile import write
13
- import PIL
14
  import soundfile as sf
15
  from openai import OpenAI
16
  import time
 
17
  from PIL import Image
18
  import io
19
  import hashlib
@@ -28,6 +27,8 @@ from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoM
28
  from peft import PeftModel, PeftConfig
29
  import torch
30
  import os
 
 
31
 
32
  # Global variables to hold component references
33
  components = {}
@@ -120,9 +121,10 @@ def process_speech(input_language, audio_input):
120
  except Exception as e :
121
  return f"{e}"
122
 
 
123
  def convert_text_to_speech(input_text, target_language):
124
  """
125
- Convert text to speech in the specified language and return the audio file path and the input text.
126
  """
127
  try:
128
  text_to_speech_result = seamless_client.predict(
@@ -136,17 +138,16 @@ def convert_text_to_speech(input_text, target_language):
136
  api_name="/run" # API name
137
  )
138
 
139
- # Assuming the audio file path is in the second position of the result
140
- audio_file = text_to_speech_result[1]
 
 
 
141
 
142
- max_length = 25
143
- dir_name, file_name = os.path.split(audio_file)
144
- file_extension = os.path.splitext(file_name)[1]
145
- shortened_file_name = file_name[:max_length - len(file_extension)] + file_extension
146
- shortened_audio_file = os.path.join(dir_name, shortened_file_name)
147
 
148
- # Return the shortened audio file path and the input text
149
- return shortened_audio_file, input_text
150
  except Exception as e:
151
  return f"An error occurred during text-to-speech conversion: {e}", input_text
152
 
 
1
  # Welcome to Team Tonic's MultiMed
2
 
3
  from gradio_client import Client
 
4
  import numpy as np
5
  import base64
6
  import gradio as gr
 
9
  import json
10
  import dotenv
11
  from scipy.io.wavfile import write
 
12
  import soundfile as sf
13
  from openai import OpenAI
14
  import time
15
+ import PIL
16
  from PIL import Image
17
  import io
18
  import hashlib
 
27
  from peft import PeftModel, PeftConfig
28
  import torch
29
  import os
30
+ import uuid
31
+
32
 
33
  # Global variables to hold component references
34
  components = {}
 
121
  except Exception as e :
122
  return f"{e}"
123
 
124
+
125
  def convert_text_to_speech(input_text, target_language):
126
  """
127
+ Convert text to speech in the specified language, rename the audio file with a unique identifier, and return both the new audio file path and the input text.
128
  """
129
  try:
130
  text_to_speech_result = seamless_client.predict(
 
138
  api_name="/run" # API name
139
  )
140
 
141
+ original_audio_file = text_to_speech_result[1] # Assuming the audio file path is in the second position
142
+
143
+ # Generate a new file name with a random UUID
144
+ new_file_name = f"audio_output_{uuid.uuid4()}.wav"
145
+ new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
146
 
147
+ # Rename the file
148
+ os.rename(original_audio_file, new_file_path)
 
 
 
149
 
150
+ return new_file_path, input_text
 
151
  except Exception as e:
152
  return f"An error occurred during text-to-speech conversion: {e}", input_text
153