sim04ful commited on
Commit
8a5d43b
1 Parent(s): 5e4143f

type casting

Browse files
Files changed (2) hide show
  1. arible_schema_power.json +78 -0
  2. handler.py +6 -6
arible_schema_power.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "title": "AI Voice Cloner",
3
+ "description": "Clone a voice using AI",
4
+ "inputs": [
5
+ {
6
+ "name": "text",
7
+ "type": "text",
8
+ "description": "Text to be narrated",
9
+ "area": true,
10
+ "options": {
11
+ "min": 100,
12
+ "max": 50000
13
+ },
14
+ "title": "Content"
15
+ },
16
+ {
17
+ "name": "audio_urls",
18
+ "type": "constant",
19
+ "value": [
20
+ "https://pub-93685b189ac24b30839990a7d9a14391.r2.dev/attenborough_short.wav"
21
+ ]
22
+ },
23
+ {
24
+ "name": "gpt_cond_len",
25
+ "type": "number",
26
+ "description": "Length of audio used for gpt latents.",
27
+ "title": "GPT Conditioning Length",
28
+ "options": {
29
+ "min": 6,
30
+ "max": 60
31
+ },
32
+ "slider_step": 0.5
33
+ },
34
+ {
35
+ "name": "gpt_cond_chunk_len",
36
+ "type": "number",
37
+ "description": "Length of audio chunks used for gpt latents.",
38
+ "title": "GPT Conditioning Chunk Length",
39
+ "options": {
40
+ "min": 6,
41
+ "max": 60
42
+ },
43
+ "slider_step": 0.5
44
+ },
45
+ {
46
+ "name": "max_ref_length",
47
+ "type": "constant",
48
+ "value": 30
49
+ },
50
+ {
51
+ "name": "temperature",
52
+ "type": "number",
53
+ "description": "Temperature for sampling.",
54
+ "title": "Temperature",
55
+ "options": {
56
+ "min": 0.0,
57
+ "max": 1.0
58
+ },
59
+ "slider_step": 0.1
60
+ },
61
+ {
62
+ "name": "repetition_penalty",
63
+ "type": "number",
64
+ "description": "Penalty for repetition.",
65
+ "title": "Repetition Penalty",
66
+ "options": {
67
+ "min": 1.0,
68
+ "max": 10.0
69
+ },
70
+ "slider_step": 0.1
71
+ },
72
+ {
73
+ "name": "language",
74
+ "type": "constant",
75
+ "value": "en"
76
+ }
77
+ ]
78
+ }
handler.py CHANGED
@@ -69,9 +69,9 @@ class EndpointHandler:
69
  speaker_embedding,
70
  ) = self.model.get_conditioning_latents(
71
  audio_path=audio_paths,
72
- gpt_cond_len=model_input["gpt_cond_len"],
73
- gpt_cond_chunk_len=model_input["gpt_cond_chunk_len"],
74
- max_ref_length=model_input["max_ref_length"],
75
  )
76
 
77
  print("Generating audio")
@@ -81,10 +81,10 @@ class EndpointHandler:
81
  text=model_input["text"],
82
  speaker_embedding=speaker_embedding,
83
  gpt_cond_latent=gpt_cond_latent,
84
- temperature=model_input["temperature"],
85
- repetition_penalty=model_input["repetition_penalty"],
86
  language=model_input["language"],
87
- enable_text_splitting=True,
88
  )
89
  audio_file = io.BytesIO()
90
  torchaudio.save(
 
69
  speaker_embedding,
70
  ) = self.model.get_conditioning_latents(
71
  audio_path=audio_paths,
72
+ gpt_cond_len=int(model_input["gpt_cond_len"]),
73
+ gpt_cond_chunk_len=int(model_input["gpt_cond_chunk_len"]),
74
+ max_ref_length=int(model_input["max_ref_length"]),
75
  )
76
 
77
  print("Generating audio")
 
81
  text=model_input["text"],
82
  speaker_embedding=speaker_embedding,
83
  gpt_cond_latent=gpt_cond_latent,
84
+ temperature=float(model_input["temperature"]),
85
+ repetition_penalty=float(model_input["repetition_penalty"]),
86
  language=model_input["language"],
87
+ enable_text_splitting=False,
88
  )
89
  audio_file = io.BytesIO()
90
  torchaudio.save(