CosyVoice commited on
Commit
7123846
1 Parent(s): 8340539

remove academic and change to iic/CosyVoice_ttsfrd

Browse files
.gitmodules CHANGED
@@ -1,6 +1,3 @@
1
- [submodule "third_party/AcademiCodec"]
2
- path = third_party/AcademiCodec
3
- url = https://github.com/yangdongchao/AcademiCodec.git
4
  [submodule "third_party/Matcha-TTS"]
5
  path = third_party/Matcha-TTS
6
  url = https://github.com/shivammehta25/Matcha-TTS.git
 
 
 
 
1
  [submodule "third_party/Matcha-TTS"]
2
  path = third_party/Matcha-TTS
3
  url = https://github.com/shivammehta25/Matcha-TTS.git
README.md CHANGED
@@ -33,7 +33,7 @@ sudo yum install sox sox-devel
33
 
34
  **Model download**
35
 
36
- We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `speech_kantts_ttsfrd` resource.
37
 
38
  If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
39
 
@@ -43,7 +43,7 @@ from modelscope import snapshot_download
43
  snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
44
  snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
45
  snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
46
- snapshot_download('speech_tts/speech_kantts_ttsfrd', local_dir='pretrained_models/speech_kantts_ttsfrd')
47
  ```
48
 
49
  ``` sh
@@ -52,12 +52,12 @@ mkdir -p pretrained_models
52
  git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
53
  git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
54
  git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
55
- git clone https://www.modelscope.cn/speech_tts/speech_kantts_ttsfrd.git pretrained_models/speech_kantts_ttsfrd
56
  ```
57
 
58
  Unzip `ttsfrd` resouce and install `ttsfrd` package
59
  ``` sh
60
- cd pretrained_models/speech_kantts_ttsfrd/
61
  unzip resource.zip -d .
62
  pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
63
  ```
 
33
 
34
  **Model download**
35
 
36
+ We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `CosyVoice-ttsfrd` resource.
37
 
38
  If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
39
 
 
43
  snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
44
  snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
45
  snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
46
+ snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
47
  ```
48
 
49
  ``` sh
 
52
  git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
53
  git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
54
  git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
55
+ git clone https://www.modelscope.cn/iic/CosyVoice-ttsfrd.git pretrained_models/CosyVoice-ttsfrd
56
  ```
57
 
58
  Unzip `ttsfrd` resouce and install `ttsfrd` package
59
  ``` sh
60
+ cd pretrained_models/CosyVoice-ttsfrd/
61
  unzip resource.zip -d .
62
  pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
63
  ```
cosyvoice/cli/frontend.py CHANGED
@@ -50,7 +50,7 @@ class CosyVoiceFrontEnd:
50
  self.inflect_parser = inflect.engine()
51
  self.frd = ttsfrd.TtsFrontendEngine()
52
  ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
53
- assert self.frd.initialize('{}/../../pretrained_models/speech_kantts_ttsfrd/resource'.format(ROOT_DIR)) is True, 'failed to initialize ttsfrd resource'
54
  self.frd.set_lang_type('pinyin')
55
  self.frd.enable_pinyin_mix(True)
56
  self.frd.set_breakmodel_index(1)
 
50
  self.inflect_parser = inflect.engine()
51
  self.frd = ttsfrd.TtsFrontendEngine()
52
  ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
53
+ assert self.frd.initialize('{}/../../pretrained_models/CosyVoice-ttsfrd/resource'.format(ROOT_DIR)) is True, 'failed to initialize ttsfrd resource'
54
  self.frd.set_lang_type('pinyin')
55
  self.frd.enable_pinyin_mix(True)
56
  self.frd.set_breakmodel_index(1)
cosyvoice/hifigan/generator.py CHANGED
@@ -27,8 +27,8 @@ from torch.nn.utils import weight_norm
27
  from torch.distributions.uniform import Uniform
28
 
29
  from cosyvoice.transformer.activation import Snake
30
- from academicodec.utils import get_padding
31
- from academicodec.utils import init_weights
32
 
33
 
34
  """hifigan based generator implementation.
 
27
  from torch.distributions.uniform import Uniform
28
 
29
  from cosyvoice.transformer.activation import Snake
30
+ from cosyvoice.utils.common import get_padding
31
+ from cosyvoice.utils.common import init_weights
32
 
33
 
34
  """hifigan based generator implementation.
cosyvoice/utils/common.py CHANGED
@@ -91,3 +91,13 @@ def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
91
  pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
92
  denominator = torch.sum(mask)
93
  return (numerator / denominator).detach()
 
 
 
 
 
 
 
 
 
 
 
91
  pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
92
  denominator = torch.sum(mask)
93
  return (numerator / denominator).detach()
94
+
95
+
96
+ def get_padding(kernel_size, dilation=1):
97
+ return int((kernel_size * dilation - dilation) / 2)
98
+
99
+
100
+ def init_weights(m, mean=0.0, std=0.01):
101
+ classname = m.__class__.__name__
102
+ if classname.find("Conv") != -1:
103
+ m.weight.data.normal_(mean, std)
runtime/python/Dockerfile CHANGED
@@ -5,8 +5,11 @@ WORKDIR /opt/CosyVoice
5
 
6
  RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
7
  RUN apt-get update -y
8
- RUN apt-get -y install python3-dev cmake python3-pip git
9
  RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
10
- RUN cd CosyVoice && pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
 
 
 
11
  RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto
12
  CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && . ./path/sh && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"]
 
5
 
6
  RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
7
  RUN apt-get update -y
8
+ RUN apt-get -y install python3-dev cmake python3-pip git unzip
9
  RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
10
+ RUN cd CosyVoice && pip3 install --default-timeout=3600 -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
11
+ RUN apt install git-lfs && git lfs install
12
+ RUN cd CosyVoice && git clone https://www.modelscope.cn/iic/CosyVoice-ttsfrd.git pretrained_models/CosyVoice-ttsfrd
13
+ RUN cd CosyVoice/pretrained_models/CosyVoice-ttsfrd && unzip resource.zip -d . && pip3 install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
14
  RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto
15
  CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && . ./path/sh && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"]
third_party/AcademiCodec DELETED
@@ -1 +0,0 @@
1
- Subproject commit b6ac134735f6079543db959a60eb77a7bab4277b