Dy3257 commited on
Commit
535a983
1 Parent(s): 12daa16

Upload 19 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ en-zh_model/target_vocabulary.json filter=lfs diff=lfs merge=lfs -text
37
+ zh-en_model/source_vocabulary.json filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #该应用创建工具共包含三个区域,顶部工具栏,左侧代码区,右侧交互效果区,其中右侧交互效果是通过左侧代码生成的,存在对照关系。
2
+ #顶部工具栏:运行、保存、新开浏览器打开、实时预览开关,针对运行和在浏览器打开选项进行重要说明:
3
+ #[运行]:交互效果并非实时更新,代码变更后,需点击运行按钮获得最新交互效果。
4
+ #[在浏览器打开]:新建页面查看交互效果。
5
+ #以下为应用创建工具的示例代码
6
+
7
+ import gradio as gr
8
+
9
+ import ctranslate2
10
+ from split import split_string
11
+
12
+ translator_zh2en = ctranslate2.Translator("zh-en_model/", device="cpu")##路径
13
+ translator2_zh2en = ctranslate2.Translator("zh2en_cmodel/", device="cpu")##路径
14
+ translator_en2zh = ctranslate2.Translator("en-zh_model/", device="cpu")##路径
15
+ translator2_en2zh = ctranslate2.Translator("en2zh_cmodel", device="cpu")##路径
16
+
17
+ def translate(input_tokens, input_tokens2, mode):
18
+
19
+ input_tokens = input_tokens.split()
20
+ input_tokens2 = input_tokens2.split()
21
+
22
+ source = split_string(input_tokens)
23
+ lenth = len(source)
24
+
25
+ source2 = split_string(input_tokens2)
26
+ lenth2 = len(source2)
27
+
28
+ results = []
29
+ results2 = []
30
+
31
+ if mode == "汉译英" :
32
+ results = translator_zh2en.translate_batch(source)##翻译的分词分句
33
+ results2 = translator2_zh2en.translate_batch(source2)##翻译的分词分句
34
+ else :
35
+ results = translator_en2zh.translate_batch(source)##翻译的分词分句
36
+ results2 = translator2_en2zh.translate_batch(source2)##翻译的分词分句
37
+
38
+ target = []
39
+ target2 = []
40
+
41
+ for i in range(0, lenth, 1):
42
+ target = target + results[i].hypotheses[0]
43
+ for i in range(0, lenth2, 1):
44
+ target2 = target2 + results2[i].hypotheses[0]
45
+
46
+ #print(results[0].hypotheses[0])##results[0]为第0句,hypotheses[0]保持0
47
+ ##print(results[1].hypotheses[0])
48
+ #return results[0].hypotheses[0]
49
+ return ' '.join(target),' '.join(target2)
50
+
51
+ demo = gr.Interface(fn=translate,
52
+ inputs=["text", "text", gr.Dropdown(["汉译英", "英译汉"])],
53
+ outputs=["text", "text"],)
54
+
55
+ demo.launch()
en-zh_model/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "multi_query_attention": false,
9
+ "unk_token": "<unk>"
10
+ }
en-zh_model/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48dcd96223ed75212fa052d1ea1cf19346c3e529e2e7ee21eb9b35dcb71df4d6
3
+ size 3465739918
en-zh_model/source_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
en-zh_model/target_vocabulary.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d939e964beab69401ddbc855fd6122007754cab6313906d4db73c351522894
3
+ size 11621611
en2zh_cmodel/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "</s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "multi_query_attention": false,
9
+ "unk_token": "<unk>"
10
+ }
en2zh_cmodel/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:151b3fde80e90cdd20552f0cf34e65a26b61e350405f7ed5d738b912544fd6bf
3
+ size 468479022
en2zh_cmodel/source_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
en2zh_cmodel/target_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ctranslate2==4.1.0
split.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def split_string(tokens):
2
+ result = []
3
+ done = 1
4
+ while(len(tokens) > 10 and done):
5
+ done = 0
6
+ for i in range(10, len(tokens), 1):
7
+ if i > 30 :
8
+ if tokens[i] in [".", "。", "!", "!", "?", "?", ",", ",", "……"]:
9
+ result.append(tokens[:i+1])
10
+ tokens = tokens[i+1:]
11
+ done = 1
12
+ break
13
+ else :
14
+ if tokens[i] in [".", "。", "!", "!", "?","?"] :
15
+ result.append(tokens[:i+1])
16
+ tokens = tokens[i+1:]
17
+ done = 1
18
+ break
19
+ if len(tokens) > 0 :
20
+ result.append(tokens)
21
+ return result
zh-en_model/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "multi_query_attention": false,
9
+ "unk_token": "<unk>"
10
+ }
zh-en_model/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03bfefa51ffe9e9e4e5f588373aaa81440eea4f082b8771a2e1523bfa81b581d
3
+ size 3055339918
zh-en_model/source_vocabulary.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d939e964beab69401ddbc855fd6122007754cab6313906d4db73c351522894
3
+ size 11621611
zh-en_model/target_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
zh2en_cmodel/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "</s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "multi_query_attention": false,
9
+ "unk_token": "<unk>"
10
+ }
zh2en_cmodel/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e8c2c3b0afd5be0941250da5ded3ae3b51dd08667f55d9b072f41fb9a5e8c9
3
+ size 458517550
zh2en_cmodel/source_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
zh2en_cmodel/target_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff