datasetsANDmodels commited on
Commit
b42624d
·
verified ·
1 Parent(s): a109898

Upload 4 files

Browse files
Files changed (4) hide show
  1. model.bin +3 -0
  2. sentencepiece.model +3 -0
  3. shared_vocabulary.txt +0 -0
  4. usage_spm.py +13 -0
model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc499862b9e8e9deb73458976e24efb182e2b57c15ef35f8e55cb1a1dd264f9
3
+ size 81165818
sentencepiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6738ed8e5f56b2c3942cc11fd8c4890c9d589dfa806a897a84f67852a4150cdb
3
+ size 847832
shared_vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff
 
usage_spm.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #export KMP_DUPLICATE_LIB_OK=TRUE
2
+ import ctranslate2
3
+ import sentencepiece as spm
4
+ from translate import Translator
5
+ text="I am calling from tax office"
6
+ text="나는 세금 사무실에서 호출"
7
+ text="میں ٹیکس آفس سے کال کر رہا ہوں۔"
8
+ translator = ctranslate2.Translator("ur_en/")
9
+ sp = spm.SentencePieceProcessor(model_file="ur_en/sentencepiece.model")
10
+ tokens = sp.encode(text, out_type=str)
11
+ translated_tokens = translator.translate_batch([tokens])
12
+ output = sp.decode(translated_tokens[0][0]["tokens"]).replace("▁"," ")
13
+ print( f"{output}")