Upload 4 files
Browse files- model.bin +3 -0
- sentencepiece.model +3 -0
- shared_vocabulary.txt +0 -0
- usage_spm.py +13 -0
model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdc499862b9e8e9deb73458976e24efb182e2b57c15ef35f8e55cb1a1dd264f9
|
| 3 |
+
size 81165818
|
sentencepiece.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6738ed8e5f56b2c3942cc11fd8c4890c9d589dfa806a897a84f67852a4150cdb
|
| 3 |
+
size 847832
|
shared_vocabulary.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
usage_spm.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#export KMP_DUPLICATE_LIB_OK=TRUE
|
| 2 |
+
import ctranslate2
|
| 3 |
+
import sentencepiece as spm
|
| 4 |
+
from translate import Translator
|
| 5 |
+
text="I am calling from tax office"
|
| 6 |
+
text="나는 세금 사무실에서 호출"
|
| 7 |
+
text="میں ٹیکس آفس سے کال کر رہا ہوں۔"
|
| 8 |
+
translator = ctranslate2.Translator("ur_en/")
|
| 9 |
+
sp = spm.SentencePieceProcessor(model_file="ur_en/sentencepiece.model")
|
| 10 |
+
tokens = sp.encode(text, out_type=str)
|
| 11 |
+
translated_tokens = translator.translate_batch([tokens])
|
| 12 |
+
output = sp.decode(translated_tokens[0][0]["tokens"]).replace("▁"," ")
|
| 13 |
+
print( f"{output}")
|