mehran commited on
Commit
dfd4faa
1 Parent(s): 4cba973

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -3
README.md CHANGED
@@ -64,15 +64,15 @@ from model import KenlmModel
64
 
65
 
66
  # Load the model
67
- model = KenlmModel.from_pretrained("32000", "5", "01111")
68
 
69
  # Get perplexity
70
  print(model.perplexity("من در را بستم"))
71
- # Outputs: 19.0
72
 
73
  # Get score
74
  print(model.score("من در را بستم"))
75
- # Outputs: -8.94505500793457
76
  ```
77
 
78
  # What are the different files you can find in this repository?
@@ -124,3 +124,9 @@ using the `build_binary` program, as shown below:
124
  ```
125
  build_binary -T /tmp -S 80% probing jomleh-sp-32000-o5-prune01111.arpa jomleh-sp-32000-o5-prune01111.probing
126
  ```
 
 
 
 
 
 
64
 
65
 
66
  # Load the model
67
+ model = KenlmModel.from_pretrained("57218", "3", "011")
68
 
69
  # Get perplexity
70
  print(model.perplexity("من در را بستم"))
71
+ # Outputs: 72.5
72
 
73
  # Get score
74
  print(model.score("من در را بستم"))
75
+ # Outputs: -11.160577774047852
76
  ```
77
 
78
  # What are the different files you can find in this repository?
124
  ```
125
  build_binary -T /tmp -S 80% probing jomleh-sp-32000-o5-prune01111.arpa jomleh-sp-32000-o5-prune01111.probing
126
  ```
127
+
128
+ # Which model to use?
129
+
130
+ Based on my personal evaluation, I recommend using the `jomleh-sp-57218-o3-prune011.probing`.
131
+ It's the perfect balanced between file size (6GB) and accuracy (80%). But if you have no concern for file
132
+ size, then go for the largest model, `jomleh-sp-57218-o5-prune00011.probing` (size: 36GB, accuracy: 82%).