Spaces:

efeperro
/

Movie_Analyzer

Paused

efeperro commited on Mar 11

Commit

57ba920

•

1 Parent(s): 1dee31e

Update functions_preprocess.py

Files changed (1) hide show

functions_preprocess.py CHANGED Viewed

@@ -17,6 +17,7 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 import torch.nn.functional as F
 def download_if_non_existent(res_path, res_name):
   try:
@@ -132,3 +133,16 @@ class CNN(nn.Module):
         pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
         cat = self.dropout(torch.cat(pooled, dim=1))
         return self.fc1(cat)

 import torch.nn as nn
 import torch.optim as optim
 import torch.nn.functional as F
+from torchtext.data.utils import get_tokenizer
 def download_if_non_existent(res_path, res_name):
   try:
         pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
         cat = self.dropout(torch.cat(pooled, dim=1))
         return self.fc1(cat)
+def build_vocab(data_iter):
+    tokenizer = get_tokenizer("basic_english")
+    def yield_tokens():
+        for example in data_iter:
+            cleaned_text = clean_text(example['text'])
+            yield tokenizer(cleaned_text)
+    vocab = build_vocab_from_iterator(yield_tokens(), specials=["<unk>", "<pad>"])
+    vocab.set_default_index(vocab["<unk>"])
+    return vocab, tokenizer