Projeto commited on
Commit
deda615
1 Parent(s): 1f38760

Create get_premodel.py

Browse files
Files changed (1) hide show
  1. legalnlp/get_premodel.py +77 -0
legalnlp/get_premodel.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import wget
2
+ import zipfile
3
+
4
+
5
+ def get_premodel(model):
6
+ modelv = False
7
+ d = None
8
+ if model == 'bert':
9
+ # BERTikal
10
+ url = 'https://ndownloader.figshare.com/files/30446754'
11
+ filename = wget.download(url, out=d)
12
+ if d == None:
13
+ d = ''
14
+ with zipfile.ZipFile(d+filename, "r") as zip_ref:
15
+ zip_ref.extractall(d+filename.replace('.zip', ''))
16
+ modelv = True
17
+ # Download files to use in Word2Vec and Doc2Vec
18
+ if model == 'wodc':
19
+ url2 = 'https://ndownloader.figshare.com/files/30446736'
20
+ filename2 = wget.download(url2, out=d)
21
+ if d == None:
22
+ d = ''
23
+ with zipfile.ZipFile(d+filename2, "r") as zip_ref:
24
+ zip_ref.extractall(d+filename2.replace('.zip', ''))
25
+ modelv = True
26
+
27
+ # Download Word2Vec of NILC
28
+ if model == 'w2vnilc':
29
+ url2 = 'http://143.107.183.175:22980/download.php?file=embeddings/word2vec/cbow_s100.zip'
30
+ filename2 = wget.download(url2, out=d)
31
+ if d == None:
32
+ d = ''
33
+ with zipfile.ZipFile(d+filename2, "r") as zip_ref:
34
+ zip_ref.extractall(d+filename2.replace('.zip', ''))
35
+ modelv = True
36
+ # Download files to use Phraser model
37
+ if model == 'phraser':
38
+ url2 = 'https://ndownloader.figshare.com/files/30446727'
39
+ filename2 = wget.download(url2, out=d)
40
+ if d == None:
41
+ d = ''
42
+ with zipfile.ZipFile(d+filename2, "r") as zip_ref:
43
+ zip_ref.extractall(d+filename2.replace('.zip', ''))
44
+ modelv = True
45
+ # Download files to use Fast Text model
46
+ if model == 'fasttext':
47
+ url2 = 'https://ndownloader.figshare.com/files/30446739'
48
+ filename2 = wget.download(url2, out=d)
49
+ if d == None:
50
+ d = ''
51
+ with zipfile.ZipFile(d+filename2, "r") as zip_ref:
52
+ zip_ref.extractall(d+filename2.replace('.zip', ''))
53
+ modelv = True
54
+ # Download files to use NeuralMind pre-model base
55
+ if model == 'neuralmindbase':
56
+ url2 = 'https://neuralmind-ai.s3.us-east-2.amazonaws.com/nlp/bert-base-portuguese-cased/bert-base-portuguese-cased_pytorch_checkpoint.zip'
57
+ url_vocab = 'https://neuralmind-ai.s3.us-east-2.amazonaws.com/nlp/bert-base-portuguese-cased/vocab.txt'
58
+ filename2 = wget.download(url2, out=d)
59
+ filename3 = wget.download(url_vocab, out=d)
60
+ if d == None:
61
+ d = ''
62
+ with zipfile.ZipFile(d+filename2, "r") as zip_ref:
63
+ zip_ref.extractall(d+filename2.replace('.zip', ''))
64
+ modelv = True
65
+ # Download files to use NeuralMind pre-model large
66
+ if model == 'neuralmindlarge':
67
+ url2 = 'https://neuralmind-ai.s3.us-east-2.amazonaws.com/nlp/bert-large-portuguese-cased/bert-large-portuguese-cased_pytorch_checkpoint.zip'
68
+ url_vocab = 'https://neuralmind-ai.s3.us-east-2.amazonaws.com/nlp/bert-large-portuguese-cased/vocab.txt'
69
+ filename2 = wget.download(url2, out=d)
70
+ filename3 = wget.download(url_vocab, out=d)
71
+ if d == None:
72
+ d = ''
73
+ with zipfile.ZipFile(d+filename2, "r") as zip_ref:
74
+ zip_ref.extractall(d+filename2.replace('.zip', ''))
75
+ modelv = True
76
+ # If don't download any model return false, else return true
77
+ return modelv