Spaces:
Runtime error
Runtime error
add
Browse files- .gitignore +163 -0
- source/services/ner/train/train.py +2 -2
.gitignore
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# .gitignore specific to this project
|
2 |
+
# ===================================
|
3 |
+
source/services/ner/model/*
|
4 |
+
dist
|
5 |
+
build
|
6 |
+
comlib.egg-info
|
7 |
+
|
8 |
+
# tex files
|
9 |
+
.log
|
10 |
+
.synctex.gz
|
11 |
+
.toc
|
12 |
+
.aux
|
13 |
+
.out
|
14 |
+
.idx
|
15 |
+
.bbl
|
16 |
+
.blg
|
17 |
+
|
18 |
+
# env file
|
19 |
+
.env
|
20 |
+
|
21 |
+
# data
|
22 |
+
sensitive/*
|
23 |
+
.cache/*
|
24 |
+
router/cache/*
|
25 |
+
database.json
|
26 |
+
data/*
|
27 |
+
log/*
|
28 |
+
!log/.empty
|
29 |
+
uploads/*
|
30 |
+
|
31 |
+
# models
|
32 |
+
model/*
|
33 |
+
|
34 |
+
# ide's config
|
35 |
+
.idea/*
|
36 |
+
.ropeproject
|
37 |
+
|
38 |
+
# python generated files
|
39 |
+
__pycache__
|
40 |
+
.mypy_cache
|
41 |
+
*.egg-info
|
42 |
+
*.pyc
|
43 |
+
dependencies/python-pdfbox/build
|
44 |
+
dependencies/python-pdfbox/dist
|
45 |
+
|
46 |
+
# log files
|
47 |
+
corpus.log
|
48 |
+
|
49 |
+
# local config files
|
50 |
+
|
51 |
+
|
52 |
+
# Default .gitignore file
|
53 |
+
# =======================
|
54 |
+
|
55 |
+
# Mac OS X
|
56 |
+
.DS_Store
|
57 |
+
|
58 |
+
# Windows image file caches
|
59 |
+
Thumbs.db
|
60 |
+
ehthumbs.db
|
61 |
+
|
62 |
+
# Folder config file
|
63 |
+
Desktop.ini
|
64 |
+
|
65 |
+
# Recycle Bin used on file shares
|
66 |
+
$RECYCLE.BIN/
|
67 |
+
|
68 |
+
# Windows Installer files
|
69 |
+
*.cab
|
70 |
+
*.msi
|
71 |
+
*.msm
|
72 |
+
*.msp
|
73 |
+
|
74 |
+
# Windows shortcuts
|
75 |
+
*.lnk
|
76 |
+
|
77 |
+
# Vagrant
|
78 |
+
.vagrant/
|
79 |
+
|
80 |
+
# IntelliJ
|
81 |
+
.idea/
|
82 |
+
*.iml
|
83 |
+
*.iws
|
84 |
+
|
85 |
+
# Eclipse
|
86 |
+
.classpath
|
87 |
+
.project
|
88 |
+
.settings/
|
89 |
+
|
90 |
+
# Maven
|
91 |
+
log/
|
92 |
+
target/
|
93 |
+
|
94 |
+
# Gradle
|
95 |
+
.gradle/
|
96 |
+
build/
|
97 |
+
|
98 |
+
# SASS
|
99 |
+
**/.sass-cache
|
100 |
+
**/.sass-cache/*
|
101 |
+
|
102 |
+
# Byte-compiled / optimized / DLL files
|
103 |
+
__pycache__/
|
104 |
+
*.py[cod]
|
105 |
+
|
106 |
+
# C extensions
|
107 |
+
*.so
|
108 |
+
|
109 |
+
# Distribution / packaging
|
110 |
+
bin/
|
111 |
+
build/
|
112 |
+
develop-eggs/
|
113 |
+
dist/
|
114 |
+
eggs/
|
115 |
+
lib64/
|
116 |
+
parts/
|
117 |
+
sdist/
|
118 |
+
var/
|
119 |
+
*.egg-info/
|
120 |
+
.installed.cfg
|
121 |
+
*.egg
|
122 |
+
|
123 |
+
# Installer logs
|
124 |
+
pip-log.txt
|
125 |
+
pip-delete-this-directory.txt
|
126 |
+
|
127 |
+
# Unit test / coverage reports
|
128 |
+
.tox/
|
129 |
+
.coverage
|
130 |
+
.cache
|
131 |
+
nosetests.xml
|
132 |
+
coverage.xml
|
133 |
+
|
134 |
+
# Translations
|
135 |
+
*.mo
|
136 |
+
|
137 |
+
# Mr Developer
|
138 |
+
.mr.developer.cfg
|
139 |
+
.project
|
140 |
+
.pydevproject
|
141 |
+
|
142 |
+
# Rope
|
143 |
+
.ropeproject
|
144 |
+
|
145 |
+
# Django stuff:
|
146 |
+
*.log
|
147 |
+
*.pot
|
148 |
+
|
149 |
+
# Sphinx documentation
|
150 |
+
docs/_build/
|
151 |
+
|
152 |
+
# VSCode
|
153 |
+
.vscode
|
154 |
+
|
155 |
+
# Jupyter Notebook
|
156 |
+
.ipynb_checkpoints
|
157 |
+
|
158 |
+
data_exploration/data/anonymization.xlsx
|
159 |
+
data_exploration/data/~$anonymization.xlsx
|
160 |
+
data_exploration/data/query4_results.pdf
|
161 |
+
data_exploration/data/query5_results.pdf
|
162 |
+
data_exploration/src/data_versioning_problem_illustrate.pptx
|
163 |
+
data_exploration/src/
|
source/services/ner/train/train.py
CHANGED
@@ -173,7 +173,7 @@ model.config.num_labels
|
|
173 |
from transformers import TrainingArguments
|
174 |
|
175 |
args = TrainingArguments(
|
176 |
-
"bert-finetuned-legalentity-ner",
|
177 |
evaluation_strategy="epoch",
|
178 |
save_strategy="epoch",
|
179 |
learning_rate=2e-5,
|
@@ -323,7 +323,7 @@ unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save)
|
|
323 |
from transformers import pipeline
|
324 |
|
325 |
# Replace this with your own checkpoint
|
326 |
-
model_checkpoint = "
|
327 |
token_classifier = pipeline(
|
328 |
"token-classification", model=model_checkpoint, aggregation_strategy="simple"
|
329 |
)
|
|
|
173 |
from transformers import TrainingArguments
|
174 |
|
175 |
args = TrainingArguments(
|
176 |
+
output_dir="source/services/ner/model/hf_tokenclassification/bert-finetuned-legalentity-ner",
|
177 |
evaluation_strategy="epoch",
|
178 |
save_strategy="epoch",
|
179 |
learning_rate=2e-5,
|
|
|
323 |
from transformers import pipeline
|
324 |
|
325 |
# Replace this with your own checkpoint
|
326 |
+
model_checkpoint = "aimlnerd/bert-finetuned-legalentity-ner"
|
327 |
token_classifier = pipeline(
|
328 |
"token-classification", model=model_checkpoint, aggregation_strategy="simple"
|
329 |
)
|