alanakbik commited on
Commit
6ffb32d
1 Parent(s): 772cf50

initial model commit

Browse files
Files changed (1) hide show
  1. README.md +15 -11
README.md CHANGED
@@ -79,7 +79,7 @@ from flair.models import SequenceTagger
79
  tagger = SequenceTagger.load("flair/pos-english")
80
 
81
  # make example sentence
82
- sentence = Sentence("I love Berlin")
83
 
84
  # predict NER tags
85
  tagger.predict(sentence)
@@ -97,14 +97,14 @@ for entity in sentence.get_spans('pos'):
97
 
98
  This yields the following output:
99
  ```
100
- Span [1,2,3]: "The happy man" [− Labels: NP (0.9958)]
101
- Span [4,5,6]: "has been eating" [− Labels: VP (0.8759)]
102
- Span [7]: "at" [− Labels: PP (1.0)]
103
- Span [8,9]: "the diner" [− Labels: NP (0.9991)]
104
 
105
  ```
106
 
107
- So, the spans "*The happy man*" and "*the diner*" are labeled as **noun phrases** (NP) and "*has been eating*" is labeled as a **verb phrase** (VP) in the sentence "*The happy man has been eating at the diner*".
108
 
109
 
110
  ---
@@ -115,14 +115,18 @@ The following Flair script was used to train this model:
115
 
116
  ```python
117
  from flair.data import Corpus
118
- from flair.datasets import CONLL_2000
119
  from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
120
 
121
- # 1. get the corpus
122
- corpus: Corpus = CONLL_2000()
 
 
 
 
123
 
124
  # 2. what tag do we want to predict?
125
- tag_type = 'np'
126
 
127
  # 3. make the tag dictionary from the corpus
128
  tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
@@ -154,7 +158,7 @@ from flair.trainers import ModelTrainer
154
  trainer = ModelTrainer(tagger, corpus)
155
 
156
  # 7. run training
157
- trainer.train('resources/taggers/chunk-english',
158
  train_with_dev=True,
159
  max_epochs=150)
160
  ```
79
  tagger = SequenceTagger.load("flair/pos-english")
80
 
81
  # make example sentence
82
+ sentence = Sentence("I love Berlin.")
83
 
84
  # predict NER tags
85
  tagger.predict(sentence)
97
 
98
  This yields the following output:
99
  ```
100
+ Span [1]: "I" [− Labels: PRP (1.0)]
101
+ Span [2]: "love" [− Labels: VBP (1.0)]
102
+ Span [3]: "Berlin" [− Labels: NNP (0.9999)]
103
+ Span [4]: "." [− Labels: . (1.0)]
104
 
105
  ```
106
 
107
+ So, the word "*I*" is labeled as a **pronoun** (PRP), "*love*" is labeled as a **verb** (VBP) and "*Berlin*" is labeled as a **proper noun** (NNP) in the sentence "*TheI love Berlin*".
108
 
109
 
110
  ---
115
 
116
  ```python
117
  from flair.data import Corpus
118
+ from flair.datasets import ColumnCorpus
119
  from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
120
 
121
+ # 1. load the corpus (Ontonotes does not ship with Flair, you need to download and reformat into a column format yourself)
122
+ corpus: Corpus = ColumnCorpus(
123
+ "resources/tasks/onto-ner",
124
+ column_format={0: "text", 1: "pos", 2: "upos", 3: "ner"},
125
+ tag_to_bioes="ner",
126
+ )
127
 
128
  # 2. what tag do we want to predict?
129
+ tag_type = 'pos'
130
 
131
  # 3. make the tag dictionary from the corpus
132
  tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
158
  trainer = ModelTrainer(tagger, corpus)
159
 
160
  # 7. run training
161
+ trainer.train('resources/taggers/pos-english',
162
  train_with_dev=True,
163
  max_epochs=150)
164
  ```