flair
/

pos-english

alanakbik commited on Feb 21, 2021

Commit

6ffb32d

•

1 Parent(s): 772cf50

initial model commit

Files changed (1) hide show

README.md CHANGED Viewed

@@ -79,7 +79,7 @@ from flair.models import SequenceTagger
 tagger = SequenceTagger.load("flair/pos-english")
 # make example sentence
-sentence = Sentence("I love Berlin")
 # predict NER tags
 tagger.predict(sentence)
@@ -97,14 +97,14 @@ for entity in sentence.get_spans('pos'):
 This yields the following output:
 ```
-Span [1,2,3]: "The happy man"   [− Labels: NP (0.9958)]
-Span [4,5,6]: "has been eating"   [− Labels: VP (0.8759)]
-Span [7]: "at"   [− Labels: PP (1.0)]
-Span [8,9]: "the diner"   [− Labels: NP (0.9991)]
 ```
-So, the spans "*The happy man*" and "*the diner*" are labeled as **noun phrases** (NP) and "*has been eating*" is labeled as a **verb phrase** (VP) in the sentence "*The happy man has been eating at the diner*".
 ---
@@ -115,14 +115,18 @@ The following Flair script was used to train this model:
 ```python
 from flair.data import Corpus
-from flair.datasets import CONLL_2000
 from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
-# 1. get the corpus
-corpus: Corpus = CONLL_2000()
 # 2. what tag do we want to predict?
-tag_type = 'np'
 # 3. make the tag dictionary from the corpus
 tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
@@ -154,7 +158,7 @@ from flair.trainers import ModelTrainer
 trainer = ModelTrainer(tagger, corpus)
 # 7. run training
-trainer.train('resources/taggers/chunk-english',
               train_with_dev=True,
               max_epochs=150)
 ```

 tagger = SequenceTagger.load("flair/pos-english")
 # make example sentence
+sentence = Sentence("I love Berlin.")
 # predict NER tags
 tagger.predict(sentence)
 This yields the following output:
 ```
+Span [1]: "I"   [− Labels: PRP (1.0)]
+Span [2]: "love"   [− Labels: VBP (1.0)]
+Span [3]: "Berlin"   [− Labels: NNP (0.9999)]
+Span [4]: "."   [− Labels: . (1.0)]
 ```
+So, the word "*I*" is labeled as a **pronoun** (PRP),  "*love*" is labeled as a **verb** (VBP) and "*Berlin*" is labeled as a **proper noun** (NNP) in the sentence "*TheI love Berlin*".
 ---
 ```python
 from flair.data import Corpus
+from flair.datasets import ColumnCorpus
 from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
+# 1. load the corpus (Ontonotes does not ship with Flair, you need to download and reformat into a column format yourself)
+corpus: Corpus = ColumnCorpus(
+                "resources/tasks/onto-ner",
+                column_format={0: "text", 1: "pos", 2: "upos", 3: "ner"},
+                tag_to_bioes="ner",
+            )
 # 2. what tag do we want to predict?
+tag_type = 'pos'
 # 3. make the tag dictionary from the corpus
 tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
 trainer = ModelTrainer(tagger, corpus)
 # 7. run training
+trainer.train('resources/taggers/pos-english',
               train_with_dev=True,
               max_epochs=150)
 ```