elozano commited on
Commit
a59a2c1
1 Parent(s): 2ed1ed2

specific tokenizer defined

Browse files
Files changed (1) hide show
  1. analyzer.py +4 -14
analyzer.py CHANGED
@@ -3,6 +3,7 @@ from typing import Dict, Optional, Union
3
  from transformers import (
4
  AutoModelForSequenceClassification,
5
  AutoModelForTokenClassification,
 
6
  AutoTokenizer,
7
  TokenClassificationPipeline,
8
  )
@@ -34,19 +35,19 @@ class NewsAnalyzer:
34
  model=AutoModelForSequenceClassification.from_pretrained(
35
  category_model_name
36
  ),
37
- tokenizer=AutoTokenizer.from_pretrained(category_model_name),
38
  emojis=CATEGORY_EMOJIS,
39
  )
40
  self.fake_pipe = NewsPipeline(
41
  model=AutoModelForSequenceClassification.from_pretrained(fake_model_name),
42
- tokenizer=AutoTokenizer.from_pretrained(fake_model_name),
43
  emojis=FAKE_EMOJIS,
44
  )
45
  self.clickbait_pipe = NewsPipeline(
46
  model=AutoModelForSequenceClassification.from_pretrained(
47
  clickbait_model_name
48
  ),
49
- tokenizer=AutoTokenizer.from_pretrained(clickbait_model_name),
50
  emojis=CLICKBAIT_EMOJIS,
51
  )
52
  self.ner_pipe = TokenClassificationPipeline(
@@ -67,14 +68,3 @@ class NewsAnalyzer:
67
  "content": self.ner_pipe(content) if content else None,
68
  },
69
  }
70
-
71
-
72
- if __name__ == "__main__":
73
- analyzer = NewsAnalyzer(
74
- category_model_name="elozano/news-category",
75
- fake_model_name="elozano/news-fake",
76
- clickbait_model_name="elozano/news-clickbait",
77
- ner_model_name="dslim/bert-base-NER",
78
- )
79
- prediction = analyzer(headline="Lakers Won!")
80
- print(prediction)
 
3
  from transformers import (
4
  AutoModelForSequenceClassification,
5
  AutoModelForTokenClassification,
6
+ BertTokenizer,
7
  AutoTokenizer,
8
  TokenClassificationPipeline,
9
  )
 
35
  model=AutoModelForSequenceClassification.from_pretrained(
36
  category_model_name
37
  ),
38
+ tokenizer=BertTokenizer.from_pretrained(category_model_name),
39
  emojis=CATEGORY_EMOJIS,
40
  )
41
  self.fake_pipe = NewsPipeline(
42
  model=AutoModelForSequenceClassification.from_pretrained(fake_model_name),
43
+ tokenizer=BertTokenizer.from_pretrained(fake_model_name),
44
  emojis=FAKE_EMOJIS,
45
  )
46
  self.clickbait_pipe = NewsPipeline(
47
  model=AutoModelForSequenceClassification.from_pretrained(
48
  clickbait_model_name
49
  ),
50
+ tokenizer=BertTokenizer.from_pretrained(clickbait_model_name),
51
  emojis=CLICKBAIT_EMOJIS,
52
  )
53
  self.ner_pipe = TokenClassificationPipeline(
 
68
  "content": self.ner_pipe(content) if content else None,
69
  },
70
  }