elozano commited on
Commit
685ba0e
1 Parent(s): ce5c5cb

App updated

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. analyzer.py +80 -0
  3. app.py +49 -43
  4. news_pipeline.py +0 -61
  5. pipeline.py +16 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
analyzer.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Optional, Union
2
+
3
+ from transformers import (
4
+ AutoModelForSequenceClassification,
5
+ AutoModelForTokenClassification,
6
+ AutoTokenizer,
7
+ TokenClassificationPipeline,
8
+ )
9
+
10
+ from pipeline import NewsPipeline
11
+
12
+ CATEGORY_EMOJIS = {
13
+ "Automobile": "🚗",
14
+ "Entertainment": "🍿",
15
+ "Politics": "⚖️",
16
+ "Science": "🧪",
17
+ "Sports": "🏀",
18
+ "Technology": "💻",
19
+ "World": "🌍",
20
+ }
21
+ FAKE_EMOJIS = {"Fake": "👻", "Real": "👍"}
22
+ CLICKBAIT_EMOJIS = {"Clickbait": "🎣", "Normal": "✅"}
23
+
24
+
25
+ class NewsAnalyzer:
26
+ def __init__(
27
+ self,
28
+ category_model_name: str,
29
+ fake_model_name: str,
30
+ clickbait_model_name: str,
31
+ ner_model_name: str,
32
+ ) -> None:
33
+ self.category_pipe = NewsPipeline(
34
+ model=AutoModelForSequenceClassification.from_pretrained(
35
+ category_model_name
36
+ ),
37
+ tokenizer=AutoTokenizer.from_pretrained(category_model_name),
38
+ emojis=CATEGORY_EMOJIS,
39
+ )
40
+ self.fake_pipe = NewsPipeline(
41
+ model=AutoModelForSequenceClassification.from_pretrained(fake_model_name),
42
+ tokenizer=AutoTokenizer.from_pretrained(fake_model_name),
43
+ emojis=FAKE_EMOJIS,
44
+ )
45
+ self.clickbait_pipe = NewsPipeline(
46
+ model=AutoModelForSequenceClassification.from_pretrained(
47
+ clickbait_model_name
48
+ ),
49
+ tokenizer=AutoTokenizer.from_pretrained(clickbait_model_name),
50
+ emojis=CLICKBAIT_EMOJIS,
51
+ )
52
+ self.ner_pipe = TokenClassificationPipeline(
53
+ model=AutoModelForTokenClassification.from_pretrained(ner_model_name),
54
+ tokenizer=AutoTokenizer.from_pretrained(ner_model_name),
55
+ aggregation_strategy="simple",
56
+ )
57
+
58
+ def __call__(
59
+ self, headline: str, content: Optional[str] = None
60
+ ) -> Dict[str, Union[str, float]]:
61
+ return {
62
+ "category": self.category_pipe(headline=headline, content=content),
63
+ "fake": self.fake_pipe(headline=headline, content=content),
64
+ "clickbait": self.clickbait_pipe(headline=headline, content=None),
65
+ "ner": {
66
+ "headline": self.ner_pipe(headline),
67
+ "content": self.ner_pipe(content) if content else None,
68
+ },
69
+ }
70
+
71
+
72
+ if __name__ == "__main__":
73
+ analyzer = NewsAnalyzer(
74
+ category_model_name="elozano/news-category",
75
+ fake_model_name="elozano/news-fake",
76
+ clickbait_model_name="elozano/news-clickbait",
77
+ ner_model_name="dslim/bert-base-NER",
78
+ )
79
+ prediction = analyzer(headline="Lakers Won!")
80
+ print(prediction)
app.py CHANGED
@@ -1,68 +1,74 @@
 
 
1
  import streamlit as st
2
  from annotated_text import annotated_text
3
- from news_pipeline import NewsPipeline
4
 
5
- CATEGORY_EMOJIS = {
6
- "Automobile": "🚗",
7
- "Entertainment": "🍿",
8
- "Politics": "⚖️",
9
- "Science": "🧪",
10
- "Sports": "🏀",
11
- "Technology": "💻",
12
- "World": "🌍",
13
- }
14
- FAKE_EMOJIS = {"Fake": "👻", "Real": "👍"}
15
- CLICKBAIT_EMOJIS = {"Clickbait": "🎣", "Normal": "✅"}
16
 
17
 
18
- def app():
19
- news_pipe = NewsPipeline()
 
 
 
 
 
20
  st.title("📰 News Analyzer")
21
- headline = st.text_input("Article headline:")
22
- content = st.text_area("Article content:")
23
- button = st.button("Analyze")
24
- if button:
25
- if headline == "" and content == "":
26
- st.error("Please, introduce an article headline and content.")
27
- else:
28
- if headline == "" or content == "":
29
- st.warning(
30
- "Please, provide both headline and content to achieve better results."
31
- )
32
- else:
33
- st.success("Article successfully analyzed!")
34
 
35
- with st.spinner("Analyzing article..."):
36
- prediction = news_pipe(headline, content)
37
- col1, _, col2 = st.columns([2, 1, 6])
38
  with col1:
39
  st.subheader("Analysis:")
 
40
  st.markdown(
41
- f"{CATEGORY_EMOJIS[prediction['category']]} **Category**: {prediction['category']}"
42
  )
 
43
  st.markdown(
44
- f"{FAKE_EMOJIS[prediction['fake']]} **Fake**: {'Yes' if prediction['fake'] == 'Fake' else 'No'}"
45
  )
 
46
  st.markdown(
47
- f"{CLICKBAIT_EMOJIS[prediction['clickbait']]} **Clickbait**: {'Yes' if prediction['clickbait'] == 'Clickbait' else 'No'}"
48
  )
 
49
  with col2:
50
- st.subheader("Headline")
51
- annotated_text(*parse_text(headline, prediction["ner"]["headline"]))
52
- st.subheader("Content")
53
- annotated_text(*parse_text(content, prediction["ner"]["content"]))
 
 
 
 
 
 
 
54
 
55
 
56
- def parse_text(text, prediction):
 
 
57
  start = 0
58
  parsed_text = []
59
- for p in prediction:
60
- parsed_text.append(text[start : p["start"]])
61
- parsed_text.append((p["word"], p["entity_group"]))
62
- start = p["end"]
63
  parsed_text.append(text[start:])
64
  return parsed_text
65
 
66
 
67
  if __name__ == "__main__":
68
- app()
 
1
+ from typing import Dict, List, Tuple, Union
2
+
3
  import streamlit as st
4
  from annotated_text import annotated_text
 
5
 
6
+ from analyzer import NewsAnalyzer
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
+ def run() -> None:
10
+ analyzer = NewsAnalyzer(
11
+ category_model_name="elozano/news-category",
12
+ fake_model_name="elozano/news-fake",
13
+ clickbait_model_name="elozano/news-clickbait",
14
+ ner_model_name="dslim/bert-base-NER",
15
+ )
16
  st.title("📰 News Analyzer")
17
+ headline = st.text_input("Headline:")
18
+ content = st.text_input("Content:")
19
+ if headline == "":
20
+ st.error("Please, provide a headline.")
21
+ else:
22
+ if content == "":
23
+ st.warning(
24
+ "Please, provide both headline and content to achieve better results."
25
+ )
26
+ button = st.button("Analyze")
27
+ if button:
28
+ predictions = analyzer(headline=headline, content=content)
29
+ col1, _, col2 = st.columns([2, 1, 5])
30
 
 
 
 
31
  with col1:
32
  st.subheader("Analysis:")
33
+ category_prediction = predictions["category"]
34
  st.markdown(
35
+ f"{category_prediction['emoji']} **Category**: {category_prediction['label']}"
36
  )
37
+ clickbait_prediction = predictions["clickbait"]
38
  st.markdown(
39
+ f"{clickbait_prediction['emoji']} **Clickbait**: {'Yes' if clickbait_prediction['label'] == 'Clickbait' else 'No'}"
40
  )
41
+ fake_prediction = predictions["fake"]
42
  st.markdown(
43
+ f"{fake_prediction['emoji']} **Fake**: {'Yes' if fake_prediction['label'] == 'Fake' else 'No'}"
44
  )
45
+
46
  with col2:
47
+ st.subheader("Headline:")
48
+ annotated_text(
49
+ *parse_entities(headline, predictions["ner"]["headline"])
50
+ )
51
+ st.subheader("Content:")
52
+ if content:
53
+ annotated_text(
54
+ *parse_entities(content, predictions["ner"]["content"])
55
+ )
56
+ else:
57
+ st.error("Content not provided.")
58
 
59
 
60
+ def parse_entities(
61
+ text: str, entities: Dict[str, Union[str, int]]
62
+ ) -> List[Union[str, Tuple[str, str]]]:
63
  start = 0
64
  parsed_text = []
65
+ for entity in entities:
66
+ parsed_text.append(text[start : entity["start"]])
67
+ parsed_text.append((entity["word"], entity["entity_group"]))
68
+ start = entity["end"]
69
  parsed_text.append(text[start:])
70
  return parsed_text
71
 
72
 
73
  if __name__ == "__main__":
74
+ run()
news_pipeline.py DELETED
@@ -1,61 +0,0 @@
1
- from typing import Dict
2
-
3
- from transformers import (
4
- AutoModelForSequenceClassification,
5
- AutoModelForTokenClassification,
6
- AutoTokenizer,
7
- TextClassificationPipeline,
8
- TokenClassificationPipeline,
9
- )
10
-
11
-
12
- class NewsPipeline:
13
- def __init__(self) -> None:
14
- self.category_tokenizer = AutoTokenizer.from_pretrained("elozano/news-category")
15
- self.category_pipeline = TextClassificationPipeline(
16
- model=AutoModelForSequenceClassification.from_pretrained(
17
- "elozano/news-category"
18
- ),
19
- tokenizer=self.category_tokenizer,
20
- )
21
- self.fake_tokenizer = AutoTokenizer.from_pretrained("elozano/news-fake")
22
- self.fake_pipeline = TextClassificationPipeline(
23
- model=AutoModelForSequenceClassification.from_pretrained(
24
- "elozano/news-fake"
25
- ),
26
- tokenizer=self.fake_tokenizer,
27
- )
28
- self.clickbait_pipeline = TextClassificationPipeline(
29
- model=AutoModelForSequenceClassification.from_pretrained(
30
- "elozano/news-clickbait"
31
- ),
32
- tokenizer=AutoTokenizer.from_pretrained("elozano/news-clickbait"),
33
- )
34
- self.ner_pipeline = TokenClassificationPipeline(
35
- tokenizer=AutoTokenizer.from_pretrained("dslim/bert-base-NER"),
36
- model=AutoModelForTokenClassification.from_pretrained(
37
- "dslim/bert-base-NER"
38
- ),
39
- aggregation_strategy="simple",
40
- )
41
-
42
- def __call__(self, headline: str, content: str) -> Dict[str, str]:
43
- category_article_text = f" {self.category_tokenizer.sep_token} ".join(
44
- [headline, content]
45
- )
46
- fake_article_text = f" {self.fake_tokenizer.sep_token} ".join(
47
- [headline, content]
48
- )
49
- return {
50
- "category": self.category_pipeline(category_article_text)[0]["label"],
51
- "fake": self.fake_pipeline(fake_article_text)[0]["label"],
52
- "clickbait": self.clickbait_pipeline(headline)[0]["label"],
53
- "ner": {
54
- "headline": list(
55
- filter(lambda x: x["score"] > 0.8, self.ner_pipeline(headline))
56
- ),
57
- "content": list(
58
- filter(lambda x: x["score"] > 0.8, self.ner_pipeline(content))
59
- ),
60
- },
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pipeline.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import TextClassificationPipeline
2
+ from typing import Dict, Optional
3
+
4
+
5
+ class NewsPipeline(TextClassificationPipeline):
6
+ def __init__(self, emojis: Dict[str, str], **kwargs) -> None:
7
+ self.emojis = emojis
8
+ super().__init__(**kwargs)
9
+
10
+ def __call__(self, headline: str, content: Optional[str]) -> str:
11
+ if content:
12
+ text = f" {self.tokenizer.sep_token} ".join([headline, content])
13
+ else:
14
+ text = headline
15
+ prediction = super().__call__(text)[0]
16
+ return {**prediction, "emoji": self.emojis[prediction["label"]]}