Commit
Β·
39c1ac4
1
Parent(s):
7de6206
add: app.py
Browse files
app.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from transformers import pipeline
|
3 |
+
|
4 |
+
############ SETTING UP THE PAGE LAYOUT AND TITLE ############
|
5 |
+
|
6 |
+
# `st.set_page_config` is used to display the default layout width, the title of the app, and the emoticon in the browser tab.
|
7 |
+
|
8 |
+
st.set_page_config(layout="centered", page_title="X_G85 Fake News", page_icon="π")
|
9 |
+
|
10 |
+
############ CREATE THE LOGO AND HEADING ############
|
11 |
+
|
12 |
+
# We create a set of columns to display the logo and the heading next to each other.
|
13 |
+
|
14 |
+
c1, c2 = st.columns([0.32, 2])
|
15 |
+
|
16 |
+
# The snowflake logo will be displayed in the first column, on the left.
|
17 |
+
|
18 |
+
with c1:
|
19 |
+
st.caption("")
|
20 |
+
st.title("π")
|
21 |
+
|
22 |
+
# The heading will be on the right.
|
23 |
+
|
24 |
+
with c2:
|
25 |
+
|
26 |
+
st.caption("")
|
27 |
+
st.title("X_G85 Fake News")
|
28 |
+
|
29 |
+
# We need to set up session state via st.session_state so that app interactions don't reset the app.
|
30 |
+
|
31 |
+
############ SIDEBAR CONTENT ############
|
32 |
+
|
33 |
+
st.sidebar.subheader("Model Options")
|
34 |
+
st.sidebar.write("")
|
35 |
+
|
36 |
+
|
37 |
+
# Model selection
|
38 |
+
SELECTED_MODEL = st.sidebar.selectbox(
|
39 |
+
"Choose a model",
|
40 |
+
("Bert", "Roberta", "Lstm")
|
41 |
+
)
|
42 |
+
|
43 |
+
MODEL_INFO = {
|
44 |
+
"Bert": """
|
45 |
+
#### [BERT base model (uncased)](https://huggingface.co/google-bert/bert-base-uncased)
|
46 |
+
Pretrained model on English language using a masked language modeling (MLM) objective. It was introduced in this paper and first released in this repository. This model is uncased: it does not make a difference between english and English.
|
47 |
+
""",
|
48 |
+
|
49 |
+
"Roberta": """
|
50 |
+
#### [jy46604790/Fake-News-Bert-Detect](https://huggingface.co/jy46604790/Fake-News-Bert-Detect)
|
51 |
+
This model is trained by over 40,000 news from different medias based on the 'roberta-base'. It can give result by simply entering the text of the news less than 500 words(the excess will be truncated automatically).
|
52 |
+
""",
|
53 |
+
|
54 |
+
"Lstm": """
|
55 |
+
#### [X_G85 Fake News LSTM MODEL](https://huggingface.co/x-g85)
|
56 |
+
It is trained on the provided datasets\n
|
57 |
+
Notebook: [Fake News using Lstm](https://www.kaggle.com/code/adamalrahman/fake-news-using-lstm)
|
58 |
+
""",
|
59 |
+
None: "NO MODEL SELECTED"
|
60 |
+
}
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
model_info_container = st.sidebar.container(border=True)
|
65 |
+
model_info_container.markdown("### Model Information\n")
|
66 |
+
model_info_container.markdown(MODEL_INFO[SELECTED_MODEL ])
|
67 |
+
|
68 |
+
|
69 |
+
copyright_container = st.sidebar.container(border=True)
|
70 |
+
copyright_container.markdown("Copyright Β©οΈ 2024 [X_G85](https://huggingface.co/x-g85)")
|
71 |
+
|
72 |
+
|
73 |
+
############ TABBED NAVIGATION ############
|
74 |
+
|
75 |
+
|
76 |
+
MainTab, InfoTab = st.tabs(["Main", "Info"])
|
77 |
+
|
78 |
+
with InfoTab:
|
79 |
+
|
80 |
+
st.subheader("X_G85 Fake News")
|
81 |
+
st.markdown("It is fake news detection based on the following models trained on datasets")
|
82 |
+
|
83 |
+
st.subheader("Datasets")
|
84 |
+
st.markdown(
|
85 |
+
"""
|
86 |
+
We have used following datasets to create our own datasets and train models.
|
87 |
+
- [Kaggle: Fake news detection dataset english](https://www.kaggle.com/datasets/sadikaljarif/fake-news-detection-dataset-english)
|
88 |
+
- [Kaggle: Liar twitter](https://www.kaggle.com/datasets/muhammadimran112233/liar-twitter-dataset)
|
89 |
+
- [Kaggle: Liar Preprocessed](https://www.kaggle.com/datasets/khandalaryan/liar-preprocessed-dataset)
|
90 |
+
- [Kaggle: Stocknews](https://www.kaggle.com/datasets/aaron7sun/stocknews)
|
91 |
+
"""
|
92 |
+
)
|
93 |
+
|
94 |
+
st.subheader("Credits")
|
95 |
+
st.markdown(
|
96 |
+
"""
|
97 |
+
- Bert: [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased)
|
98 |
+
- Roberta: [jy46604790/Fake-News-Bert-Detect](https://huggingface.co/jy46604790/Fake-News-Bert-Detect)
|
99 |
+
""")
|
100 |
+
st.write("")
|
101 |
+
copyright_container = st.container(border=True)
|
102 |
+
copyright_container.markdown("Copyright Β©οΈ 2024 [X_G85](https://huggingface.co/x-g85)")
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
def MODEL_RESULT(model: str, news: str) -> str | None:
|
107 |
+
if model == "Roberta":
|
108 |
+
MODEL_jy46604790 = "jy46604790/Fake-News-Bert-Detect"
|
109 |
+
classifier = pipeline("text-classification", model=MODEL_jy46604790, tokenizer=MODEL_jy46604790)
|
110 |
+
result = classifier(news)
|
111 |
+
|
112 |
+
if result[0]["label"] == "LABEL_1":
|
113 |
+
return "Real NEWS"
|
114 |
+
else:
|
115 |
+
return "FAKE NEWS"
|
116 |
+
|
117 |
+
# TODO(Adam-Al-Rahman): Complete the statement
|
118 |
+
if model == "Bert":
|
119 |
+
pass
|
120 |
+
|
121 |
+
if model == "Lstm":
|
122 |
+
pass
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
with MainTab:
|
127 |
+
|
128 |
+
# Then, we create a intro text for the app, which we wrap in a st.markdown() widget.
|
129 |
+
|
130 |
+
st.write("")
|
131 |
+
st.markdown("Classify News based on the selected ml model.")
|
132 |
+
st.write("")
|
133 |
+
container = st.container(border=True)
|
134 |
+
container.write(f"Selected model: {SELECTED_MODEL}")
|
135 |
+
|
136 |
+
|
137 |
+
# --------------------
|
138 |
+
|
139 |
+
news = st.text_area("Enter News",
|
140 |
+
height=200,
|
141 |
+
help="Please provide the news that you need to verify for its truthfulness.\n Press Ctrl+Enter to apply",
|
142 |
+
key="news")
|
143 |
+
|
144 |
+
# Default Model: Bert
|
145 |
+
result = MODEL_RESULT(model=SELECTED_MODEL if SELECTED_MODEL else "Bert", news=news)
|
146 |
+
result_container = st.container(border=True)
|
147 |
+
if news:
|
148 |
+
if result:
|
149 |
+
result_container.markdown(f"Result: {result}")
|
150 |
+
else:
|
151 |
+
result_container.markdown("ML MODEL ERROR")
|
152 |
+
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
|
setup.cfg
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Project-wide configuration file, can be used for package metadata and other toll configurations
|
2 |
+
# Example usage: global configuration for PEP8 (via flake8) setting or default pytest arguments
|
3 |
+
# Local usage: pip install pre-commit, pre-commit run --all-files
|
4 |
+
|
5 |
+
[metadata]
|
6 |
+
license_files = LICENSE
|
7 |
+
description_file = README.md
|
8 |
+
|
9 |
+
[tool:pytest]
|
10 |
+
norecursedirs =
|
11 |
+
.git
|
12 |
+
dist
|
13 |
+
build
|
14 |
+
addopts =
|
15 |
+
--doctest-modules
|
16 |
+
--durations=30
|
17 |
+
--color=yes
|
18 |
+
|
19 |
+
[flake8]
|
20 |
+
ignore = E266
|
21 |
+
max-line-length = 120
|
22 |
+
exclude = .tox,*.egg,build,temp
|
23 |
+
indent-size = 2
|
24 |
+
select = E,W,F
|
25 |
+
doctests = True
|
26 |
+
verbose = 2
|
27 |
+
# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
|
28 |
+
format = pylint
|
29 |
+
# see: https://www.flake8rules.com/
|
30 |
+
ignore = E731,F405,E402,W504,E501
|
31 |
+
# E731: Do not assign a lambda expression, use a def
|
32 |
+
# F405: name may be undefined, or defined from star imports: module
|
33 |
+
# E402: module level import not at top of file
|
34 |
+
# W504: line break after binary operator
|
35 |
+
# E501: line too long
|
36 |
+
# removed:
|
37 |
+
# F401: module imported but unused
|
38 |
+
# E231: missing whitespace after β,β, β;β, or β:β
|
39 |
+
# E127: continuation line over-indented for visual indent
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
[isort]
|
44 |
+
# https://pycqa.github.io/isort/docs/configuration/options.html
|
45 |
+
line_length = 120
|
46 |
+
# see: https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html
|
47 |
+
multi_line_output = 0
|
48 |
+
|
49 |
+
[yapf]
|
50 |
+
based_on_style = google
|
51 |
+
spaces_before_comment = 2
|
52 |
+
indent_width = 2
|
53 |
+
CONTINUATION_INDENT_WIDTH = 2
|
54 |
+
COLUMN_LIMIT = 120
|
55 |
+
COALESCE_BRACKETS = True
|
56 |
+
SPACES_AROUND_POWER_OPERATOR = True
|
57 |
+
SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = True
|
58 |
+
SPLIT_BEFORE_CLOSING_BRACKET = False
|
59 |
+
SPLIT_BEFORE_FIRST_ARGUMENT = False
|
60 |
+
# EACH_DICT_ENTRY_ON_SEPARATE_LINE = False
|
61 |
+
|
62 |
+
[docformatter]
|
63 |
+
wrap-summaries = 120
|
64 |
+
wrap-descriptions = 120
|
65 |
+
in-place = true
|
66 |
+
make-summary-multi-line = false
|
67 |
+
pre-summary-newline = true
|
68 |
+
force-wrap = false
|
69 |
+
close-quotes-on-newline = true
|