remove data
Browse files- Scraper.py +1 -1
- app.py +13 -8
- data/turnbackhoax.json +0 -3
Scraper.py
CHANGED
@@ -20,4 +20,4 @@ def Scrap(url):
|
|
20 |
print("Can't Scrap this article link")
|
21 |
return None
|
22 |
|
23 |
-
return article
|
|
|
20 |
print("Can't Scrap this article link")
|
21 |
return None
|
22 |
|
23 |
+
return article
|
app.py
CHANGED
@@ -1,21 +1,25 @@
|
|
1 |
import streamlit as st
|
2 |
import numpy as np
|
|
|
3 |
import re
|
4 |
import time
|
|
|
5 |
|
6 |
-
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
7 |
from Scraper import Scrap
|
8 |
|
9 |
st.set_page_config(layout="wide")
|
10 |
|
11 |
model_checkpoint = "Rifky/FND"
|
|
|
12 |
label = {0: "valid", 1: "fake"}
|
13 |
|
14 |
@st.cache(show_spinner=False, allow_output_mutation=True)
|
15 |
def load_model():
|
16 |
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
|
|
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, fast=True)
|
18 |
-
return model, tokenizer
|
19 |
|
20 |
def sigmoid(x):
|
21 |
return 1 / (1 + np.exp(-x))
|
@@ -24,7 +28,8 @@ input_column, reference_column = st.columns(2)
|
|
24 |
input_column.write('# Fake News Detection AI')
|
25 |
|
26 |
with st.spinner("Loading Model..."):
|
27 |
-
model, tokenizer = load_model()
|
|
|
28 |
|
29 |
user_input = input_column.text_input("Article url")
|
30 |
submit = input_column.button("submit")
|
@@ -43,13 +48,13 @@ if submit:
|
|
43 |
text = re.sub(r'\n', ' ', text)
|
44 |
|
45 |
with st.spinner("Computing..."):
|
46 |
-
|
47 |
-
text_len = len(
|
48 |
|
49 |
sequences = []
|
50 |
for i in range(text_len // 512):
|
51 |
-
sequences.append(" ".join(
|
52 |
-
sequences.append(" ".join(
|
53 |
sequences = tokenizer(sequences, max_length=512, truncation=True, padding="max_length", return_tensors='pt')
|
54 |
|
55 |
predictions = model(**sequences)[0].detach().numpy()
|
@@ -63,4 +68,4 @@ if submit:
|
|
63 |
prediction = np.argmax(result, axis=-1)
|
64 |
input_column.success(f"This news is {label[prediction]}.")
|
65 |
st.text(f"{int(result[prediction]*100)}% confidence")
|
66 |
-
input_column.progress(result[prediction])
|
|
|
1 |
import streamlit as st
|
2 |
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
import re
|
5 |
import time
|
6 |
+
import os
|
7 |
|
8 |
+
from transformers import AutoModelForSequenceClassification, AutoModel, AutoTokenizer
|
9 |
from Scraper import Scrap
|
10 |
|
11 |
st.set_page_config(layout="wide")
|
12 |
|
13 |
model_checkpoint = "Rifky/FND"
|
14 |
+
data_checkpoint = os.path.join(os.getcwd(), "data", "turnbackhoax.csv")
|
15 |
label = {0: "valid", 1: "fake"}
|
16 |
|
17 |
@st.cache(show_spinner=False, allow_output_mutation=True)
|
18 |
def load_model():
|
19 |
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
|
20 |
+
base_model = AutoModel.from_pretrained(model_checkpoint)
|
21 |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, fast=True)
|
22 |
+
return model, base_model, tokenizer
|
23 |
|
24 |
def sigmoid(x):
|
25 |
return 1 / (1 + np.exp(-x))
|
|
|
28 |
input_column.write('# Fake News Detection AI')
|
29 |
|
30 |
with st.spinner("Loading Model..."):
|
31 |
+
model, base_model, tokenizer = load_model()
|
32 |
+
data = pd.read_csv(data_checkpoint)
|
33 |
|
34 |
user_input = input_column.text_input("Article url")
|
35 |
submit = input_column.button("submit")
|
|
|
48 |
text = re.sub(r'\n', ' ', text)
|
49 |
|
50 |
with st.spinner("Computing..."):
|
51 |
+
token = text.split()
|
52 |
+
text_len = len(token)
|
53 |
|
54 |
sequences = []
|
55 |
for i in range(text_len // 512):
|
56 |
+
sequences.append(" ".join(token[i * 512: (i + 1) * 512]))
|
57 |
+
sequences.append(" ".join(token[text_len - (text_len % 512) : text_len]))
|
58 |
sequences = tokenizer(sequences, max_length=512, truncation=True, padding="max_length", return_tensors='pt')
|
59 |
|
60 |
predictions = model(**sequences)[0].detach().numpy()
|
|
|
68 |
prediction = np.argmax(result, axis=-1)
|
69 |
input_column.success(f"This news is {label[prediction]}.")
|
70 |
st.text(f"{int(result[prediction]*100)}% confidence")
|
71 |
+
input_column.progress(result[prediction])
|
data/turnbackhoax.json
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9467d1df1c6b0fe8d927933bf753dd8ca0bbf43ab24c37d216f6c59ca3ca02ff
|
3 |
-
size 34821564
|
|
|
|
|
|
|
|