Rifky commited on
Commit
72eedb2
1 Parent(s): a04244f

Initial Commit

Browse files
Files changed (3) hide show
  1. Scraper.py +30 -0
  2. app.py +77 -0
  3. requirements.txt +5 -0
Scraper.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from newspaper import Article
2
+
3
+ """
4
+ This script can be used to scrap article from a given link
5
+
6
+ Author: Rifky Bujana Bisri
7
+ email : rifkybujanabisri@gmail.com
8
+ """
9
+
10
+ def Scrap(url):
11
+ """
12
+ Scrap article from url
13
+
14
+ ### Parameter\n
15
+ url : article url (dtype: `string`)\n
16
+ summarize : do you want to summarize the article? (dtype: `boolean`)
17
+
18
+ ### Result\n
19
+ return the article text (dtype: `string`)
20
+ """
21
+
22
+ article = Article(url, language='id')
23
+ article.download()
24
+ article.parse()
25
+
26
+ if not article.text:
27
+ print("Can't Scrap this article link")
28
+ return None
29
+
30
+ return article.text
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import re
4
+ import time
5
+
6
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer
7
+ from Scraper import Scrap
8
+
9
+
10
+ model_checkpoint = "Rifky/FND"
11
+ label = {0: "Fakta", 1: "Hoax"}
12
+
13
+
14
+ @st.cache(show_spinner=False, allow_output_mutation=True)
15
+ def load_model():
16
+ model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
17
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, fast=True)
18
+ return Trainer(model=model), tokenizer
19
+
20
+
21
+ st.write('# Fake News Detection AI')
22
+
23
+ with st.spinner("Loading Model..."):
24
+ model, tokenizer = load_model()
25
+
26
+ user_input = st.text_area("Put article url or the full text", help="the text you want to analyze", height=200)
27
+ submit = st.button("submit")
28
+
29
+ def sigmoid(x):
30
+ return 1 / (1 + np.exp(-x))
31
+
32
+ if submit:
33
+ last_time = time.time()
34
+
35
+ text = ""
36
+
37
+ with st.spinner("Reading Article..."):
38
+ if user_input:
39
+ if user_input[:4] == 'http':
40
+ text = Scrap(user_input)
41
+ else:
42
+ text = user_input
43
+
44
+ if text:
45
+ text = re.sub(r'\n', ' ', text)
46
+
47
+ with st.spinner("Computing..."):
48
+ text_len = len(text.split(" "))
49
+ if text_len > 512:
50
+ texts = []
51
+ for i in range(text_len // 512):
52
+ texts.append(" ".join(text.split(" ")[i * 512:(i + 1) * 512]))
53
+
54
+ texts.append(" ".join(text.split(" ")[(text_len // 512) + 1:text_len % 512]))
55
+
56
+ for i in range(len(texts)):
57
+ texts[i] = tokenizer(texts[i], max_length=512, truncation=True, padding="max_length")
58
+
59
+ results = model.predict(texts)[0]
60
+ result = [0, 0]
61
+ for i in range(len(results)):
62
+ result[0] += sigmoid(results[i][0])
63
+ result[1] += sigmoid(results[i][1])
64
+
65
+ result[0] /= len(results)
66
+ result[1] /= len(results)
67
+
68
+ else:
69
+ text = tokenizer(text, max_length=512, truncation=True, padding="max_length")
70
+ result = model.predict([text])[0][0]
71
+
72
+ print (f'\nresult: {result}')
73
+
74
+ st.markdown(f"<small>Compute Finished in {int(time.time() - last_time)} seconds</small>", unsafe_allow_html=True)
75
+
76
+ prediction = np.argmax(result, axis=-1)
77
+ st.success(f"Prediction: {label[prediction]}")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ newspaper3k==0.2.8
2
+ numpy==1.23.1
3
+ streamlit==1.11.1
4
+ transformers==4.21.0
5
+ torch