Rifky commited on
Commit
2088fe5
1 Parent(s): e334543

remove data

Browse files
Files changed (3) hide show
  1. Scraper.py +1 -1
  2. app.py +13 -8
  3. data/turnbackhoax.json +0 -3
Scraper.py CHANGED
@@ -20,4 +20,4 @@ def Scrap(url):
20
  print("Can't Scrap this article link")
21
  return None
22
 
23
- return article.text
 
20
  print("Can't Scrap this article link")
21
  return None
22
 
23
+ return article
app.py CHANGED
@@ -1,21 +1,25 @@
1
  import streamlit as st
2
  import numpy as np
 
3
  import re
4
  import time
 
5
 
6
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
7
  from Scraper import Scrap
8
 
9
  st.set_page_config(layout="wide")
10
 
11
  model_checkpoint = "Rifky/FND"
 
12
  label = {0: "valid", 1: "fake"}
13
 
14
  @st.cache(show_spinner=False, allow_output_mutation=True)
15
  def load_model():
16
  model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
 
17
  tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, fast=True)
18
- return model, tokenizer
19
 
20
  def sigmoid(x):
21
  return 1 / (1 + np.exp(-x))
@@ -24,7 +28,8 @@ input_column, reference_column = st.columns(2)
24
  input_column.write('# Fake News Detection AI')
25
 
26
  with st.spinner("Loading Model..."):
27
- model, tokenizer = load_model()
 
28
 
29
  user_input = input_column.text_input("Article url")
30
  submit = input_column.button("submit")
@@ -43,13 +48,13 @@ if submit:
43
  text = re.sub(r'\n', ' ', text)
44
 
45
  with st.spinner("Computing..."):
46
- text = text.split()
47
- text_len = len(text)
48
 
49
  sequences = []
50
  for i in range(text_len // 512):
51
- sequences.append(" ".join(text[i * 512: (i + 1) * 512]))
52
- sequences.append(" ".join(text[text_len - (text_len % 512) : text_len]))
53
  sequences = tokenizer(sequences, max_length=512, truncation=True, padding="max_length", return_tensors='pt')
54
 
55
  predictions = model(**sequences)[0].detach().numpy()
@@ -63,4 +68,4 @@ if submit:
63
  prediction = np.argmax(result, axis=-1)
64
  input_column.success(f"This news is {label[prediction]}.")
65
  st.text(f"{int(result[prediction]*100)}% confidence")
66
- input_column.progress(result[prediction])
 
1
  import streamlit as st
2
  import numpy as np
3
+ import pandas as pd
4
  import re
5
  import time
6
+ import os
7
 
8
+ from transformers import AutoModelForSequenceClassification, AutoModel, AutoTokenizer
9
  from Scraper import Scrap
10
 
11
  st.set_page_config(layout="wide")
12
 
13
  model_checkpoint = "Rifky/FND"
14
+ data_checkpoint = os.path.join(os.getcwd(), "data", "turnbackhoax.csv")
15
  label = {0: "valid", 1: "fake"}
16
 
17
  @st.cache(show_spinner=False, allow_output_mutation=True)
18
  def load_model():
19
  model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
20
+ base_model = AutoModel.from_pretrained(model_checkpoint)
21
  tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, fast=True)
22
+ return model, base_model, tokenizer
23
 
24
  def sigmoid(x):
25
  return 1 / (1 + np.exp(-x))
 
28
  input_column.write('# Fake News Detection AI')
29
 
30
  with st.spinner("Loading Model..."):
31
+ model, base_model, tokenizer = load_model()
32
+ data = pd.read_csv(data_checkpoint)
33
 
34
  user_input = input_column.text_input("Article url")
35
  submit = input_column.button("submit")
 
48
  text = re.sub(r'\n', ' ', text)
49
 
50
  with st.spinner("Computing..."):
51
+ token = text.split()
52
+ text_len = len(token)
53
 
54
  sequences = []
55
  for i in range(text_len // 512):
56
+ sequences.append(" ".join(token[i * 512: (i + 1) * 512]))
57
+ sequences.append(" ".join(token[text_len - (text_len % 512) : text_len]))
58
  sequences = tokenizer(sequences, max_length=512, truncation=True, padding="max_length", return_tensors='pt')
59
 
60
  predictions = model(**sequences)[0].detach().numpy()
 
68
  prediction = np.argmax(result, axis=-1)
69
  input_column.success(f"This news is {label[prediction]}.")
70
  st.text(f"{int(result[prediction]*100)}% confidence")
71
+ input_column.progress(result[prediction])
data/turnbackhoax.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9467d1df1c6b0fe8d927933bf753dd8ca0bbf43ab24c37d216f6c59ca3ca02ff
3
- size 34821564