Spaces:

Rifky
/

FND

Runtime error

App Files Files Community

Rifky commited on Jul 29, 2022

Commit

72eedb2

•

1 Parent(s): a04244f

Initial Commit

Browse files

Files changed (3) hide show

Scraper.py +30 -0
app.py +77 -0
requirements.txt +5 -0

Scraper.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from newspaper import Article
+"""
+This script can be used to scrap article from a given link
+Author: Rifky Bujana Bisri
+email : rifkybujanabisri@gmail.com
+"""
+def Scrap(url):
+    """
+    Scrap article from url
+    ### Parameter\n
+    url : article url (dtype: `string`)\n
+    summarize : do you want to summarize the article? (dtype: `boolean`)
+    ### Result\n
+    return the article text (dtype: `string`)
+    """
+    article = Article(url, language='id')
+    article.download()
+    article.parse()
+    if not article.text:
+        print("Can't Scrap this article link")
+        return None
+    return article.text

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import streamlit as st
+import numpy as np
+import re
+import time
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer
+from Scraper import Scrap
+model_checkpoint = "Rifky/FND"
+label = {0: "Fakta", 1: "Hoax"}
+@st.cache(show_spinner=False, allow_output_mutation=True)
+def load_model():
+    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
+    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, fast=True)
+    return Trainer(model=model), tokenizer
+st.write('# Fake News Detection AI')
+with st.spinner("Loading Model..."):
+    model, tokenizer = load_model()
+user_input = st.text_area("Put article url or the full text", help="the text you want to analyze", height=200)
+submit = st.button("submit")
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+if submit:
+    last_time = time.time()
+    text = ""
+    with st.spinner("Reading Article..."):
+        if user_input:
+            if user_input[:4] == 'http':
+                text = Scrap(user_input)
+            else:
+                text = user_input
+    if text:
+        text = re.sub(r'\n', ' ', text)
+        with st.spinner("Computing..."):
+            text_len = len(text.split(" "))
+            if text_len > 512:
+                texts = []
+                for i in range(text_len // 512):
+                    texts.append(" ".join(text.split(" ")[i * 512:(i + 1) * 512]))
+                texts.append(" ".join(text.split(" ")[(text_len // 512) + 1:text_len % 512]))
+                for i in range(len(texts)):
+                    texts[i] = tokenizer(texts[i], max_length=512, truncation=True, padding="max_length")
+                results = model.predict(texts)[0]
+                result = [0, 0]
+                for i in range(len(results)):
+                    result[0] += sigmoid(results[i][0])
+                    result[1] += sigmoid(results[i][1])
+                result[0] /= len(results)
+                result[1] /= len(results)
+            else:
+                text = tokenizer(text, max_length=512, truncation=True, padding="max_length")
+                result = model.predict([text])[0][0]
+            print (f'\nresult: {result}')
+            st.markdown(f"<small>Compute Finished in {int(time.time() - last_time)} seconds</small>", unsafe_allow_html=True)
+            prediction = np.argmax(result, axis=-1)
+            st.success(f"Prediction: {label[prediction]}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+newspaper3k==0.2.8
+numpy==1.23.1
+streamlit==1.11.1
+transformers==4.21.0
+torch