AVAIYA
/

python-test

Model card Files Files and versions Community

python-test / app.py

AVAIYA

Create app.py

e228e17 over 3 years ago

raw

history blame contribute delete

4.66 kB

	#pip install GoogleNews
	#pip install --upgrade GoogleNews

	import streamlit as st
	from GoogleNews import GoogleNews
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import pandas as pd
	import numpy as np
	import string
	import re
	from nltk.corpus import stopwords
	from sklearn.metrics.pairwise import cosine_similarity
	import sklearn
	import time


	googlenews = GoogleNews()
	googlenews = GoogleNews(lang='ar')
	googlenews.clear()



	st.write("""
	Arabic Fake News Detection System
	A system designed as a part of master project
	done by Reem AlFouzan
	Supervised by : Dr, Abdulla al mutairi
	""")
	#df = pd.read_csv('News.csv')
	text_input = st.text_input (''' Enter the text ''')
	if len(text_input) != 0:
	inputt = []
	inputt = pd.DataFrame([text_input])

	googlenews.search(inputt.iloc[0,0])
	googlenews.get_news(inputt.iloc[0,0])

	result_0 = googlenews.page_at(1)
	print("Data")
	print(result_0, "data 2")
	# time.sleep(100)
	if len(result_0) == 0:
	desc_1 = ['لا يوجد نتائج للخبر ']
	link_1 = ['لا يوجد مصدر']
	if len(result_0) != 0:
	desc_1 = googlenews.get_texts()
	link_1 = googlenews.get_links()
	for i in list(range(2, 70)):

	result = googlenews.page_at(i)
	desc = googlenews.get_texts()
	link = googlenews.get_links()

	desc_1 = desc_1 + desc
	link_1 = link_1 + link

	column_names = ["text", 'link']
	df = pd.DataFrame(columns = column_names)

	df['text'] = desc_1
	df['link'] = link_1

	for letter in '#.][!XR':
	df['text'] = df['text'].astype(str).str.replace(letter,'')
	inputt[0] = inputt[0].astype(str).str.replace(letter,'')

	arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+\|!”…“–ـ'''
	english_punctuations = string.punctuation
	punctuations_list = arabic_punctuations + english_punctuations

	def remove_punctuations(text):
	translator = str.maketrans('', '', punctuations_list)
	return text.translate(translator)

	def normalize_arabic(text):
	text = re.sub("[إأآا]", "ا", text)
	text = re.sub("ى", "ي", text)
	text = re.sub("ة", "ه", text)
	text = re.sub("گ", "ك", text)
	return text


	def remove_repeating_char(text):
	return re.sub(r'(.)\1+', r'\1', text)

	def processPost(text):

	#Replace @username with empty string
	text = re.sub('@[^\s]+', ' ', text)

	#Convert www.* or https?://* to " "
	text = re.sub('((www\.[^\s]+)\|(https?://[^\s]+))',' ',text)

	#Replace #word with word
	text = re.sub(r'#([^\s]+)', r'\1', text)

	# remove punctuations
	text= remove_punctuations(text)

	# normalize the text
	text= normalize_arabic(text)

	# remove repeated letters
	text=remove_repeating_char(text)

	return text


	df['text'] = df['text'].apply(lambda x: processPost(x))
	inputt[0] = inputt[0].apply(lambda x: processPost(x))

	st.markdown(f"my input is : { inputt.iloc[0,0] }")
	#input=input.apply(lambda x: processPost(x))


	vectorizer = TfidfVectorizer()
	vectors = vectorizer.fit_transform(df['text'])

	text_tfidf = pd.DataFrame(vectors.toarray())

	traninput = vectorizer.transform(inputt[0])
	traninput = traninput.toarray()
	cosine_sim = cosine_similarity(traninput,text_tfidf)
	top = np.max(cosine_sim)


	if top >= .85 :
	prediction = 'الخبر صحيح'
	elif (top < .85) and (top >= .6) :
	prediction = 'الخبر مظلل '
	elif top < .6 :
	prediction = 'الخبر كاذب '


	st.markdown(f"most similar news is: { df['text'].iloc[np.argmax(np.array(cosine_sim[0]))] }")
	st.markdown(f"Source url : {df['link'].iloc[np.argmax(np.array(cosine_sim[0]))]}")
	st.markdown(f"Credibility rate : { np.max(cosine_sim)}")
	st.markdown(f"system prediction: { prediction}")
	df.to_csv('Students.csv', sep ='\t')


	st.sidebar.markdown('مواقع اخباريه معتمده ')
	st.sidebar.markdown("[العربية](https://www.alarabiya.net/)")
	st.sidebar.markdown("[الجزيرة نت](https://www.aljazeera.net/news/)")
	st.sidebar.markdown("[وكالة الانباء الكويتية](https://www.kuna.net.kw/Default.aspx?language=ar)")

	#st.markdown('test')