Update app.py
Browse files
app.py
CHANGED
@@ -42,76 +42,7 @@ if len(text_input) != 0:
|
|
42 |
|
43 |
st.markdown(f"my input is : { result_0 }")
|
44 |
# time.sleep(100)
|
45 |
-
|
46 |
-
def remove_punctuations(text):
|
47 |
-
translator = str.maketrans('', '', punctuations_list)
|
48 |
-
return text.translate(translator)
|
49 |
-
|
50 |
-
def normalize_arabic(text):
|
51 |
-
text = re.sub("[إأآا]", "ا", text)
|
52 |
-
text = re.sub("ى", "ي", text)
|
53 |
-
text = re.sub("ة", "ه", text)
|
54 |
-
text = re.sub("گ", "ك", text)
|
55 |
-
return text
|
56 |
-
|
57 |
-
|
58 |
-
def remove_repeating_char(text):
|
59 |
-
return re.sub(r'(.)\1+', r'\1', text)
|
60 |
-
|
61 |
-
def processPost(text):
|
62 |
-
|
63 |
-
#Replace @username with empty string
|
64 |
-
text = re.sub('@[^\s]+', ' ', text)
|
65 |
-
|
66 |
-
#Convert www.* or https?://* to " "
|
67 |
-
text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text)
|
68 |
-
|
69 |
-
#Replace #word with word
|
70 |
-
text = re.sub(r'#([^\s]+)', r'\1', text)
|
71 |
-
|
72 |
-
# remove punctuations
|
73 |
-
text= remove_punctuations(text)
|
74 |
-
|
75 |
-
# normalize the text
|
76 |
-
text= normalize_arabic(text)
|
77 |
-
|
78 |
-
# remove repeated letters
|
79 |
-
text=remove_repeating_char(text)
|
80 |
-
|
81 |
-
return text
|
82 |
-
|
83 |
-
|
84 |
-
df['text'] = df['text'].apply(lambda x: processPost(x))
|
85 |
-
inputt[0] = inputt[0].apply(lambda x: processPost(x))
|
86 |
-
|
87 |
-
st.markdown(f"my input is : { inputt.iloc[0,0] }")
|
88 |
-
#input=input.apply(lambda x: processPost(x))
|
89 |
-
|
90 |
-
|
91 |
-
vectorizer = TfidfVectorizer()
|
92 |
-
vectors = vectorizer.fit_transform(df['text'])
|
93 |
-
|
94 |
-
text_tfidf = pd.DataFrame(vectors.toarray())
|
95 |
-
|
96 |
-
traninput = vectorizer.transform(inputt[0])
|
97 |
-
traninput = traninput.toarray()
|
98 |
-
cosine_sim = cosine_similarity(traninput,text_tfidf)
|
99 |
-
top = np.max(cosine_sim)
|
100 |
-
|
101 |
-
|
102 |
-
if top >= .85 :
|
103 |
-
prediction = 'الخبر صحيح'
|
104 |
-
elif (top < .85) and (top >= .6) :
|
105 |
-
prediction = 'الخبر مظلل '
|
106 |
-
elif top < .6 :
|
107 |
-
prediction = 'الخبر كاذب '
|
108 |
-
|
109 |
-
|
110 |
-
st.markdown(f"most similar news is: { df['text'].iloc[np.argmax(np.array(cosine_sim[0]))] }")
|
111 |
-
st.markdown(f"Source url : {df['link'].iloc[np.argmax(np.array(cosine_sim[0]))]}")
|
112 |
-
st.markdown(f"Credibility rate : { np.max(cosine_sim)}")
|
113 |
-
st.markdown(f"system prediction: { prediction}")
|
114 |
-
df.to_csv('Students.csv', sep ='\t')
|
115 |
|
116 |
|
117 |
st.sidebar.markdown('مواقع اخباريه معتمده ')
|
|
|
42 |
|
43 |
st.markdown(f"my input is : { result_0 }")
|
44 |
# time.sleep(100)
|
45 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
st.sidebar.markdown('مواقع اخباريه معتمده ')
|