AVAIYA commited on
Commit
7a66ae9
1 Parent(s): a94690c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -70
app.py CHANGED
@@ -42,76 +42,7 @@ if len(text_input) != 0:
42
 
43
  st.markdown(f"my input is : { result_0 }")
44
  # time.sleep(100)
45
-
46
- def remove_punctuations(text):
47
- translator = str.maketrans('', '', punctuations_list)
48
- return text.translate(translator)
49
-
50
- def normalize_arabic(text):
51
- text = re.sub("[إأآا]", "ا", text)
52
- text = re.sub("ى", "ي", text)
53
- text = re.sub("ة", "ه", text)
54
- text = re.sub("گ", "ك", text)
55
- return text
56
-
57
-
58
- def remove_repeating_char(text):
59
- return re.sub(r'(.)\1+', r'\1', text)
60
-
61
- def processPost(text):
62
-
63
- #Replace @username with empty string
64
- text = re.sub('@[^\s]+', ' ', text)
65
-
66
- #Convert www.* or https?://* to " "
67
- text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text)
68
-
69
- #Replace #word with word
70
- text = re.sub(r'#([^\s]+)', r'\1', text)
71
-
72
- # remove punctuations
73
- text= remove_punctuations(text)
74
-
75
- # normalize the text
76
- text= normalize_arabic(text)
77
-
78
- # remove repeated letters
79
- text=remove_repeating_char(text)
80
-
81
- return text
82
-
83
-
84
- df['text'] = df['text'].apply(lambda x: processPost(x))
85
- inputt[0] = inputt[0].apply(lambda x: processPost(x))
86
-
87
- st.markdown(f"my input is : { inputt.iloc[0,0] }")
88
- #input=input.apply(lambda x: processPost(x))
89
-
90
-
91
- vectorizer = TfidfVectorizer()
92
- vectors = vectorizer.fit_transform(df['text'])
93
-
94
- text_tfidf = pd.DataFrame(vectors.toarray())
95
-
96
- traninput = vectorizer.transform(inputt[0])
97
- traninput = traninput.toarray()
98
- cosine_sim = cosine_similarity(traninput,text_tfidf)
99
- top = np.max(cosine_sim)
100
-
101
-
102
- if top >= .85 :
103
- prediction = 'الخبر صحيح'
104
- elif (top < .85) and (top >= .6) :
105
- prediction = 'الخبر مظلل '
106
- elif top < .6 :
107
- prediction = 'الخبر كاذب '
108
-
109
-
110
- st.markdown(f"most similar news is: { df['text'].iloc[np.argmax(np.array(cosine_sim[0]))] }")
111
- st.markdown(f"Source url : {df['link'].iloc[np.argmax(np.array(cosine_sim[0]))]}")
112
- st.markdown(f"Credibility rate : { np.max(cosine_sim)}")
113
- st.markdown(f"system prediction: { prediction}")
114
- df.to_csv('Students.csv', sep ='\t')
115
 
116
 
117
  st.sidebar.markdown('مواقع اخباريه معتمده ')
 
42
 
43
  st.markdown(f"my input is : { result_0 }")
44
  # time.sleep(100)
45
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  st.sidebar.markdown('مواقع اخباريه معتمده ')