Sushovan commited on
Commit
3a8f387
1 Parent(s): 4080b15

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pandas as pd
4
+ import nltk
5
+ import pickle
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.pipeline import make_pipeline
8
+ import streamlit.components.v1 as components
9
+ from transformers import pipeline
10
+ from sklearn.svm import SVC
11
+ from sklearn.preprocessing import LabelEncoder
12
+ import pickle
13
+ import streamlit as st
14
+
15
+
16
+ # Function to load the pre-trained model
17
+ @st.cache(allow_output_mutation=True)
18
+ def load_pretrained_model():
19
+ try:
20
+ feature_file='tfidf_scorer.pkl'
21
+ with open(feature_file,'rb') as f:
22
+ feature_extractor=pickle.load(f)
23
+ f.close()
24
+
25
+ encoder_file='encoder.pkl'
26
+ with open(encoder_file,'rb') as f:
27
+ encoder=pickle.load(f)
28
+ f.close()
29
+
30
+ model_file='classifier.pkl'
31
+ with open(model_file,'rb') as f:
32
+ model=pickle.load(f)
33
+ f.close()
34
+ pipe=pipeline("token-classification",model="hatmimoha/arabic-ner",aggregation_strategy='max')
35
+ return feature_extractor,encoder,model,pipe
36
+ except FileNotFoundError:
37
+ st.error("Pre-trained model not found. Please make sure the model file exists.")
38
+ st.stop()
39
+
40
+ # Streamlit App
41
+ st.title("Text Classification App")
42
+ st.write("This app demonstrates text classification using a pre-trained scikit-learn-based machine learning model.")
43
+ # Information about the app
44
+ st.sidebar.title("App Information")
45
+ st.sidebar.info(
46
+ """This Streamlit app showcases text classification using a pre-trained scikit-learn-based
47
+ machine learning model on Arabic texts. The data is sourced is from
48
+ Arabic news articles organized into 3 balanced categories from www.alkhaleej.ae
49
+ Labels are categorized in: Medical,Sports,Tech.
50
+ Enter text in the provided area, and the model will predict the label."""
51
+ )
52
+ # Load the pre-trained model
53
+ tfidf,encode,trained_model,pipeline_obj = load_pretrained_model()
54
+
55
+ # User input for text classification
56
+ user_text = st.text_area("Enter text for classification:")
57
+
58
+ # Classify user input
59
+ if user_text:
60
+ tokens_new=nltk.wordpunct_tokenize(user_text)
61
+ tokens_corrected=[i for i in tokens_new if len(i)>1]
62
+ tfidf_tokens=' '.join(tokens_corrected)
63
+
64
+ x_test=tfidf.transform([tfidf_tokens])
65
+
66
+ predicted=trained_model.predict(x_test)
67
+
68
+ predicted_class=encode.inverse_transform(predicted)[0]
69
+
70
+ st.write(f"Predicted Label: {predicted_class}")
71
+ if st.button("Extract entities"):
72
+ with st.spinner('Calculating...'):
73
+ entities=pipeline_obj(user_text)
74
+ if len(entities)>0:
75
+ entity_df=pd.DataFrame(entities)
76
+ st.table(entity_df[["entity_group","word"]])
77
+ else:
78
+ st.write("No entities found")
79
+
80
+
81
+
82
+
83
+ """if st.button("Perform explainability analysis"):
84
+ :
85
+ c=make_pipeline(tfidf,trained_model)
86
+ explainer = LimeTextExplainer(class_names=np.array(["Medical","Sports","Tech,Others"]),random_state=42)
87
+ exp = explainer.explain_instance(user_text, c.predict_proba, num_features=20, top_labels=3)
88
+ components.html(exp.as_html(), height=800)
89
+ #top_labels=exp.available_labels()"""
90
+
91
+
92
+
93
+
94
+
95
+