tonwuaso commited on
Commit
9e456db
·
1 Parent(s): ec4769b

Upload folder using huggingface_hub

Browse files
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/ML_Model.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (AiMouse)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/ML_Model.iml" filepath="$PROJECT_DIR$/.idea/ML_Model.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/workspace.xml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="adfe6b33-d4f8-4e0f-a640-2a44c006e89b" name="Changes" comment="" />
5
+ <option name="SHOW_DIALOG" value="false" />
6
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
7
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
8
+ <option name="LAST_RESOLUTION" value="IGNORE" />
9
+ </component>
10
+ <component name="FileTemplateManagerImpl">
11
+ <option name="RECENT_TEMPLATES">
12
+ <list>
13
+ <option value="Python Script" />
14
+ </list>
15
+ </option>
16
+ </component>
17
+ <component name="MarkdownSettingsMigration">
18
+ <option name="stateVersion" value="1" />
19
+ </component>
20
+ <component name="ProjectId" id="2QfIgCdHt3byO0m9tB94ER54vy3" />
21
+ <component name="ProjectViewState">
22
+ <option name="hideEmptyMiddlePackages" value="true" />
23
+ <option name="showLibraryContents" value="true" />
24
+ </component>
25
+ <component name="RunManager">
26
+ <configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
27
+ <module name="ML_Model" />
28
+ <option name="INTERPRETER_OPTIONS" value="" />
29
+ <option name="PARENT_ENVS" value="true" />
30
+ <envs>
31
+ <env name="PYTHONUNBUFFERED" value="1" />
32
+ </envs>
33
+ <option name="SDK_HOME" value="" />
34
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
35
+ <option name="IS_MODULE_SDK" value="true" />
36
+ <option name="ADD_CONTENT_ROOTS" value="true" />
37
+ <option name="ADD_SOURCE_ROOTS" value="true" />
38
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
39
+ <option name="PARAMETERS" value="" />
40
+ <option name="SHOW_COMMAND_LINE" value="false" />
41
+ <option name="EMULATE_TERMINAL" value="false" />
42
+ <option name="MODULE_MODE" value="false" />
43
+ <option name="REDIRECT_INPUT" value="false" />
44
+ <option name="INPUT_FILE" value="" />
45
+ <method v="2" />
46
+ </configuration>
47
+ </component>
48
+ <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
49
+ <component name="TaskManager">
50
+ <task active="true" id="Default" summary="Default task">
51
+ <changelist id="adfe6b33-d4f8-4e0f-a640-2a44c006e89b" name="Changes" comment="" />
52
+ <created>1685738317859</created>
53
+ <option name="number" value="Default" />
54
+ <option name="presentableId" value="Default" />
55
+ <updated>1685738317859</updated>
56
+ </task>
57
+ <servers />
58
+ </component>
59
+ </project>
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Sentiment Analysis Model
3
- emoji:
4
- colorFrom: green
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 3.33.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Sentiment_Analysis_Model
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 3.33.1
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import tensorflow as tf
4
+ import numpy as np
5
+ import gradio as gr
6
+ from matplotlib import pyplot as plt
7
+ from tensorflow import keras
8
+ from keras.models import Sequential
9
+ from keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
10
+ from keras.metrics import Precision, Recall, CategoricalAccuracy
11
+ from gradio import components
12
+ from gradio import Interface
13
+ from keras.layers import TextVectorization
14
+
15
+ base_path = r"C:\Users\tochi\SentimentAnalysisData"
16
+ df = pd.read_csv(os.path.join(base_path, 'train.csv' ))
17
+
18
+
19
+ df.head() # displays first couple of comments from csv file
20
+
21
+ X = df['comment_text']
22
+ y = df[df.columns[2:]].values
23
+
24
+
25
+ MAX_FEATURES = 200000 # number of words in the vocab
26
+
27
+
28
+ vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
29
+ output_sequence_length=1800,
30
+ output_mode='int')
31
+
32
+ vectorizer.adapt(X.values)
33
+
34
+ vectorized_text = vectorizer(X.values)
35
+
36
+ #MCSHBAP - map, chache, shuffle, batch, prefetch from_tensor_slices, list_file
37
+ dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
38
+ dataset = dataset.cache()
39
+ dataset = dataset.shuffle(160000)
40
+ dataset = dataset.batch(16)
41
+ dataset = dataset.prefetch(8) # helps bottlenecks
42
+
43
+
44
+ train = dataset.take(int(len(dataset)*.7))
45
+ val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
46
+ test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
47
+
48
+ model = Sequential() # Instantiate sequential api
49
+ # Create the embedding layer
50
+ model.add(Embedding(MAX_FEATURES+1, 32))
51
+ # Bidirectional LSTM Layer
52
+ model.add(Bidirectional(LSTM(32, activation='tanh')))
53
+ # Feature extractor Fully connected layers
54
+ model.add(Dense(128, activation='relu'))
55
+ model.add(Dense(256, activation='relu'))
56
+ model.add(Dense(128, activation='relu'))
57
+ # Final layer
58
+ model.add(Dense(6, activation='sigmoid'))
59
+
60
+
61
+ model.compile(loss='BinaryCrossentropy', optimizer='Adam')
62
+
63
+
64
+ model.summary()
65
+
66
+
67
+ history = model.fit(train, epochs=10, validation_data=val)
68
+
69
+
70
+ plt.figure(figsize=(8,5))
71
+ pd.DataFrame(history.history).plot()
72
+ plt.show()
73
+
74
+
75
+
76
+ input_text = ['You freaking suck! I am going to hit you!']
77
+
78
+
79
+ input_text = vectorizer(input_text)
80
+
81
+
82
+ res = model.predict(input_text)
83
+
84
+ (res > 0.5).astype(int)
85
+
86
+ batch_X, batch_y = test.as_numpy_iterator().next()
87
+
88
+
89
+ (model.predict(batch_X) > 0.5).astype(int)
90
+
91
+
92
+ res.shape
93
+
94
+
95
+ pre = Precision()
96
+ re = Recall()
97
+ acc = CategoricalAccuracy()
98
+
99
+ for batch in test.as_numpy_iterator():
100
+ # Unpack the batch
101
+ X_true, y_true = batch
102
+ # Make a prediction
103
+ yhat = model.predict(X_true)
104
+
105
+ # Flatten the predictions
106
+ y_true = y_true.flatten()
107
+ yhat = yhat.flatten()
108
+
109
+ pre.update_state(y_true, yhat)
110
+ re.update_state(y_true, yhat)
111
+ acc.update_state(y_true, yhat)
112
+
113
+
114
+ print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')
115
+
116
+ model.save('sentimentAnalysis.h5')
117
+
118
+ model = tf.keras.models.load_model('sentimentAnalysis.h5')
119
+
120
+ input_str = vectorizer('I hate you!')
121
+
122
+
123
+ res = model.predict(np.expand_dims(input_str,0))
124
+
125
+ res
126
+
127
+ def score_comment(comment):
128
+ vectorized_comment = vectorizer([comment])
129
+ results = model.predict(vectorized_comment)
130
+
131
+ text = ''
132
+ for idx, col in enumerate(df.columns[2:]):
133
+ text += '{}: {}\n'.format(col, results[0][idx]>0.5)
134
+
135
+ return text
136
+
137
+
138
+ interface = components.Interface(
139
+ fn=score_comment,
140
+ inputs=components.Textbox(lines=2, placeholder='Comment to score'),
141
+ outputs='text')
142
+
143
+
144
+ interface.launch(share=True)
145
+
main.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ import os
3
+ import pandas as pd
4
+ import tensorflow as tf
5
+ import numpy as np
6
+
7
+ !pip list
8
+
9
+ rom tensorflow.keras.models import Sequential
10
+ from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
11
+
12
+ from matplotlib import pyplot as plt
13
+
14
+ from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
15
+
16
+
17
+ !pip install gradio jinja2
18
+
19
+ from gradio import components
20
+ from gradio import Interface
21
+
22
+
23
+
24
+
25
+
26
+ base_path = r"C:\Users\tochi\SentimentAnalysisData"
27
+ df = pd.read_csv(os.path.join(base_path, 'train.csv' ))
28
+
29
+ df.head() # displays first couple of comments from csv file
30
+
31
+ from tensorflow.keras.layers import TextVectorization
32
+
33
+ X = df['comment_text']
34
+ y = df[df.columns[2:]].values
35
+
36
+
37
+ MAX_FEATURES = 200000 # number of words in the vocab
38
+
39
+ vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
40
+ output_sequence_length=1800,
41
+ output_mode='int')
42
+
43
+ vectorizer.adapt(X.values)
44
+
45
+ vectorized_text = vectorizer(X.values)
46
+
47
+ #MCSHBAP - map, chache, shuffle, batch, prefetch from_tensor_slices, list_file
48
+ dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
49
+ dataset = dataset.cache()
50
+ dataset = dataset.shuffle(160000)
51
+ dataset = dataset.batch(16)
52
+ dataset = dataset.prefetch(8) # helps bottlenecks
53
+
54
+ train = dataset.take(int(len(dataset)*.7))
55
+ val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
56
+ test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
57
+
58
+
59
+ from tensorflow.keras.models import Sequential
60
+ from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
61
+
62
+
63
+ model = Sequential() # Instantiate sequential api
64
+ # Create the embedding layer
65
+ model.add(Embedding(MAX_FEATURES+1, 32))
66
+ # Bidirectional LSTM Layer
67
+ model.add(Bidirectional(LSTM(32, activation='tanh')))
68
+ # Feature extractor Fully connected layers
69
+ model.add(Dense(128, activation='relu'))
70
+ model.add(Dense(256, activation='relu'))
71
+ model.add(Dense(128, activation='relu'))
72
+ # Final layer
73
+ model.add(Dense(6, activation='sigmoid'))
74
+
75
+
76
+ model.compile(loss='BinaryCrossentropy', optimizer='Adam')
77
+
78
+
79
+ model.summary()
80
+
81
+
82
+ history = model.fit(train, epochs=1, validation_data=val)
83
+
84
+
85
+ from matplotlib import pyplot as plt
86
+
87
+
88
+ plt.figure(figsize=(8,5))
89
+ pd.DataFrame(history.history).plot()
90
+ plt.show()
91
+
92
+
93
+ input_text = ['You freaking suck! I am going to hit you!']
94
+
95
+
96
+ input_text = vectorizer(input_text)
97
+
98
+
99
+ res = model.predict(input_text)
100
+
101
+ (res > 0.5).astype(int)
102
+
103
+ batch_X, batch_y = test.as_numpy_iterator().next()
104
+
105
+
106
+ (model.predict(batch_X) > 0.5).astype(int)
107
+
108
+
109
+ res.shape
110
+
111
+ from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
112
+
113
+
114
+ pre = Precision()
115
+ re = Recall()
116
+ acc = CategoricalAccuracy()
117
+
118
+ for batch in test.as_numpy_iterator():
119
+ # Unpack the batch
120
+ X_true, y_true = batch
121
+ # Make a prediction
122
+ yhat = model.predict(X_true)
123
+
124
+ # Flatten the predictions
125
+ y_true = y_true.flatten()
126
+ yhat = yhat.flatten()
127
+
128
+ pre.update_state(y_true, yhat)
129
+ re.update_state(y_true, yhat)
130
+ acc.update_state(y_true, yhat)
131
+
132
+
133
+ print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')
134
+
135
+ model.save('sentimentAnalysis.h5')
136
+
137
+ model = tf.keras.models.load_model('sentimentAnalysis.h5')
138
+
139
+ input_str = vectorizer('I hate you!')
140
+
141
+
142
+ res = model.predict(np.expand_dims(input_str,0))
143
+
144
+ res
145
+
146
+ def score_comment(comment):
147
+ vectorized_comment = vectorizer([comment])
148
+ results = model.predict(vectorized_comment)
149
+
150
+ text = ''
151
+ for idx, col in enumerate(df.columns[2:]):
152
+ text += '{}: {}\n'.format(col, results[0][idx]>0.5)
153
+
154
+ return text
155
+
156
+
157
+ interface = components.Interface(
158
+ fn=score_comment,
159
+ inputs=components.Textbox(lines=2, placeholder='Comment to score'),
160
+ outputs='text')
161
+
162
+
163
+ interface.launch(share=True)
164
+
165
+
166
+ '''
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ tensorflow
2
+ os
3
+ pandas
4
+ numpy
5
+ gradio
6
+ matplotlib
7
+ keras