jitesh commited on
Commit
86f2d3a
1 Parent(s): d93b279

adds function to calc. stats

Browse files
Files changed (7) hide show
  1. .gitignore +132 -0
  2. app.py +51 -39
  3. requirements.txt +7 -1
  4. run.sh +1 -0
  5. setup.py +22 -0
  6. story_gen.py +161 -0
  7. story_gen_test.py +31 -0
.gitignore ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ test/*
2
+ results/*
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ pip-wheel-metadata/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98
+ __pypackages__/
99
+
100
+ # Celery stuff
101
+ celerybeat-schedule
102
+ celerybeat.pid
103
+
104
+ # SageMath parsed files
105
+ *.sage.py
106
+
107
+ # Environments
108
+ .env
109
+ .venv
110
+ env/
111
+ venv/
112
+ ENV/
113
+ env.bak/
114
+ venv.bak/
115
+
116
+ # Spyder project settings
117
+ .spyderproject
118
+ .spyproject
119
+
120
+ # Rope project settings
121
+ .ropeproject
122
+
123
+ # mkdocs documentation
124
+ /site
125
+
126
+ # mypy
127
+ .mypy_cache/
128
+ .dmypy.json
129
+ dmypy.json
130
+
131
+ # Pyre type checker
132
+ .pyre/
app.py CHANGED
@@ -1,46 +1,58 @@
1
  import streamlit as st
 
2
 
3
- from transformers import pipeline, set_seed
 
4
 
5
- import time, sys
 
 
 
 
 
 
 
 
6
 
7
- from transformers import pipeline, set_seed
8
- import printj
9
- start = time.time()
10
- generator = pipeline('text-generation', model='gpt2')
11
- classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
12
- process_time = time.time()-start
13
- print(f'Process Time: {process_time}')
14
- # set_seed(42)
15
- # sys.exit()
16
- st.set_page_config(layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- def story(story_till_now, num_generation, length):
19
- last_length = 0
20
 
21
- # story_till_now = "Hello, I'm a language model,"
22
- for i in range(num_generation):
23
- # start = time.time()
24
- results = generator(story_till_now, max_length=30+length*i, num_return_sequences=1)
25
- # process_time = time.time()-start
26
- # print(f'Process Time: {process_time}, avg. time: {process_time/num_return_sequences}')
27
- story_till_now = results[0]['generated_text']
28
- new_sentence = story_till_now[last_length:]
29
- emotion = classifier(new_sentence)
30
- printj.yellow(f'Sentence {i}:')
31
- story_to_print = f'{printj.ColorText.cyan(story_till_now[:last_length])}{printj.ColorText.green(story_till_now[last_length:])}\n'
32
- print(story_to_print)
33
- printj.purple(f'Emotion: {emotion}')
34
- last_length = len(story_till_now)
35
- return story_till_now, emotion
36
- story_till_now=st.text_input(label='First Sentence', value='Hello, I\'m a language model,') # , placeholder="Start writing your story...")
37
 
38
- num_generation= st.sidebar.slider(label='Number of generation', min_value=1, max_value=100, value=10, step=1)
39
- length= st.sidebar.slider(label='Length of the generated sentence', min_value=1, max_value=100, value=20, step=1)
40
- if st.button('Run'):
41
- story_till_now, emotion =story(story_till_now, num_generation, length)
42
- st.write('Story:')
43
- st.text(story_till_now)
44
- st.text(f'Emotion: {emotion}')
45
- else:
46
- st.write('Write the first sentence and then hit the Run button')
 
 
 
1
  import streamlit as st
2
+ from story_gen import StoryGenerator
3
 
4
+ st.set_page_config(page_title='Storytelling ' +
5
+ u'\U0001F5BC', page_icon=u'\U0001F5BC', layout="wide")
6
 
7
+ gen = StoryGenerator()
8
+ container_mode = st.sidebar.container()
9
+ container_param = st.sidebar.container()
10
+ container_button = st.sidebar.container()
11
+ mode = container_mode.radio(
12
+ "Select your mode",
13
+ ('Create Statistics', 'Play Storytelling'), index=0)
14
+ story_till_now = container_param.text_input(
15
+ label='First Sentence', value='Hello, I\'m a language model,')
16
 
17
+ num_generation = container_param.slider(
18
+ label='Number of generation', min_value=1, max_value=100, value=10, step=1)
19
+ length = container_param.slider(label='Length of the generated sentence',
20
+ min_value=1, max_value=100, value=20, step=1)
21
+ if mode == 'Create Statistics':
22
+ container_mode.write('You selected statistics.')
23
+ num_tests = container_param.slider(
24
+ label='Number of tests', min_value=1, max_value=1000, value=3, step=1)
25
+ reaction_weight_mode = container_param.select_slider(
26
+ "Reaction Weight w:", ["Random", "Fixed"])
27
+ if reaction_weight_mode == "Fixed":
28
+ reaction_weight = container_param.slider(
29
+ label='reaction_weight w', min_value=0, max_value=1, value=-1, step=.001)
30
+ elif reaction_weight_mode == "Random":
31
+ reaction_weight = -1
32
+ if container_button.button('Analyse'):
33
+ gen.get_stats(story_till_now="Hello, I'm a language model,",
34
+ num_generation=num_generation, length=length, reaction_weight=reaction_weight, num_tests=num_tests)
35
+ if len(gen.stories) > 0:
36
+ for si, story in enumerate(gen.stories):
37
+ st.markdown(f'Story no. {si}:', unsafe_allow_html=False)
38
+ st.markdown(story, unsafe_allow_html=False)
39
 
40
+ elif mode == 'Create Statistics':
41
+ container_mode.write('Let\'s play storytelling.')
42
 
43
+ # # , placeholder="Start writing your story...")
44
+ # story_till_now = st.text_input(
45
+ # label='First Sentence', value='Hello, I\'m a language model,')
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ # num_generation = st.sidebar.slider(
48
+ # label='Number of generation', min_value=1, max_value=100, value=10, step=1)
49
+ # length = st.sidebar.slider(label='Length of the generated sentence',
50
+ # min_value=1, max_value=100, value=20, step=1)
51
+ if container_button.button('Run'):
52
+ story_till_now, emotion = gen.story(
53
+ story_till_now, num_generation, length)
54
+ st.write('Story:')
55
+ st.text(story_till_now)
56
+ st.text(f'Emotion: {emotion}')
57
+ else:
58
+ st.write('Write the first sentence and then hit the Run button')
requirements.txt CHANGED
@@ -1,4 +1,10 @@
1
  transformers==4.16.2
2
  printj==0.1.0
3
  torch==1.10.2
4
- torchvision==0.11.3
 
 
 
 
 
 
1
  transformers==4.16.2
2
  printj==0.1.0
3
  torch==1.10.2
4
+ torchvision==0.11.3
5
+ pandas==1.4.0
6
+ numpy==1.22.2
7
+ nltk==3.7
8
+ xlsxwriter==3.0.2
9
+ plotly
10
+ seaborn
run.sh ADDED
@@ -0,0 +1 @@
 
1
+ streamlit run app.py
setup.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import setuptools
2
+
3
+ with open("README.md", "r") as fh:
4
+ long_description = fh.read()
5
+
6
+ setuptools.setup(
7
+ name="ist",
8
+ version="0.0.1",
9
+ author="Jitesh Gosar",
10
+ author_email="gosar95@gmail.com",
11
+ description="Analyse IST",
12
+ long_description=long_description,
13
+ long_description_content_type="text/markdown",
14
+ url="https://huggingface.co/spaces/jitesh/ist",
15
+ packages=setuptools.find_packages(),
16
+ classifiers=[
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ ],
21
+ python_requires=">=3.6",
22
+ )
story_gen.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import time
4
+
5
+ import printj
6
+ from transformers import pipeline # , set_seed
7
+ import numpy as np
8
+ import pandas as pd
9
+ import plotly.figure_factory as ff
10
+ import nltk
11
+
12
+
13
+ class StoryGenerator:
14
+ def __init__(self):
15
+ self.initialise_models()
16
+ self.stats_df = pd.DataFrame(data=[], columns=[])
17
+ self.stories = []
18
+
19
+
20
+ def initialise_models(self):
21
+ start = time.time()
22
+ self.generator = pipeline('text-generation', model='gpt2')
23
+ self.classifier = pipeline("text-classification",
24
+ model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
25
+ initialising_time = time.time()-start
26
+ print(f'Initialising Time: {initialising_time}')
27
+ # set_seed(42)
28
+ # sys.exit()
29
+
30
+ def reset():
31
+ self.clear_stories()
32
+ self.clear_stats()
33
+
34
+ def clear_stories(self):
35
+ self.stories = []
36
+
37
+ def clear_stats(self):
38
+ self.stats_df = pd.DataFrame(data=[], columns=[])
39
+
40
+ @staticmethod
41
+ def get_num_token(text):
42
+ return len(nltk.word_tokenize(text))
43
+
44
+ @staticmethod
45
+ def check_show_emotion(confidence_score, frequency, w):
46
+ frequency_penalty = 1 - frequency
47
+ probability_emote = w * confidence_score + (1-w) * frequency_penalty
48
+ return probability_emote > np.random.random_sample()
49
+
50
+ def story(self,
51
+ story_till_now="Hello, I'm a language model,",
52
+ num_generation=4,
53
+ length=10):
54
+ # last_length = 0
55
+
56
+ for i in range(num_generation):
57
+ last_length = len(story_till_now)
58
+ genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(story_till_now) +
59
+ length*i, num_return_sequences=1)
60
+ story_till_now = genreate_robot_sentence[0]['generated_text']
61
+ new_sentence = story_till_now[last_length:]
62
+ emotions = self.classifier(new_sentence)
63
+ emotion = max(emotions[0], key=lambda x: x['score'])
64
+ # printj.yellow(f'Sentence {i}:')
65
+ # story_to_print = f'{printj.ColorText.cyan(story_till_now[:last_length])}{printj.ColorText.green(story_till_now[last_length:])}\n'
66
+ # print(story_to_print)
67
+ # printj.purple(f'Emotion: {emotion}')
68
+ return story_till_now, emotion
69
+
70
+ def auto_ist(self,
71
+ story_till_now="Hello, I'm a language model,",
72
+ num_generation=4,
73
+ length=20, reaction_weight=0.5):
74
+ stats_df = pd.DataFrame(data=[], columns=[])
75
+ stats_dict = dict()
76
+ num_reactions = 0
77
+ reaction_frequency = 0
78
+ for i in range(num_generation):
79
+ # Text generation for User
80
+ last_length = len(story_till_now)
81
+ printj.cyan(story_till_now)
82
+ printj.red.bold_on_white(
83
+ f'loop: {i}; generate user text; length: {last_length}')
84
+ genreate_user_sentence = self.generator(story_till_now, max_length=self.get_num_token(
85
+ story_till_now)+length, num_return_sequences=1)
86
+ story_till_now = genreate_user_sentence[0]['generated_text']
87
+ new_sentence = story_till_now[last_length:]
88
+
89
+ printj.red.bold_on_white(f'loop: {i}; check emotion')
90
+ # Emotion self.classifier for User
91
+ emotions = self.classifier(new_sentence)
92
+ emotion = max(emotions[0], key=lambda x: x['score'])
93
+ if emotion['label'] == 'neutral':
94
+ show_emotion = False
95
+ else:
96
+ reaction_frequency = num_reactions/(i+1)
97
+ show_emotion = self.check_show_emotion(
98
+ confidence_score=emotion['score'], frequency=reaction_frequency, w=reaction_weight)
99
+ if show_emotion:
100
+ num_reactions += 1
101
+
102
+ # Text generation for Robot
103
+ last_length = len(story_till_now)
104
+ printj.cyan(story_till_now)
105
+ printj.red.bold_on_white(
106
+ f'loop: {i}; generate robot text; length: {last_length}')
107
+ genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(
108
+ story_till_now)+length, num_return_sequences=1)
109
+ story_till_now = genreate_robot_sentence[0]['generated_text']
110
+ new_sentence = story_till_now[last_length:]
111
+
112
+ # emotions = self.classifier(new_sentence)
113
+ # emotion = max(emotions[0], key=lambda x:x['score'])
114
+
115
+ stats_dict['sentence_no'] = i
116
+ stats_dict['show_emotion'] = show_emotion
117
+ stats_dict['emotion_label'] = emotion['label']
118
+ stats_dict['emotion_score'] = emotion['score']
119
+ stats_dict['num_reactions'] = num_reactions
120
+ stats_dict['reaction_frequency'] = reaction_frequency
121
+ stats_dict['reaction_weight'] = reaction_weight
122
+ stats_df = pd.concat(
123
+ [stats_df, pd.DataFrame(stats_dict, index=[f'idx_{i}'])])
124
+ return stats_df, story_till_now
125
+
126
+ def get_stats(self,
127
+ story_till_now="Hello, I'm a language model,",
128
+ num_generation=4,
129
+ length=20, reaction_weight=-1, num_tests=2):
130
+ use_random_w = reaction_weight == -1
131
+ self.stories = []
132
+ try:
133
+ num_rows = max(self.stats_df.test_id)+1
134
+ except Exception:
135
+ num_rows=0
136
+ for test_id in range(num_tests):
137
+ if use_random_w:
138
+ # reaction_weight = np.random.random_sample()
139
+ reaction_weight = np.round(np.random.random_sample(), 1)
140
+ stats_df0, _story_till_now = self.auto_ist(
141
+ story_till_now=story_till_now,
142
+ num_generation=4,
143
+ length=20, reaction_weight=reaction_weight)
144
+ stats_df0.insert(loc=0, column='test_id', value=test_id+num_rows)
145
+
146
+ # stats_df0['test_id'] = test_id
147
+ self.stats_df = pd.concat([self.stats_df, stats_df0])
148
+ printj.yellow(f'test_id: {test_id}')
149
+ printj.green(stats_df0)
150
+ self.stories.append(_story_till_now)
151
+ self.stats_df = self.stats_df.reset_index(drop=True)
152
+ print(self.stats_df)
153
+
154
+ def save_stats(self, path='pandas_simple.xlsx'):
155
+ writer = pd.ExcelWriter(path, engine='xlsxwriter')
156
+
157
+ # Convert the dataframe to an XlsxWriter Excel object.
158
+ self.stats_df.to_excel(writer, sheet_name='IST')
159
+
160
+ # Close the Pandas Excel writer and output the Excel file.
161
+ writer.save()
story_gen_test.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ import printj
3
+ from story_gen import StoryGenerator
4
+
5
+ gen = StoryGenerator()
6
+ # # %%
7
+ # story_till_now, emotion = gen.story(story_till_now='Hello, I\'m a language model,', num_generation=3, length=10)
8
+ # printj.purple(story_till_now)
9
+ # printj.yellow(emotion)
10
+
11
+
12
+ # %%
13
+ gen.get_stats(story_till_now="For myriad of eons i’ve forgotten who I really was, harvesting the essence of all existence.",
14
+ length=10, num_generation=3, num_tests=50)
15
+
16
+ # %%
17
+ gen.save_stats('/home/jitesh/haru/ist/results/a.xlsx')
18
+
19
+
20
+
21
+
22
+ # %%
23
+ data=gen.stats_df[gen.stats_df.sentence_no==3]
24
+ import seaborn as sns
25
+ sns.set_theme(style="whitegrid")
26
+ # ax = sns.violinplot(x="day", y="total_bill", data=tips)
27
+ ax = sns.violinplot(x="reaction_weight", y="num_reactions", data=data).set_title('Analysing ProbabilityEmote (Max reactions=3)')
28
+ # %%
29
+
30
+ gen.stats_df[gen.stats_df.sentence_no==3]
31
+ # %%