Spaces:

jitesh
/

storytelling

Runtime error

App Files Files Community

jitesh commited on Feb 16, 2022

Commit

86f2d3a

•

1 Parent(s): d93b279

adds function to calc. stats

Browse files

Files changed (7) hide show

.gitignore +132 -0
app.py +51 -39
requirements.txt +7 -1
run.sh +1 -0
setup.py +22 -0
story_gen.py +161 -0
story_gen_test.py +31 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,132 @@

+test/*
+results/*
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

app.py CHANGED Viewed

@@ -1,46 +1,58 @@
 import streamlit as st
-from transformers import pipeline, set_seed
-import time, sys
-from transformers import pipeline, set_seed
-import printj
-start = time.time()
-generator = pipeline('text-generation', model='gpt2')
-classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
-process_time = time.time()-start
-print(f'Process Time: {process_time}')
-# set_seed(42)
-# sys.exit()
-st.set_page_config(layout="wide")
-def story(story_till_now, num_generation, length):
-    last_length = 0
-    # story_till_now = "Hello, I'm a language model,"
-    for  i in range(num_generation):
-        # start = time.time()
-        results = generator(story_till_now, max_length=30+length*i, num_return_sequences=1)
-        # process_time = time.time()-start
-        # print(f'Process Time: {process_time}, avg. time: {process_time/num_return_sequences}')
-        story_till_now = results[0]['generated_text']
-        new_sentence = story_till_now[last_length:]
-        emotion = classifier(new_sentence)
-        printj.yellow(f'Sentence {i}:')
-        story_to_print = f'{printj.ColorText.cyan(story_till_now[:last_length])}{printj.ColorText.green(story_till_now[last_length:])}\n'
-        print(story_to_print)
-        printj.purple(f'Emotion: {emotion}')
-        last_length = len(story_till_now)
-    return story_till_now, emotion
-story_till_now=st.text_input(label='First Sentence', value='Hello, I\'m a language model,')  # , placeholder="Start writing your story...")
-num_generation= st.sidebar.slider(label='Number of generation', min_value=1, max_value=100, value=10, step=1)
-length= st.sidebar.slider(label='Length of the generated sentence', min_value=1, max_value=100, value=20, step=1)
-if st.button('Run'):
-    story_till_now, emotion =story(story_till_now, num_generation, length)
-    st.write('Story:')
-    st.text(story_till_now)
-    st.text(f'Emotion: {emotion}')
-else:
-    st.write('Write the first sentence and then hit the Run button')

 import streamlit as st
+from story_gen import StoryGenerator
+st.set_page_config(page_title='Storytelling ' +
+                   u'\U0001F5BC', page_icon=u'\U0001F5BC', layout="wide")
+gen = StoryGenerator()
+container_mode = st.sidebar.container()
+container_param = st.sidebar.container()
+container_button = st.sidebar.container()
+mode = container_mode.radio(
+    "Select your mode",
+    ('Create Statistics', 'Play Storytelling'), index=0)
+story_till_now = container_param.text_input(
+    label='First Sentence', value='Hello, I\'m a language model,')
+num_generation = container_param.slider(
+    label='Number of generation', min_value=1, max_value=100, value=10, step=1)
+length = container_param.slider(label='Length of the generated sentence',
+                                min_value=1, max_value=100, value=20, step=1)
+if mode == 'Create Statistics':
+    container_mode.write('You selected statistics.')
+    num_tests = container_param.slider(
+        label='Number of tests', min_value=1, max_value=1000, value=3, step=1)
+    reaction_weight_mode = container_param.select_slider(
+        "Reaction Weight w:", ["Random", "Fixed"])
+    if reaction_weight_mode == "Fixed":
+        reaction_weight = container_param.slider(
+            label='reaction_weight w', min_value=0, max_value=1, value=-1, step=.001)
+    elif reaction_weight_mode == "Random":
+        reaction_weight = -1
+    if container_button.button('Analyse'):
+        gen.get_stats(story_till_now="Hello, I'm a language model,",
+                      num_generation=num_generation, length=length, reaction_weight=reaction_weight, num_tests=num_tests)
+        if len(gen.stories) > 0:
+            for si, story in enumerate(gen.stories):
+                st.markdown(f'Story no. {si}:', unsafe_allow_html=False)
+                st.markdown(story, unsafe_allow_html=False)
+elif mode == 'Create Statistics':
+    container_mode.write('Let\'s play storytelling.')
+    # # , placeholder="Start writing your story...")
+    # story_till_now = st.text_input(
+    #     label='First Sentence', value='Hello, I\'m a language model,')
+    # num_generation = st.sidebar.slider(
+    #     label='Number of generation', min_value=1, max_value=100, value=10, step=1)
+    # length = st.sidebar.slider(label='Length of the generated sentence',
+    #                            min_value=1, max_value=100, value=20, step=1)
+    if container_button.button('Run'):
+        story_till_now, emotion = gen.story(
+            story_till_now, num_generation, length)
+        st.write('Story:')
+        st.text(story_till_now)
+        st.text(f'Emotion: {emotion}')
+    else:
+        st.write('Write the first sentence and then hit the Run button')

requirements.txt CHANGED Viewed

@@ -1,4 +1,10 @@
 transformers==4.16.2
 printj==0.1.0
 torch==1.10.2
-torchvision==0.11.3

 transformers==4.16.2
 printj==0.1.0
 torch==1.10.2
+torchvision==0.11.3
+pandas==1.4.0
+numpy==1.22.2
+nltk==3.7
+xlsxwriter==3.0.2
+plotly
+seaborn

run.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ streamlit run app.py

setup.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import setuptools
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+setuptools.setup(
+    name="ist",
+    version="0.0.1",
+    author="Jitesh Gosar",
+    author_email="gosar95@gmail.com",
+    description="Analyse IST",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://huggingface.co/spaces/jitesh/ist",
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires=">=3.6",
+)

story_gen.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import sys
+import time
+import printj
+from transformers import pipeline  # , set_seed
+import numpy as np
+import pandas as pd
+import plotly.figure_factory as ff
+import nltk
+class StoryGenerator:
+    def __init__(self):
+        self.initialise_models()
+        self.stats_df = pd.DataFrame(data=[], columns=[])
+        self.stories = []
+    def initialise_models(self):
+        start = time.time()
+        self.generator = pipeline('text-generation', model='gpt2')
+        self.classifier = pipeline("text-classification",
+                                   model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
+        initialising_time = time.time()-start
+        print(f'Initialising Time: {initialising_time}')
+        # set_seed(42)
+        # sys.exit()
+    def reset():
+        self.clear_stories()
+        self.clear_stats()
+    def clear_stories(self):
+        self.stories = []
+    def clear_stats(self):
+        self.stats_df = pd.DataFrame(data=[], columns=[])
+    @staticmethod
+    def get_num_token(text):
+        return len(nltk.word_tokenize(text))
+    @staticmethod
+    def check_show_emotion(confidence_score, frequency, w):
+        frequency_penalty = 1 - frequency
+        probability_emote = w * confidence_score + (1-w) * frequency_penalty
+        return probability_emote > np.random.random_sample()
+    def story(self,
+              story_till_now="Hello, I'm a language model,",
+              num_generation=4,
+              length=10):
+        # last_length = 0
+        for i in range(num_generation):
+            last_length = len(story_till_now)
+            genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(story_till_now) +
+                                                     length*i, num_return_sequences=1)
+            story_till_now = genreate_robot_sentence[0]['generated_text']
+            new_sentence = story_till_now[last_length:]
+            emotions = self.classifier(new_sentence)
+            emotion = max(emotions[0], key=lambda x: x['score'])
+            # printj.yellow(f'Sentence {i}:')
+            # story_to_print = f'{printj.ColorText.cyan(story_till_now[:last_length])}{printj.ColorText.green(story_till_now[last_length:])}\n'
+            # print(story_to_print)
+            # printj.purple(f'Emotion: {emotion}')
+        return story_till_now, emotion
+    def auto_ist(self,
+                 story_till_now="Hello, I'm a language model,",
+                 num_generation=4,
+                 length=20, reaction_weight=0.5):
+        stats_df = pd.DataFrame(data=[], columns=[])
+        stats_dict = dict()
+        num_reactions = 0
+        reaction_frequency = 0
+        for i in range(num_generation):
+            # Text generation for User
+            last_length = len(story_till_now)
+            printj.cyan(story_till_now)
+            printj.red.bold_on_white(
+                f'loop: {i}; generate user text; length: {last_length}')
+            genreate_user_sentence = self.generator(story_till_now, max_length=self.get_num_token(
+                story_till_now)+length, num_return_sequences=1)
+            story_till_now = genreate_user_sentence[0]['generated_text']
+            new_sentence = story_till_now[last_length:]
+            printj.red.bold_on_white(f'loop: {i}; check emotion')
+            # Emotion self.classifier for User
+            emotions = self.classifier(new_sentence)
+            emotion = max(emotions[0], key=lambda x: x['score'])
+            if emotion['label'] == 'neutral':
+                show_emotion = False
+            else:
+                reaction_frequency = num_reactions/(i+1)
+                show_emotion = self.check_show_emotion(
+                    confidence_score=emotion['score'], frequency=reaction_frequency, w=reaction_weight)
+            if show_emotion:
+                num_reactions += 1
+            # Text generation for Robot
+            last_length = len(story_till_now)
+            printj.cyan(story_till_now)
+            printj.red.bold_on_white(
+                f'loop: {i}; generate robot text; length: {last_length}')
+            genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(
+                story_till_now)+length, num_return_sequences=1)
+            story_till_now = genreate_robot_sentence[0]['generated_text']
+            new_sentence = story_till_now[last_length:]
+            # emotions = self.classifier(new_sentence)
+            # emotion = max(emotions[0], key=lambda x:x['score'])
+            stats_dict['sentence_no'] = i
+            stats_dict['show_emotion'] = show_emotion
+            stats_dict['emotion_label'] = emotion['label']
+            stats_dict['emotion_score'] = emotion['score']
+            stats_dict['num_reactions'] = num_reactions
+            stats_dict['reaction_frequency'] = reaction_frequency
+            stats_dict['reaction_weight'] = reaction_weight
+            stats_df = pd.concat(
+                [stats_df, pd.DataFrame(stats_dict, index=[f'idx_{i}'])])
+        return stats_df, story_till_now
+    def get_stats(self,
+                  story_till_now="Hello, I'm a language model,",
+                  num_generation=4,
+                  length=20, reaction_weight=-1, num_tests=2):
+        use_random_w = reaction_weight == -1
+        self.stories = []
+        try:
+            num_rows = max(self.stats_df.test_id)+1
+        except Exception:
+            num_rows=0
+        for test_id in range(num_tests):
+            if use_random_w:
+                # reaction_weight = np.random.random_sample()
+                reaction_weight = np.round(np.random.random_sample(), 1)
+            stats_df0, _story_till_now = self.auto_ist(
+                story_till_now=story_till_now,
+                num_generation=4,
+                length=20, reaction_weight=reaction_weight)
+            stats_df0.insert(loc=0, column='test_id', value=test_id+num_rows)
+            # stats_df0['test_id'] = test_id
+            self.stats_df = pd.concat([self.stats_df, stats_df0])
+            printj.yellow(f'test_id: {test_id}')
+            printj.green(stats_df0)
+            self.stories.append(_story_till_now)
+        self.stats_df = self.stats_df.reset_index(drop=True)
+        print(self.stats_df)
+    def save_stats(self, path='pandas_simple.xlsx'):
+        writer = pd.ExcelWriter(path, engine='xlsxwriter')
+        # Convert the dataframe to an XlsxWriter Excel object.
+        self.stats_df.to_excel(writer, sheet_name='IST')
+        # Close the Pandas Excel writer and output the Excel file.
+        writer.save()

story_gen_test.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# %%
+import printj
+from story_gen import StoryGenerator
+gen = StoryGenerator()
+# # %%
+# story_till_now, emotion = gen.story(story_till_now='Hello, I\'m a language model,', num_generation=3, length=10)
+# printj.purple(story_till_now)
+# printj.yellow(emotion)
+# %%
+gen.get_stats(story_till_now="For myriad of eons i’ve forgotten who I really was, harvesting the essence of all existence.",
+             length=10, num_generation=3, num_tests=50)
+# %%
+gen.save_stats('/home/jitesh/haru/ist/results/a.xlsx')
+# %%
+data=gen.stats_df[gen.stats_df.sentence_no==3]
+import seaborn as sns
+sns.set_theme(style="whitegrid")
+# ax = sns.violinplot(x="day", y="total_bill", data=tips)
+ax = sns.violinplot(x="reaction_weight", y="num_reactions", data=data).set_title('Analysing ProbabilityEmote (Max reactions=3)')
+# %%
+gen.stats_df[gen.stats_df.sentence_no==3]
+# %%