Spaces:

jitesh
/

storytelling

Runtime error

App Files Files Community

storytelling / story_gen.py

jitesh

fixes weight slider and storytelling mode

c1fa828 about 3 years ago

raw

history blame

6.64 kB


	import sys
	import time

	import printj
	from transformers import pipeline # , set_seed
	import numpy as np
	import pandas as pd
	# import nltk
	import re

	class StoryGenerator:
	def __init__(self):
	self.initialise_models()
	self.stats_df = pd.DataFrame(data=[], columns=[])
	self.stories = []
	# nltk.download()


	def initialise_models(self):
	start = time.time()
	self.generator = pipeline('text-generation', model='gpt2')
	self.classifier = pipeline("text-classification",
	model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
	initialising_time = time.time()-start
	print(f'Initialising Time: {initialising_time}')
	# set_seed(42)
	# sys.exit()

	def reset():
	self.clear_stories()
	self.clear_stats()

	def clear_stories(self):
	self.stories = []

	def clear_stats(self):
	self.stats_df = pd.DataFrame(data=[], columns=[])

	@staticmethod
	def get_num_token(text):
	# return len(nltk.word_tokenize(text))
	return len(re.findall(r'\w+', text))

	@staticmethod
	def check_show_emotion(confidence_score, frequency, w):
	frequency_penalty = 1 - frequency
	probability_emote = w * confidence_score + (1-w) * frequency_penalty
	return probability_emote > np.random.random_sample()

	def story(self,
	story_till_now="Hello, I'm a language model,",
	num_generation=4,
	length=10):
	# last_length = 0

	for i in range(num_generation):
	last_length = len(story_till_now)
	genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(story_till_now) +
	length, num_return_sequences=1)
	story_till_now = genreate_robot_sentence[0]['generated_text']
	new_sentence = story_till_now[last_length:]
	emotions = self.classifier(new_sentence)
	emotion = max(emotions[0], key=lambda x: x['score'])
	# printj.yellow(f'Sentence {i}:')
	# story_to_print = f'{printj.ColorText.cyan(story_till_now[:last_length])}{printj.ColorText.green(story_till_now[last_length:])}\n'
	# print(story_to_print)
	# printj.purple(f'Emotion: {emotion}')
	return story_till_now, emotion

	def auto_ist(self,
	story_till_now="Hello, I'm a language model,",
	num_generation=4,
	length=20, reaction_weight=0.5):
	stats_df = pd.DataFrame(data=[], columns=[])
	stats_dict = dict()
	num_reactions = 0
	reaction_frequency = 0
	for i in range(num_generation):
	# Text generation for User
	last_length = len(story_till_now)
	printj.cyan(story_till_now)
	printj.red.bold_on_white(
	f'loop: {i}; generate user text; length: {last_length}')
	genreate_user_sentence = self.generator(story_till_now, max_length=self.get_num_token(
	story_till_now)+length, num_return_sequences=1)
	story_till_now = genreate_user_sentence[0]['generated_text']
	new_sentence = story_till_now[last_length:]

	printj.red.bold_on_white(f'loop: {i}; check emotion')
	# Emotion self.classifier for User
	emotions = self.classifier(new_sentence)
	emotion = max(emotions[0], key=lambda x: x['score'])
	if emotion['label'] == 'neutral':
	show_emotion = False
	else:
	reaction_frequency = num_reactions/(i+1)
	show_emotion = self.check_show_emotion(
	confidence_score=emotion['score'], frequency=reaction_frequency, w=reaction_weight)
	if show_emotion:
	num_reactions += 1

	# Text generation for Robot
	last_length = len(story_till_now)
	printj.cyan(story_till_now)
	printj.red.bold_on_white(
	f'loop: {i}; generate robot text; length: {last_length}')
	genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(
	story_till_now)+length, num_return_sequences=1)
	story_till_now = genreate_robot_sentence[0]['generated_text']
	new_sentence = story_till_now[last_length:]

	# emotions = self.classifier(new_sentence)
	# emotion = max(emotions[0], key=lambda x:x['score'])

	stats_dict['sentence_no'] = i
	stats_dict['show_emotion'] = show_emotion
	stats_dict['emotion_label'] = emotion['label']
	stats_dict['emotion_score'] = emotion['score']
	stats_dict['num_reactions'] = num_reactions
	stats_dict['reaction_frequency'] = reaction_frequency
	stats_dict['reaction_weight'] = reaction_weight
	stats_df = pd.concat(
	[stats_df, pd.DataFrame(stats_dict, index=[f'idx_{i}'])])
	return stats_df, story_till_now

	def get_stats(self,
	story_till_now="Hello, I'm a language model,",
	num_generation=4,
	length=20, reaction_weight=-1, num_tests=2):
	use_random_w = reaction_weight == -1
	self.stories = []
	try:
	num_rows = max(self.stats_df.test_id)+1
	except Exception:
	num_rows=0
	for test_id in range(num_tests):
	if use_random_w:
	# reaction_weight = np.random.random_sample()
	reaction_weight = np.round(np.random.random_sample(), 1)
	stats_df0, _story_till_now = self.auto_ist(
	story_till_now=story_till_now,
	num_generation=4,
	length=20, reaction_weight=reaction_weight)
	stats_df0.insert(loc=0, column='test_id', value=test_id+num_rows)

	# stats_df0['test_id'] = test_id
	self.stats_df = pd.concat([self.stats_df, stats_df0])
	printj.yellow(f'test_id: {test_id}')
	printj.green(stats_df0)
	self.stories.append(_story_till_now)
	self.stats_df = self.stats_df.reset_index(drop=True)
	print(self.stats_df)

	def save_stats(self, path='pandas_simple.xlsx'):
	writer = pd.ExcelWriter(path, engine='xlsxwriter')

	# Convert the dataframe to an XlsxWriter Excel object.
	self.stats_df.to_excel(writer, sheet_name='IST')

	# Close the Pandas Excel writer and output the Excel file.
	writer.save()