Spaces:

oscars47
/

Thinking_Parrot_Reading_Club

Runtime error

App Files Files Community

Thinking_Parrot_Reading_Club / app.py

oscars47

Update app.py

c727b7c over 2 years ago

raw

history blame contribute delete

6.98 kB

	import command
	# need to update pip
	res = command.run(['/usr/local/bin/python -m pip install --upgrade pip'])

	import gradio as gr
	import numpy as np
	import keras

	# helper files----
	# 1. clean data
	# open the textfile; convert all text to lower case for ease of use
	# takes in tetxfile path
	def clean_data(text):
	# lowercase!!
	text = text.lower()

	# print('number of characters in textfile, including newline:', len(text))
	# remove all new line '\n' characters as these don't have any meaning
	# break up into list of characters; if the char is '\n' don't add it
	# then recompile into string

	# list of all the bad characters
	forbidden_char = ['\n', '\\', '^', '{', '\|', '}', '~', '£',
	'¥', '§', '©', '«', '¬', '®', '°', '»', '„', '•', '™', '■', '□', '►']

	temp = []
	i = 0
	while i < (len(text)-3):
	char = text[i]
	char_next = text[i+1]
	char_next_next = text[i+2]
	char_next_next_next = text[i+3]

	# if the next character isn't a new line and char isn't '\n', add it
	if not(char in forbidden_char):
	# check if next character is '¬'
	if char_next == '¬':
	if (char_next_next == '\n') or (char_next_next_next=='\n'):
	temp.append(char)
	i+=3

	elif not(char_next in forbidden_char):
	temp.append(char)

	# next char is newline
	elif char_next == '\n':
	if char != ' ':
	temp.append(char)
	temp.append(' ')
	else:
	temp.append(char)
	else:
	temp.append(char)
	i+=1

	# make sure we don't forget to append final character if not illegal!!
	# print(i == len(text)-3)
	if (i == len(text)-3) and not(char_next in forbidden_char):
	temp.append(char_next)
	if not(char_next_next) in forbidden_char:
	temp.append(char_next_next)
	if not(char_next_next_next) in forbidden_char:
	temp.append(char_next_next_next)

	#reset nasrudin string
	text = ''
	for char in temp:
	text += char

	# print('number of characters in textfile:', len(text))

	# return cleaned data file
	return text


	# get nasrudin text cleaned
	with open('sufis_full.txt', 'r') as file:
	text = file.read()
	nasrudin = clean_data(text)

	# 2. helper function to parse string into alphabet and get mapping dictionaries from char to int and int to char
	def parse_text(text):
	# first find all the unique characters; sort them
	alphabet = sorted(list(set(text)))

	# create a dictionary for a 1-1 map from character to integer and vice versa so we can seamlessly convert
	char_to_int = dict((c, i) for i, c in enumerate (alphabet))
	int_to_char = dict((i, c) for i, c in enumerate (alphabet))

	return alphabet, char_to_int, int_to_char

	alphabet, char_to_int, int_to_char = parse_text(nasrudin)

	# set max_Char value; this is length of sentence which we train on -- do not change this
	global maxChar
	maxChar=40

	# helper functions from Keras

	# interpret probabilities
	def sample(preds, temperature=1.0):
	# helper function to sample an index from a probability array
	# rescale data
	preds = np.asarray(preds).astype('float64')
	#preds = np.log(preds) / temperature
	exp_preds = np.exp(1/temperature)*preds
	preds = exp_preds / np.sum(exp_preds)
	# create multinomial distribution; run experiment 10 times, select most probable outcome
	probas = np.random.multinomial(10, preds, 1)
	return np.argmax(probas)

	# helper function that we call to generate text
	# takes in an input string, hdf5 trained model, and desired output length of text
	model_types = ['Nasrudin', 'Shakespeare', 'Hemingway']

	# function takes in input string, what text TP was trained on, and the text length as provided by huggingface input
	def generate_text(input, text_len):
	# make sure at least 40 characters for training
	if len(input) < maxChar:
	raise gr.Error('Input must have >= %i characters. You have %i.' %(maxChar, len(input)))

	# make sure output num characters is integer
	if type(text_len) != int:
	raise gr.Error('Number of generated characters must be an integer!')

	# clean input data
	input = clean_data(input)

	# load desired model and set maxChar limit -- change these as we generate new models!

	model = keras.models.load_model('nasrudin_v1.0.0.hdf5')

	# grab last maxChar characters
	sentence = input[-maxChar:]

	# initalize generated string
	generated = ''
	#generated += input

	# randomly pick diversity parameter
	diversities = [0.2, 0.5, 1.0, 1.2]
	div_index = int(np.random.random()*(len(diversities)))
	diversity = diversities[div_index]
	# print('diversity:', diversity)
	# sys.stdout.write(input)

	# generate text_len characters worth of test
	for i in range(text_len):
	# prepare chosen sentence as part of new dataset
	x_pred = np.zeros((1, len(sentence), len(alphabet)))
	for t, char in enumerate(sentence):
	x_pred[0, t, char_to_int[char]] = 1.0

	# use the current model to predict what outputs are
	preds = model.predict(x_pred, verbose=0)[0]
	# call the function above to interpret the probabilities and add a degree of freedom
	next_index = sample(preds, diversity)
	#convert predicted number to character
	next_char = int_to_char[next_index]

	# append to existing string so as to build it up
	generated += next_char
	# append new character to previous sentence and delete the old one in front; now we train on predictions
	sentence = sentence[1:] + next_char

	# print the new character as we create it
	# sys.stdout.write(next_char)
	# sys.stdout.flush()
	print()

	return generated

	# call hugging space interactive interface; use Blocks

	with gr.Blocks() as think:
	# have intro blurb
	gr.Markdown("Hi! I'm Thinking Parrot, a text generating AI! 🦜" )

	# have accordian blurb
	with gr.Accordion("Click for more details!"):
	gr.Markdown("Simply type at least 40 characters into the box labeled 'Your Input Text' below and then select the number of output characters you want (note: try lower values for a faster response). Then click 'Think'! My response will appear in the box labeled 'My Response'.")

	# setup user interface
	input = [gr.Textbox(label = 'Your Input Text'), gr.Slider(minimum=40, maximum =500, label='Number of output characters', step=10)]
	output = gr.Textbox(label = 'My Response')
	think_btn = gr.Button('Think!')
	think_btn.click(fn= generate_text, inputs = input, outputs = output)

	# enable queing if heavy traffic
	think.queue(concurrency_count=3)
	think.launch()