Spaces:
Sleeping
Sleeping
File size: 4,965 Bytes
a05e4a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import numpy as np
import pandas as pd
import string
import time
#import re
#import torch
#import tensorflow as tf
#import matplotlib.pyplot as plt
import pickle as pkl
import streamlit as st
from wordcloud import WordCloud, STOPWORDS
from deepmultilingualpunctuation import PunctuationModel
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from tensorflow.python.keras.saving.hdf5_format import save_attributes_to_hdf5_group
@st.cache(allow_output_mutation=True)
def load_horoscope_model():
model=load_model('horoscopeModel.h5')
return model
model = load_horoscope_model()
@st.cache(allow_output_mutation = True)
def load_punc_model():
punctuation_model = PunctuationModel()
return punctuation_model
punctuation_model = load_punc_model()
@st.cache
def load_get_word():
# open the get_word file
fileo = open('get_word.pkl' , "rb")
# loading data
get_word = pkl.load(fileo)
return get_word
get_word = load_get_word()
@st.cache
def load_tokenizer():
# open the horoscope_tokenizer file
fileo = open('horoscope_tokenizer.pkl' , "rb")
# loading data
tokenizer = pkl.load(fileo)
return tokenizer
tokenizer = load_tokenizer()
# with st.spinner("Loading the cosmos..."):
# # #load models
# # punctuation_model = PunctuationModel()
# # # open the model file
# # model=load_model('horoscopeModel.h5')
# # # model.summary()
# # open the get_word file
# fileo = open('get_word.pkl' , "rb")
# # loading data
# get_word = pkl.load(fileo)
# # open the horoscope_tokenizer file
# fileo = open('horoscope_tokenizer.pkl' , "rb")
# # loading data
# tokenizer = pkl.load(fileo)
# #load data
# url = 'https://raw.githubusercontent.com/nicsusuki/horoscope-streamlit-app/main/horoscopes.csv'
# data = pd.read_csv(url,
# error_bad_lines=False,
# sep = "|", header = None,
# names = ["text", "date", "sign"], index_col = 0)
st.title("Horoscope Generator")
query = st.selectbox(
'What is your sign?',
('Aries','Taurus','Gemini','Cancer','Leo','Virgo','Libra', 'Scorpio',
'Sagitarius', 'Capricorn','Aquarius', 'Pisces', 'Generate my own'))
if query == 'Generate my own':
query = st.text_input("Type horoscope seed text here")
search_button = st.button('Search the cosmos!')
# words = ""
# stopwords = set(STOPWORDS)
# for review in data.text.values:
# text = str(review)
# text = text.split()
# words += " ".join([(i.lower() + " ") for i in text])
# #cleaning function - lowercase, remove punc
# def clean_text(text):
# words = str(text).split()
# words = [i.lower() + " " for i in words]
# words = " ".join(words)
# words = words.translate(words.maketrans('', '', string.punctuation))
# return words
# data['text'] = data['text'].apply(clean_text)
# #tokenize the data
vocab_size = 15000
# max_length = 50
# oov_tok = "<OOV>"
# tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok)
# tokenizer.fit_on_texts(data.text.values)
# word_index = tokenizer.word_index
# get_word = {v: k for k, v in word_index.items()}
# #create n-grams
# sequences = tokenizer.texts_to_sequences(data.text.values[::100])
# n_gram_sequences = []
# for sequence in sequences:
# for i,j in enumerate(sequence):
# if i < (len(sequence) - 10):
# s = sequence[i:i + 10]
# for k, l in enumerate(s):
# n_gram_sequences.append(s[:k + 1])
# np.array(n_gram_sequences).shape
# n_gram_sequences = np.array(n_gram_sequences)
max_len = 10 #max([len(i) for i in n_gram_sequences]) ##max len = 10
#predict horoscopes
avg_length = 44 #int(len(words.split())/len(data)) ## average length of horoscope 44
#takes seed text and generates horoscopes using closest matching words
#uses random choice element to change horoscopes returned
#@st.cache
def write_horoscope(seed_text):
for _ in range(avg_length):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre')
pred_probs = model.predict(token_list)
predicted = np.random.choice(np.linspace(0, vocab_size - 1, vocab_size), p = pred_probs[0])
if predicted == 1: ## if it's OOV, pick the next most likely one.
pred_probs[0][1] = 0
predicted = np.argmax(pred_probs)
output_word = get_word[predicted]
seed_text += " " + output_word
return seed_text
if search_button:
st.markdown("**Searching the cosmos for your horoscope:** " + query)
with st.spinner("Consulting the oracle..."):
time.sleep(2)
horoscope_text = write_horoscope(query)
horoscope = punctuation_model.restore_punctuation(horoscope_text)
st.success(horoscope)
|