Spaces:

oldfart
/

Daily-Horoscope-Generator

Sleeping

File size: 4,965 Bytes

a05e4a1

import numpy as np
import pandas as pd
import string
import time
#import re
#import torch
#import tensorflow as tf
#import matplotlib.pyplot as plt
import pickle as pkl
import streamlit as st
from wordcloud import WordCloud, STOPWORDS
from deepmultilingualpunctuation import PunctuationModel
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from tensorflow.python.keras.saving.hdf5_format import save_attributes_to_hdf5_group


@st.cache(allow_output_mutation=True)
def load_horoscope_model():
    model=load_model('horoscopeModel.h5')
    return model

model = load_horoscope_model()

@st.cache(allow_output_mutation = True)
def load_punc_model():
    punctuation_model = PunctuationModel()
    return punctuation_model

punctuation_model = load_punc_model()


@st.cache
def load_get_word():
    # open the get_word file
    fileo = open('get_word.pkl' , "rb")
    # loading data
    get_word = pkl.load(fileo)
    return get_word

get_word = load_get_word()

@st.cache
def load_tokenizer():
    # open the horoscope_tokenizer file
    fileo = open('horoscope_tokenizer.pkl' , "rb")
    # loading data
    tokenizer = pkl.load(fileo)
    return tokenizer

tokenizer = load_tokenizer()


# with st.spinner("Loading the cosmos..."):
#     # #load models
#     # punctuation_model = PunctuationModel()

#     # # open the model file
#     # model=load_model('horoscopeModel.h5')
#     # # model.summary()
    
#     # open the get_word file
#     fileo = open('get_word.pkl' , "rb")
#     # loading data
#     get_word = pkl.load(fileo)

#     # open the horoscope_tokenizer file
#     fileo = open('horoscope_tokenizer.pkl' , "rb")
#     # loading data
#     tokenizer = pkl.load(fileo)


# #load data
# url = 'https://raw.githubusercontent.com/nicsusuki/horoscope-streamlit-app/main/horoscopes.csv'
# data = pd.read_csv(url,
#                   error_bad_lines=False, 
#                   sep = "|", header = None, 
#                   names = ["text", "date", "sign"], index_col = 0)


st.title("Horoscope Generator")

query = st.selectbox(
    'What is your sign?',
    ('Aries','Taurus','Gemini','Cancer','Leo','Virgo','Libra', 'Scorpio',
      'Sagitarius', 'Capricorn','Aquarius', 'Pisces', 'Generate my own'))

if query == 'Generate my own':
    query = st.text_input("Type horoscope seed text here")
    
search_button = st.button('Search the cosmos!')

# words = ""
# stopwords = set(STOPWORDS)
# for review in data.text.values:
#     text = str(review)
#     text = text.split()
#     words += " ".join([(i.lower() + " ") for i in text])
    
# #cleaning function - lowercase, remove punc
# def clean_text(text):
#     words = str(text).split()
#     words = [i.lower() + " " for i in words]
#     words = " ".join(words)
#     words = words.translate(words.maketrans('', '', string.punctuation))
#     return words

# data['text'] = data['text'].apply(clean_text)

# #tokenize the data
vocab_size = 15000
# max_length = 50
# oov_tok = "<OOV>"

# tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok)
# tokenizer.fit_on_texts(data.text.values)
# word_index = tokenizer.word_index

# get_word = {v: k for k, v in word_index.items()}

# #create n-grams
# sequences = tokenizer.texts_to_sequences(data.text.values[::100])

# n_gram_sequences = []
# for sequence in sequences:
#     for i,j in enumerate(sequence):
#         if i < (len(sequence) - 10):
#             s = sequence[i:i + 10]
#             for k, l in enumerate(s):
#                 n_gram_sequences.append(s[:k + 1])
        
# np.array(n_gram_sequences).shape

# n_gram_sequences = np.array(n_gram_sequences)
max_len = 10 #max([len(i) for i in n_gram_sequences]) ##max len = 10

#predict horoscopes
avg_length = 44 #int(len(words.split())/len(data))  ## average length of horoscope 44

#takes seed text and generates horoscopes using closest matching words
#uses random choice element to change horoscopes returned
#@st.cache
def write_horoscope(seed_text):
    for _ in range(avg_length):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre')
        pred_probs = model.predict(token_list)
        predicted = np.random.choice(np.linspace(0, vocab_size - 1, vocab_size), p = pred_probs[0])
        if predicted == 1: ## if it's OOV, pick the next most likely one.
            pred_probs[0][1] = 0
            predicted = np.argmax(pred_probs)
        output_word = get_word[predicted]
        seed_text += " " + output_word
    return seed_text


if search_button:
    st.markdown("**Searching the cosmos for your horoscope:** " + query)
    with st.spinner("Consulting the oracle..."):
        time.sleep(2)
        horoscope_text = write_horoscope(query)
        horoscope = punctuation_model.restore_punctuation(horoscope_text)
        st.success(horoscope)