File size: 4,965 Bytes
a05e4a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import numpy as np
import pandas as pd
import string
import time
#import re
#import torch
#import tensorflow as tf
#import matplotlib.pyplot as plt
import pickle as pkl
import streamlit as st
from wordcloud import WordCloud, STOPWORDS
from deepmultilingualpunctuation import PunctuationModel
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from tensorflow.python.keras.saving.hdf5_format import save_attributes_to_hdf5_group


@st.cache(allow_output_mutation=True)
def load_horoscope_model():
    model=load_model('horoscopeModel.h5')
    return model

model = load_horoscope_model()

@st.cache(allow_output_mutation = True)
def load_punc_model():
    punctuation_model = PunctuationModel()
    return punctuation_model

punctuation_model = load_punc_model()


@st.cache
def load_get_word():
    # open the get_word file
    fileo = open('get_word.pkl' , "rb")
    # loading data
    get_word = pkl.load(fileo)
    return get_word

get_word = load_get_word()

@st.cache
def load_tokenizer():
    # open the horoscope_tokenizer file
    fileo = open('horoscope_tokenizer.pkl' , "rb")
    # loading data
    tokenizer = pkl.load(fileo)
    return tokenizer

tokenizer = load_tokenizer()


# with st.spinner("Loading the cosmos..."):
#     # #load models
#     # punctuation_model = PunctuationModel()

#     # # open the model file
#     # model=load_model('horoscopeModel.h5')
#     # # model.summary()
    
#     # open the get_word file
#     fileo = open('get_word.pkl' , "rb")
#     # loading data
#     get_word = pkl.load(fileo)

#     # open the horoscope_tokenizer file
#     fileo = open('horoscope_tokenizer.pkl' , "rb")
#     # loading data
#     tokenizer = pkl.load(fileo)


# #load data
# url = 'https://raw.githubusercontent.com/nicsusuki/horoscope-streamlit-app/main/horoscopes.csv'
# data = pd.read_csv(url,
#                   error_bad_lines=False, 
#                   sep = "|", header = None, 
#                   names = ["text", "date", "sign"], index_col = 0)


st.title("Horoscope Generator")

query = st.selectbox(
    'What is your sign?',
    ('Aries','Taurus','Gemini','Cancer','Leo','Virgo','Libra', 'Scorpio',
      'Sagitarius', 'Capricorn','Aquarius', 'Pisces', 'Generate my own'))

if query == 'Generate my own':
    query = st.text_input("Type horoscope seed text here")
    
search_button = st.button('Search the cosmos!')

# words = ""
# stopwords = set(STOPWORDS)
# for review in data.text.values:
#     text = str(review)
#     text = text.split()
#     words += " ".join([(i.lower() + " ") for i in text])
    
# #cleaning function - lowercase, remove punc
# def clean_text(text):
#     words = str(text).split()
#     words = [i.lower() + " " for i in words]
#     words = " ".join(words)
#     words = words.translate(words.maketrans('', '', string.punctuation))
#     return words

# data['text'] = data['text'].apply(clean_text)

# #tokenize the data
vocab_size = 15000
# max_length = 50
# oov_tok = "<OOV>"

# tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok)
# tokenizer.fit_on_texts(data.text.values)
# word_index = tokenizer.word_index

# get_word = {v: k for k, v in word_index.items()}

# #create n-grams
# sequences = tokenizer.texts_to_sequences(data.text.values[::100])

# n_gram_sequences = []
# for sequence in sequences:
#     for i,j in enumerate(sequence):
#         if i < (len(sequence) - 10):
#             s = sequence[i:i + 10]
#             for k, l in enumerate(s):
#                 n_gram_sequences.append(s[:k + 1])
        
# np.array(n_gram_sequences).shape

# n_gram_sequences = np.array(n_gram_sequences)
max_len = 10 #max([len(i) for i in n_gram_sequences]) ##max len = 10

#predict horoscopes
avg_length = 44 #int(len(words.split())/len(data))  ## average length of horoscope 44

#takes seed text and generates horoscopes using closest matching words
#uses random choice element to change horoscopes returned
#@st.cache
def write_horoscope(seed_text):
    for _ in range(avg_length):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre')
        pred_probs = model.predict(token_list)
        predicted = np.random.choice(np.linspace(0, vocab_size - 1, vocab_size), p = pred_probs[0])
        if predicted == 1: ## if it's OOV, pick the next most likely one.
            pred_probs[0][1] = 0
            predicted = np.argmax(pred_probs)
        output_word = get_word[predicted]
        seed_text += " " + output_word
    return seed_text


if search_button:
    st.markdown("**Searching the cosmos for your horoscope:** " + query)
    with st.spinner("Consulting the oracle..."):
        time.sleep(2)
        horoscope_text = write_horoscope(query)
        horoscope = punctuation_model.restore_punctuation(horoscope_text)
        st.success(horoscope)