Jokes_Generator / app.py
VasudevaK's picture
tuned model
a2cb722
raw
history blame
3.6 kB
import pyjokes
import gradio as gr
import numpy as np
from nltk.corpus import wordnet as wn
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
nltk.download('all')
import string
from sklearn.feature_extraction.text import TfidfVectorizer
# import fastai
def similarity(input, joke):
return cosine_similarity(input, joke)
def get_best(input):
model = SentenceTransformer('bert-base-nli-mean-tokens')
max_similarity = -1
max_idx = 0
jokes = pyjokes.get_jokes(language='en', category='all')
jokes_embedding = model.encode(jokes)
input_embedding = model.encode(input)
for idx, joke_embedding in enumerate(jokes_embedding):
sim = similarity(joke_embedding.reshape(-1, 1),
input_embedding.reshape(-1, 1))
if(np.sum(sim) > np.sum(max_similarity)):
max_idx = idx
max_similarity = sim
if(np.sum(max_similarity) != -1):
return jokes[max_idx]+'😁🀣'
else:
return None
def generate_list(input):
result = []
n = len(input)
for Len in range(2, n + 1):
for i in range(n - Len + 1):
j = i + Len - 1
tem = ""
for k in range(i, j + 1):
tem += input[k]
result.append(tem)
return result
def pattern(input):
response = input
for substr in generate_list(input):
try:
syn = wn.synsets(substr)[1].hypernyms()[0].hyponyms()[
0].hyponyms()[0].lemmas()[0].name()
except:
continue
if(syn != None):
response = response.replace(substr, syn.upper())
break
if(input == response):
return None
else:
return response+'??😁🀣'
lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict= dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
def NLTK(input):
f = open('corpus.txt', errors='strict')
data = f.read()
data = data.lower()
sent_tokens = nltk.sent_tokenize(data)
return bot(sent_tokens)
def bot(sent_tokens):
robo1_response = ''
TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words='english')
tfidf = TfidfVec.fit_transform(sent_tokens)
vals = cosine_similarity(tfidf[-1], tfidf)
idx = vals.argsort()[0][-2]
flat = vals.flatten()
flat.sort()
req_tfidf = flat[-2]
if (req_tfidf == 0):
robo1_response= robo1_response+"I could not answer this right now but you can contact the head of our dept (PUSPHA RAJ)." # add the dept recommendation engine and contact details
return robo1_response
else:
robo1_response = robo1_response+sent_tokens[idx]
return robo1_response
def generator(input=None):
response = []
if input:
out1 = NLTK(input)
if(out1):
response.append(out)
out2 = pattern(input)
if(out2):
response.append(out2)
out3 = get_best(input)
if(out3):
response.append(out3)
else:
out1 = NLTK("Hi, what's the matter")
if(out1):
for out in out1:
response.append(out)
out2 = pyjokes.get_joke(language='en', category='all')
if(out2):
response.append(out2)
return response # think of doing this
iface = gr.Interface(fn=generator, inputs="text", outputs="text")
iface.launch()