Spaces:
Sleeping
Sleeping
File size: 3,047 Bytes
01f68bf 79ede05 c540671 5b95f85 36e43d9 c540671 9aef249 0e2d485 05e5422 36e43d9 8445ccb 280e02c c540671 36e43d9 c540671 36e43d9 c540671 36e43d9 c540671 36e43d9 187169b 030d561 36e43d9 c540671 802898a 01f68bf c540671 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import streamlit as st
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
json_file_path = 'Q_and_A_Lagos.json'
def compare_sentences(sentence1, sentence2):
vectorizer = CountVectorizer().fit_transform([sentence1, sentence2])
similarity = cosine_similarity(vectorizer)
similarity_score = similarity[0, 1]
return similarity_score
def generate_gpt2_response(question):
input_ids = tokenizer.encode(question, return_tensors='pt').to(model.device)
if input_ids.size(1) == 0:
return "Generated response is empty OR Input your question"
generated_output = model.generate(input_ids, max_length=len(input_ids[0]) + 100,
num_beams=5,
no_repeat_ngram_size=2,
top_k=10,
top_p=1,
temperature=0.9,
pad_token_id=model.config.eos_token_id)
generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
return generated_response
def find_question_and_answer(json_file, question):
with open(json_file, "r") as json_file:
data = json.load(json_file)
question = question.lower()
max_similarity = 0
selected_response = None
for q_and_a in data["questions"]:
response_message = q_and_a["response"].lower()
similarity_score = compare_sentences(question, response_message)
if similarity_score > max_similarity:
max_similarity = similarity_score
selected_response = q_and_a["response"]
# Set a threshold for similarity score to switch to GPT-2
similarity_threshold = 0.4 # Adjust this threshold as needed
if max_similarity < similarity_threshold:
generated_response = generate_gpt2_response(question)
selected_response = generated_response
# Fallback to a default message if no suitable response is found
if selected_response is None:
selected_response = "CHAT BOT --> I'm sorry, I don't have data about that.\n"
return selected_response
if __name__ == '__main__':
user_input = st.text_area("Enter your question: ")
response = find_question_and_answer(json_file_path, user_input)
st.write(response)
# dataset_path = 'EbubeJohnEnyi/Q_and_A'
# import streamlit as st
# from transformers import pipeline
# from flask import Flask, render_template, request
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
# from sklearn.feature_extraction.text import CountVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# import json
# pipe = pipeline('sentiment-analysis')
# text = st.text_area('Enter your text here: ')
# if text:
# out = pipe(text)
# print(out) |