Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import json | |
# Set the path to your dataset file | |
dataset_path = '../EbubeJohnEnyi/Q_and_A' | |
tokenizer = GPT2Tokenizer.from_pretrained('gpt2') | |
model = GPT2LMHeadModel.from_pretrained('gpt2') | |
def compare_sentences(sentence1, sentence2): | |
vectorizer = CountVectorizer().fit_transform([sentence1, sentence2]) | |
similarity = cosine_similarity(vectorizer) | |
similarity_score = similarity[0, 1] | |
return similarity_score | |
def generate_gpt2_response(question): | |
input_ids = tokenizer.encode(question, return_tensors='pt') | |
generated_output = model.generate(input_ids, max_length=len(input_ids[0]) + 100, | |
num_beams=5, | |
no_repeat_ngram_size=2, | |
top_k=10, | |
top_p=1, | |
temperature=0.9) | |
generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True) | |
return generated_response | |
def find_question_and_answer(dataset_file, question): | |
with open(dataset_file, "r") as json_file: | |
data = json.load(json_file) | |
question = question.lower() | |
max_similarity = 0 | |
selected_response = None | |
for q_and_a in data.get("questions", []): | |
response_message = q_and_a.get("response", "").lower() | |
similarity_score = compare_sentences(question, response_message) | |
if similarity_score > max_similarity: | |
max_similarity = similarity_score | |
selected_response = q_and_a.get("response", "") | |
# Set a threshold for similarity score to switch to GPT-2 | |
similarity_threshold = 0.4 # Adjust this threshold as needed | |
if max_similarity < similarity_threshold: | |
generated_response = generate_gpt2_response(question) | |
selected_response = generated_response | |
# Fallback to a default message if no suitable response is found | |
if selected_response is None: | |
selected_response = "CHAT BOT --> I'm sorry, I don't have data about that.\n" | |
return selected_response | |
# Streamlit UI | |
st.title("Chatbot App") | |
user_input = st.text_input("Ask a question:") | |
response = find_question_and_answer(dataset_path, user_input) | |
st.text(response) | |
estion_and_answer(dataset_path, user_input) | |
st.text(response) | |
# dataset_path = 'EbubeJohnEnyi/Q_and_A' | |
# import streamlit as st | |
# from transformers import pipeline | |
# from flask import Flask, render_template, request | |
# from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
# from sklearn.feature_extraction.text import CountVectorizer | |
# from sklearn.metrics.pairwise import cosine_similarity | |
# import json | |
# pipe = pipeline('sentiment-analysis') | |
# text = st.text_area('Enter your text here: ') | |
# if text: | |
# out = pipe(text) | |
# print(out) |