import streamlit as st from transformers import GPT2LMHeadModel, GPT2Tokenizer from sklearn.feature_extraction.text import CountVectorizer from sklearn.metrics.pairwise import cosine_similarity import json tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2LMHeadModel.from_pretrained('gpt2') json_file_path = 'Q_and_A_Lagos.json' def compare_sentences(sentence1, sentence2): vectorizer = CountVectorizer().fit_transform([sentence1, sentence2]) similarity = cosine_similarity(vectorizer) similarity_score = similarity[0, 1] return similarity_score def generate_gpt2_response(question): input_ids = tokenizer.encode(question, return_tensors='pt').to(model.device) if input_ids.size(1) == 0: return "Generated response is empty OR Input your question" generated_output = model.generate(input_ids, max_length=len(input_ids[0]) + 100, num_beams=5, no_repeat_ngram_size=2, top_k=10, top_p=1, temperature=0.9, pad_token_id=model.config.eos_token_id) generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True) return generated_response def find_question_and_answer(json_file, question): with open(json_file, "r") as json_file: data = json.load(json_file) question = question.lower() max_similarity = 0 selected_response = None for q_and_a in data["questions"]: response_message = q_and_a["response"].lower() similarity_score = compare_sentences(question, response_message) if similarity_score > max_similarity: max_similarity = similarity_score selected_response = q_and_a["response"] # Set a threshold for similarity score to switch to GPT-2 similarity_threshold = 0.4 # Adjust this threshold as needed if max_similarity < similarity_threshold: generated_response = generate_gpt2_response(question) selected_response = generated_response # Fallback to a default message if no suitable response is found if selected_response is None: selected_response = "CHAT BOT --> I'm sorry, I don't have data about that.\n" return selected_response if __name__ == '__main__': user_input = st.text_area("Enter your question: ") response = find_question_and_answer(json_file_path, user_input) st.write(response) # dataset_path = 'EbubeJohnEnyi/Q_and_A' # import streamlit as st # from transformers import pipeline # from flask import Flask, render_template, request # from transformers import GPT2LMHeadModel, GPT2Tokenizer # from sklearn.feature_extraction.text import CountVectorizer # from sklearn.metrics.pairwise import cosine_similarity # import json # pipe = pipeline('sentiment-analysis') # text = st.text_area('Enter your text here: ') # if text: # out = pipe(text) # print(out)