File size: 3,047 Bytes
01f68bf
79ede05
 
 
 
 
c540671
 
5b95f85
36e43d9
 
c540671
 
 
 
 
 
 
9aef249
0e2d485
05e5422
36e43d9
 
 
 
 
8445ccb
280e02c
c540671
 
 
36e43d9
 
c540671
 
 
 
 
 
 
36e43d9
 
c540671
 
 
 
36e43d9
c540671
 
 
 
 
 
 
 
 
 
 
 
 
36e43d9
187169b
030d561
 
36e43d9
c540671
 
 
 
 
 
 
 
 
802898a
01f68bf
c540671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

json_file_path = 'Q_and_A_Lagos.json'

def compare_sentences(sentence1, sentence2):
    vectorizer = CountVectorizer().fit_transform([sentence1, sentence2])
    similarity = cosine_similarity(vectorizer)
    similarity_score = similarity[0, 1]
    return similarity_score

def generate_gpt2_response(question):
    input_ids = tokenizer.encode(question, return_tensors='pt').to(model.device)
    if input_ids.size(1) == 0:
        return "Generated response is empty OR Input your question"
    generated_output = model.generate(input_ids, max_length=len(input_ids[0]) + 100,
                                      num_beams=5,
                                      no_repeat_ngram_size=2,
                                      top_k=10,
                                      top_p=1,
                                      temperature=0.9,
                                      pad_token_id=model.config.eos_token_id)
    generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
    return generated_response

def find_question_and_answer(json_file, question):
    with open(json_file, "r") as json_file:
        data = json.load(json_file)

    question = question.lower()

    max_similarity = 0
    selected_response = None

    for q_and_a in data["questions"]:
        response_message = q_and_a["response"].lower()
        similarity_score = compare_sentences(question, response_message)

        if similarity_score > max_similarity:
            max_similarity = similarity_score
            selected_response = q_and_a["response"]

    # Set a threshold for similarity score to switch to GPT-2
    similarity_threshold = 0.4  # Adjust this threshold as needed
    if max_similarity < similarity_threshold:
        generated_response = generate_gpt2_response(question)
        selected_response = generated_response

    # Fallback to a default message if no suitable response is found
    if selected_response is None:
        selected_response = "CHAT BOT --> I'm sorry, I don't have data about that.\n"

    return selected_response

if __name__ == '__main__':
    user_input = st.text_area("Enter your question: ")
    response = find_question_and_answer(json_file_path, user_input)
    st.write(response)











# dataset_path = 'EbubeJohnEnyi/Q_and_A'
# import streamlit as st
# from transformers import pipeline
# from flask import Flask, render_template, request
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
# from sklearn.feature_extraction.text import CountVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# import json


# pipe = pipeline('sentiment-analysis')
# text = st.text_area('Enter your text here: ')

# if text:
#     out = pipe(text)
#     print(out)