EbubeJohnEnyi commited on
Commit
c540671
1 Parent(s): 79ede05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -8
app.py CHANGED
@@ -1,15 +1,86 @@
1
- import streamlit as st
2
- from transformers import pipeline
3
- from flask import Flask, render_template, request
4
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
5
  from sklearn.feature_extraction.text import CountVectorizer
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  import json
8
 
 
 
9
 
10
- pipe = pipeline('sentiment-analysis')
11
- text = st.text_area('Enter your text here: ')
12
 
13
- if text:
14
- out = pipe(text)
15
- print(out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
2
  from sklearn.feature_extraction.text import CountVectorizer
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import json
5
 
6
+ # Set the path to your dataset file
7
+ dataset_path = 'path/to/your/dataset.json'
8
 
9
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
10
+ model = GPT2LMHeadModel.from_pretrained('gpt2')
11
 
12
+ def compare_sentences(sentence1, sentence2):
13
+ vectorizer = CountVectorizer().fit_transform([sentence1, sentence2])
14
+ similarity = cosine_similarity(vectorizer)
15
+ similarity_score = similarity[0, 1]
16
+ return similarity_score
17
+
18
+ def generate_gpt2_response(question):
19
+ input_ids = tokenizer.encode(question, return_tensors='pt')
20
+ generated_output = model.generate(input_ids, max_length=len(input_ids[0]) + 100,
21
+ num_beams=5,
22
+ no_repeat_ngram_size=2,
23
+ top_k=10,
24
+ top_p=1,
25
+ temperature=0.9)
26
+ generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
27
+ return generated_response
28
+
29
+ def find_question_and_answer(dataset_file, question):
30
+ with open(dataset_file, "r") as json_file:
31
+ data = json.load(json_file)
32
+
33
+ question = question.lower()
34
+
35
+ max_similarity = 0
36
+ selected_response = None
37
+
38
+ for q_and_a in data["questions"]:
39
+ response_message = q_and_a["response"].lower()
40
+ similarity_score = compare_sentences(question, response_message)
41
+
42
+ if similarity_score > max_similarity:
43
+ max_similarity = similarity_score
44
+ selected_response = q_and_a["response"]
45
+
46
+ # Set a threshold for similarity score to switch to GPT-2
47
+ similarity_threshold = 0.4 # Adjust this threshold as needed
48
+ if max_similarity < similarity_threshold:
49
+ generated_response = generate_gpt2_response(question)
50
+ selected_response = generated_response
51
+
52
+ # Fallback to a default message if no suitable response is found
53
+ if selected_response is None:
54
+ selected_response = "CHAT BOT --> I'm sorry, I don't have data about that.\n"
55
+
56
+ return selected_response
57
+
58
+ # Example usage
59
+ user_input = input("Ask a question: ")
60
+ response = find_question_and_answer(dataset_path, user_input)
61
+ print(response)
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+ # import streamlit as st
73
+ # from transformers import pipeline
74
+ # from flask import Flask, render_template, request
75
+ # from transformers import GPT2LMHeadModel, GPT2Tokenizer
76
+ # from sklearn.feature_extraction.text import CountVectorizer
77
+ # from sklearn.metrics.pairwise import cosine_similarity
78
+ # import json
79
+
80
+
81
+ # pipe = pipeline('sentiment-analysis')
82
+ # text = st.text_area('Enter your text here: ')
83
+
84
+ # if text:
85
+ # out = pipe(text)
86
+ # print(out)