EbubeJohnEnyi commited on
Commit
36e43d9
1 Parent(s): b7c7571

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -29
app.py CHANGED
@@ -4,12 +4,11 @@ from sklearn.feature_extraction.text import CountVectorizer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import json
6
 
7
- # Set the path to your dataset file
8
- dataset_path = 'Q_and_A_Lagos.json'
9
-
10
  tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
11
  model = GPT2LMHeadModel.from_pretrained('gpt2')
12
 
 
 
13
  def compare_sentences(sentence1, sentence2):
14
  vectorizer = CountVectorizer().fit_transform([sentence1, sentence2])
15
  similarity = cosine_similarity(vectorizer)
@@ -18,25 +17,17 @@ def compare_sentences(sentence1, sentence2):
18
 
19
  def generate_gpt2_response(question):
20
  input_ids = tokenizer.encode(question, return_tensors='pt')
21
-
22
- # Remove padding tokens if present
23
- input_ids = input_ids[:, :model.config.max_position_embeddings]
24
-
25
- generated_output = model.generate(
26
- input_ids,
27
- max_length=len(input_ids[0]) + 100,
28
- num_beams=5,
29
- no_repeat_ngram_size=2,
30
- top_k=10,
31
- top_p=1,
32
- temperature=0.9
33
- )
34
-
35
  generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
36
  return generated_response
37
 
38
- def find_question_and_answer(dataset_file, question):
39
- with open(dataset_file, "r") as json_file:
40
  data = json.load(json_file)
41
 
42
  question = question.lower()
@@ -44,13 +35,13 @@ def find_question_and_answer(dataset_file, question):
44
  max_similarity = 0
45
  selected_response = None
46
 
47
- for q_and_a in data.get("questions", []):
48
- response_message = q_and_a.get("response", "").lower()
49
  similarity_score = compare_sentences(question, response_message)
50
 
51
  if similarity_score > max_similarity:
52
  max_similarity = similarity_score
53
- selected_response = q_and_a.get("response", "")
54
 
55
  # Set a threshold for similarity score to switch to GPT-2
56
  similarity_threshold = 0.4 # Adjust this threshold as needed
@@ -64,13 +55,14 @@ def find_question_and_answer(dataset_file, question):
64
 
65
  return selected_response
66
 
67
- # Streamlit UI
68
- st.title("Chatbot App")
69
- user_input = st.text_input("Ask a question:")
70
- response = find_question_and_answer(dataset_path, user_input)
71
- st.text(response)
72
- estion_and_answer(dataset_path, user_input)
73
- st.text(response)
 
74
 
75
 
76
 
 
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import json
6
 
 
 
 
7
  tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
8
  model = GPT2LMHeadModel.from_pretrained('gpt2')
9
 
10
+ json_file_path = 'Q_and_A_Lagos.json'
11
+
12
  def compare_sentences(sentence1, sentence2):
13
  vectorizer = CountVectorizer().fit_transform([sentence1, sentence2])
14
  similarity = cosine_similarity(vectorizer)
 
17
 
18
  def generate_gpt2_response(question):
19
  input_ids = tokenizer.encode(question, return_tensors='pt')
20
+ generated_output = model.generate(input_ids, max_length=len(input_ids[0]) + 100,
21
+ num_beams=5,
22
+ no_repeat_ngram_size=2,
23
+ top_k=10,
24
+ top_p=1,
25
+ temperature=0.9)
 
 
 
 
 
 
 
 
26
  generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
27
  return generated_response
28
 
29
+ def find_question_and_answer(json_file, question):
30
+ with open(json_file, "r") as json_file:
31
  data = json.load(json_file)
32
 
33
  question = question.lower()
 
35
  max_similarity = 0
36
  selected_response = None
37
 
38
+ for q_and_a in data["questions"]:
39
+ response_message = q_and_a["response"].lower()
40
  similarity_score = compare_sentences(question, response_message)
41
 
42
  if similarity_score > max_similarity:
43
  max_similarity = similarity_score
44
+ selected_response = q_and_a["response"]
45
 
46
  # Set a threshold for similarity score to switch to GPT-2
47
  similarity_threshold = 0.4 # Adjust this threshold as needed
 
55
 
56
  return selected_response
57
 
58
+ if __name__ == '__main__':
59
+ while True:
60
+ user_input = input("Enter your question: ")
61
+ if user_input.lower() == 'exit':
62
+ break
63
+ response = find_question_and_answer(json_file_path, user_input)
64
+ print(response)
65
+
66
 
67
 
68