File size: 9,723 Bytes
a1cc3b7
a17798e
a1cc3b7
d00a5c1
 
 
 
 
 
a938842
d00a5c1
a938842
 
92310f5
a938842
 
 
 
 
 
d00a5c1
b184e6e
 
 
92310f5
c754608
d00a5c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a938842
92310f5
 
d00a5c1
 
 
 
 
 
 
3dfdcc9
 
d00a5c1
 
 
 
a938842
92310f5
 
d00a5c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf5db68
d00a5c1
 
 
 
 
 
a938842
 
 
d00a5c1
ef44b58
ca410f4
 
dbecab0
 
38312e6
 
 
 
f3b273c
 
0d15756
1165c6e
 
 
 
 
f3b273c
0d15756
1165c6e
 
 
 
 
f3b273c
0d15756
1165c6e
92310f5
 
0b1603b
92310f5
ce57906
 
f819f31
ce57906
 
 
6ed14c5
ce57906
3a831cd
 
 
 
 
 
 
 
f3b273c
 
478511d
6dbf487
9fe1f40
 
 
 
 
07b6a39
ed497ee
07b6a39
6dbf487
07b6a39
809a544
 
27f9b67
9cde542
 
 
 
 
809a544
 
 
 
9d5fd12
8d1c37c
 
6de432e
8d1c37c
 
 
9d5fd12
a9c6987
7f454e6
dc1b177
afd0487
 
 
 
4c76d64
a9c6987
 
3e01d2c
 
 
b291e45
3e01d2c
 
 
 
 
 
b291e45
3e01d2c
 
b291e45
3e01d2c
b291e45
3e01d2c
 
 
7aa9944
3e01d2c
b291e45
3e01d2c
 
ccb89eb
b291e45
3e01d2c
 
 
 
 
 
b291e45
3e01d2c
 
 
 
b291e45
3e01d2c
 
 
 
 
b291e45
410fe98
9dd927a
809a544
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import gradio as gr
from huggingface_hub import InferenceClient

# STEP 1 FROM SEMANTIC SEARCH
from sentence_transformers import SentenceTransformer
import torch

# STEP 2 FROM SEMANTIC SEARCH
# Open the water_cycle.txt file in read mode with UTF-8 encoding
with open("cool_mom_phrases.txt", "r", encoding="utf-8") as file:
  # Read the entire contents of the file and store it in a variable
  cool_mom_text = file.read()

with open("tutor_mom_phrases.txt", "r", encoding="utf-8") as file:
  # Read the entire contents of the file and store it in a variable
  tutor_mom_text = file.read()

with open("strict_mom_phrases.txt", "r", encoding="utf-8") as file:
  # Read the entire contents of the file and store it in a variable
  strict_mom_text = file.read()

with open("study_techniques.txt", "r", encoding="utf-8") as file:
  # Read the entire contents of the file and store it in a variable
  study_techniques_text = file.read()


# STEP 3 FROM SEMANTIC SEARCH
def preprocess_text(text):
  # Strip extra whitespace from the beginning and the end of the text
  cleaned_text = text.strip()

  # Split the cleaned_text by every newline character (\n)
  chunks = cleaned_text.split("\n")

  # Create an empty list to store cleaned chunks
  cleaned_chunks = []

  # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
  for chunk in chunks:
    chunk = chunk.strip()
    if chunk != "":
      cleaned_chunks.append(chunk)

  # Return the cleaned_chunks
  return cleaned_chunks

# Call the preprocess_text function and store the result in a cleaned_chunks variable
cleaned_cool_chunks = preprocess_text(cool_mom_text) # Complete this line
cleaned_tutor_chunks = preprocess_text(tutor_mom_text)
cleaned_strict_chunks = preprocess_text(strict_mom_text)

#STEP 4 FROM SEMANTIC SEARCH
# Load the pre-trained embedding model that converts text to vectors
model = SentenceTransformer('all-MiniLM-L6-v2')

def create_embeddings(text_chunks):
  # Convert each text chunk into a vector embedding and store as a tensor
  chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
    
  # Return the chunk_embeddings
  return chunk_embeddings

# Call the create_embeddings function and store the result in a new chunk_embeddings variable
cool_chunk_embeddings = create_embeddings(cleaned_cool_chunks) # Complete this line
tutor_chunk_embeddings = create_embeddings(cleaned_tutor_chunks)
strict_chunk_embeddings = create_embeddings(cleaned_strict_chunks)

#STEP 5 FROM SEMANTIC SEARCH
# Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
def get_top_chunks(query, chunk_embeddings, text_chunks):
  # Convert the query text into a vector embedding
  query_embedding = model.encode(query, convert_to_tensor=True) # Complete this line

  # Normalize the query embedding to unit length for accurate similarity comparison
  query_embedding_normalized = query_embedding / query_embedding.norm()

  # Normalize all chunk embeddings to unit length for consistent comparison
  chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)

  # Calculate cosine similarity between query and all chunks using matrix multiplication
  similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) # Complete this line

  # Print the similarities
  print(similarities)


  # Find the indices of the 3 chunks with highest similarity scores
  top_indices = torch.topk(similarities, k=3).indices

  # Print the top indices
  print(top_indices)

  # Create an empty list to store the most relevant chunks
  top_chunks = []

  # Loop through the top indices and retrieve the corresponding text chunks
  for i in top_indices:
    top_chunks.append(text_chunks[i])

  # Return the list of most relevant chunks
  return top_chunks

# STEP 6 FROM SEMANTIC SEARCH
# Call the get_top_chunks function with the original query
#top_cool_results = get_top_chunks(message, cool_chunk_embeddings, cleaned_cool_chunks) # Complete this line
#top_tutor_results = get_top_chunks(message, tutor_chunk_embeddings, cleaned_tutor_chunks)
#top_tiger_results = get_top_chunks(message, tiger_chunk_embeddings, cleaned_tiger_chunks)

client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
# NN commented out both
# mom_type = []
def respond(message, history, mom_type) :
#    mom_type = []

    if not mom_type:
        return "Please choose atleast one mom"
    selected = mom_type[0]
    # NN changed from mom_type to selected in each if statement
    if selected == "Cool Mom" :
        top_results = get_top_chunks(message, cool_chunk_embeddings, cleaned_cool_chunks)
        messages = [{"role": "system", "content": f"You are a chatbot that plays the role of the user's cool, friendly, extremely nice and supportive mom. Respond in full sentences and use really nice and sweet language, don't cut yourself off. Base your response on the provided context: {top_results}"}, 
               {"role": "user", 
               "content": (
                   f"Question{message}"
               )}]
    elif selected == "Tutor Mom" :
        top_results = get_top_chunks(message, tutor_chunk_embeddings, cleaned_tutor_chunks)
        messages = [{"role": "system", "content": f"You are a chatbot that plays the role of the user's tutor-like mom who knows how to help and teaches her kid everything. Respond in full sentences, speak very knowledgeable and don't cut yourself off. Base your response on the provided context: {top_results}"}, 
               {"role": "user", 
               "content": (
                   f"Question{message}"
               )}]
    elif selected == "Strict Mom":
        top_results = get_top_chunks(message, strict_chunk_embeddings, cleaned_strict_chunks)
        messages = [{"role": "system", "content": f"You are a chatbot that plays the role of the user's extremely strict mom who is focused on doing well in school, studies, and academics. Respond in action-oriented and stern full sentences, don't cut yourself off. Base your response on the provided context: {top_results}"}, 
               {"role": "user", 
               "content": (
                   f"Question{message}"
               )}]
        
#updated JC 
    if history:
        for user_msg, assistant_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": assistant_msg})

    #messages.append({"role": "user", "content": message})


    response = client.chat_completion(
        messages, 
        temperature = 0.2
    )
    return response['choices'][0]['message']['content'].strip()

# NN commented out
# chatbot = gr.ChatInterface(respond, type="messages")

'''custom_theme = gr.themes.Soft().set(
    primary_hue="purple",
    secondary_hue="fuchsia",
    neutral_hue="gray",
    spacing_size="lg",
    radius_size="lg",
    text_size="lg",
    font=[gr.themes.GoogleFont("IBM Plex Sans"), "sans-serif"],
    font_mono=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"]
)'''


custom_theme = gr.themes.Soft(
    primary_hue="yellow",
    secondary_hue="violet", 
    neutral_hue="purple",
    spacing_size="md",
    radius_size="md",
    text_size="md",
    font=[gr.themes.GoogleFont("IBM Plex Sans"), "sans-serif"],
    font_mono=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"]
)

with gr.Blocks(theme=custom_theme) as chatbot:
    with gr.Row():
        mom_type = gr.CheckboxGroup(["Cool Mom", "Tutor Mom", "Strict Mom"],label = "Choose Your Mom")
   
    gr.ChatInterface(
        fn=respond,
        additional_inputs=[mom_type],
        title="StudyMama"
    )
    
    
# with gr.Blocks() as chatbot:
#     gr.Image(value="ezgif.com-webp-to-gif-converter (1).gif")
#     gr.ChatInterface(respond, type="messages")
#     gr.ChatInterface(respond, type="messages")



#def respond_tutor(message, history, mom_type):
#    top_tutor_results = get_top_chunks(message, tutor_chunk_embeddings, cleaned_tutor_chunks)
#     #str_chunks = "\n".join(best_chunks)

#    messages = [{"role": "system", "content": f"You are chatbot that plays the role of the user's extremely studious, tutor-like mom. Respond in full sentences, don't cut yourself off. Base your response on the provided context: {mom_type}"}, 
#                {"role": "user", 
#                "content": (
#                    f"Context:\n{top_tutor_results}\n\n"
#                    f"Question{message}"
#                )}]

#    if history:
#        messages.extend(history)

#    messages.append({"role": "user", "content": message})

#    response = client.chat_completion(
#        messages, 
#        temperature = 0.2
#     )
#    return response['choices'][0]['message']['content'].strip()

#def respond_strict(message, history):
#    top_strict_results = get_top_chunks(message, strict_chunk_embeddings, cleaned_strict_chunks)
    #str_chunks = "\n".join(best_chunks)

#    messages = [{"role": "system", "content": f"You are chatbot that plays the role of the user's extremely strict mom. Respond in full sentences, don't cut yourself off. Base your response on the provided context: {top_strict_results}"}, 
#            {"role": "user", 
#                "content": (
#                    f"Context:\n{top_strict_results}\n\n"
#                    f"Question{message}"
#                )}]

#    if history:
 #       messages.extend(history)
#
#    messages.append({"role": "user", "content": message})

#    response = client.chat_completion(
#        messages, 
#        temperature = 0.2
#     )
#    return response['choices'][0]['message']['content'].strip()



chatbot.launch(ssr_mode=False)