Prajwal Paudyal commited on
Commit
2cfdc8a
·
1 Parent(s): 322cd61

merged researcher and interviewer functionalities

Browse files
Interviewer.py DELETED
@@ -1,105 +0,0 @@
1
- import gradio as gr
2
- import time
3
- import openai
4
- from dotenv import find_dotenv
5
- from dev.study_processor import StudyProcessor
6
- import json
7
- from dotenv import load_dotenv
8
- import os
9
- from pathlib import Path
10
- from utils.interviewer import Interviewer
11
- import whisper
12
-
13
-
14
- load_dotenv()
15
- whisper_model = whisper.load_model("base")
16
- project_dir = Path(os.environ['PROJECT_DIR'])
17
- assert project_dir.exists()
18
- PARTICIPANT_CODES = ["letmetrythis"]
19
- with open(project_dir / 'study_prompts.json') as sp:
20
- DEFINITION_OBJECTIVE_MAP = json.load(sp)
21
-
22
- # todo refactor: move this to study processor
23
- def_objective_keys = DEFINITION_OBJECTIVE_MAP.keys() # iterate it in whatever order
24
- DEFINITION_OBJECTIVE_CHOICES_DICT = {k: DEFINITION_OBJECTIVE_MAP[k]['descriptive_name'] for k in def_objective_keys}
25
- DEFINITION_CHOICES_OBJECTIVE_DICT = {v: k for k, v in DEFINITION_OBJECTIVE_CHOICES_DICT.items()}
26
-
27
-
28
- def add_text(history, text):
29
- """
30
- Takes in the history and text and updates the history and sets the text input as disabled so it gives
31
- the AI time to think
32
- :param history:
33
- :param text:
34
- :return:
35
- """
36
- history = history + [(text, None)]
37
- return history, gr.update(value="")
38
-
39
-
40
-
41
-
42
- def bot(history, m_interviewer, to_speak_out=False):
43
- m_recent_human_msg = history[-1][0]
44
- bot_message = m_interviewer.llm_chain.predict(human_input=m_recent_human_msg)
45
- history[-1][1] = ""
46
- if not to_speak_out:
47
- for character in bot_message:
48
- history[-1][1] += character
49
- time.sleep(0.005)
50
- yield history, m_interviewer, None
51
-
52
-
53
-
54
-
55
- def transcribe_audio(m_audio):
56
- model_src="openai"
57
-
58
-
59
- if model_src == "openai":
60
- m_audio_f = open(m_audio, 'rb')
61
- transcript = openai.Audio.transcribe("whisper-1", m_audio_f, language="en")
62
- else:
63
- global whisper_model
64
- transcript = whisper_model.transcribe(m_audio)
65
- text = transcript.get("text", "")
66
- return text
67
-
68
- with gr.Blocks() as demo:
69
- gr.Markdown(f"Welcome to QualZ. I am your friendly A.I. assistant for research. "
70
- f"Go through the various tabs to select a topic and start researching! ")
71
- # create a study processor class and keep in memory
72
- # accordig to gradio documentation this is specific to a user
73
- study_processor = gr.State(value=StudyProcessor()) # todo this should refresh each time a new 'study' is selected
74
-
75
-
76
- gr.Markdown("Welcome to the study titled - Needs and routines for hair-care grooming. "
77
- "My name is Carlie and I am an AI agent who will be conducting this interview. "
78
- "If relevant feel free to use audio to talk to me or send me pictures that are relevant. "
79
- "Let me know when you are ready to begin by sending saying something below"
80
- "If you need to step away or close this sessions, simply click your unique link again.")
81
-
82
- interviewer = gr.State(value=Interviewer())
83
- chatbot = gr.Chatbot(label="Ideation Session", elem_id="chatbot", height=750)
84
- audio_in = gr.Audio(source="microphone", label="Speak",
85
- type="filepath")
86
- txt = gr.Textbox(
87
- show_label=False,
88
- placeholder="Enter text and press enter, or upload an image",
89
- container=False)
90
- btn = gr.UploadButton("📁", file_types=["image", "video", "audio"])
91
-
92
-
93
-
94
-
95
-
96
- txt_msg = txt.submit(add_text, [chatbot, txt],
97
- [chatbot, txt], queue=False).then(bot, [chatbot, interviewer], [chatbot, interviewer])
98
-
99
-
100
-
101
-
102
- if __name__ == "__main__":
103
- _ = load_dotenv(find_dotenv())
104
- openai.api_key = os.getenv('OPENAI_API_KEY')
105
- demo.queue(concurrency_count=10).launch(server_port=8850, share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README_interviewer.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
  sdk: gradio
3
  emoji: 📚
4
- app_file: Interviewer.py
5
  ---
 
1
  ---
2
  sdk: gradio
3
  emoji: 📚
4
+ app_file: app_interviewer.py
5
  ---
README_researcher.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
  sdk: gradio
3
  emoji: 📚
4
- app_file: Researcher.py
5
  ---
 
1
  ---
2
  sdk: gradio
3
  emoji: 📚
4
+ app_file: app_researcher.py
5
  ---
app_interviewer.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import time
4
+ import openai
5
+ from utils.app_common import *
6
+ from dotenv import load_dotenv, find_dotenv
7
+ from interviewer_utils.interviewer import Interviewer
8
+ from interviewer_utils import voice_controller as vc
9
+ from utils import eleven_voice_cache
10
+ import interviewer_utils.global_variables as global_variables
11
+ from bson import ObjectId
12
+
13
+ from utils.study_repository import StudyRepository
14
+ from utils.study import Study
15
+
16
+ from utils.database_helper import DatabaseIO
17
+
18
+
19
+ def add_text(history, text):
20
+ """
21
+ Takes in the history and text and updates the history and sets the text input as disabled so it gives
22
+ the AI time to think
23
+ :param history:
24
+ :param text:
25
+ :return:
26
+ """
27
+ history = history + [[text, None]]
28
+ return history, gr.update(value=""), history[-1][-2]
29
+
30
+
31
+ # chat_bot reply
32
+ def bot(this_active_study, this_user_id, history, m_interviewer, this_study_repository):
33
+ """
34
+
35
+ :param this_active_study:
36
+ :param this_user_id:
37
+ :param history:
38
+ :param m_interviewer:
39
+ :param this_study_repository:
40
+ :return:
41
+ """
42
+ this_study_id = this_active_study._id
43
+ this_human_message = history[-1][0]
44
+ this_bot_message = m_interviewer.chat_handler.converse(this_human_message)
45
+ history[-1][1] = this_bot_message
46
+ this_study_repository.append_to_transcript(study_id=this_study_id,
47
+ user_id=this_user_id,
48
+ human_text=this_human_message,
49
+ ai_response=this_bot_message)
50
+
51
+ return history, m_interviewer, history[-1][1]
52
+
53
+
54
+ def on_select(evt: gr.SelectData):
55
+ return evt.value
56
+
57
+
58
+ # function to choose subject from dropdown
59
+ def change_study_details(study_id, m_interviewer):
60
+ """
61
+
62
+ Args:
63
+ study_id:
64
+ m_interviewer:
65
+
66
+ Returns:
67
+
68
+ """
69
+ m_study_dropdown_value = global_variables.study_topic_id[0]
70
+ global_variables.study_id = ObjectId(study_id)
71
+ print("Hi", m_study_dropdown_value)
72
+ m_interviewer.update_llm_prompt(study_subject=m_study_dropdown_value)
73
+ return m_interviewer
74
+
75
+
76
+ def enable_chat():
77
+ return
78
+
79
+
80
+ with gr.Blocks() as demo:
81
+ gr.Markdown(
82
+ f"Welcome to your interview session. To Begin, enter your registered email address"
83
+ f" and the code the study facilitator provided. "
84
+ )
85
+
86
+ gr.Markdown(
87
+ "If you need to step away or close this sessions, simply come back here "
88
+ "and enter the details again to continue"
89
+ )
90
+
91
+ # initializing the Interviewer object and creating a state variable for the session
92
+
93
+ view_study_repository = gr.State(value=StudyRepository(collection_name_studies=STUDIES_COLLECTION))
94
+ # existing_studies_dropdown_choices = get_dropdown_choices()
95
+ view_active_study = gr.State(value=None)
96
+ # field to hold the user_id, will refactor this for a user object with a factory pattern todo
97
+ view_user_id = gr.Text(value="", interactive=False, visible=False)
98
+
99
+ view_interviewer = gr.State(value=None) # will set the interviewer along with details
100
+
101
+ # initializing transcripts for rendering in 2d list form
102
+ rendering_transcript = gr.State()
103
+
104
+ # subject id and user email validation form field
105
+ status = gr.Textbox(interactive=False, label="System messages will appear here")
106
+
107
+ with gr.Row():
108
+ user_email = gr.Textbox(label="Enter email", min_width=50)
109
+ study_code = gr.Textbox(label="Enter Code", min_width=50)
110
+
111
+ study_and_user_select_btn = gr.Button(value="Submit", min_width=50)
112
+
113
+ # interface for the chatbot
114
+ chatbot = gr.Chatbot(label="Ideation Session", elem_id="chatbot", height=750)
115
+
116
+ txt = gr.Textbox(
117
+ show_label=False,
118
+ placeholder="Enter text and press enter, or upload an image",
119
+ container=False,
120
+ interactive=False, # when button is clicke it should be true
121
+ )
122
+
123
+ with gr.Row():
124
+ # integrating the feature upload document
125
+ upload_files_btn = gr.UploadButton("📁", file_types=["image", "video", "audio"], interactive=False)
126
+
127
+ # adjusting the row columns
128
+ with gr.Row():
129
+ # the audio features
130
+ audio = gr.Audio(source="microphone", type="filepath", label="Audio input", interactive=False)
131
+
132
+ # radio indicator to control the AI voice response
133
+ voice_response_radio = gr.Radio(
134
+ ["On", "Off"],
135
+ label="Voice Setting",
136
+ interactive=False,
137
+ value="Off",
138
+ info="Turn on or off voice system",
139
+ )
140
+
141
+ voice_persona_dropdown = gr.Dropdown(
142
+ choices=[
143
+ eleven_voice_cache.VOICES_CACHE[i].name
144
+ for i in range(len(eleven_voice_cache.VOICES_CACHE))
145
+ ],
146
+ label="choose the voice over",
147
+ interactive=False,
148
+ value="Rachel",
149
+ )
150
+
151
+ study_and_user_select_btn.click(set_user_and_study,
152
+ inputs=[view_study_repository,
153
+ study_code,
154
+ user_email,
155
+ ],
156
+ outputs=[view_active_study, view_user_id, status,
157
+ txt, upload_files_btn,
158
+ audio, voice_response_radio, voice_persona_dropdown],
159
+ queue=False).then(
160
+ setup_conversation_context,
161
+ inputs=[
162
+ view_active_study,
163
+ view_study_repository,
164
+ view_user_id,
165
+ ],
166
+ outputs=[
167
+ chatbot,
168
+ view_interviewer,
169
+ status
170
+ ],
171
+ queue=False
172
+ )
173
+
174
+ # call dropdown from function
175
+ voice_response_radio.select(on_select, None, voice_response_radio)
176
+ voice_persona_dropdown.select(on_select, None, voice_persona_dropdown)
177
+
178
+ # audio output with transcript initialization
179
+ audio_response_output = gr.Audio(label="Audio Output", autoplay=True)
180
+
181
+ audio_message_transcription = gr.Textbox(visible=False)
182
+ msg_response_from_AI = gr.Textbox(visible=False)
183
+
184
+ # human transcript to sent to database used in db function
185
+ human_transcript_to_db = gr.State(value="")
186
+
187
+ # on stop recording functionality
188
+ audio.stop_recording(
189
+ fn=vc.speech_to_text,
190
+ inputs=[audio],
191
+ outputs=[audio_message_transcription],
192
+ ).then(
193
+ add_text,
194
+ [chatbot, audio_message_transcription],
195
+ [chatbot, audio_message_transcription, human_transcript_to_db],
196
+ ).then(
197
+ bot,
198
+ inputs=[view_active_study, view_user_id, chatbot, view_interviewer, view_study_repository],
199
+ # try to add interview instant
200
+ outputs=[chatbot, view_interviewer, msg_response_from_AI],
201
+ ).then(
202
+ fn=vc.text_to_speech,
203
+ inputs=[
204
+ msg_response_from_AI,
205
+ voice_response_radio,
206
+ voice_persona_dropdown,
207
+ ],
208
+ outputs=[audio_response_output],
209
+ )
210
+
211
+ # on pressing enter functionalities while sending chat
212
+ txt.submit(
213
+ add_text, [chatbot, txt], [chatbot, txt, human_transcript_to_db], queue=True
214
+ ).then(
215
+ bot,
216
+ inputs=[view_active_study, view_user_id, chatbot, view_interviewer, view_study_repository],
217
+ outputs=[chatbot, view_interviewer, msg_response_from_AI],
218
+ queue=True,
219
+ )
220
+
221
+ # running the main file
222
+ if __name__ == "__main__":
223
+ _ = load_dotenv(find_dotenv())
224
+ openai.api_key = os.getenv("OPENAI_API_KEY")
225
+ demo.queue(concurrency_count=10).launch(server_port=8850, share=False, debug=True)
Researcher.py → app_researcher.py RENAMED
@@ -2,19 +2,19 @@ import gradio as gr
2
  import openai
3
  from dotenv import find_dotenv
4
  # from utils.study_processor import StudyProcessor
5
- from typing import List
6
  import json
7
  from dotenv import load_dotenv
8
  import os
9
  from pathlib import Path
10
  from utils.study_repository import StudyRepository
11
  from utils.study import Study
 
12
  import hashlib
13
- from utils.eleven_voice_cache import VOICES_CACHE
14
- from utils.interviewer_persona import AI_PERSONALITIES
15
  from utils.database_helper import DatabaseIO
16
  import bcrypt
17
  import pymongo
 
18
  load_dotenv()
19
 
20
  USER_ID = os.environ["DEV_USER_ID"]
@@ -73,7 +73,7 @@ FRAMEWORK_CHOICES = {
73
  def check_credentials(username, password):
74
 
75
  try:
76
- with DatabaseIO(collection_name="Users") as db_io:
77
  user_collection = db_io.collection
78
  user = user_collection.find_one({"username": username})
79
  if user is not None:
@@ -183,17 +183,7 @@ def guess_study_objective(study_access_obj,
183
  return study_access_obj, study_objective, study_reasoning
184
 
185
 
186
- def set_active_study(this_study_repository,
187
- active_study_obj,
188
- study_id):
189
- study_id = study_id.split(':')[0]
190
- this_study = this_study_repository.get_studies(study_id=study_id)
191
- if type(this_study) == list and len(this_study):
192
- this_study = this_study[0]
193
- # initialize a new study with this study in memory!
194
- active_study_obj = Study(**this_study)
195
 
196
- return active_study_obj, ""
197
 
198
  def start_synthetic_interviews(this_study_obj):
199
  if not this_study_obj:
@@ -339,8 +329,10 @@ def show_existing_study_fields(this_study_obj):
339
  'Reason': "",
340
  'Discussion': ""})
341
 
342
- return this_study_obj, this_study_obj._id, this_study_obj.study_descriptive_name, this_study_obj.study_topic, this_study_obj.study_objective, \
343
- this_study_obj.study_framework, this_study_obj.research_questions, this_study_obj.interview_guidelines, this_synthetic_panel_markdown, ""
 
 
344
 
345
 
346
  def get_dropdown_choices(dropdown_choices_study_repository_obj=None):
@@ -585,7 +577,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
585
  outputs=[existing_status_update])
586
  study_choice_dropdown.select(set_active_study,
587
  inputs=[view_study_repository,
588
- view_active_study,
589
  study_choice_dropdown],
590
  outputs=[view_active_study, existing_status_update], queue=False).then(
591
  show_existing_study_fields,
 
2
  import openai
3
  from dotenv import find_dotenv
4
  # from utils.study_processor import StudyProcessor
 
5
  import json
6
  from dotenv import load_dotenv
7
  import os
8
  from pathlib import Path
9
  from utils.study_repository import StudyRepository
10
  from utils.study import Study
11
+ from utils.app_common import * # common functions for both apps
12
  import hashlib
13
+ from interviewer_utils.interviewer_persona import AI_PERSONALITIES
 
14
  from utils.database_helper import DatabaseIO
15
  import bcrypt
16
  import pymongo
17
+ import pymongo.errors
18
  load_dotenv()
19
 
20
  USER_ID = os.environ["DEV_USER_ID"]
 
73
  def check_credentials(username, password):
74
 
75
  try:
76
+ with DatabaseIO(collection_name=INTERVIEW_USERS_COLLECTION) as db_io:
77
  user_collection = db_io.collection
78
  user = user_collection.find_one({"username": username})
79
  if user is not None:
 
183
  return study_access_obj, study_objective, study_reasoning
184
 
185
 
 
 
 
 
 
 
 
 
 
186
 
 
187
 
188
  def start_synthetic_interviews(this_study_obj):
189
  if not this_study_obj:
 
329
  'Reason': "",
330
  'Discussion': ""})
331
 
332
+ return this_study_obj, this_study_obj._id, this_study_obj.study_descriptive_name, this_study_obj.study_topic,\
333
+ this_study_obj.study_objective, \
334
+ this_study_obj.study_framework, this_study_obj.research_questions, this_study_obj.interview_guidelines, \
335
+ this_synthetic_panel_markdown, ""
336
 
337
 
338
  def get_dropdown_choices(dropdown_choices_study_repository_obj=None):
 
577
  outputs=[existing_status_update])
578
  study_choice_dropdown.select(set_active_study,
579
  inputs=[view_study_repository,
 
580
  study_choice_dropdown],
581
  outputs=[view_active_study, existing_status_update], queue=False).then(
582
  show_existing_study_fields,
buddy.mp3 ADDED
Binary file (127 kB). View file
 
dev/voiceover.ipynb DELETED
@@ -1,34 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "outputs": [],
7
- "source": [],
8
- "metadata": {
9
- "collapsed": false
10
- }
11
- }
12
- ],
13
- "metadata": {
14
- "kernelspec": {
15
- "display_name": "Python 3",
16
- "language": "python",
17
- "name": "python3"
18
- },
19
- "language_info": {
20
- "codemirror_mode": {
21
- "name": "ipython",
22
- "version": 2
23
- },
24
- "file_extension": ".py",
25
- "mimetype": "text/x-python",
26
- "name": "python",
27
- "nbconvert_exporter": "python",
28
- "pygments_lexer": "ipython2",
29
- "version": "2.7.6"
30
- }
31
- },
32
- "nbformat": 4,
33
- "nbformat_minor": 0
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
interviewer_utils/data_process.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.database_helper import DatabaseIO
2
+ import os
3
+ import dotenv
4
+ from bson import ObjectId
5
+ import interviewer_utils.global_variables as global_variables
6
+ from interviewer_utils.interviewer import Interviewer
7
+
8
+
9
+ dotenv.load_dotenv()
10
+ # dev_user_id = os.environ["DEV_USER_ID"]
11
+
12
+
13
+ class DataProcess:
14
+ # validate weather the data exist or not
15
+ def get_existing_data(must_have_keys: []):
16
+ # create the query
17
+ query = (
18
+ {key: {"$exists": True} for key in must_have_keys} if must_have_keys else {}
19
+ )
20
+ print(query)
21
+ results = []
22
+
23
+ # query the collection
24
+ results = DatabaseIO.read_documents(DatabaseIO(), query=query)
25
+ results = list(results)
26
+ # convert results to a list (if the result set is too large, consider returning a cursor instead)
27
+ return results
28
+
29
+ # for getting study topic from base to dropdown
30
+ # def get_dropdown_choices():
31
+ # existing_studies = DataProcess.get_existing_data(
32
+ # must_have_keys=["_id", "study_name"]
33
+ # )
34
+ # _ids = [st["_id"] for st in existing_studies]
35
+ # _descriptions = [st["study_name"] for st in existing_studies]
36
+ # global_variables.study_topic_id.append(_ids)
37
+ # global_variables.study_topic_id.append(_descriptions)
38
+ # return [desc for desc in _descriptions]
39
+
40
+ # fetch user_id information
41
+ def get_set_user_information(object_id, AI_interviewer):
42
+ existing_users = DatabaseIO.read_documents(
43
+ DatabaseIO(collection_name="Users"), {"_id": global_variables.User_id}
44
+ )
45
+ existing_users = list(existing_users)
46
+
47
+ User_info = [
48
+ {
49
+ "AI_interviewer": AI_interviewer,
50
+ "Human_Respondent": {
51
+ "participant_id": global_variables.User_id,
52
+ "email": existing_users[0]["email"],
53
+ "age": 22,
54
+ "gender": "Male",
55
+ "Profession": "Retail Worker",
56
+ },
57
+ "transcript": [],
58
+ }
59
+ ]
60
+
61
+ user = DatabaseIO.insert_document(
62
+ DatabaseIO(collection_name="Post_Get_Transmission"),
63
+ article=User_info[0],
64
+ embedded_table_validation="interviews.human_interviews.Human_Respondent.participant_id",
65
+ unique_field=global_variables.User_id,
66
+ unique_id=global_variables.study_id,
67
+ target_table_insertion=User_info,
68
+ embedded_table_insertion="interviews.human_interviews",
69
+ loop_times=1,
70
+ )
71
+ return user
72
+
73
+ # a function to insert transcript data from interviewer
74
+ def insert_conversation_transcripts(text, ai_response):
75
+ transcript_json = [
76
+ {
77
+ "turn": "Human",
78
+ "text": text,
79
+ },
80
+ {
81
+ "turn": "AI",
82
+ "text": ai_response,
83
+ },
84
+ ]
85
+ conversational_transcripts = DatabaseIO.insert_transcript_document(
86
+ DatabaseIO(collection_name="Post_Get_Transmission"),
87
+ article=transcript_json,
88
+ loop_times=2,
89
+ embedded_table_validation="interviews.human_interviews.Human_Respondent.participant_id",
90
+ unique_field=global_variables.User_id,
91
+ unique_id=global_variables.study_id,
92
+ target_table_insertion=transcript_json,
93
+ embedded_table_insertion="interviews.human_interviews.$.transcript",
94
+ )
95
+ return conversational_transcripts
96
+
97
+ # get transcripts of specfici study of specific user
98
+ def get_transcripts_of_user():
99
+ existing_document = DatabaseIO.read_transcripts(
100
+ DatabaseIO(),
101
+ query={
102
+ "_id": global_variables.study_id,
103
+ "interviews.human_interviews.Human_Respondent.participant_id": global_variables.User_id,
104
+ },
105
+ )
106
+ existing_document = list(existing_document)
107
+ exisitng_transcripts = existing_document[0]["interviews"]["human_interviews"][
108
+ 0
109
+ ]["transcript"]
110
+ return exisitng_transcripts
111
+
112
+ # chaning json format into list
113
+ def changing_json_list(self):
114
+ exisitng_transcripts = DataProcess.get_transcripts_of_user()
115
+ combined_messages = []
116
+ text_list = []
117
+
118
+ try:
119
+ for item in exisitng_transcripts:
120
+ if item["turn"] == "Human":
121
+ if text_list:
122
+ combined_messages.append(text_list)
123
+ text_list = [item["text"]]
124
+ elif item["turn"] == "AI":
125
+ text_list.append(item["text"])
126
+
127
+ if text_list:
128
+ combined_messages.append(text_list)
129
+ return combined_messages
130
+ except:
131
+ print("No interview conversation initiated")
132
+
133
+ # function to convert into memory storage format
134
+ def convert_to_memory_chain_context(m_interviewer):
135
+ transcripts = DataProcess.get_transcripts_of_user()
136
+ result = []
137
+ conversation = []
138
+
139
+ for index, entry in enumerate(transcripts):
140
+ turn = entry["turn"]
141
+ text = entry["text"]
142
+
143
+ if index == 0 or turn == "Human":
144
+ if conversation:
145
+ result.append(conversation)
146
+ conversation = []
147
+ conversation.append({turn: text})
148
+
149
+ if conversation:
150
+ result.append(conversation)
151
+
152
+ # loading the context by calling memory function
153
+ m_interviewer.set_memory_chain(scripts=result, m_interviewer=m_interviewer)
154
+
155
+ return "done"
156
+
157
+ # function to read user information
158
+ def read_useremail(user_email):
159
+ existing_user = DatabaseIO.read_documents(
160
+ DatabaseIO(collection_name="Users"),
161
+ query={"email": user_email},
162
+ )
163
+ existing_user = list(existing_user)
164
+ global_variables.User_id = existing_user[0]["_id"]
165
+ print(global_variables.User_id, type(global_variables.User_id))
166
+ return existing_user
167
+
168
+ # function to read study information
169
+ def read_study(study_code):
170
+ existing_study = DatabaseIO.read_documents(
171
+ DatabaseIO(),
172
+ query={"_id": study_code},
173
+ )
174
+ existing_study = list(existing_study)
175
+ global_variables.study_topic_id.append(existing_study[0]["study_name"])
176
+ print(global_variables.study_topic_id[0])
177
+ return existing_study
178
+
179
+ # function to validate both study and user
180
+ def validating_studycode_user(study_code, user_email):
181
+ if study_code == None or user_email == None:
182
+ return False
183
+ else:
184
+ existing_user = DataProcess.read_useremail(user_email)
185
+ existing_study = DataProcess.read_study(study_code)
186
+ if existing_study == [] or existing_user == []:
187
+ return False
188
+ else:
189
+ return True
interviewer_utils/global_variables.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ study_id = None
2
+ study_topic_id = []
3
+ User_id = None
4
+ Count = "a"
interviewer_utils/interview_llm_helper.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.schema import SystemMessage, HumanMessage
2
+ from langchain.chat_models import ChatOpenAI
3
+ import ast
4
+ import warnings
5
+ from pathlib import Path
6
+ import os, json
7
+ from ra_utils.synthetic_panel_datamodel import parse_synthetic_panel_output
8
+ from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
9
+ from langchain import LLMChain
10
+ from langchain.chains import ConversationChain
11
+ from langchain.schema import AIMessage, HumanMessage
12
+ from langchain.memory.chat_memory import ChatMessageHistory
13
+ from langchain.prompts import PromptTemplate
14
+
15
+
16
+ class InterviewLLMHandler:
17
+ def __init__(self,
18
+ llm_model_name: str = 'gpt-3.5-turbo-16k',
19
+ llm_temperature: float = 0.2,
20
+ expertise_message: str = "",
21
+ research_topic: str = "",
22
+ research_objectives: str = "",
23
+ research_questions: str = "",
24
+ interview_guidelines: str = "",
25
+ verbose: bool = False,
26
+ existing_conversation_history=None
27
+ ):
28
+
29
+ self.llm = ChatOpenAI(temperature=llm_temperature, model_name=llm_model_name)
30
+
31
+ project_dir = Path(os.environ['PROJECT_DIR'])
32
+ assert project_dir.exists()
33
+ with open(project_dir / 'study_prompts.json') as sp:
34
+ self.definition_objective_maps = json.load(sp)
35
+
36
+ if not len(expertise_message):
37
+ expertise_message = f"Assistant is a world renowned expert in conducting qualitative research."
38
+
39
+ self.expertise_message = expertise_message
40
+
41
+ self.setup_message = f"Assistant is conducting a qualitative interview, for which the details will " \
42
+ f"be given below. It is very important that assistant assume the role of an impartial" \
43
+ f"qualitative researcher. IF any questions from the user does not pertain to the study" \
44
+ f"assistant will move on the next question. Assistant will not ask leading questions " \
45
+ f"and will never respond in a toxic way. If any response is needed about large language" \
46
+ f"model capabilities , assistant will say, that is beyond the scope of this conversation" \
47
+ f"lets move on to the next question and continue the conversation."
48
+ self.study_specific_message = f"""The description and the details of the study are below. The actual details
49
+ of the study is below within triple backticks after the description of the details.
50
+ The topic of research is: ```{research_topic}```.
51
+ The RESEARCH OBJECTIVES i.e. The overarching goal of the study: ```{research_objectives}```.
52
+ The RESEARCH QUESTIONS i.e. A few questions the researcher wants answered are: ```{research_questions}```.
53
+ The INTERVIEW GUIDELINES i.e. a set of reference questions to get answers for are :```{interview_guidelines}```.
54
+ Here are some things to focus on:
55
+ Rapport Building: Begin by explaining the interview's purpose and ensuring confidentiality.
56
+ Encourage candidness with no right or wrong answers.
57
+ Guided Conversation: Use the as anchor points, but don't read verbatim.
58
+ Initiate with broad questions and narrow down as the conversation evolves.
59
+ Active Listening: Be attentive. Seek deeper understanding through probing and spontaneous follow-up queries.
60
+ Always align with the study's objectives.
61
+ Concluding: Summarize the main insights, allow for participant's added input, and thank them.
62
+ """
63
+ self.interview_system_template = self.expertise_message + " " + self.setup_message \
64
+ + self.study_specific_message + """
65
+ When responding, asking questions or followups, you should remember the current conversation where you
66
+ are the AI and the participant is the human.
67
+ Current conversation:
68
+ {history}
69
+ Human: {input}
70
+ AI Assistant:
71
+ """
72
+ self.interview_system_prompt = PromptTemplate(
73
+ input_variables=["history", "input"], template=self.interview_system_template
74
+ )
75
+ self.interview_memory = ConversationBufferWindowMemory(ai_prefix="AI Assistant")
76
+
77
+ if existing_conversation_history and len(existing_conversation_history):
78
+ self.set_memory_with_history(existing_conversation_history)
79
+ else:
80
+ self.interview_memory = ConversationBufferWindowMemory(ai_prefix="AI Assistant")
81
+
82
+ self.interview_chain = ConversationChain(
83
+ llm=self.llm,
84
+ verbose=verbose,
85
+ memory=self.interview_memory,
86
+ prompt=self.interview_system_prompt
87
+ )
88
+
89
+ def converse(self, message: str = ""):
90
+ return self.interview_chain.predict(input=message)
91
+
92
+ def set_memory_with_history(self, history):
93
+ """
94
+ Takes a history object with a list of conversation and resets the memory
95
+ :param history:
96
+ :return:
97
+ """
98
+ retrieved_messages = \
99
+ [AIMessage(content=entry['text']) if entry['turn'] == 'AI'
100
+ else HumanMessage(content=entry['text']) for entry in
101
+ history]
102
+
103
+ retrieved_chat_history = ChatMessageHistory(messages=retrieved_messages)
104
+ self.interview_memory = ConversationBufferWindowMemory(ai_prefix="AI Assistant",
105
+ chat_memory=retrieved_chat_history)
106
+
interviewer_utils/interviewer.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from langchain import LLMChain, PromptTemplate
4
+ from langchain.chat_models import ChatOpenAI
5
+ from interviewer_utils import interviewer_persona
6
+ from interviewer_utils.interview_llm_helper import InterviewLLMHandler
7
+
8
+
9
+ class Interviewer:
10
+ def __init__(
11
+ self,
12
+ llm_temperature: float = 0.4, # going for some amount of creativity
13
+ llm_model_name="gpt-3.5-turbo-16k",
14
+ persona: str = "Rachel",
15
+ research_topic: str = "",
16
+ research_objectives: str = "",
17
+ research_questions: str = "",
18
+ interview_guidelines: str = "",
19
+ existing_conversation_history=List[List[str]]
20
+
21
+ ):
22
+ interviewer_personas = interviewer_persona.AI_PERSONALITIES
23
+ self.interviewer_persona = interviewer_personas.get(persona, "Rachel")
24
+ self.chat_handler = InterviewLLMHandler(
25
+ llm_model_name=llm_model_name,
26
+ llm_temperature=llm_temperature,
27
+ research_topic=research_topic,
28
+ research_objectives=research_objectives,
29
+ research_questions=research_questions,
30
+ interview_guidelines=interview_guidelines,
31
+ existing_conversation_history=existing_conversation_history
32
+ )
33
+
34
+ def update_llm_prompt(self, study_subject):
35
+ print("After : ", study_subject)
36
+ # update prompt on dropdown selection
37
+ self.study_subject = study_subject
38
+ self.system_template = """ You are an expert in %s. You are a qualitative researcher.
39
+ You are conducting an interview in this topic %s. Please create rapport and ask me some questions
40
+ related to the topic. You should ask follow-up questions if necessary. Questions should be open ended.
41
+
42
+ {history}
43
+ {human_input}
44
+
45
+ AI: """ % (
46
+ self.study_subject,
47
+ self.study_subject,
48
+ )
49
+
50
+ self.system_prompt = PromptTemplate(
51
+ input_variables=["history", "human_input"], template=self.system_template
52
+ )
53
+ self.llm_chain = LLMChain(
54
+ llm=self.llm,
55
+ prompt=self.system_prompt,
56
+ verbose=False,
57
+ memory=self.memory,
58
+ )
59
+
60
+ # feeding stored scripts in memory so that AI can remember the previous conversation
61
+ def set_memory_chain(self, scripts, m_interviewer):
62
+ # not sure why is happening here
63
+ # need to load the history parse it and then load as history object
64
+ # https://stackoverflow.com/questions/75965605/how-to-persist-langchain-conversation-memory-save-and-load
65
+ for i in scripts:
66
+ self.memory.save_context(i[0], i[1])
67
+ response = m_interviewer.llm_chain.predict(human_input="what was my name?")
68
+ print(response)
69
+ return response
utils/interviewer.py → interviewer_utils/interviewer_arc.py RENAMED
File without changes
{utils → interviewer_utils}/interviewer_persona.py RENAMED
File without changes
interviewer_utils/voice_controller.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import os, requests
4
+ import dotenv
5
+ from utils import eleven_voice_cache
6
+
7
+
8
+ dotenv.load_dotenv()
9
+
10
+ eleven_labs_API = os.getenv("ELEVENLABS_API")
11
+
12
+ messages = []
13
+
14
+
15
+ def speech_to_text(audio):
16
+ """
17
+ takes an audio file and returns a transcription
18
+ Args:
19
+ audio:
20
+ Return
21
+ """
22
+
23
+ # reading audio file
24
+ audio_file = open(audio, "rb")
25
+
26
+ # converting audio into text through "whisper"
27
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
28
+
29
+ # append the messages
30
+ messages.append({"role": "user", "content": transcript["text"]})
31
+
32
+ return transcript["text"]
33
+
34
+
35
+ # function to convert speech to text
36
+ def text_to_speech(audio_transcription, radio_value, audio_voice_persona):
37
+ """
38
+ take converted transcription from the function speech to text,
39
+ checks the radio_value to control the voice response
40
+ select the voice over persona
41
+ use elvenlabs to convert to audio
42
+ send the response to"""
43
+ # voice over selection id
44
+ for voice in eleven_voice_cache.VOICES_CACHE:
45
+ if voice.name == audio_voice_persona:
46
+ voice_over_id = voice.voice_id
47
+
48
+ # using chatGpt response system
49
+ # response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
50
+ # r = Interviewer(llm_model_name="gpt-3.5-turbo-16k")
51
+ # response = r.llm_chain.predict(human_input=audio_transcription)
52
+ response = audio_transcription
53
+ print(response)
54
+ # storing system message
55
+ # system_message = response["choices"][0]["message"]["content"]
56
+
57
+ # append the message
58
+ messages.append({"role": "assistant", "content": response})
59
+
60
+ if radio_value == "On":
61
+ # text to speech using elevenlabs
62
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_over_id}/stream"
63
+ data = {
64
+ "text": response,
65
+ "voice_settings": {"stability": 0.1, "similarity_boost": 0.8},
66
+ }
67
+
68
+ elevenlabs_response = requests.post(
69
+ url, headers={"xi-api-key": eleven_labs_API}, json=data, stream=True
70
+ )
71
+
72
+ output_filename = "buddy.mp3"
73
+ with open(output_filename, "wb") as output:
74
+ output.write(elevenlabs_response.content)
75
+
76
+ return output_filename
77
+ else:
78
+ return None
notebooks/modify_schema.ipynb ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2023-08-27T23:03:35.706464200Z",
10
+ "start_time": "2023-08-27T23:03:35.692464300Z"
11
+ }
12
+ },
13
+ "outputs": [],
14
+ "source": [
15
+ "import os\n",
16
+ "from utils.database_helper import DatabaseIO\n",
17
+ "from pymongo import MongoClient\n",
18
+ "from bson import ObjectId"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 9,
24
+ "outputs": [
25
+ {
26
+ "name": "stdout",
27
+ "output_type": "stream",
28
+ "text": [
29
+ "Updates completed!\n"
30
+ ]
31
+ }
32
+ ],
33
+ "source": [
34
+ "with DatabaseIO(collection_name=\"Studies_v02\") as db_io:\n",
35
+ " for document in db_io.collection.find():\n",
36
+ " new_human_interviews = {}\n",
37
+ " new_synthetic_interviews = {}\n",
38
+ "\n",
39
+ " # Process the 'human_interviews' list\n",
40
+ " for interview in document['interviews'].get('human_interviews', []):\n",
41
+ " participant_id = str(interview['Human_Respondent']['participant_id'])\n",
42
+ " new_human_interviews[participant_id] = interview\n",
43
+ "\n",
44
+ " # Process the 'synthetic_interviews' list\n",
45
+ " for interview in document['interviews'].get('synthetic_interviews', []):\n",
46
+ " new_id = str(ObjectId()) # Generate a new unique ObjectId\n",
47
+ " new_synthetic_interviews[new_id] = interview\n",
48
+ "\n",
49
+ " # Update the fields with the new structures\n",
50
+ " updates = {}\n",
51
+ " if new_human_interviews:\n",
52
+ " updates['interviews.human_interviews'] = new_human_interviews\n",
53
+ " if new_synthetic_interviews:\n",
54
+ " updates['interviews.synthetic_interviews'] = new_synthetic_interviews\n",
55
+ "\n",
56
+ " if updates:\n",
57
+ " db_io.collection.update_one(\n",
58
+ " {'_id': document['_id']},\n",
59
+ " {'$set': updates}\n",
60
+ " )\n",
61
+ "\n",
62
+ "print(\"Updates completed!\")\n",
63
+ "\n"
64
+ ],
65
+ "metadata": {
66
+ "collapsed": false,
67
+ "ExecuteTime": {
68
+ "end_time": "2023-08-28T00:06:10.976700800Z",
69
+ "start_time": "2023-08-28T00:06:10.425108Z"
70
+ }
71
+ }
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": null,
76
+ "outputs": [],
77
+ "source": [],
78
+ "metadata": {
79
+ "collapsed": false
80
+ }
81
+ }
82
+ ],
83
+ "metadata": {
84
+ "kernelspec": {
85
+ "display_name": "Python 3",
86
+ "language": "python",
87
+ "name": "python3"
88
+ },
89
+ "language_info": {
90
+ "codemirror_mode": {
91
+ "name": "ipython",
92
+ "version": 2
93
+ },
94
+ "file_extension": ".py",
95
+ "mimetype": "text/x-python",
96
+ "name": "python",
97
+ "nbconvert_exporter": "python",
98
+ "pygments_lexer": "ipython2",
99
+ "version": "2.7.6"
100
+ }
101
+ },
102
+ "nbformat": 4,
103
+ "nbformat_minor": 0
104
+ }
{utils → ra_utils}/coding_and_analysis.py RENAMED
File without changes
{utils → ra_utils}/synthetic_panel_datamodel.py RENAMED
File without changes
utils/app_common.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from bson import ObjectId
3
+ from dotenv import load_dotenv
4
+ import os
5
+ from interviewer_utils.interviewer import Interviewer
6
+ from utils.study import Study
7
+ from utils.study_repository import StudyRepository
8
+ from typing import Union, List
9
+ from utils.database_helper import DatabaseIO
10
+ import gradio as gr
11
+
12
+ load_dotenv()
13
+ INTERVIEW_USERS_COLLECTION = os.environ['MONGO_COLLECTION_USERS']
14
+ RESEARCHER_USERS_COLLECTION = os.environ['MONGO_COLLECTION_USERS']
15
+ STUDIES_COLLECTION = os.environ['MONGO_COLLECTION_STUDIES']
16
+
17
+
18
+ def is_valid_email(email):
19
+ # Simple regex pattern for email validation
20
+ pattern = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
21
+ return bool(re.match(pattern, email))
22
+
23
+
24
+ def set_active_study(this_study_repository,
25
+ study_id):
26
+ """
27
+ Using a repository object creates a study object for the given id
28
+ returns the study object and a status message
29
+ """
30
+
31
+ study_id = study_id.split(':')[0]
32
+ this_study = this_study_repository.get_studies(study_id=study_id)
33
+
34
+ if not this_study:
35
+ return {}
36
+
37
+ if type(this_study) == list and len(this_study):
38
+ this_study = this_study[0]
39
+
40
+ # initialize a new study with this study in memory!
41
+ try:
42
+ # study validation is here! if db schema is older, will be notified during dev
43
+ active_study_obj = Study(**this_study)
44
+ except Exception as e:
45
+ return None, f"Study not found due to {e}"
46
+
47
+ return active_study_obj, ""
48
+
49
+
50
+ def configure_interviewer(this_interviewer: Interviewer,
51
+ this_study_obj: Study,
52
+ history: List[List[str]]):
53
+ """
54
+ takes an existing interviewer object and initializes it with details from the provided study object
55
+
56
+ :param history:
57
+ :param this_interviewer:
58
+ :param this_study_obj:
59
+ :return: interviewer object with the right initializations
60
+ """
61
+ this_status = "Successfully configured interviewer object"
62
+
63
+ if this_study_obj is None or type(this_study_obj) != Study:
64
+ this_status = "The research study you are participating in does not" \
65
+ " exist or is not ready, please contact the researcher"
66
+
67
+ # if this study has
68
+
69
+ return this_interviewer, this_status
70
+
71
+
72
+ def lookup_userid_by_email(email):
73
+ """
74
+ Looks up email address and finds the right user and returns the user_id
75
+ :param email:
76
+ :return:
77
+ """
78
+ if email is None or not is_valid_email(email):
79
+ return None, "Try with a valid email please!"
80
+
81
+ with DatabaseIO(collection_name=INTERVIEW_USERS_COLLECTION) as db_io:
82
+ user = db_io.collection.find_one({"email": email}, {"_id": 1})
83
+ if user:
84
+ return str(user["_id"]), "Found correct user"
85
+ else:
86
+ return None, "Error finding user"
87
+
88
+
89
+ def process_transcript_to_chatbot_history(this_transcript):
90
+ messages_in_chatbot_format = []
91
+ for i in range(0, len(this_transcript), 2):
92
+ human_turn = this_transcript[i]['text']
93
+ ai_turn = this_transcript[i + 1]['text']
94
+ messages_in_chatbot_format.append([human_turn, ai_turn])
95
+ return messages_in_chatbot_format
96
+
97
+
98
+ def set_user_and_study(
99
+ this_study_repository,
100
+ this_study_code,
101
+ this_user_email,
102
+
103
+ ):
104
+ this_user_id, user_exists_message = lookup_userid_by_email(this_user_email)
105
+
106
+ interaction_disabled = gr.update(interactive=False)
107
+ interaction_enabled = gr.update(interactive=True)
108
+
109
+ if not this_user_id or not len(this_user_id):
110
+ this_return_message = user_exists_message
111
+ return None, "", this_return_message, *([interaction_disabled] * 5)
112
+
113
+ this_active_study, study_exists_message = set_active_study(this_study_repository, study_id=str(this_study_code))
114
+ if not this_active_study:
115
+ this_return_message = study_exists_message
116
+ return None, this_user_id, this_return_message, *([interaction_disabled] * 5)
117
+ this_return_message = user_exists_message + " " + study_exists_message
118
+
119
+ return this_active_study, this_user_id, this_return_message, *([interaction_enabled] * 5)
120
+
121
+
122
+ def setup_conversation_context(this_active_study, this_study_repository, this_user_id):
123
+ """
124
+ Populates the interview object
125
+ :param this_active_study:
126
+ :param this_user_id:
127
+ :return:
128
+ """
129
+ this_transcript = load_previous_transcript(this_active_study,
130
+ this_user_id,
131
+ this_study_repository)
132
+ this_user_id_str = str(this_user_id)
133
+ this_interviewer = Interviewer(
134
+ research_topic=this_active_study.study_topic,
135
+ research_objectives=this_active_study.study_objective,
136
+ research_questions=this_active_study.research_questions,
137
+ interview_guidelines=this_active_study.interview_guidelines,
138
+ existing_conversation_history=this_transcript
139
+ )
140
+
141
+ if not this_transcript or not len(this_transcript):
142
+ # no previous conversation
143
+ ai_response = this_interviewer.chat_handler.converse('Hello')
144
+ this_constructed_transcript = [{'turn': 'Human', 'text': 'Hello'}, {'turn': 'AI', 'text': ai_response}]
145
+ # transcript did not exist, so must create the user info in database
146
+
147
+ this_view_chat_history = process_transcript_to_chatbot_history(this_constructed_transcript)
148
+ #
149
+ if 'human_interviews' not in this_active_study.interviews or not this_active_study.interviews['human_interviews']:
150
+ this_active_study.interviews['human_interviews'] = {}
151
+ this_previous_interview = this_active_study.interviews['human_interviews'].get(this_user_id)
152
+ # If not found, initialize an empty one
153
+ if not this_previous_interview:
154
+ this_previous_interview = {
155
+ "AI_interviewer": "Sophia (AI Researcher)",
156
+ "Human_Respondent": {
157
+ "participant_id": this_user_id_str
158
+ },
159
+ "transcript": []
160
+ }
161
+ this_active_study.interviews['human_interviews'][this_user_id_str] = this_previous_interview
162
+ this_active_study.interviews['human_interviews'][str(this_user_id)]['transcript'] = this_constructed_transcript
163
+ this_study_repository.update_study(study_id=this_active_study._id, updated_data=this_active_study.to_dict())
164
+ this_message = "First time talking to you ... "
165
+ else:
166
+ this_view_chat_history = process_transcript_to_chatbot_history(this_transcript)
167
+ this_message = "Welcome back, lets start where we left off!"
168
+
169
+ return this_view_chat_history, this_interviewer, this_message
170
+
171
+
172
+ def load_previous_transcript(this_study: Study,
173
+ this_user_id: Union[ObjectId | str],
174
+ this_study_repository: StudyRepository):
175
+ """
176
+ loads existing interview
177
+ :param this_study:
178
+ :param this_user_id:
179
+ :param this_study_repository
180
+ :return:
181
+ """
182
+ this_user_id_str = str(this_user_id)
183
+
184
+ # Ensure 'human_interviews' exists and is initialized properly
185
+
186
+ try:
187
+ # Try to get the interview of the participant
188
+ human_interviews = this_study.interviews['human_interviews']
189
+ this_previous_interview = human_interviews.get(this_user_id_str, None)
190
+ return this_previous_interview['transcript']
191
+
192
+ except (AttributeError, KeyError, Exception) as ae:
193
+ return []
194
+
195
+
196
+ if __name__ == "__main__":
197
+ # unit tests
198
+ study_repository = StudyRepository(collection_name_studies=STUDIES_COLLECTION)
199
+ sample_study_id = "64dc833a12e518bb5d4ea2b4"
200
+ sample_email = "paudyalprajwal@qualz.net"
201
+ interviewer = Interviewer(existing_conversation_history=['a', 'b'])
utils/llm_helper.py CHANGED
@@ -1,13 +1,12 @@
 
1
  from langchain.chat_models import ChatOpenAI
2
- from langchain.prompts.chat import ChatMessage
3
- from langchain.schema import SystemMessage, HumanMessage, AIMessage
4
- from langchain.chat_models import ChatOpenAI
5
- from langchain.prompts.chat import ChatMessage
6
  import ast
7
  import warnings
8
  from pathlib import Path
9
  import os, json
10
- from utils.synthetic_panel_datamodel import SyntheticPanelOutput, parse_synthetic_panel_output
 
 
11
 
12
  class LLMChatHandler:
13
  def __init__(self,
@@ -16,6 +15,7 @@ class LLMChatHandler:
16
  expertise_message: str = ""):
17
  self.llm = ChatOpenAI(temperature=temperature, model_name=llm_model_name)
18
 
 
19
  project_dir = Path(os.environ['PROJECT_DIR'])
20
  assert project_dir.exists()
21
  with open(project_dir / 'study_prompts.json') as sp:
@@ -24,13 +24,16 @@ class LLMChatHandler:
24
  if not len(expertise_message):
25
  expertise_message = f"Assistant is a world renowned expert in qualitative research." \
26
  f" Assistant is well known in your field worldwide and has published several books " \
27
- f"and whitepapers. Assistant is also an expert qualitative research analyst " \
28
  f"who is thorough and methodical."
29
  self.expertise_message = expertise_message
30
 
 
 
31
  def predict(self, messages):
32
  return self.llm.predict_messages(messages)
33
 
 
34
  def guess_research_questions(self,
35
  research_name: str,
36
  research_topic: str,
 
1
+ from langchain.schema import SystemMessage, HumanMessage
2
  from langchain.chat_models import ChatOpenAI
 
 
 
 
3
  import ast
4
  import warnings
5
  from pathlib import Path
6
  import os, json
7
+ from ra_utils.synthetic_panel_datamodel import parse_synthetic_panel_output
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain import LLMChain
10
 
11
  class LLMChatHandler:
12
  def __init__(self,
 
15
  expertise_message: str = ""):
16
  self.llm = ChatOpenAI(temperature=temperature, model_name=llm_model_name)
17
 
18
+
19
  project_dir = Path(os.environ['PROJECT_DIR'])
20
  assert project_dir.exists()
21
  with open(project_dir / 'study_prompts.json') as sp:
 
24
  if not len(expertise_message):
25
  expertise_message = f"Assistant is a world renowned expert in qualitative research." \
26
  f" Assistant is well known in your field worldwide and has published several books " \
27
+ f"and white-papers. Assistant is also an expert qualitative research analyst " \
28
  f"who is thorough and methodical."
29
  self.expertise_message = expertise_message
30
 
31
+
32
+
33
  def predict(self, messages):
34
  return self.llm.predict_messages(messages)
35
 
36
+
37
  def guess_research_questions(self,
38
  research_name: str,
39
  research_topic: str,
utils/study.py CHANGED
@@ -45,7 +45,8 @@ class Study:
45
  self.additional_fields = kwargs
46
  self.chat_handler = LLMChatHandler()
47
 
48
- self.synthetic_panel_pending_interviews = synthetic_panel_pending_interviews if synthetic_panel_pending_interviews else []
 
49
  # Check if synthetic_interviews exists and is not empty
50
  # if 'synthetic_interviews' in self.interviews and len(self.interviews.get('synthetic_interviews', "")):
51
  # # Extract AI_Respondent from each item in synthetic_interviews and append to self.synthetic_panelists
 
45
  self.additional_fields = kwargs
46
  self.chat_handler = LLMChatHandler()
47
 
48
+ self.synthetic_panel_pending_interviews = synthetic_panel_pending_interviews if\
49
+ synthetic_panel_pending_interviews else []
50
  # Check if synthetic_interviews exists and is not empty
51
  # if 'synthetic_interviews' in self.interviews and len(self.interviews.get('synthetic_interviews', "")):
52
  # # Extract AI_Respondent from each item in synthetic_interviews and append to self.synthetic_panelists
utils/study_repository.py CHANGED
@@ -1,19 +1,14 @@
1
  import dotenv
2
 
3
- import os, sys
4
 
5
- from typing import Dict, Optional, List, Any, Tuple, Mapping
6
- import pymongo
7
- from pymongo.errors import PyMongoError
8
- from pymongo.server_api import ServerApi
9
  from typing import Dict, Any
10
  from utils.study import Study
11
  from utils.database_helper import DatabaseIO
12
  from bson import ObjectId
13
  from typing import Union
14
- from utils.synthetic_panel_datamodel import SyntheticPanelOutput
15
 
16
- import ast
17
  from utils.llm_helper import LLMChatHandler
18
 
19
 
@@ -65,6 +60,44 @@ class StudyRepository:
65
  study_id = db_io.collection.insert_one(study_data).inserted_id
66
  return study_id
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def get_studies(self,
69
  study_id: Optional[str] = None,
70
  user_id: Optional[str] = None,
@@ -79,13 +112,12 @@ class StudyRepository:
79
  :return: The studies or a specific study data, or a tuple with a message if an error occurs.
80
  """
81
 
82
- # If study_id is provided, return just that study.
83
  if study_id:
84
  study_id = ObjectId(study_id)
85
  with DatabaseIO(db_name=self.db_name, collection_name=self.collection_name_studies) as db_io:
86
  study = db_io.collection.find_one({"_id": study_id})
87
  if not study:
88
- return {}, "Study not found."
89
  return study
90
 
91
  # If user_id is provided and filter is desired, return studies the user is authorized to view.
@@ -154,6 +186,7 @@ class StudyRepository:
154
 
155
  return return_msg
156
 
 
157
  def delete_study(self, study_id: str):
158
  """
159
  Delete a study by its ID.
 
1
  import dotenv
2
 
3
+ import os
4
 
5
+ from typing import Optional, List, Tuple
 
 
 
6
  from typing import Dict, Any
7
  from utils.study import Study
8
  from utils.database_helper import DatabaseIO
9
  from bson import ObjectId
10
  from typing import Union
 
11
 
 
12
  from utils.llm_helper import LLMChatHandler
13
 
14
 
 
60
  study_id = db_io.collection.insert_one(study_data).inserted_id
61
  return study_id
62
 
63
+ def append_to_transcript(self, study_id, user_id, human_text, ai_response):
64
+ """
65
+ Append the recent conversation to the transcript in the database.
66
+
67
+ :param db_url: MongoDB database URL.
68
+ :param db_name: Name of the MongoDB database.
69
+ :param collection_name: Name of the collection storing the transcripts.
70
+ :param participant_id: ID of the participant for whom the transcript is being updated.
71
+ :param human_text: The text/message from the human.
72
+ :param ai_response: The response from the AI.
73
+ """
74
+
75
+ # Initialize the MongoDB client and select the database and collection
76
+ user_id = str(user_id)
77
+ # Define the conversation entries to be appended
78
+ new_entries = [
79
+ {
80
+ "turn": "Human",
81
+ "text": human_text,
82
+ },
83
+ {
84
+ "turn": "AI",
85
+ "text": ai_response,
86
+ }
87
+ ]
88
+
89
+ # Append new entries to the transcript
90
+ with DatabaseIO(collection_name=self.collection_name_studies) as db_io:
91
+ query = {"_id": ObjectId(study_id)}
92
+
93
+ # Append new entries to the transcript using a dynamic field update
94
+ update_field = f"interviews.human_interviews.{user_id}.transcript"
95
+ update_action = {"$push": {update_field: {"$each": new_entries}}}
96
+
97
+ # Execute the update
98
+ db_io.collection.update_one(query, update_action)
99
+
100
+
101
  def get_studies(self,
102
  study_id: Optional[str] = None,
103
  user_id: Optional[str] = None,
 
112
  :return: The studies or a specific study data, or a tuple with a message if an error occurs.
113
  """
114
 
 
115
  if study_id:
116
  study_id = ObjectId(study_id)
117
  with DatabaseIO(db_name=self.db_name, collection_name=self.collection_name_studies) as db_io:
118
  study = db_io.collection.find_one({"_id": study_id})
119
  if not study:
120
+ return {}
121
  return study
122
 
123
  # If user_id is provided and filter is desired, return studies the user is authorized to view.
 
186
 
187
  return return_msg
188
 
189
+
190
  def delete_study(self, study_id: str):
191
  """
192
  Delete a study by its ID.