Spaces:
Runtime error
Runtime error
Prajwal Paudyal commited on
Commit ·
2cfdc8a
1
Parent(s): 322cd61
merged researcher and interviewer functionalities
Browse files- Interviewer.py +0 -105
- README_interviewer.md +1 -1
- README_researcher.md +1 -1
- app_interviewer.py +225 -0
- Researcher.py → app_researcher.py +8 -17
- buddy.mp3 +0 -0
- dev/voiceover.ipynb +0 -34
- interviewer_utils/data_process.py +189 -0
- interviewer_utils/global_variables.py +4 -0
- interviewer_utils/interview_llm_helper.py +106 -0
- interviewer_utils/interviewer.py +69 -0
- utils/interviewer.py → interviewer_utils/interviewer_arc.py +0 -0
- {utils → interviewer_utils}/interviewer_persona.py +0 -0
- interviewer_utils/voice_controller.py +78 -0
- notebooks/modify_schema.ipynb +104 -0
- {utils → ra_utils}/coding_and_analysis.py +0 -0
- {utils → ra_utils}/synthetic_panel_datamodel.py +0 -0
- utils/app_common.py +201 -0
- utils/llm_helper.py +9 -6
- utils/study.py +2 -1
- utils/study_repository.py +42 -9
Interviewer.py
DELETED
|
@@ -1,105 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import time
|
| 3 |
-
import openai
|
| 4 |
-
from dotenv import find_dotenv
|
| 5 |
-
from dev.study_processor import StudyProcessor
|
| 6 |
-
import json
|
| 7 |
-
from dotenv import load_dotenv
|
| 8 |
-
import os
|
| 9 |
-
from pathlib import Path
|
| 10 |
-
from utils.interviewer import Interviewer
|
| 11 |
-
import whisper
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
load_dotenv()
|
| 15 |
-
whisper_model = whisper.load_model("base")
|
| 16 |
-
project_dir = Path(os.environ['PROJECT_DIR'])
|
| 17 |
-
assert project_dir.exists()
|
| 18 |
-
PARTICIPANT_CODES = ["letmetrythis"]
|
| 19 |
-
with open(project_dir / 'study_prompts.json') as sp:
|
| 20 |
-
DEFINITION_OBJECTIVE_MAP = json.load(sp)
|
| 21 |
-
|
| 22 |
-
# todo refactor: move this to study processor
|
| 23 |
-
def_objective_keys = DEFINITION_OBJECTIVE_MAP.keys() # iterate it in whatever order
|
| 24 |
-
DEFINITION_OBJECTIVE_CHOICES_DICT = {k: DEFINITION_OBJECTIVE_MAP[k]['descriptive_name'] for k in def_objective_keys}
|
| 25 |
-
DEFINITION_CHOICES_OBJECTIVE_DICT = {v: k for k, v in DEFINITION_OBJECTIVE_CHOICES_DICT.items()}
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
def add_text(history, text):
|
| 29 |
-
"""
|
| 30 |
-
Takes in the history and text and updates the history and sets the text input as disabled so it gives
|
| 31 |
-
the AI time to think
|
| 32 |
-
:param history:
|
| 33 |
-
:param text:
|
| 34 |
-
:return:
|
| 35 |
-
"""
|
| 36 |
-
history = history + [(text, None)]
|
| 37 |
-
return history, gr.update(value="")
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
def bot(history, m_interviewer, to_speak_out=False):
|
| 43 |
-
m_recent_human_msg = history[-1][0]
|
| 44 |
-
bot_message = m_interviewer.llm_chain.predict(human_input=m_recent_human_msg)
|
| 45 |
-
history[-1][1] = ""
|
| 46 |
-
if not to_speak_out:
|
| 47 |
-
for character in bot_message:
|
| 48 |
-
history[-1][1] += character
|
| 49 |
-
time.sleep(0.005)
|
| 50 |
-
yield history, m_interviewer, None
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
def transcribe_audio(m_audio):
|
| 56 |
-
model_src="openai"
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
if model_src == "openai":
|
| 60 |
-
m_audio_f = open(m_audio, 'rb')
|
| 61 |
-
transcript = openai.Audio.transcribe("whisper-1", m_audio_f, language="en")
|
| 62 |
-
else:
|
| 63 |
-
global whisper_model
|
| 64 |
-
transcript = whisper_model.transcribe(m_audio)
|
| 65 |
-
text = transcript.get("text", "")
|
| 66 |
-
return text
|
| 67 |
-
|
| 68 |
-
with gr.Blocks() as demo:
|
| 69 |
-
gr.Markdown(f"Welcome to QualZ. I am your friendly A.I. assistant for research. "
|
| 70 |
-
f"Go through the various tabs to select a topic and start researching! ")
|
| 71 |
-
# create a study processor class and keep in memory
|
| 72 |
-
# accordig to gradio documentation this is specific to a user
|
| 73 |
-
study_processor = gr.State(value=StudyProcessor()) # todo this should refresh each time a new 'study' is selected
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
gr.Markdown("Welcome to the study titled - Needs and routines for hair-care grooming. "
|
| 77 |
-
"My name is Carlie and I am an AI agent who will be conducting this interview. "
|
| 78 |
-
"If relevant feel free to use audio to talk to me or send me pictures that are relevant. "
|
| 79 |
-
"Let me know when you are ready to begin by sending saying something below"
|
| 80 |
-
"If you need to step away or close this sessions, simply click your unique link again.")
|
| 81 |
-
|
| 82 |
-
interviewer = gr.State(value=Interviewer())
|
| 83 |
-
chatbot = gr.Chatbot(label="Ideation Session", elem_id="chatbot", height=750)
|
| 84 |
-
audio_in = gr.Audio(source="microphone", label="Speak",
|
| 85 |
-
type="filepath")
|
| 86 |
-
txt = gr.Textbox(
|
| 87 |
-
show_label=False,
|
| 88 |
-
placeholder="Enter text and press enter, or upload an image",
|
| 89 |
-
container=False)
|
| 90 |
-
btn = gr.UploadButton("📁", file_types=["image", "video", "audio"])
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
txt_msg = txt.submit(add_text, [chatbot, txt],
|
| 97 |
-
[chatbot, txt], queue=False).then(bot, [chatbot, interviewer], [chatbot, interviewer])
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
if __name__ == "__main__":
|
| 103 |
-
_ = load_dotenv(find_dotenv())
|
| 104 |
-
openai.api_key = os.getenv('OPENAI_API_KEY')
|
| 105 |
-
demo.queue(concurrency_count=10).launch(server_port=8850, share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README_interviewer.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
sdk: gradio
|
| 3 |
emoji: 📚
|
| 4 |
-
app_file:
|
| 5 |
---
|
|
|
|
| 1 |
---
|
| 2 |
sdk: gradio
|
| 3 |
emoji: 📚
|
| 4 |
+
app_file: app_interviewer.py
|
| 5 |
---
|
README_researcher.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
sdk: gradio
|
| 3 |
emoji: 📚
|
| 4 |
-
app_file:
|
| 5 |
---
|
|
|
|
| 1 |
---
|
| 2 |
sdk: gradio
|
| 3 |
emoji: 📚
|
| 4 |
+
app_file: app_researcher.py
|
| 5 |
---
|
app_interviewer.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import time
|
| 4 |
+
import openai
|
| 5 |
+
from utils.app_common import *
|
| 6 |
+
from dotenv import load_dotenv, find_dotenv
|
| 7 |
+
from interviewer_utils.interviewer import Interviewer
|
| 8 |
+
from interviewer_utils import voice_controller as vc
|
| 9 |
+
from utils import eleven_voice_cache
|
| 10 |
+
import interviewer_utils.global_variables as global_variables
|
| 11 |
+
from bson import ObjectId
|
| 12 |
+
|
| 13 |
+
from utils.study_repository import StudyRepository
|
| 14 |
+
from utils.study import Study
|
| 15 |
+
|
| 16 |
+
from utils.database_helper import DatabaseIO
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def add_text(history, text):
|
| 20 |
+
"""
|
| 21 |
+
Takes in the history and text and updates the history and sets the text input as disabled so it gives
|
| 22 |
+
the AI time to think
|
| 23 |
+
:param history:
|
| 24 |
+
:param text:
|
| 25 |
+
:return:
|
| 26 |
+
"""
|
| 27 |
+
history = history + [[text, None]]
|
| 28 |
+
return history, gr.update(value=""), history[-1][-2]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# chat_bot reply
|
| 32 |
+
def bot(this_active_study, this_user_id, history, m_interviewer, this_study_repository):
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
:param this_active_study:
|
| 36 |
+
:param this_user_id:
|
| 37 |
+
:param history:
|
| 38 |
+
:param m_interviewer:
|
| 39 |
+
:param this_study_repository:
|
| 40 |
+
:return:
|
| 41 |
+
"""
|
| 42 |
+
this_study_id = this_active_study._id
|
| 43 |
+
this_human_message = history[-1][0]
|
| 44 |
+
this_bot_message = m_interviewer.chat_handler.converse(this_human_message)
|
| 45 |
+
history[-1][1] = this_bot_message
|
| 46 |
+
this_study_repository.append_to_transcript(study_id=this_study_id,
|
| 47 |
+
user_id=this_user_id,
|
| 48 |
+
human_text=this_human_message,
|
| 49 |
+
ai_response=this_bot_message)
|
| 50 |
+
|
| 51 |
+
return history, m_interviewer, history[-1][1]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def on_select(evt: gr.SelectData):
|
| 55 |
+
return evt.value
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# function to choose subject from dropdown
|
| 59 |
+
def change_study_details(study_id, m_interviewer):
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
study_id:
|
| 64 |
+
m_interviewer:
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
|
| 68 |
+
"""
|
| 69 |
+
m_study_dropdown_value = global_variables.study_topic_id[0]
|
| 70 |
+
global_variables.study_id = ObjectId(study_id)
|
| 71 |
+
print("Hi", m_study_dropdown_value)
|
| 72 |
+
m_interviewer.update_llm_prompt(study_subject=m_study_dropdown_value)
|
| 73 |
+
return m_interviewer
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def enable_chat():
|
| 77 |
+
return
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
with gr.Blocks() as demo:
|
| 81 |
+
gr.Markdown(
|
| 82 |
+
f"Welcome to your interview session. To Begin, enter your registered email address"
|
| 83 |
+
f" and the code the study facilitator provided. "
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
gr.Markdown(
|
| 87 |
+
"If you need to step away or close this sessions, simply come back here "
|
| 88 |
+
"and enter the details again to continue"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# initializing the Interviewer object and creating a state variable for the session
|
| 92 |
+
|
| 93 |
+
view_study_repository = gr.State(value=StudyRepository(collection_name_studies=STUDIES_COLLECTION))
|
| 94 |
+
# existing_studies_dropdown_choices = get_dropdown_choices()
|
| 95 |
+
view_active_study = gr.State(value=None)
|
| 96 |
+
# field to hold the user_id, will refactor this for a user object with a factory pattern todo
|
| 97 |
+
view_user_id = gr.Text(value="", interactive=False, visible=False)
|
| 98 |
+
|
| 99 |
+
view_interviewer = gr.State(value=None) # will set the interviewer along with details
|
| 100 |
+
|
| 101 |
+
# initializing transcripts for rendering in 2d list form
|
| 102 |
+
rendering_transcript = gr.State()
|
| 103 |
+
|
| 104 |
+
# subject id and user email validation form field
|
| 105 |
+
status = gr.Textbox(interactive=False, label="System messages will appear here")
|
| 106 |
+
|
| 107 |
+
with gr.Row():
|
| 108 |
+
user_email = gr.Textbox(label="Enter email", min_width=50)
|
| 109 |
+
study_code = gr.Textbox(label="Enter Code", min_width=50)
|
| 110 |
+
|
| 111 |
+
study_and_user_select_btn = gr.Button(value="Submit", min_width=50)
|
| 112 |
+
|
| 113 |
+
# interface for the chatbot
|
| 114 |
+
chatbot = gr.Chatbot(label="Ideation Session", elem_id="chatbot", height=750)
|
| 115 |
+
|
| 116 |
+
txt = gr.Textbox(
|
| 117 |
+
show_label=False,
|
| 118 |
+
placeholder="Enter text and press enter, or upload an image",
|
| 119 |
+
container=False,
|
| 120 |
+
interactive=False, # when button is clicke it should be true
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
with gr.Row():
|
| 124 |
+
# integrating the feature upload document
|
| 125 |
+
upload_files_btn = gr.UploadButton("📁", file_types=["image", "video", "audio"], interactive=False)
|
| 126 |
+
|
| 127 |
+
# adjusting the row columns
|
| 128 |
+
with gr.Row():
|
| 129 |
+
# the audio features
|
| 130 |
+
audio = gr.Audio(source="microphone", type="filepath", label="Audio input", interactive=False)
|
| 131 |
+
|
| 132 |
+
# radio indicator to control the AI voice response
|
| 133 |
+
voice_response_radio = gr.Radio(
|
| 134 |
+
["On", "Off"],
|
| 135 |
+
label="Voice Setting",
|
| 136 |
+
interactive=False,
|
| 137 |
+
value="Off",
|
| 138 |
+
info="Turn on or off voice system",
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
voice_persona_dropdown = gr.Dropdown(
|
| 142 |
+
choices=[
|
| 143 |
+
eleven_voice_cache.VOICES_CACHE[i].name
|
| 144 |
+
for i in range(len(eleven_voice_cache.VOICES_CACHE))
|
| 145 |
+
],
|
| 146 |
+
label="choose the voice over",
|
| 147 |
+
interactive=False,
|
| 148 |
+
value="Rachel",
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
study_and_user_select_btn.click(set_user_and_study,
|
| 152 |
+
inputs=[view_study_repository,
|
| 153 |
+
study_code,
|
| 154 |
+
user_email,
|
| 155 |
+
],
|
| 156 |
+
outputs=[view_active_study, view_user_id, status,
|
| 157 |
+
txt, upload_files_btn,
|
| 158 |
+
audio, voice_response_radio, voice_persona_dropdown],
|
| 159 |
+
queue=False).then(
|
| 160 |
+
setup_conversation_context,
|
| 161 |
+
inputs=[
|
| 162 |
+
view_active_study,
|
| 163 |
+
view_study_repository,
|
| 164 |
+
view_user_id,
|
| 165 |
+
],
|
| 166 |
+
outputs=[
|
| 167 |
+
chatbot,
|
| 168 |
+
view_interviewer,
|
| 169 |
+
status
|
| 170 |
+
],
|
| 171 |
+
queue=False
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# call dropdown from function
|
| 175 |
+
voice_response_radio.select(on_select, None, voice_response_radio)
|
| 176 |
+
voice_persona_dropdown.select(on_select, None, voice_persona_dropdown)
|
| 177 |
+
|
| 178 |
+
# audio output with transcript initialization
|
| 179 |
+
audio_response_output = gr.Audio(label="Audio Output", autoplay=True)
|
| 180 |
+
|
| 181 |
+
audio_message_transcription = gr.Textbox(visible=False)
|
| 182 |
+
msg_response_from_AI = gr.Textbox(visible=False)
|
| 183 |
+
|
| 184 |
+
# human transcript to sent to database used in db function
|
| 185 |
+
human_transcript_to_db = gr.State(value="")
|
| 186 |
+
|
| 187 |
+
# on stop recording functionality
|
| 188 |
+
audio.stop_recording(
|
| 189 |
+
fn=vc.speech_to_text,
|
| 190 |
+
inputs=[audio],
|
| 191 |
+
outputs=[audio_message_transcription],
|
| 192 |
+
).then(
|
| 193 |
+
add_text,
|
| 194 |
+
[chatbot, audio_message_transcription],
|
| 195 |
+
[chatbot, audio_message_transcription, human_transcript_to_db],
|
| 196 |
+
).then(
|
| 197 |
+
bot,
|
| 198 |
+
inputs=[view_active_study, view_user_id, chatbot, view_interviewer, view_study_repository],
|
| 199 |
+
# try to add interview instant
|
| 200 |
+
outputs=[chatbot, view_interviewer, msg_response_from_AI],
|
| 201 |
+
).then(
|
| 202 |
+
fn=vc.text_to_speech,
|
| 203 |
+
inputs=[
|
| 204 |
+
msg_response_from_AI,
|
| 205 |
+
voice_response_radio,
|
| 206 |
+
voice_persona_dropdown,
|
| 207 |
+
],
|
| 208 |
+
outputs=[audio_response_output],
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
# on pressing enter functionalities while sending chat
|
| 212 |
+
txt.submit(
|
| 213 |
+
add_text, [chatbot, txt], [chatbot, txt, human_transcript_to_db], queue=True
|
| 214 |
+
).then(
|
| 215 |
+
bot,
|
| 216 |
+
inputs=[view_active_study, view_user_id, chatbot, view_interviewer, view_study_repository],
|
| 217 |
+
outputs=[chatbot, view_interviewer, msg_response_from_AI],
|
| 218 |
+
queue=True,
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
# running the main file
|
| 222 |
+
if __name__ == "__main__":
|
| 223 |
+
_ = load_dotenv(find_dotenv())
|
| 224 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 225 |
+
demo.queue(concurrency_count=10).launch(server_port=8850, share=False, debug=True)
|
Researcher.py → app_researcher.py
RENAMED
|
@@ -2,19 +2,19 @@ import gradio as gr
|
|
| 2 |
import openai
|
| 3 |
from dotenv import find_dotenv
|
| 4 |
# from utils.study_processor import StudyProcessor
|
| 5 |
-
from typing import List
|
| 6 |
import json
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
import os
|
| 9 |
from pathlib import Path
|
| 10 |
from utils.study_repository import StudyRepository
|
| 11 |
from utils.study import Study
|
|
|
|
| 12 |
import hashlib
|
| 13 |
-
from
|
| 14 |
-
from utils.interviewer_persona import AI_PERSONALITIES
|
| 15 |
from utils.database_helper import DatabaseIO
|
| 16 |
import bcrypt
|
| 17 |
import pymongo
|
|
|
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
USER_ID = os.environ["DEV_USER_ID"]
|
|
@@ -73,7 +73,7 @@ FRAMEWORK_CHOICES = {
|
|
| 73 |
def check_credentials(username, password):
|
| 74 |
|
| 75 |
try:
|
| 76 |
-
with DatabaseIO(collection_name=
|
| 77 |
user_collection = db_io.collection
|
| 78 |
user = user_collection.find_one({"username": username})
|
| 79 |
if user is not None:
|
|
@@ -183,17 +183,7 @@ def guess_study_objective(study_access_obj,
|
|
| 183 |
return study_access_obj, study_objective, study_reasoning
|
| 184 |
|
| 185 |
|
| 186 |
-
def set_active_study(this_study_repository,
|
| 187 |
-
active_study_obj,
|
| 188 |
-
study_id):
|
| 189 |
-
study_id = study_id.split(':')[0]
|
| 190 |
-
this_study = this_study_repository.get_studies(study_id=study_id)
|
| 191 |
-
if type(this_study) == list and len(this_study):
|
| 192 |
-
this_study = this_study[0]
|
| 193 |
-
# initialize a new study with this study in memory!
|
| 194 |
-
active_study_obj = Study(**this_study)
|
| 195 |
|
| 196 |
-
return active_study_obj, ""
|
| 197 |
|
| 198 |
def start_synthetic_interviews(this_study_obj):
|
| 199 |
if not this_study_obj:
|
|
@@ -339,8 +329,10 @@ def show_existing_study_fields(this_study_obj):
|
|
| 339 |
'Reason': "",
|
| 340 |
'Discussion': ""})
|
| 341 |
|
| 342 |
-
return this_study_obj, this_study_obj._id, this_study_obj.study_descriptive_name, this_study_obj.study_topic,
|
| 343 |
-
this_study_obj.
|
|
|
|
|
|
|
| 344 |
|
| 345 |
|
| 346 |
def get_dropdown_choices(dropdown_choices_study_repository_obj=None):
|
|
@@ -585,7 +577,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 585 |
outputs=[existing_status_update])
|
| 586 |
study_choice_dropdown.select(set_active_study,
|
| 587 |
inputs=[view_study_repository,
|
| 588 |
-
view_active_study,
|
| 589 |
study_choice_dropdown],
|
| 590 |
outputs=[view_active_study, existing_status_update], queue=False).then(
|
| 591 |
show_existing_study_fields,
|
|
|
|
| 2 |
import openai
|
| 3 |
from dotenv import find_dotenv
|
| 4 |
# from utils.study_processor import StudyProcessor
|
|
|
|
| 5 |
import json
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
import os
|
| 8 |
from pathlib import Path
|
| 9 |
from utils.study_repository import StudyRepository
|
| 10 |
from utils.study import Study
|
| 11 |
+
from utils.app_common import * # common functions for both apps
|
| 12 |
import hashlib
|
| 13 |
+
from interviewer_utils.interviewer_persona import AI_PERSONALITIES
|
|
|
|
| 14 |
from utils.database_helper import DatabaseIO
|
| 15 |
import bcrypt
|
| 16 |
import pymongo
|
| 17 |
+
import pymongo.errors
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
USER_ID = os.environ["DEV_USER_ID"]
|
|
|
|
| 73 |
def check_credentials(username, password):
|
| 74 |
|
| 75 |
try:
|
| 76 |
+
with DatabaseIO(collection_name=INTERVIEW_USERS_COLLECTION) as db_io:
|
| 77 |
user_collection = db_io.collection
|
| 78 |
user = user_collection.find_one({"username": username})
|
| 79 |
if user is not None:
|
|
|
|
| 183 |
return study_access_obj, study_objective, study_reasoning
|
| 184 |
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
|
|
|
| 187 |
|
| 188 |
def start_synthetic_interviews(this_study_obj):
|
| 189 |
if not this_study_obj:
|
|
|
|
| 329 |
'Reason': "",
|
| 330 |
'Discussion': ""})
|
| 331 |
|
| 332 |
+
return this_study_obj, this_study_obj._id, this_study_obj.study_descriptive_name, this_study_obj.study_topic,\
|
| 333 |
+
this_study_obj.study_objective, \
|
| 334 |
+
this_study_obj.study_framework, this_study_obj.research_questions, this_study_obj.interview_guidelines, \
|
| 335 |
+
this_synthetic_panel_markdown, ""
|
| 336 |
|
| 337 |
|
| 338 |
def get_dropdown_choices(dropdown_choices_study_repository_obj=None):
|
|
|
|
| 577 |
outputs=[existing_status_update])
|
| 578 |
study_choice_dropdown.select(set_active_study,
|
| 579 |
inputs=[view_study_repository,
|
|
|
|
| 580 |
study_choice_dropdown],
|
| 581 |
outputs=[view_active_study, existing_status_update], queue=False).then(
|
| 582 |
show_existing_study_fields,
|
buddy.mp3
ADDED
|
Binary file (127 kB). View file
|
|
|
dev/voiceover.ipynb
DELETED
|
@@ -1,34 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": null,
|
| 6 |
-
"outputs": [],
|
| 7 |
-
"source": [],
|
| 8 |
-
"metadata": {
|
| 9 |
-
"collapsed": false
|
| 10 |
-
}
|
| 11 |
-
}
|
| 12 |
-
],
|
| 13 |
-
"metadata": {
|
| 14 |
-
"kernelspec": {
|
| 15 |
-
"display_name": "Python 3",
|
| 16 |
-
"language": "python",
|
| 17 |
-
"name": "python3"
|
| 18 |
-
},
|
| 19 |
-
"language_info": {
|
| 20 |
-
"codemirror_mode": {
|
| 21 |
-
"name": "ipython",
|
| 22 |
-
"version": 2
|
| 23 |
-
},
|
| 24 |
-
"file_extension": ".py",
|
| 25 |
-
"mimetype": "text/x-python",
|
| 26 |
-
"name": "python",
|
| 27 |
-
"nbconvert_exporter": "python",
|
| 28 |
-
"pygments_lexer": "ipython2",
|
| 29 |
-
"version": "2.7.6"
|
| 30 |
-
}
|
| 31 |
-
},
|
| 32 |
-
"nbformat": 4,
|
| 33 |
-
"nbformat_minor": 0
|
| 34 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
interviewer_utils/data_process.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from utils.database_helper import DatabaseIO
|
| 2 |
+
import os
|
| 3 |
+
import dotenv
|
| 4 |
+
from bson import ObjectId
|
| 5 |
+
import interviewer_utils.global_variables as global_variables
|
| 6 |
+
from interviewer_utils.interviewer import Interviewer
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
dotenv.load_dotenv()
|
| 10 |
+
# dev_user_id = os.environ["DEV_USER_ID"]
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class DataProcess:
|
| 14 |
+
# validate weather the data exist or not
|
| 15 |
+
def get_existing_data(must_have_keys: []):
|
| 16 |
+
# create the query
|
| 17 |
+
query = (
|
| 18 |
+
{key: {"$exists": True} for key in must_have_keys} if must_have_keys else {}
|
| 19 |
+
)
|
| 20 |
+
print(query)
|
| 21 |
+
results = []
|
| 22 |
+
|
| 23 |
+
# query the collection
|
| 24 |
+
results = DatabaseIO.read_documents(DatabaseIO(), query=query)
|
| 25 |
+
results = list(results)
|
| 26 |
+
# convert results to a list (if the result set is too large, consider returning a cursor instead)
|
| 27 |
+
return results
|
| 28 |
+
|
| 29 |
+
# for getting study topic from base to dropdown
|
| 30 |
+
# def get_dropdown_choices():
|
| 31 |
+
# existing_studies = DataProcess.get_existing_data(
|
| 32 |
+
# must_have_keys=["_id", "study_name"]
|
| 33 |
+
# )
|
| 34 |
+
# _ids = [st["_id"] for st in existing_studies]
|
| 35 |
+
# _descriptions = [st["study_name"] for st in existing_studies]
|
| 36 |
+
# global_variables.study_topic_id.append(_ids)
|
| 37 |
+
# global_variables.study_topic_id.append(_descriptions)
|
| 38 |
+
# return [desc for desc in _descriptions]
|
| 39 |
+
|
| 40 |
+
# fetch user_id information
|
| 41 |
+
def get_set_user_information(object_id, AI_interviewer):
|
| 42 |
+
existing_users = DatabaseIO.read_documents(
|
| 43 |
+
DatabaseIO(collection_name="Users"), {"_id": global_variables.User_id}
|
| 44 |
+
)
|
| 45 |
+
existing_users = list(existing_users)
|
| 46 |
+
|
| 47 |
+
User_info = [
|
| 48 |
+
{
|
| 49 |
+
"AI_interviewer": AI_interviewer,
|
| 50 |
+
"Human_Respondent": {
|
| 51 |
+
"participant_id": global_variables.User_id,
|
| 52 |
+
"email": existing_users[0]["email"],
|
| 53 |
+
"age": 22,
|
| 54 |
+
"gender": "Male",
|
| 55 |
+
"Profession": "Retail Worker",
|
| 56 |
+
},
|
| 57 |
+
"transcript": [],
|
| 58 |
+
}
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
user = DatabaseIO.insert_document(
|
| 62 |
+
DatabaseIO(collection_name="Post_Get_Transmission"),
|
| 63 |
+
article=User_info[0],
|
| 64 |
+
embedded_table_validation="interviews.human_interviews.Human_Respondent.participant_id",
|
| 65 |
+
unique_field=global_variables.User_id,
|
| 66 |
+
unique_id=global_variables.study_id,
|
| 67 |
+
target_table_insertion=User_info,
|
| 68 |
+
embedded_table_insertion="interviews.human_interviews",
|
| 69 |
+
loop_times=1,
|
| 70 |
+
)
|
| 71 |
+
return user
|
| 72 |
+
|
| 73 |
+
# a function to insert transcript data from interviewer
|
| 74 |
+
def insert_conversation_transcripts(text, ai_response):
|
| 75 |
+
transcript_json = [
|
| 76 |
+
{
|
| 77 |
+
"turn": "Human",
|
| 78 |
+
"text": text,
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"turn": "AI",
|
| 82 |
+
"text": ai_response,
|
| 83 |
+
},
|
| 84 |
+
]
|
| 85 |
+
conversational_transcripts = DatabaseIO.insert_transcript_document(
|
| 86 |
+
DatabaseIO(collection_name="Post_Get_Transmission"),
|
| 87 |
+
article=transcript_json,
|
| 88 |
+
loop_times=2,
|
| 89 |
+
embedded_table_validation="interviews.human_interviews.Human_Respondent.participant_id",
|
| 90 |
+
unique_field=global_variables.User_id,
|
| 91 |
+
unique_id=global_variables.study_id,
|
| 92 |
+
target_table_insertion=transcript_json,
|
| 93 |
+
embedded_table_insertion="interviews.human_interviews.$.transcript",
|
| 94 |
+
)
|
| 95 |
+
return conversational_transcripts
|
| 96 |
+
|
| 97 |
+
# get transcripts of specfici study of specific user
|
| 98 |
+
def get_transcripts_of_user():
|
| 99 |
+
existing_document = DatabaseIO.read_transcripts(
|
| 100 |
+
DatabaseIO(),
|
| 101 |
+
query={
|
| 102 |
+
"_id": global_variables.study_id,
|
| 103 |
+
"interviews.human_interviews.Human_Respondent.participant_id": global_variables.User_id,
|
| 104 |
+
},
|
| 105 |
+
)
|
| 106 |
+
existing_document = list(existing_document)
|
| 107 |
+
exisitng_transcripts = existing_document[0]["interviews"]["human_interviews"][
|
| 108 |
+
0
|
| 109 |
+
]["transcript"]
|
| 110 |
+
return exisitng_transcripts
|
| 111 |
+
|
| 112 |
+
# chaning json format into list
|
| 113 |
+
def changing_json_list(self):
|
| 114 |
+
exisitng_transcripts = DataProcess.get_transcripts_of_user()
|
| 115 |
+
combined_messages = []
|
| 116 |
+
text_list = []
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
for item in exisitng_transcripts:
|
| 120 |
+
if item["turn"] == "Human":
|
| 121 |
+
if text_list:
|
| 122 |
+
combined_messages.append(text_list)
|
| 123 |
+
text_list = [item["text"]]
|
| 124 |
+
elif item["turn"] == "AI":
|
| 125 |
+
text_list.append(item["text"])
|
| 126 |
+
|
| 127 |
+
if text_list:
|
| 128 |
+
combined_messages.append(text_list)
|
| 129 |
+
return combined_messages
|
| 130 |
+
except:
|
| 131 |
+
print("No interview conversation initiated")
|
| 132 |
+
|
| 133 |
+
# function to convert into memory storage format
|
| 134 |
+
def convert_to_memory_chain_context(m_interviewer):
|
| 135 |
+
transcripts = DataProcess.get_transcripts_of_user()
|
| 136 |
+
result = []
|
| 137 |
+
conversation = []
|
| 138 |
+
|
| 139 |
+
for index, entry in enumerate(transcripts):
|
| 140 |
+
turn = entry["turn"]
|
| 141 |
+
text = entry["text"]
|
| 142 |
+
|
| 143 |
+
if index == 0 or turn == "Human":
|
| 144 |
+
if conversation:
|
| 145 |
+
result.append(conversation)
|
| 146 |
+
conversation = []
|
| 147 |
+
conversation.append({turn: text})
|
| 148 |
+
|
| 149 |
+
if conversation:
|
| 150 |
+
result.append(conversation)
|
| 151 |
+
|
| 152 |
+
# loading the context by calling memory function
|
| 153 |
+
m_interviewer.set_memory_chain(scripts=result, m_interviewer=m_interviewer)
|
| 154 |
+
|
| 155 |
+
return "done"
|
| 156 |
+
|
| 157 |
+
# function to read user information
|
| 158 |
+
def read_useremail(user_email):
|
| 159 |
+
existing_user = DatabaseIO.read_documents(
|
| 160 |
+
DatabaseIO(collection_name="Users"),
|
| 161 |
+
query={"email": user_email},
|
| 162 |
+
)
|
| 163 |
+
existing_user = list(existing_user)
|
| 164 |
+
global_variables.User_id = existing_user[0]["_id"]
|
| 165 |
+
print(global_variables.User_id, type(global_variables.User_id))
|
| 166 |
+
return existing_user
|
| 167 |
+
|
| 168 |
+
# function to read study information
|
| 169 |
+
def read_study(study_code):
|
| 170 |
+
existing_study = DatabaseIO.read_documents(
|
| 171 |
+
DatabaseIO(),
|
| 172 |
+
query={"_id": study_code},
|
| 173 |
+
)
|
| 174 |
+
existing_study = list(existing_study)
|
| 175 |
+
global_variables.study_topic_id.append(existing_study[0]["study_name"])
|
| 176 |
+
print(global_variables.study_topic_id[0])
|
| 177 |
+
return existing_study
|
| 178 |
+
|
| 179 |
+
# function to validate both study and user
|
| 180 |
+
def validating_studycode_user(study_code, user_email):
|
| 181 |
+
if study_code == None or user_email == None:
|
| 182 |
+
return False
|
| 183 |
+
else:
|
| 184 |
+
existing_user = DataProcess.read_useremail(user_email)
|
| 185 |
+
existing_study = DataProcess.read_study(study_code)
|
| 186 |
+
if existing_study == [] or existing_user == []:
|
| 187 |
+
return False
|
| 188 |
+
else:
|
| 189 |
+
return True
|
interviewer_utils/global_variables.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
study_id = None
|
| 2 |
+
study_topic_id = []
|
| 3 |
+
User_id = None
|
| 4 |
+
Count = "a"
|
interviewer_utils/interview_llm_helper.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.schema import SystemMessage, HumanMessage
|
| 2 |
+
from langchain.chat_models import ChatOpenAI
|
| 3 |
+
import ast
|
| 4 |
+
import warnings
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import os, json
|
| 7 |
+
from ra_utils.synthetic_panel_datamodel import parse_synthetic_panel_output
|
| 8 |
+
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
|
| 9 |
+
from langchain import LLMChain
|
| 10 |
+
from langchain.chains import ConversationChain
|
| 11 |
+
from langchain.schema import AIMessage, HumanMessage
|
| 12 |
+
from langchain.memory.chat_memory import ChatMessageHistory
|
| 13 |
+
from langchain.prompts import PromptTemplate
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class InterviewLLMHandler:
|
| 17 |
+
def __init__(self,
|
| 18 |
+
llm_model_name: str = 'gpt-3.5-turbo-16k',
|
| 19 |
+
llm_temperature: float = 0.2,
|
| 20 |
+
expertise_message: str = "",
|
| 21 |
+
research_topic: str = "",
|
| 22 |
+
research_objectives: str = "",
|
| 23 |
+
research_questions: str = "",
|
| 24 |
+
interview_guidelines: str = "",
|
| 25 |
+
verbose: bool = False,
|
| 26 |
+
existing_conversation_history=None
|
| 27 |
+
):
|
| 28 |
+
|
| 29 |
+
self.llm = ChatOpenAI(temperature=llm_temperature, model_name=llm_model_name)
|
| 30 |
+
|
| 31 |
+
project_dir = Path(os.environ['PROJECT_DIR'])
|
| 32 |
+
assert project_dir.exists()
|
| 33 |
+
with open(project_dir / 'study_prompts.json') as sp:
|
| 34 |
+
self.definition_objective_maps = json.load(sp)
|
| 35 |
+
|
| 36 |
+
if not len(expertise_message):
|
| 37 |
+
expertise_message = f"Assistant is a world renowned expert in conducting qualitative research."
|
| 38 |
+
|
| 39 |
+
self.expertise_message = expertise_message
|
| 40 |
+
|
| 41 |
+
self.setup_message = f"Assistant is conducting a qualitative interview, for which the details will " \
|
| 42 |
+
f"be given below. It is very important that assistant assume the role of an impartial" \
|
| 43 |
+
f"qualitative researcher. IF any questions from the user does not pertain to the study" \
|
| 44 |
+
f"assistant will move on the next question. Assistant will not ask leading questions " \
|
| 45 |
+
f"and will never respond in a toxic way. If any response is needed about large language" \
|
| 46 |
+
f"model capabilities , assistant will say, that is beyond the scope of this conversation" \
|
| 47 |
+
f"lets move on to the next question and continue the conversation."
|
| 48 |
+
self.study_specific_message = f"""The description and the details of the study are below. The actual details
|
| 49 |
+
of the study is below within triple backticks after the description of the details.
|
| 50 |
+
The topic of research is: ```{research_topic}```.
|
| 51 |
+
The RESEARCH OBJECTIVES i.e. The overarching goal of the study: ```{research_objectives}```.
|
| 52 |
+
The RESEARCH QUESTIONS i.e. A few questions the researcher wants answered are: ```{research_questions}```.
|
| 53 |
+
The INTERVIEW GUIDELINES i.e. a set of reference questions to get answers for are :```{interview_guidelines}```.
|
| 54 |
+
Here are some things to focus on:
|
| 55 |
+
Rapport Building: Begin by explaining the interview's purpose and ensuring confidentiality.
|
| 56 |
+
Encourage candidness with no right or wrong answers.
|
| 57 |
+
Guided Conversation: Use the as anchor points, but don't read verbatim.
|
| 58 |
+
Initiate with broad questions and narrow down as the conversation evolves.
|
| 59 |
+
Active Listening: Be attentive. Seek deeper understanding through probing and spontaneous follow-up queries.
|
| 60 |
+
Always align with the study's objectives.
|
| 61 |
+
Concluding: Summarize the main insights, allow for participant's added input, and thank them.
|
| 62 |
+
"""
|
| 63 |
+
self.interview_system_template = self.expertise_message + " " + self.setup_message \
|
| 64 |
+
+ self.study_specific_message + """
|
| 65 |
+
When responding, asking questions or followups, you should remember the current conversation where you
|
| 66 |
+
are the AI and the participant is the human.
|
| 67 |
+
Current conversation:
|
| 68 |
+
{history}
|
| 69 |
+
Human: {input}
|
| 70 |
+
AI Assistant:
|
| 71 |
+
"""
|
| 72 |
+
self.interview_system_prompt = PromptTemplate(
|
| 73 |
+
input_variables=["history", "input"], template=self.interview_system_template
|
| 74 |
+
)
|
| 75 |
+
self.interview_memory = ConversationBufferWindowMemory(ai_prefix="AI Assistant")
|
| 76 |
+
|
| 77 |
+
if existing_conversation_history and len(existing_conversation_history):
|
| 78 |
+
self.set_memory_with_history(existing_conversation_history)
|
| 79 |
+
else:
|
| 80 |
+
self.interview_memory = ConversationBufferWindowMemory(ai_prefix="AI Assistant")
|
| 81 |
+
|
| 82 |
+
self.interview_chain = ConversationChain(
|
| 83 |
+
llm=self.llm,
|
| 84 |
+
verbose=verbose,
|
| 85 |
+
memory=self.interview_memory,
|
| 86 |
+
prompt=self.interview_system_prompt
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
def converse(self, message: str = ""):
|
| 90 |
+
return self.interview_chain.predict(input=message)
|
| 91 |
+
|
| 92 |
+
def set_memory_with_history(self, history):
|
| 93 |
+
"""
|
| 94 |
+
Takes a history object with a list of conversation and resets the memory
|
| 95 |
+
:param history:
|
| 96 |
+
:return:
|
| 97 |
+
"""
|
| 98 |
+
retrieved_messages = \
|
| 99 |
+
[AIMessage(content=entry['text']) if entry['turn'] == 'AI'
|
| 100 |
+
else HumanMessage(content=entry['text']) for entry in
|
| 101 |
+
history]
|
| 102 |
+
|
| 103 |
+
retrieved_chat_history = ChatMessageHistory(messages=retrieved_messages)
|
| 104 |
+
self.interview_memory = ConversationBufferWindowMemory(ai_prefix="AI Assistant",
|
| 105 |
+
chat_memory=retrieved_chat_history)
|
| 106 |
+
|
interviewer_utils/interviewer.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
|
| 3 |
+
from langchain import LLMChain, PromptTemplate
|
| 4 |
+
from langchain.chat_models import ChatOpenAI
|
| 5 |
+
from interviewer_utils import interviewer_persona
|
| 6 |
+
from interviewer_utils.interview_llm_helper import InterviewLLMHandler
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Interviewer:
|
| 10 |
+
def __init__(
|
| 11 |
+
self,
|
| 12 |
+
llm_temperature: float = 0.4, # going for some amount of creativity
|
| 13 |
+
llm_model_name="gpt-3.5-turbo-16k",
|
| 14 |
+
persona: str = "Rachel",
|
| 15 |
+
research_topic: str = "",
|
| 16 |
+
research_objectives: str = "",
|
| 17 |
+
research_questions: str = "",
|
| 18 |
+
interview_guidelines: str = "",
|
| 19 |
+
existing_conversation_history=List[List[str]]
|
| 20 |
+
|
| 21 |
+
):
|
| 22 |
+
interviewer_personas = interviewer_persona.AI_PERSONALITIES
|
| 23 |
+
self.interviewer_persona = interviewer_personas.get(persona, "Rachel")
|
| 24 |
+
self.chat_handler = InterviewLLMHandler(
|
| 25 |
+
llm_model_name=llm_model_name,
|
| 26 |
+
llm_temperature=llm_temperature,
|
| 27 |
+
research_topic=research_topic,
|
| 28 |
+
research_objectives=research_objectives,
|
| 29 |
+
research_questions=research_questions,
|
| 30 |
+
interview_guidelines=interview_guidelines,
|
| 31 |
+
existing_conversation_history=existing_conversation_history
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
def update_llm_prompt(self, study_subject):
|
| 35 |
+
print("After : ", study_subject)
|
| 36 |
+
# update prompt on dropdown selection
|
| 37 |
+
self.study_subject = study_subject
|
| 38 |
+
self.system_template = """ You are an expert in %s. You are a qualitative researcher.
|
| 39 |
+
You are conducting an interview in this topic %s. Please create rapport and ask me some questions
|
| 40 |
+
related to the topic. You should ask follow-up questions if necessary. Questions should be open ended.
|
| 41 |
+
|
| 42 |
+
{history}
|
| 43 |
+
{human_input}
|
| 44 |
+
|
| 45 |
+
AI: """ % (
|
| 46 |
+
self.study_subject,
|
| 47 |
+
self.study_subject,
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
self.system_prompt = PromptTemplate(
|
| 51 |
+
input_variables=["history", "human_input"], template=self.system_template
|
| 52 |
+
)
|
| 53 |
+
self.llm_chain = LLMChain(
|
| 54 |
+
llm=self.llm,
|
| 55 |
+
prompt=self.system_prompt,
|
| 56 |
+
verbose=False,
|
| 57 |
+
memory=self.memory,
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# feeding stored scripts in memory so that AI can remember the previous conversation
|
| 61 |
+
def set_memory_chain(self, scripts, m_interviewer):
|
| 62 |
+
# not sure why is happening here
|
| 63 |
+
# need to load the history parse it and then load as history object
|
| 64 |
+
# https://stackoverflow.com/questions/75965605/how-to-persist-langchain-conversation-memory-save-and-load
|
| 65 |
+
for i in scripts:
|
| 66 |
+
self.memory.save_context(i[0], i[1])
|
| 67 |
+
response = m_interviewer.llm_chain.predict(human_input="what was my name?")
|
| 68 |
+
print(response)
|
| 69 |
+
return response
|
utils/interviewer.py → interviewer_utils/interviewer_arc.py
RENAMED
|
File without changes
|
{utils → interviewer_utils}/interviewer_persona.py
RENAMED
|
File without changes
|
interviewer_utils/voice_controller.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import openai
|
| 3 |
+
import os, requests
|
| 4 |
+
import dotenv
|
| 5 |
+
from utils import eleven_voice_cache
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
dotenv.load_dotenv()
|
| 9 |
+
|
| 10 |
+
eleven_labs_API = os.getenv("ELEVENLABS_API")
|
| 11 |
+
|
| 12 |
+
messages = []
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def speech_to_text(audio):
|
| 16 |
+
"""
|
| 17 |
+
takes an audio file and returns a transcription
|
| 18 |
+
Args:
|
| 19 |
+
audio:
|
| 20 |
+
Return
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
# reading audio file
|
| 24 |
+
audio_file = open(audio, "rb")
|
| 25 |
+
|
| 26 |
+
# converting audio into text through "whisper"
|
| 27 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
| 28 |
+
|
| 29 |
+
# append the messages
|
| 30 |
+
messages.append({"role": "user", "content": transcript["text"]})
|
| 31 |
+
|
| 32 |
+
return transcript["text"]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# function to convert speech to text
|
| 36 |
+
def text_to_speech(audio_transcription, radio_value, audio_voice_persona):
|
| 37 |
+
"""
|
| 38 |
+
take converted transcription from the function speech to text,
|
| 39 |
+
checks the radio_value to control the voice response
|
| 40 |
+
select the voice over persona
|
| 41 |
+
use elvenlabs to convert to audio
|
| 42 |
+
send the response to"""
|
| 43 |
+
# voice over selection id
|
| 44 |
+
for voice in eleven_voice_cache.VOICES_CACHE:
|
| 45 |
+
if voice.name == audio_voice_persona:
|
| 46 |
+
voice_over_id = voice.voice_id
|
| 47 |
+
|
| 48 |
+
# using chatGpt response system
|
| 49 |
+
# response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
|
| 50 |
+
# r = Interviewer(llm_model_name="gpt-3.5-turbo-16k")
|
| 51 |
+
# response = r.llm_chain.predict(human_input=audio_transcription)
|
| 52 |
+
response = audio_transcription
|
| 53 |
+
print(response)
|
| 54 |
+
# storing system message
|
| 55 |
+
# system_message = response["choices"][0]["message"]["content"]
|
| 56 |
+
|
| 57 |
+
# append the message
|
| 58 |
+
messages.append({"role": "assistant", "content": response})
|
| 59 |
+
|
| 60 |
+
if radio_value == "On":
|
| 61 |
+
# text to speech using elevenlabs
|
| 62 |
+
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_over_id}/stream"
|
| 63 |
+
data = {
|
| 64 |
+
"text": response,
|
| 65 |
+
"voice_settings": {"stability": 0.1, "similarity_boost": 0.8},
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
elevenlabs_response = requests.post(
|
| 69 |
+
url, headers={"xi-api-key": eleven_labs_API}, json=data, stream=True
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
output_filename = "buddy.mp3"
|
| 73 |
+
with open(output_filename, "wb") as output:
|
| 74 |
+
output.write(elevenlabs_response.content)
|
| 75 |
+
|
| 76 |
+
return output_filename
|
| 77 |
+
else:
|
| 78 |
+
return None
|
notebooks/modify_schema.ipynb
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 3,
|
| 6 |
+
"metadata": {
|
| 7 |
+
"collapsed": true,
|
| 8 |
+
"ExecuteTime": {
|
| 9 |
+
"end_time": "2023-08-27T23:03:35.706464200Z",
|
| 10 |
+
"start_time": "2023-08-27T23:03:35.692464300Z"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"outputs": [],
|
| 14 |
+
"source": [
|
| 15 |
+
"import os\n",
|
| 16 |
+
"from utils.database_helper import DatabaseIO\n",
|
| 17 |
+
"from pymongo import MongoClient\n",
|
| 18 |
+
"from bson import ObjectId"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "code",
|
| 23 |
+
"execution_count": 9,
|
| 24 |
+
"outputs": [
|
| 25 |
+
{
|
| 26 |
+
"name": "stdout",
|
| 27 |
+
"output_type": "stream",
|
| 28 |
+
"text": [
|
| 29 |
+
"Updates completed!\n"
|
| 30 |
+
]
|
| 31 |
+
}
|
| 32 |
+
],
|
| 33 |
+
"source": [
|
| 34 |
+
"with DatabaseIO(collection_name=\"Studies_v02\") as db_io:\n",
|
| 35 |
+
" for document in db_io.collection.find():\n",
|
| 36 |
+
" new_human_interviews = {}\n",
|
| 37 |
+
" new_synthetic_interviews = {}\n",
|
| 38 |
+
"\n",
|
| 39 |
+
" # Process the 'human_interviews' list\n",
|
| 40 |
+
" for interview in document['interviews'].get('human_interviews', []):\n",
|
| 41 |
+
" participant_id = str(interview['Human_Respondent']['participant_id'])\n",
|
| 42 |
+
" new_human_interviews[participant_id] = interview\n",
|
| 43 |
+
"\n",
|
| 44 |
+
" # Process the 'synthetic_interviews' list\n",
|
| 45 |
+
" for interview in document['interviews'].get('synthetic_interviews', []):\n",
|
| 46 |
+
" new_id = str(ObjectId()) # Generate a new unique ObjectId\n",
|
| 47 |
+
" new_synthetic_interviews[new_id] = interview\n",
|
| 48 |
+
"\n",
|
| 49 |
+
" # Update the fields with the new structures\n",
|
| 50 |
+
" updates = {}\n",
|
| 51 |
+
" if new_human_interviews:\n",
|
| 52 |
+
" updates['interviews.human_interviews'] = new_human_interviews\n",
|
| 53 |
+
" if new_synthetic_interviews:\n",
|
| 54 |
+
" updates['interviews.synthetic_interviews'] = new_synthetic_interviews\n",
|
| 55 |
+
"\n",
|
| 56 |
+
" if updates:\n",
|
| 57 |
+
" db_io.collection.update_one(\n",
|
| 58 |
+
" {'_id': document['_id']},\n",
|
| 59 |
+
" {'$set': updates}\n",
|
| 60 |
+
" )\n",
|
| 61 |
+
"\n",
|
| 62 |
+
"print(\"Updates completed!\")\n",
|
| 63 |
+
"\n"
|
| 64 |
+
],
|
| 65 |
+
"metadata": {
|
| 66 |
+
"collapsed": false,
|
| 67 |
+
"ExecuteTime": {
|
| 68 |
+
"end_time": "2023-08-28T00:06:10.976700800Z",
|
| 69 |
+
"start_time": "2023-08-28T00:06:10.425108Z"
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"cell_type": "code",
|
| 75 |
+
"execution_count": null,
|
| 76 |
+
"outputs": [],
|
| 77 |
+
"source": [],
|
| 78 |
+
"metadata": {
|
| 79 |
+
"collapsed": false
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
],
|
| 83 |
+
"metadata": {
|
| 84 |
+
"kernelspec": {
|
| 85 |
+
"display_name": "Python 3",
|
| 86 |
+
"language": "python",
|
| 87 |
+
"name": "python3"
|
| 88 |
+
},
|
| 89 |
+
"language_info": {
|
| 90 |
+
"codemirror_mode": {
|
| 91 |
+
"name": "ipython",
|
| 92 |
+
"version": 2
|
| 93 |
+
},
|
| 94 |
+
"file_extension": ".py",
|
| 95 |
+
"mimetype": "text/x-python",
|
| 96 |
+
"name": "python",
|
| 97 |
+
"nbconvert_exporter": "python",
|
| 98 |
+
"pygments_lexer": "ipython2",
|
| 99 |
+
"version": "2.7.6"
|
| 100 |
+
}
|
| 101 |
+
},
|
| 102 |
+
"nbformat": 4,
|
| 103 |
+
"nbformat_minor": 0
|
| 104 |
+
}
|
{utils → ra_utils}/coding_and_analysis.py
RENAMED
|
File without changes
|
{utils → ra_utils}/synthetic_panel_datamodel.py
RENAMED
|
File without changes
|
utils/app_common.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from bson import ObjectId
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
import os
|
| 5 |
+
from interviewer_utils.interviewer import Interviewer
|
| 6 |
+
from utils.study import Study
|
| 7 |
+
from utils.study_repository import StudyRepository
|
| 8 |
+
from typing import Union, List
|
| 9 |
+
from utils.database_helper import DatabaseIO
|
| 10 |
+
import gradio as gr
|
| 11 |
+
|
| 12 |
+
load_dotenv()
|
| 13 |
+
INTERVIEW_USERS_COLLECTION = os.environ['MONGO_COLLECTION_USERS']
|
| 14 |
+
RESEARCHER_USERS_COLLECTION = os.environ['MONGO_COLLECTION_USERS']
|
| 15 |
+
STUDIES_COLLECTION = os.environ['MONGO_COLLECTION_STUDIES']
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def is_valid_email(email):
|
| 19 |
+
# Simple regex pattern for email validation
|
| 20 |
+
pattern = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
|
| 21 |
+
return bool(re.match(pattern, email))
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def set_active_study(this_study_repository,
|
| 25 |
+
study_id):
|
| 26 |
+
"""
|
| 27 |
+
Using a repository object creates a study object for the given id
|
| 28 |
+
returns the study object and a status message
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
study_id = study_id.split(':')[0]
|
| 32 |
+
this_study = this_study_repository.get_studies(study_id=study_id)
|
| 33 |
+
|
| 34 |
+
if not this_study:
|
| 35 |
+
return {}
|
| 36 |
+
|
| 37 |
+
if type(this_study) == list and len(this_study):
|
| 38 |
+
this_study = this_study[0]
|
| 39 |
+
|
| 40 |
+
# initialize a new study with this study in memory!
|
| 41 |
+
try:
|
| 42 |
+
# study validation is here! if db schema is older, will be notified during dev
|
| 43 |
+
active_study_obj = Study(**this_study)
|
| 44 |
+
except Exception as e:
|
| 45 |
+
return None, f"Study not found due to {e}"
|
| 46 |
+
|
| 47 |
+
return active_study_obj, ""
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def configure_interviewer(this_interviewer: Interviewer,
|
| 51 |
+
this_study_obj: Study,
|
| 52 |
+
history: List[List[str]]):
|
| 53 |
+
"""
|
| 54 |
+
takes an existing interviewer object and initializes it with details from the provided study object
|
| 55 |
+
|
| 56 |
+
:param history:
|
| 57 |
+
:param this_interviewer:
|
| 58 |
+
:param this_study_obj:
|
| 59 |
+
:return: interviewer object with the right initializations
|
| 60 |
+
"""
|
| 61 |
+
this_status = "Successfully configured interviewer object"
|
| 62 |
+
|
| 63 |
+
if this_study_obj is None or type(this_study_obj) != Study:
|
| 64 |
+
this_status = "The research study you are participating in does not" \
|
| 65 |
+
" exist or is not ready, please contact the researcher"
|
| 66 |
+
|
| 67 |
+
# if this study has
|
| 68 |
+
|
| 69 |
+
return this_interviewer, this_status
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def lookup_userid_by_email(email):
|
| 73 |
+
"""
|
| 74 |
+
Looks up email address and finds the right user and returns the user_id
|
| 75 |
+
:param email:
|
| 76 |
+
:return:
|
| 77 |
+
"""
|
| 78 |
+
if email is None or not is_valid_email(email):
|
| 79 |
+
return None, "Try with a valid email please!"
|
| 80 |
+
|
| 81 |
+
with DatabaseIO(collection_name=INTERVIEW_USERS_COLLECTION) as db_io:
|
| 82 |
+
user = db_io.collection.find_one({"email": email}, {"_id": 1})
|
| 83 |
+
if user:
|
| 84 |
+
return str(user["_id"]), "Found correct user"
|
| 85 |
+
else:
|
| 86 |
+
return None, "Error finding user"
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def process_transcript_to_chatbot_history(this_transcript):
|
| 90 |
+
messages_in_chatbot_format = []
|
| 91 |
+
for i in range(0, len(this_transcript), 2):
|
| 92 |
+
human_turn = this_transcript[i]['text']
|
| 93 |
+
ai_turn = this_transcript[i + 1]['text']
|
| 94 |
+
messages_in_chatbot_format.append([human_turn, ai_turn])
|
| 95 |
+
return messages_in_chatbot_format
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def set_user_and_study(
|
| 99 |
+
this_study_repository,
|
| 100 |
+
this_study_code,
|
| 101 |
+
this_user_email,
|
| 102 |
+
|
| 103 |
+
):
|
| 104 |
+
this_user_id, user_exists_message = lookup_userid_by_email(this_user_email)
|
| 105 |
+
|
| 106 |
+
interaction_disabled = gr.update(interactive=False)
|
| 107 |
+
interaction_enabled = gr.update(interactive=True)
|
| 108 |
+
|
| 109 |
+
if not this_user_id or not len(this_user_id):
|
| 110 |
+
this_return_message = user_exists_message
|
| 111 |
+
return None, "", this_return_message, *([interaction_disabled] * 5)
|
| 112 |
+
|
| 113 |
+
this_active_study, study_exists_message = set_active_study(this_study_repository, study_id=str(this_study_code))
|
| 114 |
+
if not this_active_study:
|
| 115 |
+
this_return_message = study_exists_message
|
| 116 |
+
return None, this_user_id, this_return_message, *([interaction_disabled] * 5)
|
| 117 |
+
this_return_message = user_exists_message + " " + study_exists_message
|
| 118 |
+
|
| 119 |
+
return this_active_study, this_user_id, this_return_message, *([interaction_enabled] * 5)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def setup_conversation_context(this_active_study, this_study_repository, this_user_id):
|
| 123 |
+
"""
|
| 124 |
+
Populates the interview object
|
| 125 |
+
:param this_active_study:
|
| 126 |
+
:param this_user_id:
|
| 127 |
+
:return:
|
| 128 |
+
"""
|
| 129 |
+
this_transcript = load_previous_transcript(this_active_study,
|
| 130 |
+
this_user_id,
|
| 131 |
+
this_study_repository)
|
| 132 |
+
this_user_id_str = str(this_user_id)
|
| 133 |
+
this_interviewer = Interviewer(
|
| 134 |
+
research_topic=this_active_study.study_topic,
|
| 135 |
+
research_objectives=this_active_study.study_objective,
|
| 136 |
+
research_questions=this_active_study.research_questions,
|
| 137 |
+
interview_guidelines=this_active_study.interview_guidelines,
|
| 138 |
+
existing_conversation_history=this_transcript
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
if not this_transcript or not len(this_transcript):
|
| 142 |
+
# no previous conversation
|
| 143 |
+
ai_response = this_interviewer.chat_handler.converse('Hello')
|
| 144 |
+
this_constructed_transcript = [{'turn': 'Human', 'text': 'Hello'}, {'turn': 'AI', 'text': ai_response}]
|
| 145 |
+
# transcript did not exist, so must create the user info in database
|
| 146 |
+
|
| 147 |
+
this_view_chat_history = process_transcript_to_chatbot_history(this_constructed_transcript)
|
| 148 |
+
#
|
| 149 |
+
if 'human_interviews' not in this_active_study.interviews or not this_active_study.interviews['human_interviews']:
|
| 150 |
+
this_active_study.interviews['human_interviews'] = {}
|
| 151 |
+
this_previous_interview = this_active_study.interviews['human_interviews'].get(this_user_id)
|
| 152 |
+
# If not found, initialize an empty one
|
| 153 |
+
if not this_previous_interview:
|
| 154 |
+
this_previous_interview = {
|
| 155 |
+
"AI_interviewer": "Sophia (AI Researcher)",
|
| 156 |
+
"Human_Respondent": {
|
| 157 |
+
"participant_id": this_user_id_str
|
| 158 |
+
},
|
| 159 |
+
"transcript": []
|
| 160 |
+
}
|
| 161 |
+
this_active_study.interviews['human_interviews'][this_user_id_str] = this_previous_interview
|
| 162 |
+
this_active_study.interviews['human_interviews'][str(this_user_id)]['transcript'] = this_constructed_transcript
|
| 163 |
+
this_study_repository.update_study(study_id=this_active_study._id, updated_data=this_active_study.to_dict())
|
| 164 |
+
this_message = "First time talking to you ... "
|
| 165 |
+
else:
|
| 166 |
+
this_view_chat_history = process_transcript_to_chatbot_history(this_transcript)
|
| 167 |
+
this_message = "Welcome back, lets start where we left off!"
|
| 168 |
+
|
| 169 |
+
return this_view_chat_history, this_interviewer, this_message
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def load_previous_transcript(this_study: Study,
|
| 173 |
+
this_user_id: Union[ObjectId | str],
|
| 174 |
+
this_study_repository: StudyRepository):
|
| 175 |
+
"""
|
| 176 |
+
loads existing interview
|
| 177 |
+
:param this_study:
|
| 178 |
+
:param this_user_id:
|
| 179 |
+
:param this_study_repository
|
| 180 |
+
:return:
|
| 181 |
+
"""
|
| 182 |
+
this_user_id_str = str(this_user_id)
|
| 183 |
+
|
| 184 |
+
# Ensure 'human_interviews' exists and is initialized properly
|
| 185 |
+
|
| 186 |
+
try:
|
| 187 |
+
# Try to get the interview of the participant
|
| 188 |
+
human_interviews = this_study.interviews['human_interviews']
|
| 189 |
+
this_previous_interview = human_interviews.get(this_user_id_str, None)
|
| 190 |
+
return this_previous_interview['transcript']
|
| 191 |
+
|
| 192 |
+
except (AttributeError, KeyError, Exception) as ae:
|
| 193 |
+
return []
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
if __name__ == "__main__":
|
| 197 |
+
# unit tests
|
| 198 |
+
study_repository = StudyRepository(collection_name_studies=STUDIES_COLLECTION)
|
| 199 |
+
sample_study_id = "64dc833a12e518bb5d4ea2b4"
|
| 200 |
+
sample_email = "paudyalprajwal@qualz.net"
|
| 201 |
+
interviewer = Interviewer(existing_conversation_history=['a', 'b'])
|
utils/llm_helper.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
|
|
| 1 |
from langchain.chat_models import ChatOpenAI
|
| 2 |
-
from langchain.prompts.chat import ChatMessage
|
| 3 |
-
from langchain.schema import SystemMessage, HumanMessage, AIMessage
|
| 4 |
-
from langchain.chat_models import ChatOpenAI
|
| 5 |
-
from langchain.prompts.chat import ChatMessage
|
| 6 |
import ast
|
| 7 |
import warnings
|
| 8 |
from pathlib import Path
|
| 9 |
import os, json
|
| 10 |
-
from
|
|
|
|
|
|
|
| 11 |
|
| 12 |
class LLMChatHandler:
|
| 13 |
def __init__(self,
|
|
@@ -16,6 +15,7 @@ class LLMChatHandler:
|
|
| 16 |
expertise_message: str = ""):
|
| 17 |
self.llm = ChatOpenAI(temperature=temperature, model_name=llm_model_name)
|
| 18 |
|
|
|
|
| 19 |
project_dir = Path(os.environ['PROJECT_DIR'])
|
| 20 |
assert project_dir.exists()
|
| 21 |
with open(project_dir / 'study_prompts.json') as sp:
|
|
@@ -24,13 +24,16 @@ class LLMChatHandler:
|
|
| 24 |
if not len(expertise_message):
|
| 25 |
expertise_message = f"Assistant is a world renowned expert in qualitative research." \
|
| 26 |
f" Assistant is well known in your field worldwide and has published several books " \
|
| 27 |
-
f"and
|
| 28 |
f"who is thorough and methodical."
|
| 29 |
self.expertise_message = expertise_message
|
| 30 |
|
|
|
|
|
|
|
| 31 |
def predict(self, messages):
|
| 32 |
return self.llm.predict_messages(messages)
|
| 33 |
|
|
|
|
| 34 |
def guess_research_questions(self,
|
| 35 |
research_name: str,
|
| 36 |
research_topic: str,
|
|
|
|
| 1 |
+
from langchain.schema import SystemMessage, HumanMessage
|
| 2 |
from langchain.chat_models import ChatOpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import ast
|
| 4 |
import warnings
|
| 5 |
from pathlib import Path
|
| 6 |
import os, json
|
| 7 |
+
from ra_utils.synthetic_panel_datamodel import parse_synthetic_panel_output
|
| 8 |
+
from langchain.memory import ConversationBufferMemory
|
| 9 |
+
from langchain import LLMChain
|
| 10 |
|
| 11 |
class LLMChatHandler:
|
| 12 |
def __init__(self,
|
|
|
|
| 15 |
expertise_message: str = ""):
|
| 16 |
self.llm = ChatOpenAI(temperature=temperature, model_name=llm_model_name)
|
| 17 |
|
| 18 |
+
|
| 19 |
project_dir = Path(os.environ['PROJECT_DIR'])
|
| 20 |
assert project_dir.exists()
|
| 21 |
with open(project_dir / 'study_prompts.json') as sp:
|
|
|
|
| 24 |
if not len(expertise_message):
|
| 25 |
expertise_message = f"Assistant is a world renowned expert in qualitative research." \
|
| 26 |
f" Assistant is well known in your field worldwide and has published several books " \
|
| 27 |
+
f"and white-papers. Assistant is also an expert qualitative research analyst " \
|
| 28 |
f"who is thorough and methodical."
|
| 29 |
self.expertise_message = expertise_message
|
| 30 |
|
| 31 |
+
|
| 32 |
+
|
| 33 |
def predict(self, messages):
|
| 34 |
return self.llm.predict_messages(messages)
|
| 35 |
|
| 36 |
+
|
| 37 |
def guess_research_questions(self,
|
| 38 |
research_name: str,
|
| 39 |
research_topic: str,
|
utils/study.py
CHANGED
|
@@ -45,7 +45,8 @@ class Study:
|
|
| 45 |
self.additional_fields = kwargs
|
| 46 |
self.chat_handler = LLMChatHandler()
|
| 47 |
|
| 48 |
-
self.synthetic_panel_pending_interviews = synthetic_panel_pending_interviews if
|
|
|
|
| 49 |
# Check if synthetic_interviews exists and is not empty
|
| 50 |
# if 'synthetic_interviews' in self.interviews and len(self.interviews.get('synthetic_interviews', "")):
|
| 51 |
# # Extract AI_Respondent from each item in synthetic_interviews and append to self.synthetic_panelists
|
|
|
|
| 45 |
self.additional_fields = kwargs
|
| 46 |
self.chat_handler = LLMChatHandler()
|
| 47 |
|
| 48 |
+
self.synthetic_panel_pending_interviews = synthetic_panel_pending_interviews if\
|
| 49 |
+
synthetic_panel_pending_interviews else []
|
| 50 |
# Check if synthetic_interviews exists and is not empty
|
| 51 |
# if 'synthetic_interviews' in self.interviews and len(self.interviews.get('synthetic_interviews', "")):
|
| 52 |
# # Extract AI_Respondent from each item in synthetic_interviews and append to self.synthetic_panelists
|
utils/study_repository.py
CHANGED
|
@@ -1,19 +1,14 @@
|
|
| 1 |
import dotenv
|
| 2 |
|
| 3 |
-
import os
|
| 4 |
|
| 5 |
-
from typing import
|
| 6 |
-
import pymongo
|
| 7 |
-
from pymongo.errors import PyMongoError
|
| 8 |
-
from pymongo.server_api import ServerApi
|
| 9 |
from typing import Dict, Any
|
| 10 |
from utils.study import Study
|
| 11 |
from utils.database_helper import DatabaseIO
|
| 12 |
from bson import ObjectId
|
| 13 |
from typing import Union
|
| 14 |
-
from utils.synthetic_panel_datamodel import SyntheticPanelOutput
|
| 15 |
|
| 16 |
-
import ast
|
| 17 |
from utils.llm_helper import LLMChatHandler
|
| 18 |
|
| 19 |
|
|
@@ -65,6 +60,44 @@ class StudyRepository:
|
|
| 65 |
study_id = db_io.collection.insert_one(study_data).inserted_id
|
| 66 |
return study_id
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
def get_studies(self,
|
| 69 |
study_id: Optional[str] = None,
|
| 70 |
user_id: Optional[str] = None,
|
|
@@ -79,13 +112,12 @@ class StudyRepository:
|
|
| 79 |
:return: The studies or a specific study data, or a tuple with a message if an error occurs.
|
| 80 |
"""
|
| 81 |
|
| 82 |
-
# If study_id is provided, return just that study.
|
| 83 |
if study_id:
|
| 84 |
study_id = ObjectId(study_id)
|
| 85 |
with DatabaseIO(db_name=self.db_name, collection_name=self.collection_name_studies) as db_io:
|
| 86 |
study = db_io.collection.find_one({"_id": study_id})
|
| 87 |
if not study:
|
| 88 |
-
return {}
|
| 89 |
return study
|
| 90 |
|
| 91 |
# If user_id is provided and filter is desired, return studies the user is authorized to view.
|
|
@@ -154,6 +186,7 @@ class StudyRepository:
|
|
| 154 |
|
| 155 |
return return_msg
|
| 156 |
|
|
|
|
| 157 |
def delete_study(self, study_id: str):
|
| 158 |
"""
|
| 159 |
Delete a study by its ID.
|
|
|
|
| 1 |
import dotenv
|
| 2 |
|
| 3 |
+
import os
|
| 4 |
|
| 5 |
+
from typing import Optional, List, Tuple
|
|
|
|
|
|
|
|
|
|
| 6 |
from typing import Dict, Any
|
| 7 |
from utils.study import Study
|
| 8 |
from utils.database_helper import DatabaseIO
|
| 9 |
from bson import ObjectId
|
| 10 |
from typing import Union
|
|
|
|
| 11 |
|
|
|
|
| 12 |
from utils.llm_helper import LLMChatHandler
|
| 13 |
|
| 14 |
|
|
|
|
| 60 |
study_id = db_io.collection.insert_one(study_data).inserted_id
|
| 61 |
return study_id
|
| 62 |
|
| 63 |
+
def append_to_transcript(self, study_id, user_id, human_text, ai_response):
|
| 64 |
+
"""
|
| 65 |
+
Append the recent conversation to the transcript in the database.
|
| 66 |
+
|
| 67 |
+
:param db_url: MongoDB database URL.
|
| 68 |
+
:param db_name: Name of the MongoDB database.
|
| 69 |
+
:param collection_name: Name of the collection storing the transcripts.
|
| 70 |
+
:param participant_id: ID of the participant for whom the transcript is being updated.
|
| 71 |
+
:param human_text: The text/message from the human.
|
| 72 |
+
:param ai_response: The response from the AI.
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
# Initialize the MongoDB client and select the database and collection
|
| 76 |
+
user_id = str(user_id)
|
| 77 |
+
# Define the conversation entries to be appended
|
| 78 |
+
new_entries = [
|
| 79 |
+
{
|
| 80 |
+
"turn": "Human",
|
| 81 |
+
"text": human_text,
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"turn": "AI",
|
| 85 |
+
"text": ai_response,
|
| 86 |
+
}
|
| 87 |
+
]
|
| 88 |
+
|
| 89 |
+
# Append new entries to the transcript
|
| 90 |
+
with DatabaseIO(collection_name=self.collection_name_studies) as db_io:
|
| 91 |
+
query = {"_id": ObjectId(study_id)}
|
| 92 |
+
|
| 93 |
+
# Append new entries to the transcript using a dynamic field update
|
| 94 |
+
update_field = f"interviews.human_interviews.{user_id}.transcript"
|
| 95 |
+
update_action = {"$push": {update_field: {"$each": new_entries}}}
|
| 96 |
+
|
| 97 |
+
# Execute the update
|
| 98 |
+
db_io.collection.update_one(query, update_action)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
def get_studies(self,
|
| 102 |
study_id: Optional[str] = None,
|
| 103 |
user_id: Optional[str] = None,
|
|
|
|
| 112 |
:return: The studies or a specific study data, or a tuple with a message if an error occurs.
|
| 113 |
"""
|
| 114 |
|
|
|
|
| 115 |
if study_id:
|
| 116 |
study_id = ObjectId(study_id)
|
| 117 |
with DatabaseIO(db_name=self.db_name, collection_name=self.collection_name_studies) as db_io:
|
| 118 |
study = db_io.collection.find_one({"_id": study_id})
|
| 119 |
if not study:
|
| 120 |
+
return {}
|
| 121 |
return study
|
| 122 |
|
| 123 |
# If user_id is provided and filter is desired, return studies the user is authorized to view.
|
|
|
|
| 186 |
|
| 187 |
return return_msg
|
| 188 |
|
| 189 |
+
|
| 190 |
def delete_study(self, study_id: str):
|
| 191 |
"""
|
| 192 |
Delete a study by its ID.
|