Spaces:

Omdena-MentalHealth-team-consolidated
/

MindfulMedia_Mentor

Sleeping

App Files Files Community

MindfulMedia_Mentor / Chromadb_storage_JyotiNigam.py

jaelin215

Upload 14 files

bd9870c verified 4 months ago

raw

history blame contribute delete

No virus

2.89 kB

	#---
	#- Author: Jaelin Lee, Jyoti Nigam
	#- Date: Mar 16, 2024
	#- Description: storing data into vector database called Chroma DB. Based on user input, retrieve most relevant info from knowledge base.
	#- How it works: Tokenize the user input text using NLTK. Then, get TF-IDF based score against knowledge base using BM25. Get the index of the most similar item within knowledgebase using `argmax()`. Then, using the index, retrieve that item from the knowledge base.
	#---

	import nltk
	from nltk.tokenize import word_tokenize
	from langchain_community.document_loaders import TextLoader
	from langchain_community.embeddings.sentence_transformer import (
	SentenceTransformerEmbeddings,
	)
	from langchain_community.vectorstores import Chroma
	from langchain_text_splitters import CharacterTextSplitter

	# Download NLTK data for tokenization
	nltk.download('punkt')
	import os
	global db
	class QuestionRetriever:

	def load_documents(self,file_name):
	current_directory = os.getcwd()
	data_directory = os.path.join(current_directory, "data")
	file_path = os.path.join(data_directory, file_name)
	loader = TextLoader(file_path)
	documents = loader.load()
	return documents

	def store_data_in_vector_db(self,documents):
	# global db
	text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0,separator="\n")
	docs = text_splitter.split_documents(documents)
	# create the open-source embedding function
	embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
	# print(docs)
	# load it into Chroma
	db = Chroma.from_documents(docs, embedding_function)
	return db

	def get_response(self, user_query, predicted_mental_category):
	if predicted_mental_category == "depression":
	documents=self.load_documents("depression_questions.txt")

	elif predicted_mental_category == "adhd":
	documents=self.load_documents("adhd_questions.txt")

	elif predicted_mental_category == "anxiety":
	documents=self.load_documents("anxiety_questions.txt")

	else:
	print("Sorry, allowed predicted_mental_category is ['depresison', 'adhd', 'anxiety'].")
	return
	db=self.store_data_in_vector_db(documents)

	docs = db.similarity_search(user_query)
	most_similar_question = docs[0].page_content.split("\n")[0] # Extract the first question
	if user_query==most_similar_question:
	most_similar_question=docs[1].page_content.split("\n")[0]

	print(most_similar_question)
	return most_similar_question

	if __name__ == "__main__":
	model = QuestionRetriever()
	user_input = input("User: ")

	predicted_mental_condition = "depression"
	response = model.get_response(user_input, predicted_mental_condition)
	print("Chatbot:", response)