File size: 2,365 Bytes
db8f917
e8e6bd3
 
 
 
 
 
 
 
 
0afa1d3
e8e6bd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09bdd6d
 
 
 
 
 
 
 
e8e6bd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db8f917
 
 
 
e8e6bd3
 
db8f917
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import json
from pathlib import Path

from sklearn.metrics.pairwise import cosine_similarity

from InstructorEmbedding import INSTRUCTOR

model = INSTRUCTOR("hkunlp/instructor-large")

EMBED_FILE_PATH = Path(__file__).parent / "top15_disorders_emb.json"
case_note_instruction = (
    "Represent the case note for a possible DSM-5 mental health diagnosis:"
)


def read_json_file(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data


def get_case_note_embedding(case_note):
    case_note_emb = model.encode([[case_note_instruction, case_note]])
    return case_note_emb


def get_top_n_diagnoses(case_note):
    top_n = 3

    case_note_emb = get_case_note_embedding(case_note)
    diagnoses_embed = read_json_file(EMBED_FILE_PATH)["diagnoses"]

    diagnosis_list = []
    for diagnosis in diagnoses_embed:
        diagnosis_name = diagnosis["name"]
        criteria_scores = (
            []
        )  # Store criterion descriptions and scores for each diagnosis

        count = 0
        sum_score = 0
        for criterion in diagnosis["criteria"]:
            count += 1
            score = cosine_similarity(criterion["embedding"], case_note_emb)
            sum_score += score

            criterion_data = {
                "description": criterion["description"],
                "score": float(score),
            }
            if "symptom_list" in criterion:
                criterion_data["symptom_list"] = criterion["symptom_list"]
            criteria_scores.append(criterion_data)

        sum_score /= count
        diagnosis_list.append(
            {
                "name": diagnosis_name,
                "sum_score": float(sum_score),
                "criteria_scores": criteria_scores,
            }
        )

    # Sort the diagnoses based on the sum_score in descending order
    sorted_diagnoses = sorted(
        diagnosis_list, key=lambda x: x["sum_score"], reverse=True
    )

    # Select the top n diagnoses
    top_n_diagnoses = sorted_diagnoses[:top_n]

    # Convert the top_n_diagnoses to JSON format
    top_n_diagnoses_json = json.dumps(top_n_diagnoses, indent=2)

    return top_n_diagnoses_json


def greet(name):
    return "Hello " + name + "!!"


iface = gr.Interface(fn=get_top_n_diagnoses, inputs="text", outputs="text")
iface.launch()