ACMCMC commited on
Commit
4ae75ec
1 Parent(s): b0c103a

Get by disease description

Browse files
Files changed (3) hide show
  1. app.py +3 -2
  2. database.ipynb +196 -30
  3. utils.py +67 -14
app.py CHANGED
@@ -3,7 +3,7 @@ from streamlit_agraph import agraph, Node, Edge, Config
3
  import os
4
  from sqlalchemy import create_engine, text
5
  import pandas as pd
6
- from utils import get_all_diseases_name, get_most_similar_diseases_from_uri, get_uri_from_name
7
  import json
8
 
9
 
@@ -16,7 +16,8 @@ CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}
16
  engine = create_engine(CONNECTION_STRING)
17
 
18
  def handle_click_on_analyze_button():
19
- # 1. Embed the textual description that the user entered using the model ()
 
20
  # 2. Get 5 diseases with the highest cosine silimarity from the DB
21
  # 3. Get the similarities of the embeddings of those diseases (cosine similarity of the embeddings of the nodes of such diseases)
22
  # 4. Potentially filter out the diseases that are not similar enough (e.g. similarity < 0.8)
 
3
  import os
4
  from sqlalchemy import create_engine, text
5
  import pandas as pd
6
+ from utils import get_all_diseases_name, get_most_similar_diseases_from_uri, get_uri_from_name, get_diseases_related_to_a_textual_description
7
  import json
8
 
9
 
 
16
  engine = create_engine(CONNECTION_STRING)
17
 
18
  def handle_click_on_analyze_button():
19
+ # 1. Embed the textual description that the user entered using the model
20
+ diseases_related_to_the_user_text = get_diseases_related_to_a_textual_description(description_input)
21
  # 2. Get 5 diseases with the highest cosine silimarity from the DB
22
  # 3. Get the similarities of the embeddings of those diseases (cosine similarity of the embeddings of the nodes of such diseases)
23
  # 4. Potentially filter out the diseases that are not similar enough (e.g. similarity < 0.8)
database.ipynb CHANGED
@@ -9,7 +9,7 @@
9
  },
10
  {
11
  "cell_type": "code",
12
- "execution_count": 8,
13
  "metadata": {},
14
  "outputs": [],
15
  "source": [
@@ -19,7 +19,7 @@
19
  },
20
  {
21
  "cell_type": "code",
22
- "execution_count": 9,
23
  "metadata": {},
24
  "outputs": [],
25
  "source": [
@@ -35,7 +35,7 @@
35
  },
36
  {
37
  "cell_type": "code",
38
- "execution_count": null,
39
  "metadata": {},
40
  "outputs": [],
41
  "source": [
@@ -53,8 +53,8 @@
53
  " sql = f\"\"\"\n",
54
  " CREATE TABLE Test.EntityEmbeddings (\n",
55
  " embedding VECTOR(DOUBLE, 50),\n",
56
- " label VARCHAR(143),\n",
57
- " uri VARCHAR(38)\n",
58
  " )\n",
59
  " \"\"\"\n",
60
  " result = conn.execute(text(sql))\n",
@@ -76,9 +76,158 @@
76
  },
77
  {
78
  "cell_type": "code",
79
- "execution_count": null,
80
  "metadata": {},
81
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  "source": [
83
  "# Calculate distance between entities\n",
84
  "with engine.connect() as conn:\n",
@@ -116,8 +265,8 @@
116
  " sql = f\"\"\"\n",
117
  " CREATE TABLE Test.RelationEmbeddings (\n",
118
  " embedding VECTOR(DOUBLE, 50),\n",
119
- " label VARCHAR(10),\n",
120
- " uri VARCHAR(38)\n",
121
  " )\n",
122
  " \"\"\"\n",
123
  " result = conn.execute(text(sql))\n",
@@ -184,35 +333,53 @@
184
  },
185
  {
186
  "cell_type": "code",
187
- "execution_count": null,
188
  "metadata": {},
189
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  "source": [
191
  "# %%\n",
192
  "import pandas as pd\n",
193
  "import rdflib\n",
194
  "\n",
195
  "# Load the disease descriptions from MGDEF.RRF\n",
196
- "df_disease_descriptions = pd.read_csv(\"MGDEF.RRF\", sep=\"|\", header=0)\n",
197
- "# Rename the column '#CUI' to 'CUI'\n",
198
- "df_disease_descriptions.rename(columns={\"#CUI\": \"CUI\"}, inplace=True)\n",
199
- "# Remove the last column, it's empty\n",
200
- "df_disease_descriptions = df_disease_descriptions.iloc[:, :-1]\n",
201
- "# Filter out the rows where the SUPPRESS field is equal to 'Y'\n",
202
- "df_disease_descriptions = df_disease_descriptions[df_disease_descriptions[\"SUPPRESS\"] != \"Y\"]\n",
203
- "# Some of the rows include a \\n character, so we need to remove the rows where the CUI field contains spaces or doesn't start with 'C'\n",
204
- "df_disease_descriptions = df_disease_descriptions[df_disease_descriptions[\"CUI\"].str.startswith(\"C\") & ~df_disease_descriptions[\"CUI\"].str.contains(\" \")]\n",
205
- "# Remove the rows where the DEF field is empty\n",
206
- "df_disease_descriptions = df_disease_descriptions[df_disease_descriptions[\"DEF\"].notnull()]\n",
207
- "df_disease_descriptions['uri'] = df_disease_descriptions['CUI'].apply(lambda x: f'http://identifiers.org/medgen/{x}')\n",
208
  "\n",
209
  "with engine.connect() as conn:\n",
210
  " with conn.begin(): \n",
211
  " result = conn.execute(text('DROP TABLE IF EXISTS Test.DiseaseDescriptions'))\n",
212
  " sql = f\"\"\"\n",
213
  " CREATE TABLE Test.DiseaseDescriptions (\n",
214
- " uri VARCHAR(50),\n",
215
- " description TEXT\n",
 
216
  " )\n",
217
  " \"\"\"\n",
218
  " result = conn.execute(text(sql))\n",
@@ -220,16 +387,15 @@
220
  "with engine.connect() as conn:\n",
221
  " with conn.begin():\n",
222
  " for index, row in df_disease_descriptions.iterrows():\n",
223
- " print(row['DEF'])\n",
224
- " print(row['uri'])\n",
225
  " sql = text(\"\"\"\n",
226
  " INSERT INTO Test.DiseaseDescriptions \n",
227
- " (uri, description) \n",
228
- " VALUES ( :uri, :description)\n",
229
  " \"\"\")\n",
230
  " conn.execute(sql, {\n",
231
  " 'uri': row['uri'],\n",
232
- " 'description': row['DEF'], \n",
 
233
  " })"
234
  ]
235
  },
 
9
  },
10
  {
11
  "cell_type": "code",
12
+ "execution_count": null,
13
  "metadata": {},
14
  "outputs": [],
15
  "source": [
 
19
  },
20
  {
21
  "cell_type": "code",
22
+ "execution_count": null,
23
  "metadata": {},
24
  "outputs": [],
25
  "source": [
 
35
  },
36
  {
37
  "cell_type": "code",
38
+ "execution_count": 18,
39
  "metadata": {},
40
  "outputs": [],
41
  "source": [
 
53
  " sql = f\"\"\"\n",
54
  " CREATE TABLE Test.EntityEmbeddings (\n",
55
  " embedding VECTOR(DOUBLE, 50),\n",
56
+ " label VARCHAR({len_label}),\n",
57
+ " uri VARCHAR({len_uri})\n",
58
  " )\n",
59
  " \"\"\"\n",
60
  " result = conn.execute(text(sql))\n",
 
76
  },
77
  {
78
  "cell_type": "code",
79
+ "execution_count": 19,
80
  "metadata": {},
81
+ "outputs": [
82
+ {
83
+ "name": "stderr",
84
+ "output_type": "stream",
85
+ "text": [
86
+ "Exception during reset or similar\n",
87
+ "Traceback (most recent call last):\n",
88
+ " File \"/var/folders/g5/qwphv0rn5tvflncj75xsxvxw0000gn/T/ipykernel_77641/1077196279.py\", line 11, in <module>\n",
89
+ " result = conn.execute(text(sql))\n",
90
+ " ^^^^^^^^^^^^^^^^^^^^^^^\n",
91
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 1416, in execute\n",
92
+ " return meth(\n",
93
+ " ^^^^^\n",
94
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/sql/elements.py\", line 517, in _execute_on_connection\n",
95
+ " return connection._execute_clauseelement(\n",
96
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
97
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 1639, in _execute_clauseelement\n",
98
+ " ret = self._execute_context(\n",
99
+ " ^^^^^^^^^^^^^^^^^^^^^^\n",
100
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 1848, in _execute_context\n",
101
+ " return self._exec_single_context(\n",
102
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
103
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 1988, in _exec_single_context\n",
104
+ " self._handle_dbapi_exception(\n",
105
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 2347, in _handle_dbapi_exception\n",
106
+ " raise exc_info[1].with_traceback(exc_info[2])\n",
107
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 1969, in _exec_single_context\n",
108
+ " self.dialect.do_execute(\n",
109
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy_iris/base.py\", line 1060, in do_execute\n",
110
+ " cursor.execute(query, params)\n",
111
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/dbapi/_DBAPI.py\", line 268, in execute\n",
112
+ " self._execute()\n",
113
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/dbapi/_DBAPI.py\", line 528, in _execute\n",
114
+ " return exec_func()\n",
115
+ " ^^^^^^^^^^^\n",
116
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/dbapi/_DBAPI.py\", line 574, in _execute_query\n",
117
+ " self._send_direct_query_request()\n",
118
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/dbapi/_DBAPI.py\", line 1159, in _send_direct_query_request\n",
119
+ " self._in_message._read_message_sql(sequence_number, self._statement_id, _InStream.FETCH_DATA, [100])\n",
120
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py\", line 46, in _read_message_sql\n",
121
+ " is_for_gateway = self.__read_message_internal(expected_message_id, expected_statement_id, type)\n",
122
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
123
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py\", line 59, in __read_message_internal\n",
124
+ " self.__read_buffer(header.buffer, 0, _MessageHeader.HEADER_SIZE)\n",
125
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py\", line 146, in __read_buffer\n",
126
+ " raise Exception(\"Server unexpectedly closing communication device\")\n",
127
+ "Exception: Server unexpectedly closing communication device\n",
128
+ "\n",
129
+ "During handling of the above exception, another exception occurred:\n",
130
+ "\n",
131
+ "Traceback (most recent call last):\n",
132
+ " File \"/var/folders/g5/qwphv0rn5tvflncj75xsxvxw0000gn/T/ipykernel_77641/1077196279.py\", line 3, in <module>\n",
133
+ " with conn.begin():\n",
134
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/util.py\", line 161, in __exit__\n",
135
+ " self.rollback()\n",
136
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 2599, in rollback\n",
137
+ " self._do_rollback()\n",
138
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 2718, in _do_rollback\n",
139
+ " self._close_impl(try_deactivate=True)\n",
140
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 2701, in _close_impl\n",
141
+ " self._connection_rollback_impl()\n",
142
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 2693, in _connection_rollback_impl\n",
143
+ " self.connection._rollback_impl()\n",
144
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 1121, in _rollback_impl\n",
145
+ " self._handle_dbapi_exception(e, None, None, None, None)\n",
146
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 2347, in _handle_dbapi_exception\n",
147
+ " raise exc_info[1].with_traceback(exc_info[2])\n",
148
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py\", line 1119, in _rollback_impl\n",
149
+ " self.engine.dialect.do_rollback(self.connection)\n",
150
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy_iris/base.py\", line 1072, in do_rollback\n",
151
+ " connection.rollback()\n",
152
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_IRISConnection.py\", line 494, in rollback\n",
153
+ " self._in_message._read_message_sql(sequence_number, -1, 0, [0])\n",
154
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py\", line 46, in _read_message_sql\n",
155
+ " is_for_gateway = self.__read_message_internal(expected_message_id, expected_statement_id, type)\n",
156
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
157
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py\", line 59, in __read_message_internal\n",
158
+ " self.__read_buffer(header.buffer, 0, _MessageHeader.HEADER_SIZE)\n",
159
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py\", line 138, in __read_buffer\n",
160
+ " data = self._device.recv(length)\n",
161
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
162
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_Device.py\", line 40, in recv\n",
163
+ " return self._socket.recv(len)\n",
164
+ " ^^^^^^^^^^^^^^^^^^^^^^\n",
165
+ "ConnectionResetError: [Errno 54] Connection reset by peer\n",
166
+ "\n",
167
+ "During handling of the above exception, another exception occurred:\n",
168
+ "\n",
169
+ "Traceback (most recent call last):\n",
170
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/pool/base.py\", line 988, in _finalize_fairy\n",
171
+ " fairy._reset(\n",
172
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/pool/base.py\", line 1438, in _reset\n",
173
+ " pool._dialect.do_rollback(self)\n",
174
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy_iris/base.py\", line 1072, in do_rollback\n",
175
+ " connection.rollback()\n",
176
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_IRISConnection.py\", line 493, in rollback\n",
177
+ " self._out_message._send(sequence_number)\n",
178
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_OutStream.py\", line 18, in _send\n",
179
+ " self._device.sendall(self.wire._get_buffer())\n",
180
+ " File \"/Users/aldan.creo/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_Device.py\", line 37, in sendall\n",
181
+ " return self._socket.sendall(buffer)\n",
182
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
183
+ "BrokenPipeError: [Errno 32] Broken pipe\n"
184
+ ]
185
+ },
186
+ {
187
+ "ename": "ConnectionResetError",
188
+ "evalue": "[Errno 54] Connection reset by peer",
189
+ "output_type": "error",
190
+ "traceback": [
191
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
192
+ "\u001b[0;31mException\u001b[0m Traceback (most recent call last)",
193
+ "Cell \u001b[0;32mIn[19], line 11\u001b[0m\n\u001b[1;32m 4\u001b[0m sql \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m SELECT TOP 10 e1.uri AS uri1, e2.uri AS uri2, e1.label AS label1, e2.label AS label2,\u001b[39m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m VECTOR_COSINE(e1.embedding, e2.embedding) AS distance\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124m ORDER BY distance DESC\u001b[39m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m---> 11\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 12\u001b[0m data \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mfetchall()\n",
194
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1416\u001b[0m, in \u001b[0;36mConnection.execute\u001b[0;34m(self, statement, parameters, execution_options)\u001b[0m\n\u001b[1;32m 1415\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1416\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1417\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1418\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistilled_parameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1419\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecution_options\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mNO_OPTIONS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1420\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
195
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/sql/elements.py:517\u001b[0m, in \u001b[0;36mClauseElement._execute_on_connection\u001b[0;34m(self, connection, distilled_params, execution_options)\u001b[0m\n\u001b[1;32m 516\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m, Executable)\n\u001b[0;32m--> 517\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_clauseelement\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdistilled_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexecution_options\u001b[49m\n\u001b[1;32m 519\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 520\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
196
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1639\u001b[0m, in \u001b[0;36mConnection._execute_clauseelement\u001b[0;34m(self, elem, distilled_parameters, execution_options)\u001b[0m\n\u001b[1;32m 1631\u001b[0m compiled_sql, extracted_params, cache_hit \u001b[38;5;241m=\u001b[39m elem\u001b[38;5;241m.\u001b[39m_compile_w_cache(\n\u001b[1;32m 1632\u001b[0m dialect\u001b[38;5;241m=\u001b[39mdialect,\n\u001b[1;32m 1633\u001b[0m compiled_cache\u001b[38;5;241m=\u001b[39mcompiled_cache,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1637\u001b[0m linting\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdialect\u001b[38;5;241m.\u001b[39mcompiler_linting \u001b[38;5;241m|\u001b[39m compiler\u001b[38;5;241m.\u001b[39mWARN_LINTING,\n\u001b[1;32m 1638\u001b[0m )\n\u001b[0;32m-> 1639\u001b[0m ret \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_context\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1640\u001b[0m \u001b[43m \u001b[49m\u001b[43mdialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1641\u001b[0m \u001b[43m \u001b[49m\u001b[43mdialect\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecution_ctx_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_init_compiled\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1642\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompiled_sql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1643\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistilled_parameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1644\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecution_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1645\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompiled_sql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1646\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistilled_parameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1647\u001b[0m \u001b[43m \u001b[49m\u001b[43melem\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1648\u001b[0m \u001b[43m \u001b[49m\u001b[43mextracted_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1649\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_hit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_hit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1650\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1651\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_events:\n",
197
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1848\u001b[0m, in \u001b[0;36mConnection._execute_context\u001b[0;34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001b[0m\n\u001b[1;32m 1847\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1848\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_exec_single_context\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1849\u001b[0m \u001b[43m \u001b[49m\u001b[43mdialect\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstatement\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\n\u001b[1;32m 1850\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
198
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1988\u001b[0m, in \u001b[0;36mConnection._exec_single_context\u001b[0;34m(self, dialect, context, statement, parameters)\u001b[0m\n\u001b[1;32m 1987\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m-> 1988\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle_dbapi_exception\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1989\u001b[0m \u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstr_statement\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43meffective_parameters\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\n\u001b[1;32m 1990\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1992\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
199
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2347\u001b[0m, in \u001b[0;36mConnection._handle_dbapi_exception\u001b[0;34m(self, e, statement, parameters, cursor, context, is_sub_exec)\u001b[0m\n\u001b[1;32m 2346\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m exc_info[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 2347\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc_info[\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mwith_traceback(exc_info[\u001b[38;5;241m2\u001b[39m])\n\u001b[1;32m 2348\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n",
200
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1969\u001b[0m, in \u001b[0;36mConnection._exec_single_context\u001b[0;34m(self, dialect, context, statement, parameters)\u001b[0m\n\u001b[1;32m 1968\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m evt_handled:\n\u001b[0;32m-> 1969\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdialect\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdo_execute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1970\u001b[0m \u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstr_statement\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43meffective_parameters\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\n\u001b[1;32m 1971\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1973\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_has_events \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine\u001b[38;5;241m.\u001b[39m_has_events:\n",
201
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy_iris/base.py:1060\u001b[0m, in \u001b[0;36mIRISDialect.do_execute\u001b[0;34m(self, cursor, query, params, context)\u001b[0m\n\u001b[1;32m 1059\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_debug(query, params)\n\u001b[0;32m-> 1060\u001b[0m \u001b[43mcursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n",
202
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/dbapi/_DBAPI.py:268\u001b[0m, in \u001b[0;36m_BaseCursor.execute\u001b[0;34m(self, operation, params)\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cursor_type \u001b[38;5;241m=\u001b[39m CursorType\u001b[38;5;241m.\u001b[39mDEFAULT\n\u001b[0;32m--> 268\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_rowcount\n",
203
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/dbapi/_DBAPI.py:528\u001b[0m, in \u001b[0;36m_BaseCursor._execute\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 528\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mexec_func\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
204
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/dbapi/_DBAPI.py:574\u001b[0m, in \u001b[0;36m_BaseCursor._execute_query\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 572\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 573\u001b[0m \u001b[38;5;66;03m# not found in client side cache - send DQ message\u001b[39;00m\n\u001b[0;32m--> 574\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_direct_query_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 575\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
205
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/dbapi/_DBAPI.py:1159\u001b[0m, in \u001b[0;36mCursor._send_direct_query_request\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1158\u001b[0m \u001b[38;5;66;03m# retrieve data\u001b[39;00m\n\u001b[0;32m-> 1159\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_in_message\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_message_sql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msequence_number\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_statement_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_InStream\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mFETCH_DATA\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1160\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sqlcode \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_in_message\u001b[38;5;241m.\u001b[39mwire\u001b[38;5;241m.\u001b[39mheader\u001b[38;5;241m.\u001b[39m_get_function_code()\n",
206
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py:46\u001b[0m, in \u001b[0;36m_InStream._read_message_sql\u001b[0;34m(self, expected_message_id, expected_statement_id, type, allowedErrors)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m---> 46\u001b[0m is_for_gateway \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__read_message_internal\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpected_message_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_statement_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_for_gateway:\n",
207
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py:59\u001b[0m, in \u001b[0;36m_InStream.__read_message_internal\u001b[0;34m(self, expected_message_id, expected_statement_id, call_type)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m high_bit \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__read_buffer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mheader\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_MessageHeader\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mHEADER_SIZE\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_log_stream \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
208
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py:146\u001b[0m, in \u001b[0;36m_InStream.__read_buffer\u001b[0;34m(self, buffer, offset, length)\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 146\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mServer unexpectedly closing communication device\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 147\u001b[0m buffer[offset\u001b[38;5;241m+\u001b[39mcb:offset\u001b[38;5;241m+\u001b[39mcb\u001b[38;5;241m+\u001b[39m\u001b[38;5;28mlen\u001b[39m(data)] \u001b[38;5;241m=\u001b[39m data\n",
209
+ "\u001b[0;31mException\u001b[0m: Server unexpectedly closing communication device",
210
+ "\nDuring handling of the above exception, another exception occurred:\n",
211
+ "\u001b[0;31mConnectionResetError\u001b[0m Traceback (most recent call last)",
212
+ "Cell \u001b[0;32mIn[19], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Calculate distance between entities\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m engine\u001b[38;5;241m.\u001b[39mconnect() \u001b[38;5;28;01mas\u001b[39;00m conn:\n\u001b[0;32m----> 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43m SELECT TOP 10 e1.uri AS uri1, e2.uri AS uri2, e1.label AS label1, e2.label AS label2,\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m VECTOR_COSINE(e1.embedding, e2.embedding) AS distance\u001b[39;49m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124;43m ORDER BY distance DESC\u001b[39;49m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mresult\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
213
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/util.py:161\u001b[0m, in \u001b[0;36mTransactionalContext.__exit__\u001b[0;34m(self, type_, value, traceback)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_rollback_can_be_called():\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrollback\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m out_of_band_exit:\n",
214
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2599\u001b[0m, in \u001b[0;36mTransaction.rollback\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2582\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Roll back this :class:`.Transaction`.\u001b[39;00m\n\u001b[1;32m 2583\u001b[0m \n\u001b[1;32m 2584\u001b[0m \u001b[38;5;124;03mThe implementation of this may vary based on the type of transaction in\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2596\u001b[0m \n\u001b[1;32m 2597\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2598\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 2599\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_rollback\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2600\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 2601\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_active\n",
215
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2718\u001b[0m, in \u001b[0;36mRootTransaction._do_rollback\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2717\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_do_rollback\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 2718\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_close_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtry_deactivate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
216
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2701\u001b[0m, in \u001b[0;36mRootTransaction._close_impl\u001b[0;34m(self, try_deactivate)\u001b[0m\n\u001b[1;32m 2699\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 2700\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_active:\n\u001b[0;32m-> 2701\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connection_rollback_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2703\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconnection\u001b[38;5;241m.\u001b[39m_nested_transaction:\n\u001b[1;32m 2704\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconnection\u001b[38;5;241m.\u001b[39m_nested_transaction\u001b[38;5;241m.\u001b[39m_cancel()\n",
217
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2693\u001b[0m, in \u001b[0;36mRootTransaction._connection_rollback_impl\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2692\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_connection_rollback_impl\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 2693\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_rollback_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
218
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1121\u001b[0m, in \u001b[0;36mConnection._rollback_impl\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1119\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine\u001b[38;5;241m.\u001b[39mdialect\u001b[38;5;241m.\u001b[39mdo_rollback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconnection)\n\u001b[1;32m 1120\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m-> 1121\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle_dbapi_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43me\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
219
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2347\u001b[0m, in \u001b[0;36mConnection._handle_dbapi_exception\u001b[0;34m(self, e, statement, parameters, cursor, context, is_sub_exec)\u001b[0m\n\u001b[1;32m 2345\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2346\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m exc_info[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 2347\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc_info[\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mwith_traceback(exc_info[\u001b[38;5;241m2\u001b[39m])\n\u001b[1;32m 2348\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 2349\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reentrant_error\n",
220
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1119\u001b[0m, in \u001b[0;36mConnection._rollback_impl\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1117\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_log_info(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mROLLBACK\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1119\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdialect\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdo_rollback\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1120\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1121\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handle_dbapi_exception(e, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m)\n",
221
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/sqlalchemy_iris/base.py:1072\u001b[0m, in \u001b[0;36mIRISDialect.do_rollback\u001b[0;34m(self, connection)\u001b[0m\n\u001b[1;32m 1071\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdo_rollback\u001b[39m(\u001b[38;5;28mself\u001b[39m, connection):\n\u001b[0;32m-> 1072\u001b[0m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrollback\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
222
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_IRISConnection.py:494\u001b[0m, in \u001b[0;36m_IRISConnection.rollback\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 492\u001b[0m sequence_number \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_new_sequence_number()\n\u001b[1;32m 493\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_out_message\u001b[38;5;241m.\u001b[39m_send(sequence_number)\n\u001b[0;32m--> 494\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_in_message\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_message_sql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msequence_number\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
223
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py:46\u001b[0m, in \u001b[0;36m_InStream._read_message_sql\u001b[0;34m(self, expected_message_id, expected_statement_id, type, allowedErrors)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_message_sql\u001b[39m(\u001b[38;5;28mself\u001b[39m, expected_message_id, expected_statement_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;28mtype\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, allowedErrors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m---> 46\u001b[0m is_for_gateway \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__read_message_internal\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpected_message_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_statement_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_for_gateway:\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection\u001b[38;5;241m.\u001b[39m_get_gateway()\u001b[38;5;241m.\u001b[39m_dispatch_reentrancy(\u001b[38;5;28mself\u001b[39m)\n",
224
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py:59\u001b[0m, in \u001b[0;36m_InStream.__read_message_internal\u001b[0;34m(self, expected_message_id, expected_statement_id, call_type)\u001b[0m\n\u001b[1;32m 57\u001b[0m final_buffer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mbytearray\u001b[39m()\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m high_bit \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__read_buffer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mheader\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_MessageHeader\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mHEADER_SIZE\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_log_stream \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_log_stream\u001b[38;5;241m.\u001b[39m_dump_header(\n\u001b[1;32m 62\u001b[0m header\u001b[38;5;241m.\u001b[39mbuffer, _LogFileStream\u001b[38;5;241m.\u001b[39mLOG_RECEIVED, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection)\n",
225
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_InStream.py:138\u001b[0m, in \u001b[0;36m_InStream.__read_buffer\u001b[0;34m(self, buffer, offset, length)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_device \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mno longer connected to server\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 138\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_device\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlength\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m buffer[offset:offset\u001b[38;5;241m+\u001b[39m\u001b[38;5;28mlen\u001b[39m(data)] \u001b[38;5;241m=\u001b[39m data\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data) \u001b[38;5;241m==\u001b[39m length:\n",
226
+ "File \u001b[0;32m~/miniconda3/envs/hackupc/lib/python3.11/site-packages/intersystems_iris/_Device.py:40\u001b[0m, in \u001b[0;36m_Device.recv\u001b[0;34m(self, len)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrecv\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28mlen\u001b[39m):\n\u001b[0;32m---> 40\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_socket\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m)\u001b[49m\n",
227
+ "\u001b[0;31mConnectionResetError\u001b[0m: [Errno 54] Connection reset by peer"
228
+ ]
229
+ }
230
+ ],
231
  "source": [
232
  "# Calculate distance between entities\n",
233
  "with engine.connect() as conn:\n",
 
265
  " sql = f\"\"\"\n",
266
  " CREATE TABLE Test.RelationEmbeddings (\n",
267
  " embedding VECTOR(DOUBLE, 50),\n",
268
+ " label VARCHAR({len_label}),\n",
269
+ " uri VARCHAR({len_uri})\n",
270
  " )\n",
271
  " \"\"\"\n",
272
  " result = conn.execute(text(sql))\n",
 
333
  },
334
  {
335
  "cell_type": "code",
336
+ "execution_count": 21,
337
  "metadata": {},
338
+ "outputs": [
339
+ {
340
+ "name": "stdout",
341
+ "output_type": "stream",
342
+ "text": [
343
+ " definition \\\n",
344
+ "0 A sudden onset of abdominal pain with associat... \n",
345
+ "1 A type of abdominal pain characterized by a fe... \n",
346
+ "2 Distention of the abdomen. \n",
347
+ "3 An abnormal enlargement or swelling in the abd... \n",
348
+ "4 New abnormal growth of tissue in the ABDOMEN. \n",
349
+ "\n",
350
+ " uri \\\n",
351
+ "0 http://identifiers.org/medgen/C0000727 \n",
352
+ "1 http://identifiers.org/medgen/C0000729 \n",
353
+ "2 http://identifiers.org/medgen/C0000731 \n",
354
+ "3 http://identifiers.org/medgen/C0000734 \n",
355
+ "4 http://identifiers.org/medgen/C0000735 \n",
356
+ "\n",
357
+ " embeddings \n",
358
+ "0 0.07821787893772125, 0.9349365234375, -0.11445... \n",
359
+ "1 -0.48267558217048645, 0.8518325090408325, -0.1... \n",
360
+ "2 -0.4706612527370453, -0.23200057446956635, -0.... \n",
361
+ "3 0.03700314462184906, 0.6256464123725891, -0.47... \n",
362
+ "4 -0.5286742448806763, 0.06493321806192398, -1.0... \n"
363
+ ]
364
+ }
365
+ ],
366
  "source": [
367
  "# %%\n",
368
  "import pandas as pd\n",
369
  "import rdflib\n",
370
  "\n",
371
  "# Load the disease descriptions from MGDEF.RRF\n",
372
+ "df_disease_descriptions = pd.read_csv(\"disease_descriptions_with_embeddings.csv\")\n",
373
+ "df_disease_descriptions[\"embeddings\"] = df_disease_descriptions[\"embeddings\"].apply(lambda x: x[1:-1])\n",
 
 
 
 
 
 
 
 
 
 
374
  "\n",
375
  "with engine.connect() as conn:\n",
376
  " with conn.begin(): \n",
377
  " result = conn.execute(text('DROP TABLE IF EXISTS Test.DiseaseDescriptions'))\n",
378
  " sql = f\"\"\"\n",
379
  " CREATE TABLE Test.DiseaseDescriptions (\n",
380
+ " definition TEXT,\n",
381
+ " uri TEXT,\n",
382
+ " embedding VECTOR(DOUBLE, 768)\n",
383
  " )\n",
384
  " \"\"\"\n",
385
  " result = conn.execute(text(sql))\n",
 
387
  "with engine.connect() as conn:\n",
388
  " with conn.begin():\n",
389
  " for index, row in df_disease_descriptions.iterrows():\n",
 
 
390
  " sql = text(\"\"\"\n",
391
  " INSERT INTO Test.DiseaseDescriptions \n",
392
+ " (uri, definition, embedding)\n",
393
+ " VALUES (:uri, :definition, TO_VECTOR(:embedding))\n",
394
  " \"\"\")\n",
395
  " conn.execute(sql, {\n",
396
  " 'uri': row['uri'],\n",
397
+ " 'definition': row['definition'],\n",
398
+ " 'embedding': str(row['embeddings'])\n",
399
  " })"
400
  ]
401
  },
utils.py CHANGED
@@ -1,8 +1,10 @@
1
  # %%
2
- from typing import List, Dict, tuple, Any
3
  import os
4
  from sqlalchemy import create_engine, text
5
  import requests
 
 
6
 
7
  def get_all_diseases_name(engine) -> List[List[str]]:
8
  with engine.connect() as conn:
@@ -16,6 +18,7 @@ def get_all_diseases_name(engine) -> List[List[str]]:
16
  all_diseases = [row[1] for row in data if row[1] != "nan"]
17
  return all_diseases
18
 
 
19
  def get_uri_from_name(engine, name: str) -> str:
20
  with engine.connect() as conn:
21
  with conn.begin():
@@ -25,9 +28,12 @@ def get_uri_from_name(engine, name: str) -> str:
25
  """
26
  result = conn.execute(text(sql))
27
  data = result.fetchall()
28
- return data[0][0].split('/')[-1]
29
 
30
- def get_most_similar_diseases_from_uri(engine, original_disease_uri: str, threshold: float = 0.8) -> List[str]:
 
 
 
31
  with engine.connect() as conn:
32
  with conn.begin():
33
  sql = f"""
@@ -39,6 +45,7 @@ def get_most_similar_diseases_from_uri(engine, original_disease_uri: str, thresh
39
  all_diseases = [row[1] for row in data if row[1] != "nan"]
40
  return all_diseases
41
 
 
42
  def get_uri_from_name(engine, name: str) -> str:
43
  with engine.connect() as conn:
44
  with conn.begin():
@@ -48,9 +55,12 @@ def get_uri_from_name(engine, name: str) -> str:
48
  """
49
  result = conn.execute(text(sql))
50
  data = result.fetchall()
51
- return data[0][0].split('/')[-1]
52
 
53
- def get_most_similar_diseases_from_uri(engine, original_disease_uri: str, threshold: float = 0.8) -> List[str]:
 
 
 
54
  with engine.connect() as conn:
55
  with conn.begin():
56
  sql = f"""
@@ -65,9 +75,12 @@ def get_most_similar_diseases_from_uri(engine, original_disease_uri: str, thresh
65
  result = conn.execute(text(sql))
66
  data = result.fetchall()
67
 
68
- similar_diseases = [(row[1].split('/')[-1], row[3], row[4]) for row in data if row[3] != "nan"]
 
 
69
  return similar_diseases
70
 
 
71
  def get_clinical_record_info(clinical_record_id: str) -> Dict[str, Any]:
72
  # Request:
73
  # curl -X GET "https://clinicaltrials.gov/api/v2/studies/NCT00841061" \
@@ -76,6 +89,7 @@ def get_clinical_record_info(clinical_record_id: str) -> Dict[str, Any]:
76
  response = requests.get(request_url, headers={"accept": "application/json"})
77
  return response.json()
78
 
 
79
  def get_clinical_records_by_ids(clinical_record_ids: List[str]) -> List[Dict[str, Any]]:
80
  clinical_records = []
81
  for clinical_record_id in clinical_record_ids:
@@ -83,6 +97,7 @@ def get_clinical_records_by_ids(clinical_record_ids: List[str]) -> List[Dict[str
83
  clinical_records.append(clinical_record_info)
84
  return clinical_records
85
 
 
86
  def get_uris_of_similar_diseases(uri_list: List[str]) -> List[tuple[str, str, float]]:
87
  uri_list = tuple(uri_list)
88
  with engine.connect() as conn:
@@ -96,24 +111,62 @@ def get_uris_of_similar_diseases(uri_list: List[str]) -> List[tuple[str, str, fl
96
  data = result.fetchall()
97
  return data
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  if __name__ == "__main__":
100
- username = 'demo'
101
- password = 'demo'
102
- hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
103
- port = '1972'
104
- namespace = 'USER'
105
  CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"
106
 
107
  try:
108
  engine = create_engine(CONNECTION_STRING)
109
- diseases = get_most_similar_diseases_from_uri('C1843013')
110
  for disease in diseases:
111
  print(disease)
112
  except Exception as e:
113
  print(e)
114
 
115
- print(get_uri_from_name(engine, 'Alzheimer disease 3'))
 
 
 
116
 
117
- clinical_record_info = get_clinical_records_by_ids(['NCT00841061'])
118
  print(clinical_record_info)
119
 
 
 
 
 
 
 
 
1
  # %%
2
+ from typing import List, Dict, Any
3
  import os
4
  from sqlalchemy import create_engine, text
5
  import requests
6
+ from sentence_transformers import SentenceTransformer
7
+
8
 
9
  def get_all_diseases_name(engine) -> List[List[str]]:
10
  with engine.connect() as conn:
 
18
  all_diseases = [row[1] for row in data if row[1] != "nan"]
19
  return all_diseases
20
 
21
+
22
  def get_uri_from_name(engine, name: str) -> str:
23
  with engine.connect() as conn:
24
  with conn.begin():
 
28
  """
29
  result = conn.execute(text(sql))
30
  data = result.fetchall()
31
+ return data[0][0].split("/")[-1]
32
 
33
+
34
+ def get_most_similar_diseases_from_uri(
35
+ engine, original_disease_uri: str, threshold: float = 0.8
36
+ ) -> List[str]:
37
  with engine.connect() as conn:
38
  with conn.begin():
39
  sql = f"""
 
45
  all_diseases = [row[1] for row in data if row[1] != "nan"]
46
  return all_diseases
47
 
48
+
49
  def get_uri_from_name(engine, name: str) -> str:
50
  with engine.connect() as conn:
51
  with conn.begin():
 
55
  """
56
  result = conn.execute(text(sql))
57
  data = result.fetchall()
58
+ return data[0][0].split("/")[-1]
59
 
60
+
61
+ def get_most_similar_diseases_from_uri(
62
+ engine, original_disease_uri: str, threshold: float = 0.8
63
+ ) -> List[str]:
64
  with engine.connect() as conn:
65
  with conn.begin():
66
  sql = f"""
 
75
  result = conn.execute(text(sql))
76
  data = result.fetchall()
77
 
78
+ similar_diseases = [
79
+ (row[1].split("/")[-1], row[3], row[4]) for row in data if row[3] != "nan"
80
+ ]
81
  return similar_diseases
82
 
83
+
84
  def get_clinical_record_info(clinical_record_id: str) -> Dict[str, Any]:
85
  # Request:
86
  # curl -X GET "https://clinicaltrials.gov/api/v2/studies/NCT00841061" \
 
89
  response = requests.get(request_url, headers={"accept": "application/json"})
90
  return response.json()
91
 
92
+
93
  def get_clinical_records_by_ids(clinical_record_ids: List[str]) -> List[Dict[str, Any]]:
94
  clinical_records = []
95
  for clinical_record_id in clinical_record_ids:
 
97
  clinical_records.append(clinical_record_info)
98
  return clinical_records
99
 
100
+
101
  def get_uris_of_similar_diseases(uri_list: List[str]) -> List[tuple[str, str, float]]:
102
  uri_list = tuple(uri_list)
103
  with engine.connect() as conn:
 
111
  data = result.fetchall()
112
  return data
113
 
114
+
115
+ encoder = SentenceTransformer("allenai-specter")
116
+
117
+
118
+ def get_embedding(string: str) -> List[float]:
119
+ # Embed the string using sentence-transformers
120
+ vector = encoder.encode(string, show_progress_bar=False)
121
+ return vector
122
+
123
+
124
+ def get_diseases_related_to_a_textual_description(description: str) -> List[str]:
125
+ # Embed the description using sentence-transformers
126
+ description_embedding = get_embedding(description)
127
+ print(f'Size of the embedding: {len(description_embedding)}')
128
+ string_representation = str(description_embedding.tolist())[1:-1]
129
+ print(f'String representation: {string_representation}')
130
+
131
+ with engine.connect() as conn:
132
+ with conn.begin():
133
+ sql = f"""
134
+ SELECT TOP 5 uri, VECTOR_COSINE(e.embedding, TO_VECTOR('{string_representation}', DOUBLE)) AS distance
135
+ FROM Test.DiseaseDescriptions e
136
+ ORDER BY distance DESC
137
+ """
138
+ result = conn.execute(text(sql))
139
+ data = result.fetchall()
140
+ return data
141
+
142
+
143
  if __name__ == "__main__":
144
+ username = "demo"
145
+ password = "demo"
146
+ hostname = os.getenv("IRIS_HOSTNAME", "localhost")
147
+ port = "1972"
148
+ namespace = "USER"
149
  CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"
150
 
151
  try:
152
  engine = create_engine(CONNECTION_STRING)
153
+ diseases = get_most_similar_diseases_from_uri("C1843013")
154
  for disease in diseases:
155
  print(disease)
156
  except Exception as e:
157
  print(e)
158
 
159
+ try:
160
+ print(get_uri_from_name(engine, "Alzheimer disease 3"))
161
+ except Exception as e:
162
+ print(e)
163
 
164
+ clinical_record_info = get_clinical_records_by_ids(["NCT00841061"])
165
  print(clinical_record_info)
166
 
167
+ textual_description = "A disease that causes memory loss and other cognitive impairments."
168
+ diseases = get_diseases_related_to_a_textual_description(textual_description)
169
+ for disease in diseases:
170
+ print(disease)
171
+
172
+ # %%