Spaces:
Runtime error
Runtime error
Didier Guillevic
commited on
Commit
·
8507fc0
1
Parent(s):
9a919aa
Attempting to give more information on the table columns
Browse files- app.py +9 -4
- icij_utils.py +18 -16
app.py
CHANGED
|
@@ -18,6 +18,7 @@ Generation:
|
|
| 18 |
|
| 19 |
import gradio as gr
|
| 20 |
import icij_utils
|
|
|
|
| 21 |
import smolagents
|
| 22 |
import os
|
| 23 |
import pathlib
|
|
@@ -65,8 +66,8 @@ for table, doc in metadata.TABLE_DOCS.items():
|
|
| 65 |
if table in schema:
|
| 66 |
for col_name, col_type in schema[table].items():
|
| 67 |
col_doc = metadata.COLUMN_DOCS.get(table, {}).get(col_name, "No documentation available")
|
| 68 |
-
|
| 69 |
-
tool_description += f" - {col_name}: {col_type}\n"
|
| 70 |
|
| 71 |
# Add source documentation
|
| 72 |
#tool_description += "\n\nSource IDs:\n"
|
|
@@ -150,8 +151,12 @@ with gr.Blocks() as demo:
|
|
| 150 |
with gr.Accordion("Sample questions", open=False):
|
| 151 |
gr.Examples(
|
| 152 |
[
|
| 153 |
-
[
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
],
|
| 156 |
inputs=[question,],
|
| 157 |
outputs=[response,],
|
|
|
|
| 18 |
|
| 19 |
import gradio as gr
|
| 20 |
import icij_utils
|
| 21 |
+
import sqlalchemy
|
| 22 |
import smolagents
|
| 23 |
import os
|
| 24 |
import pathlib
|
|
|
|
| 66 |
if table in schema:
|
| 67 |
for col_name, col_type in schema[table].items():
|
| 68 |
col_doc = metadata.COLUMN_DOCS.get(table, {}).get(col_name, "No documentation available")
|
| 69 |
+
tool_description += f" - {col_name}: {col_type}: {col_doc}\n"
|
| 70 |
+
#tool_description += f" - {col_name}: {col_type}\n"
|
| 71 |
|
| 72 |
# Add source documentation
|
| 73 |
#tool_description += "\n\nSource IDs:\n"
|
|
|
|
| 151 |
with gr.Accordion("Sample questions", open=False):
|
| 152 |
gr.Examples(
|
| 153 |
[
|
| 154 |
+
[
|
| 155 |
+
(
|
| 156 |
+
"Can you list the entities with an address in Canada? "
|
| 157 |
+
"Please give the name of the entity an its address."
|
| 158 |
+
),
|
| 159 |
+
],
|
| 160 |
],
|
| 161 |
inputs=[question,],
|
| 162 |
outputs=[response,],
|
icij_utils.py
CHANGED
|
@@ -229,7 +229,7 @@ class ICIJDatabaseMetadata:
|
|
| 229 |
'name': "Legal name of the offshore entity",
|
| 230 |
'original_name': "Name in original language/character set",
|
| 231 |
'former_name': "Previous names of the entity",
|
| 232 |
-
'jurisdiction': "Country/region where the entity is registered",
|
| 233 |
'jurisdiction_description': "Detailed description of the jurisdiction",
|
| 234 |
'company_type': "Legal structure of the entity (e.g., corporation, trust)",
|
| 235 |
'address': "Primary registered address",
|
|
@@ -240,29 +240,31 @@ class ICIJDatabaseMetadata:
|
|
| 240 |
'dorm_date': "Date when entity became dormant",
|
| 241 |
'status': "Current status of the entity",
|
| 242 |
'service_provider': "Firm that provided offshore services",
|
| 243 |
-
'
|
|
|
|
|
|
|
| 244 |
},
|
| 245 |
|
| 246 |
'others': {
|
| 247 |
'name': "Name of the miscellaneous party or item",
|
| 248 |
'type': "Type of the other party (e.g., vessel, legal case)",
|
| 249 |
'incorporation_date': "Date of incorporation or creation if applicable",
|
| 250 |
-
'jurisdiction': "Jurisdiction associated with the party",
|
|
|
|
| 251 |
'countries': "Countries associated with the party",
|
| 252 |
'status': "Current status",
|
| 253 |
'internal_id': "Unique identifier within the leak data",
|
| 254 |
'address': "Associated address if available",
|
| 255 |
-
'
|
| 256 |
'valid_until': "Date until which the information is valid"
|
| 257 |
},
|
| 258 |
|
| 259 |
'officers': {
|
| 260 |
'name': "Name of the individual or organization",
|
| 261 |
-
'
|
| 262 |
-
'
|
|
|
|
| 263 |
'valid_until': "Date until which the information is valid",
|
| 264 |
-
'status': "Current status of the officer",
|
| 265 |
-
'internal_id': "Unique identifier within the leak data"
|
| 266 |
},
|
| 267 |
|
| 268 |
'intermediaries': {
|
|
@@ -270,28 +272,28 @@ class ICIJDatabaseMetadata:
|
|
| 270 |
'internal_id': "Unique identifier within the leak data",
|
| 271 |
'address': "Business address",
|
| 272 |
'status': "Current status",
|
| 273 |
-
'
|
| 274 |
-
'
|
|
|
|
| 275 |
},
|
| 276 |
|
| 277 |
'addresses': {
|
| 278 |
'address': "Full address text",
|
| 279 |
'name': "Name associated with address",
|
| 280 |
-
'country_codes': "
|
| 281 |
'countries': "Full country names",
|
| 282 |
-
'
|
| 283 |
'valid_until': "Date until which address is valid",
|
| 284 |
-
'internal_id': "Unique identifier within the leak data"
|
| 285 |
},
|
| 286 |
|
| 287 |
'relationships': {
|
| 288 |
-
'
|
| 289 |
-
'
|
| 290 |
'rel_type': "Type of relationship (e.g., shareholder, director)",
|
| 291 |
'link': "Additional details about the relationship",
|
| 292 |
'start_date': "When the relationship began",
|
| 293 |
'end_date': "When the relationship ended",
|
| 294 |
-
'
|
| 295 |
'status': "Current status of the relationship"
|
| 296 |
}
|
| 297 |
}
|
|
|
|
| 229 |
'name': "Legal name of the offshore entity",
|
| 230 |
'original_name': "Name in original language/character set",
|
| 231 |
'former_name': "Previous names of the entity",
|
| 232 |
+
#'jurisdiction': "Country/region where the entity is registered",
|
| 233 |
'jurisdiction_description': "Detailed description of the jurisdiction",
|
| 234 |
'company_type': "Legal structure of the entity (e.g., corporation, trust)",
|
| 235 |
'address': "Primary registered address",
|
|
|
|
| 240 |
'dorm_date': "Date when entity became dormant",
|
| 241 |
'status': "Current status of the entity",
|
| 242 |
'service_provider': "Firm that provided offshore services",
|
| 243 |
+
'country_codes': '3 letter abbreviations of country names',
|
| 244 |
+
'countries': 'name of country',
|
| 245 |
+
'sourceID': "Identifier for the leak source"
|
| 246 |
},
|
| 247 |
|
| 248 |
'others': {
|
| 249 |
'name': "Name of the miscellaneous party or item",
|
| 250 |
'type': "Type of the other party (e.g., vessel, legal case)",
|
| 251 |
'incorporation_date': "Date of incorporation or creation if applicable",
|
| 252 |
+
'jurisdiction': "2 letter code of the Jurisdiction associated with the party",
|
| 253 |
+
'jurisdiction-description': 'full name of the jurisdiction',
|
| 254 |
'countries': "Countries associated with the party",
|
| 255 |
'status': "Current status",
|
| 256 |
'internal_id': "Unique identifier within the leak data",
|
| 257 |
'address': "Associated address if available",
|
| 258 |
+
'sourceID': "Identifier for the leak source",
|
| 259 |
'valid_until': "Date until which the information is valid"
|
| 260 |
},
|
| 261 |
|
| 262 |
'officers': {
|
| 263 |
'name': "Name of the individual or organization",
|
| 264 |
+
'countries': 'full name of the country connected to the officer',
|
| 265 |
+
'country_codes': "3 letter code of the countries connected to the officer",
|
| 266 |
+
'sourceID': "Identifier for the leak source",
|
| 267 |
'valid_until': "Date until which the information is valid",
|
|
|
|
|
|
|
| 268 |
},
|
| 269 |
|
| 270 |
'intermediaries': {
|
|
|
|
| 272 |
'internal_id': "Unique identifier within the leak data",
|
| 273 |
'address': "Business address",
|
| 274 |
'status': "Current status",
|
| 275 |
+
'countries': "Countries where intermediary operates",
|
| 276 |
+
'country_codes': "3 letter abbreviations of the countries where intermediary operates",
|
| 277 |
+
'sourceID': "Identifier for the leak source"
|
| 278 |
},
|
| 279 |
|
| 280 |
'addresses': {
|
| 281 |
'address': "Full address text",
|
| 282 |
'name': "Name associated with address",
|
| 283 |
+
'country_codes': "3 letter country codes for the address",
|
| 284 |
'countries': "Full country names",
|
| 285 |
+
'sourceID': "Identifier for the leak source",
|
| 286 |
'valid_until': "Date until which address is valid",
|
|
|
|
| 287 |
},
|
| 288 |
|
| 289 |
'relationships': {
|
| 290 |
+
'node_id_start': "Internal ID of the source node",
|
| 291 |
+
'node_id_end': "Internal ID of the target node",
|
| 292 |
'rel_type': "Type of relationship (e.g., shareholder, director)",
|
| 293 |
'link': "Additional details about the relationship",
|
| 294 |
'start_date': "When the relationship began",
|
| 295 |
'end_date': "When the relationship ended",
|
| 296 |
+
'sourceID': "Identifier for the leak source",
|
| 297 |
'status': "Current status of the relationship"
|
| 298 |
}
|
| 299 |
}
|