andymbryant commited on
Commit
bc41f37
·
1 Parent(s): 9a1452c

brainstorm on interface execution

Browse files
app.py CHANGED
@@ -8,7 +8,7 @@ source_df, template_df = get_dataframes()
8
 
9
  with gr.Blocks() as demo:
10
  with gr.Column():
11
- gr.Markdown("## To begin, upload a Template CSV and a Source CSV file.")
12
  with gr.Row():
13
  gr.inputs.File(label="Template", type="file", file_count='single')
14
  gr.inputs.File(label="Source", type="file", file_count='single')
 
8
 
9
  with gr.Blocks() as demo:
10
  with gr.Column():
11
+ gr.Markdown("# Step 1\n### Upload a Template CSV and a Source CSV file.")
12
  with gr.Row():
13
  gr.inputs.File(label="Template", type="file", file_count='single')
14
  gr.inputs.File(label="Source", type="file", file_count='single')
src/core.py CHANGED
@@ -16,16 +16,11 @@ load_dotenv()
16
  DATA_DIR_PATH = os.path.join(os.path.dirname(__file__), 'data')
17
  SYNTHETIC_DATA_DIR_PATH = os.path.join(DATA_DIR_PATH, 'synthetic')
18
 
19
- TRANSFORM_MODEL = ChatOpenAI(
20
  model_name='gpt-4',
21
  temperature=0,
22
  )
23
 
24
- NATURAL_LANGUAGE_MODEL = ChatOpenAI(
25
- model_name='gpt-4',
26
- temperature=0.1,
27
- )
28
-
29
  def get_dataframes():
30
  source = pd.read_csv(os.path.join(SYNTHETIC_DATA_DIR_PATH, 'legal_entries_a.csv'))
31
  template = pd.read_csv(os.path.join(SYNTHETIC_DATA_DIR_PATH, 'legal_template.csv'))
@@ -41,19 +36,19 @@ def get_table_mapping(source_df, template_df) -> TableMapping:
41
  partial_variables={'format_instructions': table_mapping_parser.get_format_instructions()},
42
  )
43
 
44
- mapping_chain = analyst_prompt | TRANSFORM_MODEL | table_mapping_parser
45
  return mapping_chain.invoke({"source_1_csv_str": get_data_str_from_df_for_prompt(source_df), "target_csv_str": get_data_str_from_df_for_prompt(template_df)})
46
 
47
 
48
  def get_code_spec(table_mapping: TableMapping) -> str:
49
  writer_prompt = ChatPromptTemplate.from_template(SPEC_WRITER_PROMPT_STR)
50
- writer_chain = writer_prompt | NATURAL_LANGUAGE_MODEL | StrOutputParser()
51
  return writer_chain.invoke({"table_mapping": str(table_mapping)})
52
 
53
 
54
  def get_mapping_code(spec_str: str) -> str:
55
  engineer_prompt = ChatPromptTemplate.from_template(ENGINEER_PROMPT_STR)
56
- engineer_chain = engineer_prompt | TRANSFORM_MODEL | StrOutputParser()
57
  return engineer_chain.invoke({"spec_str": spec_str})
58
 
59
 
 
16
  DATA_DIR_PATH = os.path.join(os.path.dirname(__file__), 'data')
17
  SYNTHETIC_DATA_DIR_PATH = os.path.join(DATA_DIR_PATH, 'synthetic')
18
 
19
+ BASE_MODEL = ChatOpenAI(
20
  model_name='gpt-4',
21
  temperature=0,
22
  )
23
 
 
 
 
 
 
24
  def get_dataframes():
25
  source = pd.read_csv(os.path.join(SYNTHETIC_DATA_DIR_PATH, 'legal_entries_a.csv'))
26
  template = pd.read_csv(os.path.join(SYNTHETIC_DATA_DIR_PATH, 'legal_template.csv'))
 
36
  partial_variables={'format_instructions': table_mapping_parser.get_format_instructions()},
37
  )
38
 
39
+ mapping_chain = analyst_prompt | BASE_MODEL | table_mapping_parser
40
  return mapping_chain.invoke({"source_1_csv_str": get_data_str_from_df_for_prompt(source_df), "target_csv_str": get_data_str_from_df_for_prompt(template_df)})
41
 
42
 
43
  def get_code_spec(table_mapping: TableMapping) -> str:
44
  writer_prompt = ChatPromptTemplate.from_template(SPEC_WRITER_PROMPT_STR)
45
+ writer_chain = writer_prompt | BASE_MODEL | StrOutputParser()
46
  return writer_chain.invoke({"table_mapping": str(table_mapping)})
47
 
48
 
49
  def get_mapping_code(spec_str: str) -> str:
50
  engineer_prompt = ChatPromptTemplate.from_template(ENGINEER_PROMPT_STR)
51
+ engineer_chain = engineer_prompt | BASE_MODEL | StrOutputParser()
52
  return engineer_chain.invoke({"spec_str": spec_str})
53
 
54
 
src/notebooks/brainstorm4.ipynb CHANGED
@@ -2,16 +2,24 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 90,
6
  "metadata": {},
7
  "outputs": [
 
 
 
 
 
 
 
 
8
  {
9
  "data": {
10
  "text/plain": [
11
  "True"
12
  ]
13
  },
14
- "execution_count": 90,
15
  "metadata": {},
16
  "output_type": "execute_result"
17
  }
@@ -35,7 +43,7 @@
35
  },
36
  {
37
  "cell_type": "code",
38
- "execution_count": 8,
39
  "metadata": {},
40
  "outputs": [],
41
  "source": [
@@ -46,7 +54,7 @@
46
  },
47
  {
48
  "cell_type": "code",
49
- "execution_count": 9,
50
  "metadata": {},
51
  "outputs": [],
52
  "source": [
@@ -60,7 +68,7 @@
60
  },
61
  {
62
  "cell_type": "code",
63
- "execution_count": 10,
64
  "metadata": {},
65
  "outputs": [],
66
  "source": [
@@ -77,7 +85,7 @@
77
  },
78
  {
79
  "cell_type": "code",
80
- "execution_count": 11,
81
  "metadata": {},
82
  "outputs": [],
83
  "source": [
@@ -148,7 +156,7 @@
148
  },
149
  {
150
  "cell_type": "code",
151
- "execution_count": 12,
152
  "metadata": {},
153
  "outputs": [],
154
  "source": [
@@ -196,7 +204,7 @@
196
  },
197
  {
198
  "cell_type": "code",
199
- "execution_count": 19,
200
  "metadata": {},
201
  "outputs": [],
202
  "source": [
@@ -227,14 +235,14 @@
227
  },
228
  {
229
  "cell_type": "code",
230
- "execution_count": 17,
231
  "metadata": {},
232
  "outputs": [
233
  {
234
  "name": "stdout",
235
  "output_type": "stream",
236
  "text": [
237
- "Running on local URL: http://127.0.0.1:7874\n",
238
  "\n",
239
  "To create a public link, set `share=True` in `launch()`.\n"
240
  ]
@@ -242,7 +250,7 @@
242
  {
243
  "data": {
244
  "text/html": [
245
- "<div><iframe src=\"http://127.0.0.1:7874/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
246
  ],
247
  "text/plain": [
248
  "<IPython.core.display.HTML object>"
@@ -255,7 +263,7 @@
255
  "data": {
256
  "text/plain": []
257
  },
258
- "execution_count": 17,
259
  "metadata": {},
260
  "output_type": "execute_result"
261
  }
@@ -270,16 +278,14 @@
270
  },
271
  {
272
  "cell_type": "code",
273
- "execution_count": 34,
274
  "metadata": {},
275
  "outputs": [
276
  {
277
  "name": "stdout",
278
  "output_type": "stream",
279
  "text": [
280
- "Running on local URL: http://127.0.0.1:7885\n",
281
- "\n",
282
- "Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB\n",
283
  "\n",
284
  "To create a public link, set `share=True` in `launch()`.\n"
285
  ]
@@ -287,7 +293,7 @@
287
  {
288
  "data": {
289
  "text/html": [
290
- "<div><iframe src=\"http://127.0.0.1:7885/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
291
  ],
292
  "text/plain": [
293
  "<IPython.core.display.HTML object>"
@@ -300,7 +306,7 @@
300
  "data": {
301
  "text/plain": []
302
  },
303
- "execution_count": 34,
304
  "metadata": {},
305
  "output_type": "execute_result"
306
  }
@@ -320,7 +326,7 @@
320
  },
321
  {
322
  "cell_type": "code",
323
- "execution_count": 41,
324
  "metadata": {},
325
  "outputs": [
326
  {
@@ -335,7 +341,7 @@
335
  "name": "stdout",
336
  "output_type": "stream",
337
  "text": [
338
- "Running on local URL: http://127.0.0.1:7892\n",
339
  "\n",
340
  "To create a public link, set `share=True` in `launch()`.\n"
341
  ]
@@ -343,7 +349,7 @@
343
  {
344
  "data": {
345
  "text/html": [
346
- "<div><iframe src=\"http://127.0.0.1:7892/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
347
  ],
348
  "text/plain": [
349
  "<IPython.core.display.HTML object>"
@@ -356,7 +362,7 @@
356
  "data": {
357
  "text/plain": []
358
  },
359
- "execution_count": 41,
360
  "metadata": {},
361
  "output_type": "execute_result"
362
  }
@@ -377,40 +383,28 @@
377
  },
378
  {
379
  "cell_type": "code",
380
- "execution_count": 89,
381
  "metadata": {},
382
  "outputs": [
383
  {
384
  "name": "stderr",
385
  "output_type": "stream",
386
  "text": [
387
- "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
388
- " gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
389
- "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n",
390
- " gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
391
- "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: `keep_filename` parameter is deprecated, and it has no effect\n",
392
- " gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
393
- "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
394
- " gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
395
- "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n",
396
- " gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
397
- "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: `keep_filename` parameter is deprecated, and it has no effect\n",
398
- " gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
399
- "/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/utils.py:841: UserWarning: Expected 1 arguments for function <function generate_code at 0x12cb559d0>, received 0.\n",
400
  " warnings.warn(\n",
401
- "/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/utils.py:845: UserWarning: Expected at least 1 arguments for function <function generate_code at 0x12cb559d0>, received 0.\n",
402
- " warnings.warn(\n",
403
- "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:39: GradioUnusedKwargWarning: You have unused kwarg parameters in Button, please remove them: {'trigger': 'transform_source'}\n",
404
- " gr.Button(value=\"Transform Source\", variant=\"primary\", trigger=\"transform_source\")\n",
405
- "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:40: GradioUnusedKwargWarning: You have unused kwarg parameters in Button, please remove them: {'trigger': 'save_code'}\n",
406
- " gr.Button(value=\"Save Code\", variant=\"secondary\", trigger=\"save_code\")\n"
407
  ]
408
  },
409
  {
410
  "name": "stdout",
411
  "output_type": "stream",
412
  "text": [
413
- "Running on local URL: http://127.0.0.1:7934\n",
414
  "\n",
415
  "To create a public link, set `share=True` in `launch()`.\n"
416
  ]
@@ -418,7 +412,7 @@
418
  {
419
  "data": {
420
  "text/html": [
421
- "<div><iframe src=\"http://127.0.0.1:7934/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
422
  ],
423
  "text/plain": [
424
  "<IPython.core.display.HTML object>"
@@ -431,62 +425,255 @@
431
  "data": {
432
  "text/plain": []
433
  },
434
- "execution_count": 89,
435
  "metadata": {},
436
  "output_type": "execute_result"
 
 
 
 
 
 
 
 
437
  }
438
  ],
439
  "source": [
440
  "def _sanitize_python_output(text: str):\n",
441
  " _, after = text.split(\"```python\")\n",
442
  " return after.split(\"```\")[0]\n",
 
 
 
 
 
443
  "\n",
444
- "def do_stuff(val):\n",
445
- " print(val)\n",
446
- "\n",
447
  "def generate_code(val):\n",
448
  " return '# check this out'\n",
449
  "\n",
450
- "def save_csv_file(df, filename):\n",
451
- " df.to_csv(os.path.join(data_dir_path, 'output', filename) + '.csv')\n",
 
 
452
  "\n",
453
- "with gr.Blocks() as demo:\n",
454
- " with gr.Column():\n",
455
- " gr.Markdown(\"## To begin, upload a Template CSV and a Source CSV file.\")\n",
456
- " with gr.Row():\n",
457
- " gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
458
- " gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
459
- "\n",
460
- " with gr.Column():\n",
461
- " gr.Markdown(\"## Mapping from Source to Template\")\n",
462
- " with gr.Row():\n",
463
- " table_mapping_df = pd.DataFrame(table_mapping.dict()['table_mappings'])\n",
464
- " gr.DataFrame(value=table_mapping_df)\n",
465
- " save_mapping_btn = gr.Button(value=\"Save Mapping\", variant=\"secondary\")\n",
466
- " save_mapping_btn.click(fn=lambda : save_csv_file(table_mapping_df, 'table_mapping'))\n",
467
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  " with gr.Row():\n",
469
- " test = gr.Markdown()\n",
470
  " generate_code_btn = gr.Button(value=\"Generate Code from Mapping\", variant=\"primary\")\n",
471
- " generate_code_btn.click(fn=generate_code, outputs=test)\n",
472
- "\n",
473
- " with gr.Column():\n",
474
- " gr.Markdown(\"## Here is the code that will be used to transform the source file into the template schema:\")\n",
475
- " gr.Code(language=\"python\", value=_sanitize_python_output(transform_code))\n",
476
  "\n",
 
 
477
  " with gr.Row():\n",
478
- " gr.Button(value=\"Transform Source\", variant=\"primary\", trigger=\"transform_source\")\n",
479
- " gr.Button(value=\"Save Code\", variant=\"secondary\", trigger=\"save_code\")\n",
 
 
 
 
 
 
 
 
480
  " \n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  " with gr.Row():\n",
 
 
 
482
  " with gr.Column():\n",
483
- " gr.Dataframe(label='Target (template)', type='pandas', value=template_df)\n",
484
- " with gr.Column():\n",
485
- " gr.Dataframe(label='Source (transformed)', type='pandas', value=PythonAstREPLTool(locals={'source_df': table_1_df}).run(transform_code))\n",
486
- "\n",
 
 
 
487
  "demo.launch()"
488
  ]
489
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  {
491
  "cell_type": "code",
492
  "execution_count": null,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ },
16
  {
17
  "data": {
18
  "text/plain": [
19
  "True"
20
  ]
21
  },
22
+ "execution_count": 2,
23
  "metadata": {},
24
  "output_type": "execute_result"
25
  }
 
43
  },
44
  {
45
  "cell_type": "code",
46
+ "execution_count": 103,
47
  "metadata": {},
48
  "outputs": [],
49
  "source": [
 
54
  },
55
  {
56
  "cell_type": "code",
57
+ "execution_count": 104,
58
  "metadata": {},
59
  "outputs": [],
60
  "source": [
 
68
  },
69
  {
70
  "cell_type": "code",
71
+ "execution_count": 105,
72
  "metadata": {},
73
  "outputs": [],
74
  "source": [
 
85
  },
86
  {
87
  "cell_type": "code",
88
+ "execution_count": 106,
89
  "metadata": {},
90
  "outputs": [],
91
  "source": [
 
156
  },
157
  {
158
  "cell_type": "code",
159
+ "execution_count": 107,
160
  "metadata": {},
161
  "outputs": [],
162
  "source": [
 
204
  },
205
  {
206
  "cell_type": "code",
207
+ "execution_count": null,
208
  "metadata": {},
209
  "outputs": [],
210
  "source": [
 
235
  },
236
  {
237
  "cell_type": "code",
238
+ "execution_count": 108,
239
  "metadata": {},
240
  "outputs": [
241
  {
242
  "name": "stdout",
243
  "output_type": "stream",
244
  "text": [
245
+ "Running on local URL: http://127.0.0.1:7938\n",
246
  "\n",
247
  "To create a public link, set `share=True` in `launch()`.\n"
248
  ]
 
250
  {
251
  "data": {
252
  "text/html": [
253
+ "<div><iframe src=\"http://127.0.0.1:7938/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
254
  ],
255
  "text/plain": [
256
  "<IPython.core.display.HTML object>"
 
263
  "data": {
264
  "text/plain": []
265
  },
266
+ "execution_count": 108,
267
  "metadata": {},
268
  "output_type": "execute_result"
269
  }
 
278
  },
279
  {
280
  "cell_type": "code",
281
+ "execution_count": 109,
282
  "metadata": {},
283
  "outputs": [
284
  {
285
  "name": "stdout",
286
  "output_type": "stream",
287
  "text": [
288
+ "Running on local URL: http://127.0.0.1:7939\n",
 
 
289
  "\n",
290
  "To create a public link, set `share=True` in `launch()`.\n"
291
  ]
 
293
  {
294
  "data": {
295
  "text/html": [
296
+ "<div><iframe src=\"http://127.0.0.1:7939/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
297
  ],
298
  "text/plain": [
299
  "<IPython.core.display.HTML object>"
 
306
  "data": {
307
  "text/plain": []
308
  },
309
+ "execution_count": 109,
310
  "metadata": {},
311
  "output_type": "execute_result"
312
  }
 
326
  },
327
  {
328
  "cell_type": "code",
329
+ "execution_count": 110,
330
  "metadata": {},
331
  "outputs": [
332
  {
 
341
  "name": "stdout",
342
  "output_type": "stream",
343
  "text": [
344
+ "Running on local URL: http://127.0.0.1:7940\n",
345
  "\n",
346
  "To create a public link, set `share=True` in `launch()`.\n"
347
  ]
 
349
  {
350
  "data": {
351
  "text/html": [
352
+ "<div><iframe src=\"http://127.0.0.1:7940/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
353
  ],
354
  "text/plain": [
355
  "<IPython.core.display.HTML object>"
 
362
  "data": {
363
  "text/plain": []
364
  },
365
+ "execution_count": 110,
366
  "metadata": {},
367
  "output_type": "execute_result"
368
  }
 
383
  },
384
  {
385
  "cell_type": "code",
386
+ "execution_count": 41,
387
  "metadata": {},
388
  "outputs": [
389
  {
390
  "name": "stderr",
391
  "output_type": "stream",
392
  "text": [
393
+ "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_13584/2709196309.py:47: GradioUnusedKwargWarning: You have unused kwarg parameters in UploadButton, please remove them: {'live': True}\n",
394
+ " upload_template_btn = gr.UploadButton(label=\"Upload Template File\", file_types = ['.csv'], live=True, file_count = \"single\")\n",
395
+ "/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_13584/2709196309.py:51: GradioUnusedKwargWarning: You have unused kwarg parameters in UploadButton, please remove them: {'live': True}\n",
396
+ " upload_source_button = gr.UploadButton(label=\"Upload Source File\", file_types = ['.csv'], live=True, file_count = \"single\")\n",
397
+ "/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/utils.py:841: UserWarning: Expected 1 arguments for function <function generate_code at 0x12ba74f70>, received 0.\n",
 
 
 
 
 
 
 
 
398
  " warnings.warn(\n",
399
+ "/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/utils.py:845: UserWarning: Expected at least 1 arguments for function <function generate_code at 0x12ba74f70>, received 0.\n",
400
+ " warnings.warn(\n"
 
 
 
 
401
  ]
402
  },
403
  {
404
  "name": "stdout",
405
  "output_type": "stream",
406
  "text": [
407
+ "Running on local URL: http://127.0.0.1:7881\n",
408
  "\n",
409
  "To create a public link, set `share=True` in `launch()`.\n"
410
  ]
 
412
  {
413
  "data": {
414
  "text/html": [
415
+ "<div><iframe src=\"http://127.0.0.1:7881/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
416
  ],
417
  "text/plain": [
418
  "<IPython.core.display.HTML object>"
 
425
  "data": {
426
  "text/plain": []
427
  },
428
+ "execution_count": 41,
429
  "metadata": {},
430
  "output_type": "execute_result"
431
+ },
432
+ {
433
+ "name": "stderr",
434
+ "output_type": "stream",
435
+ "text": [
436
+ "/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/helpers.py:735: UserWarning: Unexpected argument. Filling with None.\n",
437
+ " warnings.warn(\"Unexpected argument. Filling with None.\")\n"
438
+ ]
439
  }
440
  ],
441
  "source": [
442
  "def _sanitize_python_output(text: str):\n",
443
  " _, after = text.split(\"```python\")\n",
444
  " return after.split(\"```\")[0]\n",
445
+ "file_val = None\n",
446
+ "def do_stuff(file, extra):\n",
447
+ " assert False, (file, extra)\n",
448
+ " return pd.read_csv(file.name)\n",
449
+ " file_val = file\n",
450
  "\n",
451
+ "import io\n",
 
 
452
  "def generate_code(val):\n",
453
  " return '# check this out'\n",
454
  "\n",
455
+ "def export_csv(d):\n",
456
+ " filepath = \"output.csv\"\n",
457
+ " d.to_csv(filepath)\n",
458
+ " return gr.File.update(value=filepath, visible=True)\n",
459
  "\n",
460
+ "def get_table_mapping(source_df, template_df):\n",
461
+ " # table_mapping_df = pd.DataFrame(table_mapping.dict()['table_mappings'])\n",
462
+ " return pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})\n",
 
 
 
 
 
 
 
 
 
 
 
463
  "\n",
464
+ "def process_csv_text(temp_file):\n",
465
+ " if isinstance(temp_file, str):\n",
466
+ " df = pd.read_csv(io.StringIO(temp_file))\n",
467
+ " else:\n",
468
+ " df = pd.read_csv(temp_file.name)\n",
469
+ " return df\n",
470
+ "\n",
471
+ "def generate_step_markdown(step_number: int, subtitle: str):\n",
472
+ " return gr.Markdown(f\"# Step {step_number}\\n\\n ### {subtitle}\")\n",
473
+ "\n",
474
+ "def export_csv(d):\n",
475
+ " d.to_csv(\"output.csv\")\n",
476
+ " return gr.File.update(value=\"output.csv\", visible=True)\n",
477
+ "\n",
478
+ "def export_code(val):\n",
479
+ " with open(\"output.py\", \"w\") as f:\n",
480
+ " f.write(val)\n",
481
+ " return gr.File.update(value=\"output.py\", visible=True)\n",
482
+ "\n",
483
+ "with gr.Blocks() as demo:\n",
484
+ " # STEP 1\n",
485
+ " generate_step_markdown(1, \"Upload a Template CSV (target schema) and a Source CSV.\")\n",
486
+ " with gr.Row():\n",
487
+ " with gr.Column():\n",
488
+ " upload_template_btn = gr.UploadButton(label=\"Upload Template File\", file_types = ['.csv'], live=True, file_count = \"single\")\n",
489
+ " template_df = gr.Dataframe(type=\"pandas\")\n",
490
+ " upload_template_btn.upload(fn=process_csv_text, inputs=upload_template_btn, outputs=template_df)\n",
491
+ " with gr.Column():\n",
492
+ " upload_source_button = gr.UploadButton(label=\"Upload Source File\", file_types = ['.csv'], live=True, file_count = \"single\")\n",
493
+ " source_df = gr.Dataframe(type=\"pandas\")\n",
494
+ " upload_source_button.upload(fn=process_csv_text, inputs=upload_source_button, outputs=source_df)\n",
495
+ " \n",
496
+ " # STEP 2\n",
497
+ " generate_step_markdown(2, \"Generate mapping from Source to Template. Once generated, you can edit the values directly in the table below.\")\n",
498
+ " with gr.Row():\n",
499
+ " generate_mapping_btn = gr.Button(value=\"Generate Mapping\", variant=\"primary\")\n",
500
+ " with gr.Row():\n",
501
+ " table_mapping_df = gr.DataFrame(type=\"pandas\")\n",
502
+ " generate_mapping_btn.click(fn=get_table_mapping, inputs=[source_df, template_df], outputs=[table_mapping_df])\n",
503
+ " \n",
504
+ " # STEP 3\n",
505
+ " generate_step_markdown(3, \"Save mapping to CSV and download (optional).\")\n",
506
+ " with gr.Row():\n",
507
+ " save_mapping_btn = gr.Button(value=\"Save Mapping\", variant=\"secondary\")\n",
508
+ " with gr.Row():\n",
509
+ " csv = gr.File(interactive=False, visible=False)\n",
510
+ " save_mapping_btn.click(export_csv, table_mapping_df, csv)\n",
511
+ " mapping_file = gr.File(label=\"Downloaded File\", visible=False)\n",
512
+ " mapping_file.change(lambda x: x, mapping_file, table_mapping_df)\n",
513
+ " # STEP 4\n",
514
+ " generate_step_markdown(4, \"Generate python code to transform Source to Template, using the generated mapping.\")\n",
515
  " with gr.Row():\n",
 
516
  " generate_code_btn = gr.Button(value=\"Generate Code from Mapping\", variant=\"primary\")\n",
517
+ " with gr.Row():\n",
518
+ " code_block = gr.Code(language=\"python\")\n",
519
+ " generate_code_btn.click(fn=generate_code, outputs=[code_block])\n",
 
 
520
  "\n",
521
+ " # STEP 5\n",
522
+ " generate_step_markdown(5, \"Save transformation code (optional).\")\n",
523
  " with gr.Row():\n",
524
+ " save_code_btn = gr.Button(value=\"Save Code\", variant=\"secondary\")\n",
525
+ " with gr.Row():\n",
526
+ " text = gr.File(interactive=False, visible=False)\n",
527
+ " save_code_btn.click(export_code, code_block, text)\n",
528
+ " code_file = gr.File(label=\"Downloaded File\", visible=False)\n",
529
+ " code_file.change(lambda x: x, code_file, code_block)\n",
530
+ "\n",
531
+ " # with gr.Row():\n",
532
+ " # gr.Button(value=\"Transform Source\", variant=\"primary\", trigger=\"transform_source\")\n",
533
+ " # gr.Button(value=\"Save Code\", variant=\"secondary\", trigger=\"save_code\")\n",
534
  " \n",
535
+ " # with gr.Row():\n",
536
+ " # with gr.Column():\n",
537
+ " # gr.Dataframe(label='Target (template)', type='pandas', value=template_df)\n",
538
+ " # with gr.Column():\n",
539
+ " # gr.Dataframe(label='Source (transformed)', type='pandas', value=PythonAstREPLTool(locals={'source_df': table_1_df}).run(transform_code))\n",
540
+ "\n",
541
+ " \n",
542
+ " \n",
543
+ "\n",
544
+ "\n",
545
+ "\n",
546
+ "# def mock_ocr(f):\n",
547
+ "# return [[1, 2, 3], [4, 5, 6]]\n",
548
+ "\n",
549
+ "\n",
550
+ "\n",
551
+ "# with gr.Blocks() as demo:\n",
552
+ "# with gr.Row():\n",
553
+ "# file = gr.File(label=\"PDF file\", file_types=[\".pdf\"])\n",
554
+ "# dataframe = gr.Dataframe()\n",
555
+ " \n",
556
+ "# with gr.Column():\n",
557
+ "# button = gr.Button(\"Export\")\n",
558
+ "# csv = gr.File(interactive=False, visible=False)\n",
559
+ " \n",
560
+ " \n",
561
+ "# file.change(mock_ocr, file, dataframe)\n",
562
+ "# button.click(export_csv, dataframe, csv)\n",
563
+ " \n",
564
+ "# demo.launch()\n",
565
+ "\n",
566
+ "\n",
567
+ "\n",
568
+ "\n",
569
+ " # with gr.Column():\n",
570
+ " # gr.Markdown(\"## Mapping from Source to Template\")\n",
571
+ " # with gr.Row():\n",
572
+ " # table_mapping_df = pd.DataFrame(table_mapping.dict()['table_mappings'])\n",
573
+ " # gr.DataFrame(value=table_mapping_df)\n",
574
+ " # save_mapping_btn = gr.Button(value=\"Save Mapping\", variant=\"secondary\")\n",
575
+ " # save_mapping_btn.click(fn=lambda : save_csv_file(table_mapping_df, 'table_mapping'))\n",
576
+ "\n",
577
+ " # with gr.Row():\n",
578
+ " # test = gr.Markdown()\n",
579
+ " # generate_code_btn = gr.Button(value=\"Generate Code from Mapping\", variant=\"primary\")\n",
580
+ " # generate_code_btn.click(fn=generate_code, outputs=test)\n",
581
+ "\n",
582
+ " # with gr.Column():\n",
583
+ " # gr.Markdown(\"## Here is the code that will be used to transform the source file into the template schema:\")\n",
584
+ " # gr.Code(language=\"python\", value=_sanitize_python_output(transform_code))\n",
585
+ "\n",
586
+ " # with gr.Row():\n",
587
+ " # gr.Button(value=\"Transform Source\", variant=\"primary\", trigger=\"transform_source\")\n",
588
+ " # gr.Button(value=\"Save Code\", variant=\"secondary\", trigger=\"save_code\")\n",
589
+ " \n",
590
+ " # with gr.Row():\n",
591
+ " # with gr.Column():\n",
592
+ " # gr.Dataframe(label='Target (template)', type='pandas', value=template_df)\n",
593
+ " # with gr.Column():\n",
594
+ " # gr.Dataframe(label='Source (transformed)', type='pandas', value=PythonAstREPLTool(locals={'source_df': table_1_df}).run(transform_code))\n",
595
+ "\n",
596
+ "demo.launch()"
597
+ ]
598
+ },
599
+ {
600
+ "cell_type": "code",
601
+ "execution_count": 22,
602
+ "metadata": {},
603
+ "outputs": [
604
+ {
605
+ "name": "stdout",
606
+ "output_type": "stream",
607
+ "text": [
608
+ "Running on local URL: http://127.0.0.1:7866\n",
609
+ "\n",
610
+ "To create a public link, set `share=True` in `launch()`.\n"
611
+ ]
612
+ },
613
+ {
614
+ "data": {
615
+ "text/html": [
616
+ "<div><iframe src=\"http://127.0.0.1:7866/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
617
+ ],
618
+ "text/plain": [
619
+ "<IPython.core.display.HTML object>"
620
+ ]
621
+ },
622
+ "metadata": {},
623
+ "output_type": "display_data"
624
+ },
625
+ {
626
+ "data": {
627
+ "text/plain": []
628
+ },
629
+ "execution_count": 22,
630
+ "metadata": {},
631
+ "output_type": "execute_result"
632
+ }
633
+ ],
634
+ "source": [
635
+ "import gradio as gr\n",
636
+ "\n",
637
+ "def mock_ocr(f):\n",
638
+ " return [[1, 2, 3], [4, 5, 6]]\n",
639
+ "\n",
640
+ "def export_csv(d):\n",
641
+ " d.to_csv(\"output.csv\")\n",
642
+ " return gr.File.update(value=\"output.csv\", visible=True)\n",
643
+ "\n",
644
+ "with gr.Blocks() as demo:\n",
645
  " with gr.Row():\n",
646
+ " file = gr.File(label=\"PDF file\", file_types=[\".pdf\"])\n",
647
+ " dataframe = gr.Dataframe()\n",
648
+ " \n",
649
  " with gr.Column():\n",
650
+ " button = gr.Button(\"Export\")\n",
651
+ " csv = gr.File(interactive=False, visible=False)\n",
652
+ " \n",
653
+ " \n",
654
+ " file.change(mock_ocr, file, dataframe)\n",
655
+ " button.click(export_csv, dataframe, csv)\n",
656
+ " \n",
657
  "demo.launch()"
658
  ]
659
  },
660
+ {
661
+ "cell_type": "code",
662
+ "execution_count": 176,
663
+ "metadata": {},
664
+ "outputs": [
665
+ {
666
+ "name": "stdout",
667
+ "output_type": "stream",
668
+ "text": [
669
+ "dataframe\n"
670
+ ]
671
+ }
672
+ ],
673
+ "source": [
674
+ "source_df"
675
+ ]
676
+ },
677
  {
678
  "cell_type": "code",
679
  "execution_count": null,
src/notebooks/output.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ,1,2,3
2
+ 0,,,
src/notebooks/output.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # check this out