Spaces:
Runtime error
Runtime error
Commit
·
d197237
1
Parent(s):
0e660e8
remove test
Browse files- test_notebook.ipynb +0 -509
test_notebook.ipynb
DELETED
@@ -1,509 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 4,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"import os\n",
|
10 |
-
"import pandas as pd\n",
|
11 |
-
"import gradio as gr\n",
|
12 |
-
"from pydantic import BaseModel, Field\n",
|
13 |
-
"\n",
|
14 |
-
"import langchain\n",
|
15 |
-
"from langchain.output_parsers import PydanticOutputParser\n",
|
16 |
-
"from langchain.prompts import ChatPromptTemplate\n",
|
17 |
-
"from langchain.prompts import ChatPromptTemplate\n",
|
18 |
-
"from langchain.tools import PythonAstREPLTool\n",
|
19 |
-
"from langchain.chat_models import ChatOpenAI\n",
|
20 |
-
"from langchain.schema.output_parser import StrOutputParser"
|
21 |
-
]
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"cell_type": "code",
|
25 |
-
"execution_count": 8,
|
26 |
-
"metadata": {},
|
27 |
-
"outputs": [],
|
28 |
-
"source": [
|
29 |
-
"langchain.debug = False\n",
|
30 |
-
"# Throwaway key with strict usage limit\n",
|
31 |
-
"os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
|
32 |
-
"pd.set_option('display.max_columns', 20)\n",
|
33 |
-
"pd.set_option('display.max_rows', 20)"
|
34 |
-
]
|
35 |
-
},
|
36 |
-
{
|
37 |
-
"cell_type": "code",
|
38 |
-
"execution_count": 9,
|
39 |
-
"metadata": {},
|
40 |
-
"outputs": [],
|
41 |
-
"source": [
|
42 |
-
"data_dir_path = os.path.join(os.getcwd(), 'data')\n",
|
43 |
-
"NUM_ROWS_TO_RETURN = 5\n",
|
44 |
-
"\n",
|
45 |
-
"table_1_df = pd.read_csv(os.path.join(data_dir_path, 'legal_entries_a.csv'))\n",
|
46 |
-
"table_2_df = pd.read_csv(os.path.join(data_dir_path, 'legal_entries_b.csv'))\n",
|
47 |
-
"template_df = pd.read_csv(os.path.join(data_dir_path, 'legal_template.csv'))"
|
48 |
-
]
|
49 |
-
},
|
50 |
-
{
|
51 |
-
"cell_type": "code",
|
52 |
-
"execution_count": 10,
|
53 |
-
"metadata": {},
|
54 |
-
"outputs": [],
|
55 |
-
"source": [
|
56 |
-
"transform_model = ChatOpenAI(\n",
|
57 |
-
" model_name='gpt-4',\n",
|
58 |
-
" temperature=0,\n",
|
59 |
-
")\n",
|
60 |
-
"\n",
|
61 |
-
"natural_language_model = ChatOpenAI(\n",
|
62 |
-
" model_name='gpt-4',\n",
|
63 |
-
" temperature=0.1,\n",
|
64 |
-
")"
|
65 |
-
]
|
66 |
-
},
|
67 |
-
{
|
68 |
-
"cell_type": "code",
|
69 |
-
"execution_count": 11,
|
70 |
-
"metadata": {},
|
71 |
-
"outputs": [],
|
72 |
-
"source": [
|
73 |
-
"# TODO: add validation to models, coupled with retry mechanism in chain\n",
|
74 |
-
"class TableMappingEntry(BaseModel):\n",
|
75 |
-
" '''A single row in a table mapping. Describes how a single column in a source table maps to a single column in a target table, including any necessary transformations, and their explanations.'''\n",
|
76 |
-
" source_column_name: str = Field(..., description=\"Name of the column in the source table.\")\n",
|
77 |
-
" target_column_name: str = Field(..., description=\"Name of the column in the target table, to which the source column maps.\")\n",
|
78 |
-
" value_transformations: str = Field(..., description=\"Transformations needed make the source values match the target values. If unncecessary, write 'NO_TRANSFORM'.\")\n",
|
79 |
-
" explanation: str = Field(..., description=\"One-sentence explanation of this row (source-target mapping/transformation). Include any information that might be relevant to a software engineer building an ETL pipeline with this document.\")\n",
|
80 |
-
"\n",
|
81 |
-
"class TableMapping(BaseModel):\n",
|
82 |
-
" '''A list of table mappings collectively describe how a source table should be transformed to match the schema of a target table.'''\n",
|
83 |
-
" table_mappings: list[TableMappingEntry] = Field(..., description=\"A list of table mappings.\")\n",
|
84 |
-
" \n",
|
85 |
-
"analyst_prompt_str = '''\n",
|
86 |
-
"You are a Data Scientist, who specializes in generating schema mappings for use by Software Engineers in ETL pipelines.\n",
|
87 |
-
"\n",
|
88 |
-
"Head of `source_csv`:\n",
|
89 |
-
"\n",
|
90 |
-
"{source_1_csv_str}\n",
|
91 |
-
"\n",
|
92 |
-
"Head of `target_csv`:\n",
|
93 |
-
"\n",
|
94 |
-
"{target_csv_str}\n",
|
95 |
-
"\n",
|
96 |
-
"Your job is to generate a thorough, precise summary of how `source_csv` should be transformed to adhere exactly to the `target_csv` schema.\n",
|
97 |
-
"\n",
|
98 |
-
"For each column in the `source_csv`, you must communicate which column in the `target_csv` it maps to, and how the values in the `source_csv` column should be transformed to match those in the `target_csv`.\n",
|
99 |
-
"You can assume the rows are aligned: that is, the first row in `source_csv` corresponds to the first row in `target_csv`, and so on.\n",
|
100 |
-
"\n",
|
101 |
-
"Remember:\n",
|
102 |
-
"1. Which column in `target_csv` it maps to. You should consider the semantic meaning of the columns, not just the character similarity. \n",
|
103 |
-
"\n",
|
104 |
-
"Example mappings:\n",
|
105 |
-
"- 'MunICipality' in `source_csv` should map to 'City' in `target_csv`.\n",
|
106 |
-
"- 'fullname' in `source_csv` should map to both 'FirstName' and 'LastName' in `target_csv`. You must explain this transformation, as well, including the target sequencing of first and last name.\n",
|
107 |
-
"\n",
|
108 |
-
"Example transformations:\n",
|
109 |
-
"- If date in `source_csv` is `2020-01-01` and date in `target_csv` is `01/01/2020`, explain exactly how this should be transformed and the reasoning behind it.\n",
|
110 |
-
"- If city in `source_csv` is `New York` and city in `target_csv` is `NEW YORK` or `NYC`, explain exactly how this should be transformed and the reasoning behind it.\n",
|
111 |
-
"\n",
|
112 |
-
"Lastly, point out any other oddities, such as duplicate columns, erroneous columns, etc.\n",
|
113 |
-
"\n",
|
114 |
-
"{format_instructions}\n",
|
115 |
-
"\n",
|
116 |
-
"Remember:\n",
|
117 |
-
"- Be concise: you are speaking to engineers, not customers.\n",
|
118 |
-
"- Be precise: all of these values are case sensitive. Consider casing for city names, exact prefixes for identifiers, ordering of people's names, etc.\n",
|
119 |
-
"- DO NOT include commas, quotes, or any other characters that might interfere with JSON serialization or CSV generation\n",
|
120 |
-
"\n",
|
121 |
-
"Your response:\n",
|
122 |
-
"'''\n",
|
123 |
-
"\n",
|
124 |
-
"def get_data_str_from_df_for_prompt(df, use_head=True, num_rows_to_return=NUM_ROWS_TO_RETURN):\n",
|
125 |
-
" data = df.head(num_rows_to_return) if use_head else df.tail(num_rows_to_return)\n",
|
126 |
-
" return f'<df>\\n{data.to_markdown()}\\n</df>'\n",
|
127 |
-
"\n",
|
128 |
-
"table_mapping_parser = PydanticOutputParser(pydantic_object=TableMapping)\n",
|
129 |
-
"analyst_prompt = ChatPromptTemplate.from_template(\n",
|
130 |
-
" template=analyst_prompt_str, \n",
|
131 |
-
" partial_variables={'format_instructions': table_mapping_parser.get_format_instructions()},\n",
|
132 |
-
")\n",
|
133 |
-
"\n",
|
134 |
-
"mapping_chain = analyst_prompt | transform_model | table_mapping_parser\n",
|
135 |
-
"table_mapping: TableMapping = mapping_chain.invoke({\"source_1_csv_str\": get_data_str_from_df_for_prompt(table_1_df), \"target_csv_str\": get_data_str_from_df_for_prompt(template_df)})"
|
136 |
-
]
|
137 |
-
},
|
138 |
-
{
|
139 |
-
"cell_type": "code",
|
140 |
-
"execution_count": 12,
|
141 |
-
"metadata": {},
|
142 |
-
"outputs": [],
|
143 |
-
"source": [
|
144 |
-
"# spec writer\n",
|
145 |
-
"spec_writer_prompt_str = '''\n",
|
146 |
-
"You are an expert product manager and technical writer for a software company, who generates clean, concise, precise specification documents for your employees.\n",
|
147 |
-
"Your job is to write a plaintext spec for a python script for a software engineer to develop a component within an ETL pipeline.\n",
|
148 |
-
"\n",
|
149 |
-
"This document must include 100% of the information your employee needs to write a successful script to transform source_df to target_df.\n",
|
150 |
-
"However, DO NOT include the original table_mapping. Your job is to translate everything into natural language.\n",
|
151 |
-
"\n",
|
152 |
-
"Here is a stringified pydantic object that describes the mapping and the transformation steps:\n",
|
153 |
-
"\n",
|
154 |
-
"{table_mapping}\n",
|
155 |
-
"\n",
|
156 |
-
"You must translate this into clean, concise, and complete instructions for your employee.\n",
|
157 |
-
"\n",
|
158 |
-
"This document should be formatted like a technical document in plaintext. Do not include code or data.\n",
|
159 |
-
"\n",
|
160 |
-
"This document must include:\n",
|
161 |
-
"- Overview\n",
|
162 |
-
"- Input (source_df), Output (target_df)\n",
|
163 |
-
"- Exact column mapping\n",
|
164 |
-
"- Exact transformation steps for each column\n",
|
165 |
-
"- Precise instructions for what this script should do\n",
|
166 |
-
"- Script input: Pandas Dataframe named `source_df`.\n",
|
167 |
-
"- Script output: Pandas Dataframe named `target_df`.\n",
|
168 |
-
"- Do not modify the source_df. Create a new dataframe named target_df.\n",
|
169 |
-
"- This script should never include the source data. It should only include the transormations required to create the target_df.\n",
|
170 |
-
"- Return the target_df.\n",
|
171 |
-
"\n",
|
172 |
-
"You will never see this employee. They cannot contact you. You will never see their code. You must include 100% of the information they need to write a successful script.\n",
|
173 |
-
"Remember:\n",
|
174 |
-
"- Clean: No extra information, no formatting aside from plaintext\n",
|
175 |
-
"- Concise: Your employees benefit from brevity\n",
|
176 |
-
"- Precise: your words must be unambiguous, exact, and full represent a perfect translation of the table_mapping object.\n",
|
177 |
-
"\n",
|
178 |
-
"Your response:\n",
|
179 |
-
"'''\n",
|
180 |
-
"spec_writer_prompt = ChatPromptTemplate.from_template(spec_writer_prompt_str)\n",
|
181 |
-
"\n",
|
182 |
-
"spec_writer_chain = spec_writer_prompt | natural_language_model | StrOutputParser()\n",
|
183 |
-
"spec_str = spec_writer_chain.invoke({\"table_mapping\": str(table_mapping)})"
|
184 |
-
]
|
185 |
-
},
|
186 |
-
{
|
187 |
-
"cell_type": "code",
|
188 |
-
"execution_count": 19,
|
189 |
-
"metadata": {},
|
190 |
-
"outputs": [],
|
191 |
-
"source": [
|
192 |
-
"engineer_prompt_str = '''\n",
|
193 |
-
"You are a Senior Software Engineer, who specializes in writing Python code for ETL pipelines.\n",
|
194 |
-
"Your Product Manager has written a spec for a new transormation script. You must follow this document exactly, write python code that implements the spec, validate that code, and then return it.\n",
|
195 |
-
"Your output should only be python code in Markdown format, eg:\n",
|
196 |
-
" ```python\n",
|
197 |
-
" ....\n",
|
198 |
-
" ```\"\"\"\n",
|
199 |
-
"Do not return any additional text / explanation. This code will be executed by a robot without human intervention.\n",
|
200 |
-
"\n",
|
201 |
-
"Here is the technical specification for your code:\n",
|
202 |
-
"\n",
|
203 |
-
"{spec_str}\n",
|
204 |
-
"\n",
|
205 |
-
"Remember: return only clean python code in markdown format. The python interpreter running this code will already have `source_df` as a local variable.\n",
|
206 |
-
"\n",
|
207 |
-
"Your must return `target_df` at the end.\n",
|
208 |
-
"'''\n",
|
209 |
-
"engineer_prompt = ChatPromptTemplate.from_template(engineer_prompt_str)\n",
|
210 |
-
"\n",
|
211 |
-
"# engineer_chain = engineer_prompt | transform_model | StrOutputParser() | PythonAstREPLTool(locals={'source_df': table_1_df}).run\n",
|
212 |
-
"# table_1_df_transformed = engineer_chain.invoke({\"spec_str\": spec_str})\n",
|
213 |
-
"engineer_chain = engineer_prompt | transform_model | StrOutputParser()\n",
|
214 |
-
"transform_code = engineer_chain.invoke({\"spec_str\": spec_str})"
|
215 |
-
]
|
216 |
-
},
|
217 |
-
{
|
218 |
-
"cell_type": "code",
|
219 |
-
"execution_count": 17,
|
220 |
-
"metadata": {},
|
221 |
-
"outputs": [
|
222 |
-
{
|
223 |
-
"name": "stdout",
|
224 |
-
"output_type": "stream",
|
225 |
-
"text": [
|
226 |
-
"Running on local URL: http://127.0.0.1:7874\n",
|
227 |
-
"\n",
|
228 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
229 |
-
]
|
230 |
-
},
|
231 |
-
{
|
232 |
-
"data": {
|
233 |
-
"text/html": [
|
234 |
-
"<div><iframe src=\"http://127.0.0.1:7874/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
235 |
-
],
|
236 |
-
"text/plain": [
|
237 |
-
"<IPython.core.display.HTML object>"
|
238 |
-
]
|
239 |
-
},
|
240 |
-
"metadata": {},
|
241 |
-
"output_type": "display_data"
|
242 |
-
},
|
243 |
-
{
|
244 |
-
"data": {
|
245 |
-
"text/plain": []
|
246 |
-
},
|
247 |
-
"execution_count": 17,
|
248 |
-
"metadata": {},
|
249 |
-
"output_type": "execute_result"
|
250 |
-
}
|
251 |
-
],
|
252 |
-
"source": [
|
253 |
-
"def show_mapping(file):\n",
|
254 |
-
" # TODO: add code\n",
|
255 |
-
" return pd.DataFrame(table_mapping.dict()['table_mappings'])\n",
|
256 |
-
"demo = gr.Interface(fn=show_mapping, inputs=[\"file\"], outputs='dataframe')\n",
|
257 |
-
"demo.launch()"
|
258 |
-
]
|
259 |
-
},
|
260 |
-
{
|
261 |
-
"cell_type": "code",
|
262 |
-
"execution_count": 34,
|
263 |
-
"metadata": {},
|
264 |
-
"outputs": [
|
265 |
-
{
|
266 |
-
"name": "stdout",
|
267 |
-
"output_type": "stream",
|
268 |
-
"text": [
|
269 |
-
"Running on local URL: http://127.0.0.1:7885\n",
|
270 |
-
"\n",
|
271 |
-
"Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB\n",
|
272 |
-
"\n",
|
273 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
274 |
-
]
|
275 |
-
},
|
276 |
-
{
|
277 |
-
"data": {
|
278 |
-
"text/html": [
|
279 |
-
"<div><iframe src=\"http://127.0.0.1:7885/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
280 |
-
],
|
281 |
-
"text/plain": [
|
282 |
-
"<IPython.core.display.HTML object>"
|
283 |
-
]
|
284 |
-
},
|
285 |
-
"metadata": {},
|
286 |
-
"output_type": "display_data"
|
287 |
-
},
|
288 |
-
{
|
289 |
-
"data": {
|
290 |
-
"text/plain": []
|
291 |
-
},
|
292 |
-
"execution_count": 34,
|
293 |
-
"metadata": {},
|
294 |
-
"output_type": "execute_result"
|
295 |
-
}
|
296 |
-
],
|
297 |
-
"source": [
|
298 |
-
"def _sanitize_python_output(text: str):\n",
|
299 |
-
" _, after = text.split(\"```python\")\n",
|
300 |
-
" return after.split(\"```\")[0]\n",
|
301 |
-
"\n",
|
302 |
-
"def show_code(button):\n",
|
303 |
-
" # TODO: add code\n",
|
304 |
-
" return _sanitize_python_output(transform_code)\n",
|
305 |
-
"check_mapping_text = 'How does that mapping look? \\n\\nFeel free to update it: your changes will be incorporated! \\n\\nWhen you are ready, click the Submit below, and the mapping code will be generated for your approval.'\n",
|
306 |
-
"demo = gr.Interface(fn=show_code, inputs=[gr.Textbox(value=check_mapping_text, interactive=False)], outputs=[gr.Code(language=\"python\")])\n",
|
307 |
-
"demo.launch()"
|
308 |
-
]
|
309 |
-
},
|
310 |
-
{
|
311 |
-
"cell_type": "code",
|
312 |
-
"execution_count": 41,
|
313 |
-
"metadata": {},
|
314 |
-
"outputs": [
|
315 |
-
{
|
316 |
-
"name": "stderr",
|
317 |
-
"output_type": "stream",
|
318 |
-
"text": [
|
319 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/4236222443.py:4: GradioDeprecationWarning: `layout` parameter is deprecated, and it has no effect\n",
|
320 |
-
" demo = gr.Interface(\n"
|
321 |
-
]
|
322 |
-
},
|
323 |
-
{
|
324 |
-
"name": "stdout",
|
325 |
-
"output_type": "stream",
|
326 |
-
"text": [
|
327 |
-
"Running on local URL: http://127.0.0.1:7892\n",
|
328 |
-
"\n",
|
329 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
330 |
-
]
|
331 |
-
},
|
332 |
-
{
|
333 |
-
"data": {
|
334 |
-
"text/html": [
|
335 |
-
"<div><iframe src=\"http://127.0.0.1:7892/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
336 |
-
],
|
337 |
-
"text/plain": [
|
338 |
-
"<IPython.core.display.HTML object>"
|
339 |
-
]
|
340 |
-
},
|
341 |
-
"metadata": {},
|
342 |
-
"output_type": "display_data"
|
343 |
-
},
|
344 |
-
{
|
345 |
-
"data": {
|
346 |
-
"text/plain": []
|
347 |
-
},
|
348 |
-
"execution_count": 41,
|
349 |
-
"metadata": {},
|
350 |
-
"output_type": "execute_result"
|
351 |
-
}
|
352 |
-
],
|
353 |
-
"source": [
|
354 |
-
"def get_transformed_table(button):\n",
|
355 |
-
" return template_df, PythonAstREPLTool(locals={'source_df': table_1_df}).run(transform_code)\n",
|
356 |
-
"check_mapping_text = 'How does that code look? \\n\\nWhen you are ready, click the Submit button and the transformed source file will be transformed.'\n",
|
357 |
-
"demo = gr.Interface(\n",
|
358 |
-
" fn=get_transformed_table,\n",
|
359 |
-
" inputs=[gr.Textbox(value=check_mapping_text, interactive=False)],\n",
|
360 |
-
" outputs=[gr.Dataframe(label='Template Table (target)'), gr.Dataframe(label='Table 1 (transformed)')],\n",
|
361 |
-
" layout=\"column\",\n",
|
362 |
-
" examples=[[1]],\n",
|
363 |
-
")\n",
|
364 |
-
"demo.launch()"
|
365 |
-
]
|
366 |
-
},
|
367 |
-
{
|
368 |
-
"cell_type": "code",
|
369 |
-
"execution_count": 89,
|
370 |
-
"metadata": {},
|
371 |
-
"outputs": [
|
372 |
-
{
|
373 |
-
"name": "stderr",
|
374 |
-
"output_type": "stream",
|
375 |
-
"text": [
|
376 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
|
377 |
-
" gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
|
378 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n",
|
379 |
-
" gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
|
380 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: `keep_filename` parameter is deprecated, and it has no effect\n",
|
381 |
-
" gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
|
382 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
|
383 |
-
" gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
|
384 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n",
|
385 |
-
" gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
|
386 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: `keep_filename` parameter is deprecated, and it has no effect\n",
|
387 |
-
" gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
|
388 |
-
"/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/utils.py:841: UserWarning: Expected 1 arguments for function <function generate_code at 0x12cb559d0>, received 0.\n",
|
389 |
-
" warnings.warn(\n",
|
390 |
-
"/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/utils.py:845: UserWarning: Expected at least 1 arguments for function <function generate_code at 0x12cb559d0>, received 0.\n",
|
391 |
-
" warnings.warn(\n",
|
392 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:39: GradioUnusedKwargWarning: You have unused kwarg parameters in Button, please remove them: {'trigger': 'transform_source'}\n",
|
393 |
-
" gr.Button(value=\"Transform Source\", variant=\"primary\", trigger=\"transform_source\")\n",
|
394 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:40: GradioUnusedKwargWarning: You have unused kwarg parameters in Button, please remove them: {'trigger': 'save_code'}\n",
|
395 |
-
" gr.Button(value=\"Save Code\", variant=\"secondary\", trigger=\"save_code\")\n"
|
396 |
-
]
|
397 |
-
},
|
398 |
-
{
|
399 |
-
"name": "stdout",
|
400 |
-
"output_type": "stream",
|
401 |
-
"text": [
|
402 |
-
"Running on local URL: http://127.0.0.1:7934\n",
|
403 |
-
"\n",
|
404 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
405 |
-
]
|
406 |
-
},
|
407 |
-
{
|
408 |
-
"data": {
|
409 |
-
"text/html": [
|
410 |
-
"<div><iframe src=\"http://127.0.0.1:7934/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
411 |
-
],
|
412 |
-
"text/plain": [
|
413 |
-
"<IPython.core.display.HTML object>"
|
414 |
-
]
|
415 |
-
},
|
416 |
-
"metadata": {},
|
417 |
-
"output_type": "display_data"
|
418 |
-
},
|
419 |
-
{
|
420 |
-
"data": {
|
421 |
-
"text/plain": []
|
422 |
-
},
|
423 |
-
"execution_count": 89,
|
424 |
-
"metadata": {},
|
425 |
-
"output_type": "execute_result"
|
426 |
-
}
|
427 |
-
],
|
428 |
-
"source": [
|
429 |
-
"def _sanitize_python_output(text: str):\n",
|
430 |
-
" _, after = text.split(\"```python\")\n",
|
431 |
-
" return after.split(\"```\")[0]\n",
|
432 |
-
"\n",
|
433 |
-
"def do_stuff(val):\n",
|
434 |
-
" print(val)\n",
|
435 |
-
"\n",
|
436 |
-
"def generate_code(val):\n",
|
437 |
-
" return '# check this out'\n",
|
438 |
-
"\n",
|
439 |
-
"def save_csv_file(df, filename):\n",
|
440 |
-
" df.to_csv(os.path.join(data_dir_path, 'output', filename) + '.csv')\n",
|
441 |
-
"\n",
|
442 |
-
"with gr.Blocks() as demo:\n",
|
443 |
-
" with gr.Column():\n",
|
444 |
-
" gr.Markdown(\"## To begin, upload a Template CSV and a Source CSV file.\")\n",
|
445 |
-
" with gr.Row():\n",
|
446 |
-
" gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
|
447 |
-
" gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
|
448 |
-
"\n",
|
449 |
-
" with gr.Column():\n",
|
450 |
-
" gr.Markdown(\"## Mapping from Source to Template\")\n",
|
451 |
-
" with gr.Row():\n",
|
452 |
-
" table_mapping_df = pd.DataFrame(table_mapping.dict()['table_mappings'])\n",
|
453 |
-
" gr.DataFrame(value=table_mapping_df)\n",
|
454 |
-
" save_mapping_btn = gr.Button(value=\"Save Mapping\", variant=\"secondary\")\n",
|
455 |
-
" save_mapping_btn.click(fn=lambda : save_csv_file(table_mapping_df, 'table_mapping'))\n",
|
456 |
-
"\n",
|
457 |
-
" with gr.Row():\n",
|
458 |
-
" test = gr.Markdown()\n",
|
459 |
-
" generate_code_btn = gr.Button(value=\"Generate Code from Mapping\", variant=\"primary\")\n",
|
460 |
-
" generate_code_btn.click(fn=generate_code, outputs=test)\n",
|
461 |
-
"\n",
|
462 |
-
" with gr.Column():\n",
|
463 |
-
" gr.Markdown(\"## Here is the code that will be used to transform the source file into the template schema:\")\n",
|
464 |
-
" gr.Code(language=\"python\", value=_sanitize_python_output(transform_code))\n",
|
465 |
-
"\n",
|
466 |
-
" with gr.Row():\n",
|
467 |
-
" gr.Button(value=\"Transform Source\", variant=\"primary\", trigger=\"transform_source\")\n",
|
468 |
-
" gr.Button(value=\"Save Code\", variant=\"secondary\", trigger=\"save_code\")\n",
|
469 |
-
" \n",
|
470 |
-
" with gr.Row():\n",
|
471 |
-
" with gr.Column():\n",
|
472 |
-
" gr.Dataframe(label='Target (template)', type='pandas', value=template_df)\n",
|
473 |
-
" with gr.Column():\n",
|
474 |
-
" gr.Dataframe(label='Source (transformed)', type='pandas', value=PythonAstREPLTool(locals={'source_df': table_1_df}).run(transform_code))\n",
|
475 |
-
"\n",
|
476 |
-
"demo.launch()"
|
477 |
-
]
|
478 |
-
},
|
479 |
-
{
|
480 |
-
"cell_type": "code",
|
481 |
-
"execution_count": null,
|
482 |
-
"metadata": {},
|
483 |
-
"outputs": [],
|
484 |
-
"source": []
|
485 |
-
}
|
486 |
-
],
|
487 |
-
"metadata": {
|
488 |
-
"kernelspec": {
|
489 |
-
"display_name": "venv",
|
490 |
-
"language": "python",
|
491 |
-
"name": "python3"
|
492 |
-
},
|
493 |
-
"language_info": {
|
494 |
-
"codemirror_mode": {
|
495 |
-
"name": "ipython",
|
496 |
-
"version": 3
|
497 |
-
},
|
498 |
-
"file_extension": ".py",
|
499 |
-
"mimetype": "text/x-python",
|
500 |
-
"name": "python",
|
501 |
-
"nbconvert_exporter": "python",
|
502 |
-
"pygments_lexer": "ipython3",
|
503 |
-
"version": "3.9.6"
|
504 |
-
},
|
505 |
-
"orig_nbformat": 4
|
506 |
-
},
|
507 |
-
"nbformat": 4,
|
508 |
-
"nbformat_minor": 2
|
509 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|