Spaces:
Sleeping
Sleeping
ignore columns in input too
Browse files
app.py
CHANGED
@@ -366,9 +366,10 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
366 |
|
367 |
@rewrite_preview_button.click(inputs=[dataset_search, pretty_input_preview, input_prompt, output_format_dataframe], outputs=[pretty_output_preview, rewrite_full_dataset_button, full_dataset_generation_label, full_dataset_generation_success_html, pretty_full_dataset_generation_output])
|
368 |
def rewrite_preview(dataset: str, pretty_input_preview_df: pd.DataFrame, prompt: str, output_format_df: pd.DataFrame) -> Iterator[pd.DataFrame]:
|
369 |
-
|
370 |
format = output_format_df.to_dict(orient="records")
|
371 |
format = {"properties": {x["column"]: json.loads(x["type"]) for x in format}, "required": [x["column"] for x in format]}
|
|
|
372 |
output_rows = []
|
373 |
print(f"(preview) ReWriting {dataset} with instruction '{prompt}'")
|
374 |
yield {rewrite_full_dataset_button: gr.Button(interactive=False), full_dataset_generation_label: gr.Label(visible=False)}
|
@@ -386,11 +387,11 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
386 |
|
387 |
@rewrite_full_dataset_button.click(inputs=[dataset_search, subset_dropdown, split_dropdown, pretty_input_preview, pretty_output_preview, input_prompt, output_format_dataframe, dataset_info_json, select_namespace_dropdown, max_num_rows_dropdown], outputs=[full_dataset_generation_label, full_dataset_generation_success_html, pretty_output_preview, pretty_full_dataset_generation_output])
|
388 |
def rewrite_full_dataset(dataset: str, subset: str, split: str, pretty_input_preview_df: pd.DataFrame, pretty_output_preview_df: pd.DataFrame, prompt: str, output_format_df: pd.DataFrame, dataset_info: dict[str, Any], namespace: str, max_num_rows: int, oauth_token: Optional[gr.OAuthToken]) -> Iterator[pd.DataFrame]:
|
389 |
-
input_preview_rows = [{k: json.loads(v) for k, v in row.items()} for row in pretty_input_preview_df.to_dict(orient="records")]
|
390 |
-
output_preview_rows = [{k: json.loads(v) for k, v in row.items()} for row in pretty_output_preview_df.to_dict(orient="records")]
|
391 |
output_format_df = output_format_df[output_format_df["column"] != ""]
|
392 |
format = output_format_df.to_dict(orient="records")
|
393 |
format = {"properties": {x["column"]: json.loads(x["type"]) for x in format}, "required": [x["column"] for x in format]}
|
|
|
|
|
394 |
num_examples = dataset_info["splits"][split]["num_examples"]
|
395 |
total = min(num_examples, max_num_rows)
|
396 |
print(f"ReWriting {dataset} with instruction '{prompt}'")
|
|
|
366 |
|
367 |
@rewrite_preview_button.click(inputs=[dataset_search, pretty_input_preview, input_prompt, output_format_dataframe], outputs=[pretty_output_preview, rewrite_full_dataset_button, full_dataset_generation_label, full_dataset_generation_success_html, pretty_full_dataset_generation_output])
|
368 |
def rewrite_preview(dataset: str, pretty_input_preview_df: pd.DataFrame, prompt: str, output_format_df: pd.DataFrame) -> Iterator[pd.DataFrame]:
|
369 |
+
output_format_df = output_format_df[output_format_df["column"] != ""]
|
370 |
format = output_format_df.to_dict(orient="records")
|
371 |
format = {"properties": {x["column"]: json.loads(x["type"]) for x in format}, "required": [x["column"] for x in format]}
|
372 |
+
rows = [{k: json.loads(row[k]) for k in output_format_df["column"] if k in row} for row in pretty_input_preview_df.to_dict(orient="records")]
|
373 |
output_rows = []
|
374 |
print(f"(preview) ReWriting {dataset} with instruction '{prompt}'")
|
375 |
yield {rewrite_full_dataset_button: gr.Button(interactive=False), full_dataset_generation_label: gr.Label(visible=False)}
|
|
|
387 |
|
388 |
@rewrite_full_dataset_button.click(inputs=[dataset_search, subset_dropdown, split_dropdown, pretty_input_preview, pretty_output_preview, input_prompt, output_format_dataframe, dataset_info_json, select_namespace_dropdown, max_num_rows_dropdown], outputs=[full_dataset_generation_label, full_dataset_generation_success_html, pretty_output_preview, pretty_full_dataset_generation_output])
|
389 |
def rewrite_full_dataset(dataset: str, subset: str, split: str, pretty_input_preview_df: pd.DataFrame, pretty_output_preview_df: pd.DataFrame, prompt: str, output_format_df: pd.DataFrame, dataset_info: dict[str, Any], namespace: str, max_num_rows: int, oauth_token: Optional[gr.OAuthToken]) -> Iterator[pd.DataFrame]:
|
|
|
|
|
390 |
output_format_df = output_format_df[output_format_df["column"] != ""]
|
391 |
format = output_format_df.to_dict(orient="records")
|
392 |
format = {"properties": {x["column"]: json.loads(x["type"]) for x in format}, "required": [x["column"] for x in format]}
|
393 |
+
input_preview_rows = [{k: json.loads(row[k]) for k in output_format_df["column"] if k in row} for row in pretty_input_preview_df.to_dict(orient="records")]
|
394 |
+
output_preview_rows = [{k: json.loads(v) for k, v in row.items()} for row in pretty_output_preview_df.to_dict(orient="records")]
|
395 |
num_examples = dataset_info["splits"][split]["num_examples"]
|
396 |
total = min(num_examples, max_num_rows)
|
397 |
print(f"ReWriting {dataset} with instruction '{prompt}'")
|