lhoestq HF staff commited on
Commit
231073c
1 Parent(s): 3a7f10a

ignore columns in input too

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -366,9 +366,10 @@ with gr.Blocks(css=css, js=js) as demo:
366
 
367
  @rewrite_preview_button.click(inputs=[dataset_search, pretty_input_preview, input_prompt, output_format_dataframe], outputs=[pretty_output_preview, rewrite_full_dataset_button, full_dataset_generation_label, full_dataset_generation_success_html, pretty_full_dataset_generation_output])
368
  def rewrite_preview(dataset: str, pretty_input_preview_df: pd.DataFrame, prompt: str, output_format_df: pd.DataFrame) -> Iterator[pd.DataFrame]:
369
- rows = [{k: json.loads(v) for k, v in row.items()} for row in pretty_input_preview_df.to_dict(orient="records")]
370
  format = output_format_df.to_dict(orient="records")
371
  format = {"properties": {x["column"]: json.loads(x["type"]) for x in format}, "required": [x["column"] for x in format]}
 
372
  output_rows = []
373
  print(f"(preview) ReWriting {dataset} with instruction '{prompt}'")
374
  yield {rewrite_full_dataset_button: gr.Button(interactive=False), full_dataset_generation_label: gr.Label(visible=False)}
@@ -386,11 +387,11 @@ with gr.Blocks(css=css, js=js) as demo:
386
 
387
  @rewrite_full_dataset_button.click(inputs=[dataset_search, subset_dropdown, split_dropdown, pretty_input_preview, pretty_output_preview, input_prompt, output_format_dataframe, dataset_info_json, select_namespace_dropdown, max_num_rows_dropdown], outputs=[full_dataset_generation_label, full_dataset_generation_success_html, pretty_output_preview, pretty_full_dataset_generation_output])
388
  def rewrite_full_dataset(dataset: str, subset: str, split: str, pretty_input_preview_df: pd.DataFrame, pretty_output_preview_df: pd.DataFrame, prompt: str, output_format_df: pd.DataFrame, dataset_info: dict[str, Any], namespace: str, max_num_rows: int, oauth_token: Optional[gr.OAuthToken]) -> Iterator[pd.DataFrame]:
389
- input_preview_rows = [{k: json.loads(v) for k, v in row.items()} for row in pretty_input_preview_df.to_dict(orient="records")]
390
- output_preview_rows = [{k: json.loads(v) for k, v in row.items()} for row in pretty_output_preview_df.to_dict(orient="records")]
391
  output_format_df = output_format_df[output_format_df["column"] != ""]
392
  format = output_format_df.to_dict(orient="records")
393
  format = {"properties": {x["column"]: json.loads(x["type"]) for x in format}, "required": [x["column"] for x in format]}
 
 
394
  num_examples = dataset_info["splits"][split]["num_examples"]
395
  total = min(num_examples, max_num_rows)
396
  print(f"ReWriting {dataset} with instruction '{prompt}'")
 
366
 
367
  @rewrite_preview_button.click(inputs=[dataset_search, pretty_input_preview, input_prompt, output_format_dataframe], outputs=[pretty_output_preview, rewrite_full_dataset_button, full_dataset_generation_label, full_dataset_generation_success_html, pretty_full_dataset_generation_output])
368
  def rewrite_preview(dataset: str, pretty_input_preview_df: pd.DataFrame, prompt: str, output_format_df: pd.DataFrame) -> Iterator[pd.DataFrame]:
369
+ output_format_df = output_format_df[output_format_df["column"] != ""]
370
  format = output_format_df.to_dict(orient="records")
371
  format = {"properties": {x["column"]: json.loads(x["type"]) for x in format}, "required": [x["column"] for x in format]}
372
+ rows = [{k: json.loads(row[k]) for k in output_format_df["column"] if k in row} for row in pretty_input_preview_df.to_dict(orient="records")]
373
  output_rows = []
374
  print(f"(preview) ReWriting {dataset} with instruction '{prompt}'")
375
  yield {rewrite_full_dataset_button: gr.Button(interactive=False), full_dataset_generation_label: gr.Label(visible=False)}
 
387
 
388
  @rewrite_full_dataset_button.click(inputs=[dataset_search, subset_dropdown, split_dropdown, pretty_input_preview, pretty_output_preview, input_prompt, output_format_dataframe, dataset_info_json, select_namespace_dropdown, max_num_rows_dropdown], outputs=[full_dataset_generation_label, full_dataset_generation_success_html, pretty_output_preview, pretty_full_dataset_generation_output])
389
  def rewrite_full_dataset(dataset: str, subset: str, split: str, pretty_input_preview_df: pd.DataFrame, pretty_output_preview_df: pd.DataFrame, prompt: str, output_format_df: pd.DataFrame, dataset_info: dict[str, Any], namespace: str, max_num_rows: int, oauth_token: Optional[gr.OAuthToken]) -> Iterator[pd.DataFrame]:
 
 
390
  output_format_df = output_format_df[output_format_df["column"] != ""]
391
  format = output_format_df.to_dict(orient="records")
392
  format = {"properties": {x["column"]: json.loads(x["type"]) for x in format}, "required": [x["column"] for x in format]}
393
+ input_preview_rows = [{k: json.loads(row[k]) for k in output_format_df["column"] if k in row} for row in pretty_input_preview_df.to_dict(orient="records")]
394
+ output_preview_rows = [{k: json.loads(v) for k, v in row.items()} for row in pretty_output_preview_df.to_dict(orient="records")]
395
  num_examples = dataset_info["splits"][split]["num_examples"]
396
  total = min(num_examples, max_num_rows)
397
  print(f"ReWriting {dataset} with instruction '{prompt}'")