fix preprocessing errors

#1
by Zhihui - opened
Files changed (1) hide show
  1. data_reviewer.py +26 -25
data_reviewer.py CHANGED
@@ -47,12 +47,6 @@ def load_and_display_sample(split, idx):
47
 
48
  # Process JSON data
49
  models = json.loads(sample["models"]) if isinstance(sample["models"], str) else sample["models"]
50
- meta = json.loads(sample["meta"]) if isinstance(sample["meta"], str) else sample["meta"]
51
- error_analysis = (
52
- json.loads(sample["human_error_analysis"])
53
- if isinstance(sample["human_error_analysis"], str)
54
- else sample["human_error_analysis"]
55
- )
56
 
57
  return (
58
  sample["image"], # image
@@ -63,9 +57,7 @@ def load_and_display_sample(split, idx):
63
  sample["query_source"], # query_source
64
  sample["query"], # query
65
  json.dumps(models, indent=2), # models_json
66
- json.dumps(meta, indent=2), # meta_json
67
  sample["rationale"], # rationale
68
- json.dumps(error_analysis, indent=2), # error_analysis_json
69
  sample["ground_truth"], # ground_truth
70
  f"Total samples: {len(dataset)}", # total_samples
71
  )
@@ -78,53 +70,64 @@ def create_data_viewer():
78
  initial_split = "test"
79
  initial_idx = 0
80
  initial_data = load_and_display_sample(initial_split, initial_idx)
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  with gr.Column():
83
  with gr.Row():
84
  dataset_split = gr.Radio(choices=["test"], value=initial_split, label="Dataset Split")
85
  sample_idx = gr.Number(label="Sample Index", value=initial_idx, minimum=0, step=1, interactive=True)
86
  total_samples = gr.Textbox(
87
- label="Total Samples", value=initial_data[12], interactive=False # Set initial total samples
88
  )
89
 
90
  with gr.Row():
91
  with gr.Column():
92
- image = gr.Image(label="Sample Image", type="pil", value=initial_data[0]) # Set initial image
93
- query = gr.Textbox(label="Query", value=initial_data[6], interactive=False) # Set initial query
94
 
95
  with gr.Column():
96
  sample_id = gr.Textbox(
97
- label="Sample ID", value=initial_data[1], interactive=False # Set initial sample ID
98
  )
99
  chosen_response = gr.TextArea(
100
- label="Chosen Response βœ…", value=initial_data[2], interactive=False # Set initial chosen response
 
 
101
  )
102
  rejected_response = gr.TextArea(
103
  label="Rejected Response ❌",
104
- value=initial_data[3], # Set initial rejected response
105
  interactive=False,
106
  )
107
 
108
  with gr.Row(visible=not IGNORE_DETAILS):
109
- judge = gr.Textbox(label="Judge", value=initial_data[4], interactive=False) # Set initial judge
110
  query_source = gr.Textbox(
111
- label="Query Source", value=initial_data[5], interactive=False # Set initial query source
112
  )
113
 
114
  with gr.Row(visible=not IGNORE_DETAILS):
115
  with gr.Column():
116
- models_json = gr.JSON(label="Models", value=json.loads(initial_data[7])) # Set initial models
117
- meta_json = gr.JSON(label="Meta", value=json.loads(initial_data[8])) # Set initial meta
118
  rationale = gr.TextArea(
119
- label="Rationale", value=initial_data[9], interactive=False # Set initial rationale
120
  )
121
 
122
  with gr.Column():
123
- error_analysis_json = gr.JSON(
124
- label="Human Error Analysis", value=json.loads(initial_data[10]) # Set initial error analysis
125
- )
126
  ground_truth = gr.TextArea(
127
- label="Ground Truth", value=initial_data[11], interactive=False # Set initial ground truth
128
  )
129
 
130
  # Auto-update when any input changes
@@ -141,9 +144,7 @@ def create_data_viewer():
141
  query_source,
142
  query,
143
  models_json,
144
- meta_json,
145
  rationale,
146
- error_analysis_json,
147
  ground_truth,
148
  total_samples,
149
  ],
 
47
 
48
  # Process JSON data
49
  models = json.loads(sample["models"]) if isinstance(sample["models"], str) else sample["models"]
 
 
 
 
 
 
50
 
51
  return (
52
  sample["image"], # image
 
57
  sample["query_source"], # query_source
58
  sample["query"], # query
59
  json.dumps(models, indent=2), # models_json
 
60
  sample["rationale"], # rationale
 
61
  sample["ground_truth"], # ground_truth
62
  f"Total samples: {len(dataset)}", # total_samples
63
  )
 
70
  initial_split = "test"
71
  initial_idx = 0
72
  initial_data = load_and_display_sample(initial_split, initial_idx)
73
+ (
74
+ init_image,
75
+ init_sample_id,
76
+ init_chosen_response,
77
+ init_rejected_response,
78
+ init_judge,
79
+ init_query_source,
80
+ init_query,
81
+ init_models_json,
82
+ init_rationale,
83
+ init_ground_truth,
84
+ init_total_samples,
85
+ ) = initial_data
86
 
87
  with gr.Column():
88
  with gr.Row():
89
  dataset_split = gr.Radio(choices=["test"], value=initial_split, label="Dataset Split")
90
  sample_idx = gr.Number(label="Sample Index", value=initial_idx, minimum=0, step=1, interactive=True)
91
  total_samples = gr.Textbox(
92
+ label="Total Samples", value=init_total_samples, interactive=False # Set initial total samples
93
  )
94
 
95
  with gr.Row():
96
  with gr.Column():
97
+ image = gr.Image(label="Sample Image", type="pil", value=init_image) # Set initial image
98
+ query = gr.Textbox(label="Query", value=init_query, interactive=False) # Set initial query
99
 
100
  with gr.Column():
101
  sample_id = gr.Textbox(
102
+ label="Sample ID", value=init_sample_id, interactive=False # Set initial sample ID
103
  )
104
  chosen_response = gr.TextArea(
105
+ label="Chosen Response βœ…",
106
+ value=init_chosen_response,
107
+ interactive=False, # Set initial chosen response
108
  )
109
  rejected_response = gr.TextArea(
110
  label="Rejected Response ❌",
111
+ value=init_rejected_response, # Set initial rejected response
112
  interactive=False,
113
  )
114
 
115
  with gr.Row(visible=not IGNORE_DETAILS):
116
+ judge = gr.Textbox(label="Judge", value=init_judge, interactive=False) # Set initial judge
117
  query_source = gr.Textbox(
118
+ label="Query Source", value=init_query_source, interactive=False # Set initial query source
119
  )
120
 
121
  with gr.Row(visible=not IGNORE_DETAILS):
122
  with gr.Column():
123
+ models_json = gr.JSON(label="Models", value=json.loads(init_models_json)) # Set initial models
 
124
  rationale = gr.TextArea(
125
+ label="Rationale", value=init_rationale, interactive=False # Set initial rationale
126
  )
127
 
128
  with gr.Column():
 
 
 
129
  ground_truth = gr.TextArea(
130
+ label="Ground Truth", value=init_ground_truth, interactive=False # Set initial ground truth
131
  )
132
 
133
  # Auto-update when any input changes
 
144
  query_source,
145
  query,
146
  models_json,
 
147
  rationale,
 
148
  ground_truth,
149
  total_samples,
150
  ],