wcy commited on
Commit
0803c45
1 Parent(s): ce41bcb
app.py CHANGED
@@ -11,36 +11,6 @@ def load_json(file_name):
11
  with open(file_path, "r", encoding="utf-8") as f:
12
  return json.load(f)
13
 
14
- # # 显示单个字典的信息
15
- # def display_dict(data):
16
- # st.write("### 文件信息")
17
- # st.write(f"**Path:** {data['path']}")
18
- # st.write(f"**Table ID:** {data['table_id']}")
19
- # st.write(f"**Section:** {data['section']}")
20
- # st.write("### Table")
21
- # st.markdown(data['table_html'], unsafe_allow_html=True)
22
- # st.write("### Context")
23
- # # 拼接 all_context 并高亮 target_context_ids 的句子
24
- # all_context = data["all_context"]
25
- # highlighted_context = ""
26
- # for idx, sentence in enumerate(all_context):
27
- # if idx == data["perturb_sentence_id"]:
28
- # highlighted_context += f"<span style='color:red;'>{sentence}</span> "
29
- # elif idx in data["target_context_ids"]:
30
- # highlighted_context += f"**{sentence}** "
31
- # else:
32
- # highlighted_context += sentence + " "
33
- # st.markdown(highlighted_context, unsafe_allow_html=True)
34
-
35
- # st.write("### Selected Paragraphs")
36
- # for paragraph in data["selected_paragraphs"]:
37
- # st.write(paragraph)
38
-
39
- # st.write("### Output")
40
- # st.write("**Perturbed Statement:**")
41
- # st.write(data["output"]["perturbed_statement"])
42
- # st.write("**Perturbed Explanation:**")
43
- # st.write(data["output"]["perturbed_explanation"])
44
  # 显示单个字典的信息
45
  def display_dict(data):
46
  st.write("### 文件信息")
@@ -73,6 +43,9 @@ def display_dict(data):
73
  st.write("### Selected Paragraphs")
74
  for paragraph in data["selected_paragraphs"]:
75
  st.write(paragraph)
 
 
 
76
 
77
  st.write("### Output")
78
  st.write("**Perturbed Statement:**")
 
11
  with open(file_path, "r", encoding="utf-8") as f:
12
  return json.load(f)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # 显示单个字典的信息
15
  def display_dict(data):
16
  st.write("### 文件信息")
 
43
  st.write("### Selected Paragraphs")
44
  for paragraph in data["selected_paragraphs"]:
45
  st.write(paragraph)
46
+ st.write("### perturb_sentence_id")
47
+ st.write(data["perturb_sentence_id"][0])
48
+ st.write(data["perturb_sentence_id"][1])
49
 
50
  st.write("### Output")
51
  st.write("**Perturbed Statement:**")
table_result/2407.00009v1_output.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "path": "table_paper/2407.00009v1.json",
4
+ "table_id": "1",
5
+ "section": "4.1",
6
+ "all_context": [
7
+ "The statistics of FPGA24 public benchmarks are summarized in Table 1 .",
8
+ "In FPGA24 contest, these circuits are obtained from different public benchmark suites and are then synthesized, placed, and routed on the target FPGA by using Vivado.",
9
+ "The routing solutions of all signal nets are removed for the contest task.",
10
+ "The benchmarks use the open-source FPGA Interchange Format (FPGAIF).",
11
+ "The nets in Table 1 include all signal nets to be routed and the connections represent the corresponding two-pin sub-nets to be routed.",
12
+ ""
13
+ ],
14
+ "target_context_ids": [
15
+ 0,
16
+ 4
17
+ ],
18
+ "selected_paragraphs": [
19
+ "[paragraph id = 0] The statistics of FPGA24 public benchmarks are summarized in Table 1 .",
20
+ "[paragraph id = 4] The nets in Table 1 include all signal nets to be routed and the connections represent the corresponding two-pin sub-nets to be routed."
21
+ ],
22
+ "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S3.T1.2.1.1\" style=\"font-size:90%;\">Table 1</span>. </span><span class=\"ltx_text\" id=\"S3.T1.3.2\" style=\"font-size:90%;\">Statistics of FPGA24 public benchmarks</span></figcaption>\n<div class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T1.4\" style=\"width:433.6pt;height:269pt;vertical-align:-1.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-1.6pt,1.0pt) scale(0.99276869630907,0.99276869630907) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T1.4.1\">\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.4.1.1.1\">Benchmark</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.1.2\">Nets (k)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.4.1.1.3\">Connections (k)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.1.4\">LUTs (k)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.1.5\">FFs (k)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.1.6\">DSPs</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.1.7\">BRAMs</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.4.1.2.1\">logicnets_jscl</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.2.2\">28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.4.1.2.3\">180</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.2.4\">31</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.2.5\">2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.2.6\">0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.1.2.7\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.3.1\">boom_med_pb</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.3.2\">54</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.3.3\">221</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.3.4\">36</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.3.5\">17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.3.6\">24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.3.7\">142</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.4.1\">vtr_mcml</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.4.2\">71</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.4.3\">225</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.4.4\">43</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.4.5\">15</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.4.6\">105</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.4.7\">142</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.5.1\">rosetta_fd</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.5.2\">77</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.5.3\">230</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.5.4\">46</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.5.5\">39</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.5.6\">72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.5.7\">62</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.6.1\">corundum_25g</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.6.2\">166</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.6.3\">495</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.6.4\">73</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.6.5\">96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.6.6\">0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.6.7\">221</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.7.1\">finn_radioml</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.7.2\">110</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.7.3\">405</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.7.4\">74</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.7.5\">46</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.7.6\">0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.7.7\">25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.8.1\">vtr_lu64peeng</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.8.2\">143</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.8.3\">537</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.8.4\">90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.8.5\">36</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.8.6\">128</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.8.7\">303</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.9.1\">corescore_500</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.9.2\">179</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.9.3\">590</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.9.4\">96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.9.5\">116</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.9.6\">0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.9.7\">250</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.10.1\">corescore_500_pb</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.10.2\">175</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.10.3\">597</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.10.4\">96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.10.5\">116</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.10.6\">0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.10.7\">250</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.11.1\">mlcad_d181_lefttwo3rds</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.11.2\">361</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.11.3\">916</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.11.4\">155</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.11.5\">203</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.11.6\">1344</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.11.7\">405</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.12.1\">koios_dla_like_large</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.12.2\">509</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.12.3\">912</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.12.4\">189</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.12.5\">362</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.12.6\">2209</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.12.7\">192</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.13.1\">boom_soc</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.13.2\">274</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.13.3\">1374</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.13.4\">227</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.13.5\">98</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.13.6\">61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.13.7\">161</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.4.1.14.1\">ispd16_example2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.14.2\">449</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T1.4.1.14.3\">1455</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.14.4\">289</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.14.5\">234</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.14.6\">200</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.4.1.14.7\">384</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.4.1.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.4.1.15.1\">UltraScale+ xcvu3p</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T1.4.1.15.2\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.4.1.15.3\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T1.4.1.15.4\">394</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T1.4.1.15.5\">788</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T1.4.1.15.6\">2280</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T1.4.1.15.7\">720</td>\n</tr>\n</table>\n</span></div>\n</figure>",
23
+ "perturb_sentence_id": [
24
+ 0,
25
+ 1
26
+ ],
27
+ "output": {
28
+ "perturbed_statement": "[paragraph id = 0] The statistics of FPGA24 public benchmarks are summarized in Table 1. In FPGA24 contest, these circuits are obtained from different public benchmark suites and are then synthesized, placed, and routed on the target FPGA using Synopsys Design Compiler.",
29
+ "perturbed_explanation": "1. The original statement explains that Table 1 summarizes the statistics of FPGA24 public benchmarks and describes the usage of Vivado for synthesizing, placing, and routing the circuits on the FPGA, which aligns with common usage in FPGA design workflows. 2. The statement incorrectly replaces the tool used for synthesizing, placing, and routing from Vivado to Synopsys Design Compiler. Synopsys Design Compiler is typically used for ASIC design rather than FPGA design, making it inconsistent with the context, which specifies the use of Vivado for FPGA tasks."
30
+ }
31
+ },
32
+ {
33
+ "path": "table_paper/2407.00009v1.json",
34
+ "table_id": "2",
35
+ "section": "4.2",
36
+ "all_context": [
37
+ "The overall results of different methods are presented in Table 2 .",
38
+ "Compared with Vivado, RWRoute can significantly reduce the wirelength but incur considerable time overhead in some circuits, like mlcad_d181_lefttwo3rds and boom_soc.",
39
+ "Compared with both Vivado and RWRoute, our router can not only run two times faster on average but also further improve the wirelength in most cases, demonstrating the effectiveness of our proposed parallel framework.",
40
+ "In the following, we will conduct two ablation studies to discuss the contributions of different techniques in our proposed method.",
41
+ ""
42
+ ],
43
+ "target_context_ids": [
44
+ 0,
45
+ 1,
46
+ 2
47
+ ],
48
+ "selected_paragraphs": [
49
+ "[paragraph id = 0] The overall results of different methods are presented in Table 2 .",
50
+ "[paragraph id = 1] Compared with Vivado, RWRoute can significantly reduce the wirelength but incur considerable time overhead in some circuits, like mlcad_d181_lefttwo3rds and boom_soc.",
51
+ "[paragraph id = 2] Compared with both Vivado and RWRoute, our router can not only run two times faster on average but also further improve the wirelength in most cases, demonstrating the effectiveness of our proposed parallel framework."
52
+ ],
53
+ "table_html": "<figure class=\"ltx_table\" id=\"S3.T2\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S3.T2.2.1.1\" style=\"font-size:90%;\">Table 2</span>. </span><span class=\"ltx_text\" id=\"S3.T2.3.2\" style=\"font-size:90%;\">Overall performance. All metrics are the smaller the better.</span></figcaption><div class=\"ltx_flex_figure\">\n<div class=\"ltx_flex_cell ltx_flex_size_1\">\n<div class=\"ltx_inline-block ltx_figure_panel ltx_transformed_outer\" id=\"S3.T2.4\" style=\"width:433.6pt;height:199.5pt;vertical-align:-0.7pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-97.3pt,44.6pt) scale(0.690201763473409,0.690201763473409) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T2.4.1\">\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T2.4.1.1.1\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T2.4.1.1.1.1\">Benchmark</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" colspan=\"3\" id=\"S3.T2.4.1.1.2\">Vivado</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" colspan=\"3\" id=\"S3.T2.4.1.1.3\">RWRoute</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" colspan=\"3\" id=\"S3.T2.4.1.1.4\">Ours</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.2\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.2.1\">Runtime (s)</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.2.2\">Wirelength</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.2.3\">Score</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.2.4\">Runtime (s)</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.2.5\">Wirelength</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.2.6\">Score</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.2.7\">Runtime (s)</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.2.8\">Wirelength</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.2.9\">Score</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T2.4.1.3.1\">logicnets_jscl</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.4.1.3.2\">78.33</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.4.1.3.3\">310</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.4.1.3.4\">101.50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.4.1.3.5\">52.03</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.4.1.3.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.3.6.1\">226</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.4.1.3.7\">69.43</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.4.1.3.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.3.8.1\">35.26</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.4.1.3.9\">234</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.4.1.3.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.3.10.1\">55.13</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.4.1\">boom_med_pb</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.4.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.4.2.1\">139.33</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.4.3\">823</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.4.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.4.4.1\">207.70</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.4.5\">230.88</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.4.6\">969</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.4.7\">304.69</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.4.8\">144.50</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.4.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.4.9.1\">806</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.4.10\">210.65</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.5.1\">vtr_mcml</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.5.2\">490.33</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.5.3\">666</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.5.4\">507.90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.5.5\">243.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.5.6\">594</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.5.7\">278.22</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.5.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.5.8.1\">94.29</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.5.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.5.9.1\">584</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.5.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.5.10.1\">143.26</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.6.1\">rosetta_fd</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.6.2\">147.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.6.3\">888</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.6.4\">221.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.6.5\">161.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.6.6\">839</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.6.7\">229.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.6.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.6.8.1\">125.32</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.6.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.6.9.1\">804</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.6.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.6.10.1\">193.19</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.7.1\">corundum_25g</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.7.2\">-</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.7.3\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.7.4\">-</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.7.5\">249.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.7.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.7.6.1\">396</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.7.7\">264.25</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.7.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.7.8.1\">131.11</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.7.9\">500</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.7.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.7.10.1\">168.00</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.8.1\">finn_radioml</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.8.2\">154.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.8.3\">338</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.8.4\">173.00</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.8.5\">119.88</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.8.6\">277</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.8.7\">135.59</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.8.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.8.8.1\">63.29</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.8.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.8.9.1\">251</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.8.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.8.10.1\">82.06</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.9.1\">vtr_lu64peeng</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.9.2\">218.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.9.3\">1728</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.9.4\">369.60</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.9.5\">226.57</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.9.6\">1412</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.9.7\">345.12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.9.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.9.8.1\">114.12</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.9.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.9.9.1\">1333</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.9.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.9.10.1\">236.01</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.10.1\">corescore_500</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.10.2\">188.33</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.10.3\">751</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.10.4\">244.60</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.10.5\">158.84</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.10.6\">680</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.10.7\">210.96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.10.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.10.8.1\">73.03</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.10.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.10.9.1\">668</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.10.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.10.10.1\">132.52</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.11.1\">corescore_500_pb</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.11.2\">226.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.11.3\">861</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.11.4\">290.10</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.11.5\">278.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.11.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.11.6.1\">687</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.11.7\">319.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.11.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.11.8.1\">138.63</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.11.9\">739</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.11.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.11.10.1\">198.67</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.12.1\">mlcad_d181_lefttwo3rds</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.12.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.12.2.1\">407.67</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.12.3\">1159</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.12.4\">482.80</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.12.5\">1,779.59</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.12.6\">809</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.12.7\">1,682.53</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.12.8\">409.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.12.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.12.9.1\">771</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.12.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.12.10.1\">445.93</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.13.1\">koios_dla_like_large</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.13.2\">542.33</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.13.3\">927</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.13.4\">580.80</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.13.5\">392.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.13.6\">548</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.13.7\">407.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.13.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.13.8.1\">181.47</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.13.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.13.9.1\">520</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.13.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.13.10.1\">215.33</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.14.1\">boom_soc</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.14.2\">711.00</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.14.3\">2235</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.14.4\">863.40</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.14.5\">1,292.74</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.14.6\">1698</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.14.7\">1,333.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.14.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.14.8.1\">635.33</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.14.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.14.9.1\">1673</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.14.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.14.10.1\">739.10</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T2.4.1.15.1\">ispd16_example2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.15.2\">385.00</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.15.3\">1481</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.15.4\">494.60</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.15.5\">584.94</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.15.6\">1114</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.4.1.15.7\">637.85</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.15.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.15.8.1\">314.65</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.15.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.15.9.1\">939</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.4.1.15.10\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.4.1.15.10.1\">377.09</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.4.1.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T2.4.1.16.1\">Avg. Ratio</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T2.4.1.16.2\">2.04</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T2.4.1.16.3\">1.31</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T2.4.1.16.4\">1.73</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T2.4.1.16.5\">2.10</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T2.4.1.16.6\">1.03</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T2.4.1.16.7\">1.76</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T2.4.1.16.8\">1.00</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T2.4.1.16.9\">1.00</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S3.T2.4.1.16.10\">1.00</td>\n</tr>\n</table>\n</span></div>\n</div>\n<div class=\"ltx_flex_break\"></div>\n<div class=\"ltx_flex_cell ltx_flex_size_1\">\n<p class=\"ltx_p ltx_figure_panel\" id=\"S3.T2.5\"><span class=\"ltx_text\" id=\"S3.T2.5.1\" style=\"font-size:80%;\">*Vivado fails to route the corundum_25g due to the failure in the DRC during the routing.</span></p>\n</div>\n</div>\n</figure>",
54
+ "perturb_sentence_id": [
55
+ 1,
56
+ 2
57
+ ],
58
+ "output": {
59
+ "perturbed_statement": "[paragraph id = 1] Compared with Vivado, RWRoute shows no significant improvement in wirelength and has no time overhead in any circuits, like mlcad_d181_lefttwo3rds and boom_soc. Compared with both Vivado and RWRoute, our router shows no speed improvement but slightly worsens the wirelength, questioning the effectiveness of our proposed parallel framework.",
60
+ "perturbed_explanation": "The original explanation highlights two key comparisons: 1) RWRoute significantly reduces wirelength but incurs time overhead compared to Vivado; 2) The proposed router is two times faster on average and improves wirelength compared to both Vivado and RWRoute, showcasing its effectiveness. 1) The statement is incorrect because it claims RWRoute shows no significant improvement in wirelength and has no time overhead in any circuits, which contradicts the context where RWRoute is noted for improving wirelength but having a time overhead. 2) The statement wrongly suggests that the proposed router neither improves speed nor wirelength, which contradicts the original context that it runs faster and improves wirelength, thereby demonstrating the effectiveness of the framework."
61
+ }
62
+ },
63
+ {
64
+ "path": "table_paper/2407.00009v1.json",
65
+ "table_id": "3",
66
+ "section": "4.3",
67
+ "all_context": [
68
+ "Firstly, we conduct an ablation study on the recursive partitioning ternary tree (RPTT) in our framework by replacing the RPTT with the single recursive partitioning tree in ParaDRo (Hoo and Kumar, 2018 ).",
69
+ "The comparison results, shown in Table 3 , reveal that the RPTT can reduce the runtime by 14% without obvious wirelength degradations.",
70
+ "Secondly, we study the effect of the hybrid updating strategy (HUS) for congestion coefficients.",
71
+ "We disable the HUS and apply the default updating strategy in RWRoute.",
72
+ "The results on the four congested designs, depicted in Figure 6 , show that our HUS can both improve the runtime and the wirelength for congested designs.",
73
+ "In particular, the runtime of mlcad_d181_lefttwo3rds is accelerated by around 4.5 times, and the wirelengths of mlcad_d181_lefttwo3rds and boom_med_pb are reduced by over 16%.",
74
+ ""
75
+ ],
76
+ "target_context_ids": [
77
+ 0,
78
+ 1
79
+ ],
80
+ "selected_paragraphs": [
81
+ "[paragraph id = 0] Firstly, we conduct an ablation study on the recursive partitioning ternary tree (RPTT) in our framework by replacing the RPTT with the single recursive partitioning tree in ParaDRo (Hoo and Kumar, 2018 ).",
82
+ "[paragraph id = 1] The comparison results, shown in Table 3 , reveal that the RPTT can reduce the runtime by 14% without obvious wirelength degradations."
83
+ ],
84
+ "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S4.T3.2.1.1\" style=\"font-size:90%;\">Table 3</span>. </span><span class=\"ltx_text\" id=\"S4.T3.3.2\" style=\"font-size:90%;\">The comparison between Ours w.o. RPTT and Ours. The ratios larger than 1 represent the quality degradation.</span></figcaption>\n<div class=\"ltx_inline-block ltx_transformed_outer\" id=\"S4.T3.4\" style=\"width:433.6pt;height:404.9pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(72.2pt,-67.4pt) scale(1.49954817342014,1.49954817342014) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.4.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T3.4.1.1.1\">Benchmark</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.1.1.2\">Runtime (s)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.1.1.3\">Wirelength</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.1.1.4\">Score</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T3.4.1.2.1\">logicnets_jscl</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.1.2.2\">1.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.1.2.3\">0.98</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.1.2.4\">1.00</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.3.1\">boom_med_pb</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.3.2\">1.15</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.3.3\">1.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.3.4\">1.10</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.4.1\">vtr_mcml</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.4.2\">1.46</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.4.3\">1.06</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.4.4\">1.30</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.5.1\">rosetta_fd</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.5.2\">1.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.5.3\">1.06</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.5.4\">1.09</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.6.1\">corundum_25g</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.6.2\">1.03</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.6.3\">0.76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.6.4\">0.95</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.7.1\">finn_radioml</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.7.2\">1.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.7.3\">1.04</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.7.4\">1.03</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.8.1\">vtr_lu64peeng</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.8.2\">1.12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.8.3\">1.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.8.4\">1.06</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.9.1\">corescore_500</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.9.2\">1.08</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.9.3\">1.01</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.9.4\">1.04</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.10.1\">corescore_500_pb</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.10.2\">1.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.10.3\">1.08</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.10.4\">1.10</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.11.1\">mlcad_d181_lefttwo3rds</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.11.2\">1.16</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.11.3\">1.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.11.4\">1.15</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.12.1\">koios_dla_like_large</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.12.2\">1.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.12.3\">1.04</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.12.4\">1.12</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.13.1\">boom_soc</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.13.2\">1.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.13.3\">0.98</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.13.4\">1.32</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T3.4.1.14.1\">ispd16_example2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.14.2\">1.01</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.14.3\">0.99</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.4.1.14.4\">1.00</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.1.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T3.4.1.15.1\">Avg. Ratio</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S4.T3.4.1.15.2\">1.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S4.T3.4.1.15.3\">1.01</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S4.T3.4.1.15.4\">1.10</td>\n</tr>\n</table>\n</span></div>\n</figure>",
85
+ "perturb_sentence_id": [
86
+ 0,
87
+ 1
88
+ ],
89
+ "output": {
90
+ "perturbed_statement": "[paragraph id = 0] Firstly, we conduct an ablation study on the recursive partitioning ternary tree (RPTT) in our framework by replacing the RPTT with the single recursive partitioning tree in ParaDRo (Hoo and Kumar, 2018 ). The comparison results, shown in Table 3, reveal that the RPTT can increase the runtime by 14% without obvious wirelength degradations.",
91
+ "perturbed_explanation": "The original explanation is that the RPTT reduces the runtime by 14% without obvious wirelength degradations. 2. The statement is incorrect because it claims that RPTT increases the runtime by 14%, whereas the correct finding is that it reduces the runtime."
92
+ }
93
+ }
94
+ ]
table_result/2407.00010v1_output.json CHANGED
@@ -21,10 +21,13 @@
21
  "[paragraph id = 3] We note that the M1-Pro results only include the Llama-2 (7B) and Mistral (7B) results, as Falcon (7B) generally did not complete tasks in less than two orders of magnitude greater runtime."
22
  ],
23
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T1.3\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T1.3.4.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T1.3.4.1.1\">System Name</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.2\">CPU</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.3\">GPU(s) per Node</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.4\">DRAM per Node</th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.5\">VRAM per GPU</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T1.3.5.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T1.3.5.1.1\">Macbook Pro</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.2\">10-core M1 Pro</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.3\">14-core M1 Pro</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.4\">32GB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.5\">-</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T1.2.2.3\">Swing AMD+A100</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.1.1.1\">264-core AMD EPYC 7742</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.2.2.2\">8NVIDIA A100</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.2.2.4\">1TB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T1.2.2.5\">40GB</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.3.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T1.3.3.2\">Palmetto Intel+V100</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.3\">40-Core Intel Xeon 6148G</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.1\">2NVIDIA V100</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.4\">376GB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.5\">16GB</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Our System Configurations</figcaption>\n</figure>",
24
- "perturb_sentence_id": 3,
 
 
 
25
  "output": {
26
- "perturbed_statement": "[paragraph id = 3] We note that the M1-Pro results only include the Falcon (7B) and Mistral (7B) results, as Llama-2 (7B) generally did not complete tasks in less than two orders of magnitude greater runtime.",
27
- "perturbed_explanation": "1. The original explanation mentions that the M1-Pro results include the Llama-2 (7B) and Mistral (7B) results, whilst Falcon (7B) was excluded due to runtime inefficiency. 2. The statement is incorrect as it claims Falcon (7B) and Mistral (7B) results are included instead of the actual Llama-2 (7B) and Mistral (7B). Falcon (7B) was noted for its omission due to runtime issues, which does not align with the claim made in the statement."
28
  }
29
  },
30
  {
@@ -53,10 +56,13 @@
53
  "[paragraph id = 1] We perform these experiments using the suite of systems outlined in Table 1 with the models outlined in Section 4.1 ."
54
  ],
55
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T1.3\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T1.3.4.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T1.3.4.1.1\">System Name</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.2\">CPU</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.3\">GPU(s) per Node</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.4\">DRAM per Node</th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.5\">VRAM per GPU</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T1.3.5.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T1.3.5.1.1\">Macbook Pro</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.2\">10-core M1 Pro</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.3\">14-core M1 Pro</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.4\">32GB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.5\">-</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T1.2.2.3\">Swing AMD+A100</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.1.1.1\">264-core AMD EPYC 7742</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.2.2.2\">8NVIDIA A100</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.2.2.4\">1TB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T1.2.2.5\">40GB</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.3.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T1.3.3.2\">Palmetto Intel+V100</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.3\">40-Core Intel Xeon 6148G</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.1\">2NVIDIA V100</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.4\">376GB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.5\">16GB</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Our System Configurations</figcaption>\n</figure>",
56
- "perturb_sentence_id": 1,
 
 
 
57
  "output": {
58
- "perturbed_statement": "[paragraph id = 1] We perform these experiments using the suite of systems outlined in Table 1 with the models outlined in Section 4.1 .In our experiments on the Palmetto Intel+V100 system, the V100 GPU had an out-of-memory error beyond 512 output tokens for Falcon (7B).",
59
- "perturbed_explanation": "The original explanation: The V100 GPU experienced memory allocation issues when generating output exceeding the specified token limit (1024 tokens). 1. However, the statement mentions 512 output tokens as the limit, which contradicts the actual tested token capacity of the V100 GPU for this model. Hence, this difference establishes the inaccuracy of the statement's claim."
60
  }
61
  }
62
  ]
 
21
  "[paragraph id = 3] We note that the M1-Pro results only include the Llama-2 (7B) and Mistral (7B) results, as Falcon (7B) generally did not complete tasks in less than two orders of magnitude greater runtime."
22
  ],
23
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T1.3\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T1.3.4.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T1.3.4.1.1\">System Name</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.2\">CPU</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.3\">GPU(s) per Node</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.4\">DRAM per Node</th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.5\">VRAM per GPU</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T1.3.5.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T1.3.5.1.1\">Macbook Pro</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.2\">10-core M1 Pro</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.3\">14-core M1 Pro</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.4\">32GB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.5\">-</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T1.2.2.3\">Swing AMD+A100</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.1.1.1\">264-core AMD EPYC 7742</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.2.2.2\">8NVIDIA A100</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.2.2.4\">1TB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T1.2.2.5\">40GB</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.3.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T1.3.3.2\">Palmetto Intel+V100</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.3\">40-Core Intel Xeon 6148G</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.1\">2NVIDIA V100</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.4\">376GB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.5\">16GB</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Our System Configurations</figcaption>\n</figure>",
24
+ "perturb_sentence_id": [
25
+ 3,
26
+ 4
27
+ ],
28
  "output": {
29
+ "perturbed_statement": "[paragraph id = 3] We note that the M1-Pro results only include the Llama-2 (7B) and Mistral (7B) results, as Falcon (7B) generally completed tasks in less than two orders of magnitude greater runtime.",
30
+ "perturbed_explanation": "The original explanation: The statement specifies that the M1-Pro results only include certain model results because Falcon (7B) generally did not perform efficiently, taking significantly longer to complete tasks. 2. The statement now incorrectly suggests that the Falcon (7B) completed tasks efficiently in less than two orders of magnitude greater runtime, which contradicts the reason given for its exclusion in the results. Instead, it should reflect that the Falcon (7B) had inefficient run times that excluded it from the results."
31
  }
32
  },
33
  {
 
56
  "[paragraph id = 1] We perform these experiments using the suite of systems outlined in Table 1 with the models outlined in Section 4.1 ."
57
  ],
58
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T1.3\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T1.3.4.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T1.3.4.1.1\">System Name</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.2\">CPU</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.3\">GPU(s) per Node</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.4\">DRAM per Node</th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.3.4.1.5\">VRAM per GPU</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T1.3.5.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T1.3.5.1.1\">Macbook Pro</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.2\">10-core M1 Pro</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.3\">14-core M1 Pro</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.4\">32GB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S5.T1.3.5.1.5\">-</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T1.2.2.3\">Swing AMD+A100</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.1.1.1\">264-core AMD EPYC 7742</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.2.2.2\">8NVIDIA A100</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.2.2.4\">1TB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T1.2.2.5\">40GB</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.3.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T1.3.3.2\">Palmetto Intel+V100</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.3\">40-Core Intel Xeon 6148G</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.1\">2NVIDIA V100</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.4\">376GB</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_bb\" id=\"S5.T1.3.3.5\">16GB</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Our System Configurations</figcaption>\n</figure>",
59
+ "perturb_sentence_id": [
60
+ 1,
61
+ 2
62
+ ],
63
  "output": {
64
+ "perturbed_statement": "[paragraph id = 1] We perform these experiments using the suite of systems outlined in Table 1 with the models outlined in Section 4.1. In our experiments on the Palmetto Intel+V100 system, the V100 GPU had an out-of-memory error beyond 2048 output tokens for Falcon (7B).",
65
+ "perturbed_explanation": "1. We perform these experiments using the suite of systems outlined in Table 1 with the models outlined in Section 4.1. In our experiments on the Palmetto Intel+V100 system, the V100 GPU had an out-of-memory error beyond 1024 output tokens for Falcon (7B). The statement is incorrect because it claims the V100 GPU had an out-of-memory error beyond 2048 output tokens, whereas it actually occurred beyond 1024 output tokens, according to the context."
66
  }
67
  }
68
  ]
table_result/2407.00013v1_output.json CHANGED
@@ -29,10 +29,13 @@
29
  "[paragraph id = 5] This threshold selection ensures an optimal balance between cached context freshness and computational efficiency."
30
  ],
31
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1\" rowspan=\"2\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T1.1.1.1.1.1\">Threshold</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" colspan=\"4\" id=\"S4.T1.1.1.1.2\" style=\"padding:2.5pt 8.0pt;\">Value</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.2.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.2.2.1\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.2.2.2\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.2.2.3\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.2.2.4\" style=\"padding:2.5pt 8.0pt;\">25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.3.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.1\" style=\"padding:2.5pt 8.0pt;\">Cache hit</th>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.2\" style=\"padding:2.5pt 8.0pt;\">174</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.3\" style=\"padding:2.5pt 8.0pt;\">179</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.4\" style=\"padding:2.5pt 8.0pt;\">184</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.5\" style=\"padding:2.5pt 8.0pt;\">186</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.1\" style=\"padding:2.5pt 8.0pt;\">Cache miss</th>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.2\" style=\"padding:2.5pt 8.0pt;\">26</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.3\" style=\"padding:2.5pt 8.0pt;\">21</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.4\" style=\"padding:2.5pt 8.0pt;\">16</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.5\" style=\"padding:2.5pt 8.0pt;\">14</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.5.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.1\" style=\"padding:2.5pt 8.0pt;\">Ratio</th>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.2\" style=\"padding:2.5pt 8.0pt;\">6.7</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.3\" style=\"padding:2.5pt 8.0pt;\">8.5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.4\" style=\"padding:2.5pt 8.0pt;\">11.5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.5\" style=\"padding:2.5pt 8.0pt;\">13.3</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Cache hit and cache miss ratios at different thresholds.</figcaption>\n</figure>",
32
- "perturb_sentence_id": 1,
 
 
 
33
  "output": {
34
- "perturbed_statement": "[paragraph id = 1] The threshold is systematically varied from 15 minutes to 30 minutes, in increments of 5 minutes, as indicated in Table 1. The results of this variation, visualized in Figure 4, suggest a trend of decreasing cache hits as the threshold value rises.",
35
- "perturbed_explanation": "1. The original statement accurately describes the range of variation for the threshold, which is from 10 minutes to 25 minutes, and the observed trend of cache hits increasing with the threshold value, supported by visual references in the table and figure.\n2. The statement is inaccurate because it alters the threshold range to 15–30 minutes, which is not the analyzed range, and misrepresents the trend by claiming a decrease in cache hits rather than an increase. Therefore, this statement contradicts the data analysis results provided."
36
  }
37
  },
38
  {
@@ -84,10 +87,13 @@
84
  "[paragraph id = 12] Therefore, a 20-minute threshold appears to be the optimal point for maintaining an efficient cache system, given the current configuration and workload."
85
  ],
86
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.1\" style=\"padding:2.5pt 8.0pt;\">No. of queries</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.2\" style=\"padding:2.5pt 8.0pt;\">Threshold</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.3\" style=\"padding:2.5pt 8.0pt;\">Cache Hit</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.4\" style=\"padding:2.5pt 8.0pt;\">Cache miss</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.5\" style=\"padding:2.5pt 8.0pt;\">Cache Hit Ratio</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.2.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.1\" rowspan=\"4\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.1.1\">150</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.2\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.3\" style=\"padding:2.5pt 8.0pt;\">528</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.4\" style=\"padding:2.5pt 8.0pt;\">72</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.5\" style=\"padding:2.5pt 8.0pt;\">7.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.3.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.3.2.1\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.3.2.2\" style=\"padding:2.5pt 8.0pt;\">542</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.3.2.3\" style=\"padding:2.5pt 8.0pt;\">58</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.3.2.4\" style=\"padding:2.5pt 8.0pt;\">9.34</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.4.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.3.1\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.3.2\" style=\"padding:2.5pt 8.0pt;\">555</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.3.3\" style=\"padding:2.5pt 8.0pt;\">45</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.3.4\" style=\"padding:2.5pt 8.0pt;\">12.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.5.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.5.4.1\" style=\"padding:2.5pt 8.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.5.4.2\" style=\"padding:2.5pt 8.0pt;\">561</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.5.4.3\" style=\"padding:2.5pt 8.0pt;\">39</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.5.4.4\" style=\"padding:2.5pt 8.0pt;\">14.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.6.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.1\" rowspan=\"4\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.6.5.1.1\">250</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.2\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.3\" style=\"padding:2.5pt 8.0pt;\">880</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.4\" style=\"padding:2.5pt 8.0pt;\">120</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.5\" style=\"padding:2.5pt 8.0pt;\">7.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.7.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.7.6.1\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.7.6.2\" style=\"padding:2.5pt 8.0pt;\">904</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.7.6.3\" style=\"padding:2.5pt 8.0pt;\">96</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.7.6.4\" style=\"padding:2.5pt 8.0pt;\">9.41</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.8.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.8.7.1\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.8.7.2\" style=\"padding:2.5pt 8.0pt;\">925</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.8.7.3\" style=\"padding:2.5pt 8.0pt;\">75</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.8.7.4\" style=\"padding:2.5pt 8.0pt;\">12.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.9.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.9.8.1\" style=\"padding:2.5pt 8.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.9.8.2\" style=\"padding:2.5pt 8.0pt;\">934</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.9.8.3\" style=\"padding:2.5pt 8.0pt;\">66</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.9.8.4\" style=\"padding:2.5pt 8.0pt;\">14.15</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.10.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.1\" rowspan=\"4\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.10.9.1.1\">350</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.2\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.3\" style=\"padding:2.5pt 8.0pt;\">1232</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.4\" style=\"padding:2.5pt 8.0pt;\">168</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.5\" style=\"padding:2.5pt 8.0pt;\">7.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.11.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.11.10.1\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.11.10.2\" style=\"padding:2.5pt 8.0pt;\">1266</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.11.10.3\" style=\"padding:2.5pt 8.0pt;\">134</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.11.10.4\" style=\"padding:2.5pt 8.0pt;\">9.44</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.12.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.12.11.1\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.12.11.2\" style=\"padding:2.5pt 8.0pt;\">1296</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.12.11.3\" style=\"padding:2.5pt 8.0pt;\">104</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.12.11.4\" style=\"padding:2.5pt 8.0pt;\">12.46</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.13.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.13.12.1\" style=\"padding:2.5pt 8.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.13.12.2\" style=\"padding:2.5pt 8.0pt;\">1309</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.13.12.3\" style=\"padding:2.5pt 8.0pt;\">91</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.13.12.4\" style=\"padding:2.5pt 8.0pt;\">14.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.14.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.1\" rowspan=\"4\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.14.13.1.1\">500</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.2\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.3\" style=\"padding:2.5pt 8.0pt;\">1761</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.4\" style=\"padding:2.5pt 8.0pt;\">239</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.5\" style=\"padding:2.5pt 8.0pt;\">7.36</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.15.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.15.14.1\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.15.14.2\" style=\"padding:2.5pt 8.0pt;\">1809</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.15.14.3\" style=\"padding:2.5pt 8.0pt;\">191</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.15.14.4\" style=\"padding:2.5pt 8.0pt;\">9.47</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.16.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.16.15.1\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.16.15.2\" style=\"padding:2.5pt 8.0pt;\">1851</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.16.15.3\" style=\"padding:2.5pt 8.0pt;\">149</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.16.15.4\" style=\"padding:2.5pt 8.0pt;\">12.42</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.17.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T2.1.17.16.1\" style=\"padding:2.5pt 8.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T2.1.17.16.2\" style=\"padding:2.5pt 8.0pt;\">1870</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T2.1.17.16.3\" style=\"padding:2.5pt 8.0pt;\">130</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T2.1.17.16.4\" style=\"padding:2.5pt 8.0pt;\">14.38</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Cache hit and cache miss ratios at different number of entries.</figcaption>\n</figure>",
87
- "perturb_sentence_id": 0,
 
 
 
88
  "output": {
89
- "perturbed_statement": "[paragraph id = 0] In this subsection, the size of the incoming entries load was systematically varied, testing with 200, 250, 350, and 600 for each threshold from 10 to 25 minutes in increments of 5 minutes as shown in Table 2. The findings reveal a consistent pattern across all test cases.",
90
- "perturbed_explanation": "1. The original explanation highlights the systematic variation in the size of the incoming entries load to specific values: 150, 250, 350, and 500 entries. 2. The statement introduces different entry load sizes (200 and 600 instead of 150 and 500), which conflicts with the identified tested values. This alteration affects the understanding of the experimental setup and its methodology."
91
  }
92
  }
93
  ]
 
29
  "[paragraph id = 5] This threshold selection ensures an optimal balance between cached context freshness and computational efficiency."
30
  ],
31
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1\" rowspan=\"2\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T1.1.1.1.1.1\">Threshold</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" colspan=\"4\" id=\"S4.T1.1.1.1.2\" style=\"padding:2.5pt 8.0pt;\">Value</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.2.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.2.2.1\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.2.2.2\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.2.2.3\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.2.2.4\" style=\"padding:2.5pt 8.0pt;\">25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.3.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.1\" style=\"padding:2.5pt 8.0pt;\">Cache hit</th>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.2\" style=\"padding:2.5pt 8.0pt;\">174</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.3\" style=\"padding:2.5pt 8.0pt;\">179</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.4\" style=\"padding:2.5pt 8.0pt;\">184</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.3.3.5\" style=\"padding:2.5pt 8.0pt;\">186</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.1\" style=\"padding:2.5pt 8.0pt;\">Cache miss</th>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.2\" style=\"padding:2.5pt 8.0pt;\">26</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.3\" style=\"padding:2.5pt 8.0pt;\">21</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.4\" style=\"padding:2.5pt 8.0pt;\">16</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.4.5\" style=\"padding:2.5pt 8.0pt;\">14</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.5.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.1\" style=\"padding:2.5pt 8.0pt;\">Ratio</th>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.2\" style=\"padding:2.5pt 8.0pt;\">6.7</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.3\" style=\"padding:2.5pt 8.0pt;\">8.5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.4\" style=\"padding:2.5pt 8.0pt;\">11.5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.5.5.5\" style=\"padding:2.5pt 8.0pt;\">13.3</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Cache hit and cache miss ratios at different thresholds.</figcaption>\n</figure>",
32
+ "perturb_sentence_id": [
33
+ 1,
34
+ 2
35
+ ],
36
  "output": {
37
+ "perturbed_statement": "[paragraph id = 1] The threshold is systematically varied from 10 minutes to 30 minutes, in increments of 5 minutes, as indicated in Table 1. The results of this variation, visualized in Figure 4, suggest a trend of increasing cache hits as the threshold value rises.",
38
+ "perturbed_explanation": "1. The original explanation describes how the threshold is systematically varied from 10 minutes to 25 minutes, ensuring a controlled evaluation of the caching system's performance in relation to cache hit and miss ratios. 2. The statement incorrectly states that the threshold varies from 10 minutes to 30 minutes, whereas the correct range mentioned in the context is up to 25 minutes. This introduces a factual error as the threshold was not tested up to 30 minutes in the described experiment."
39
  }
40
  },
41
  {
 
87
  "[paragraph id = 12] Therefore, a 20-minute threshold appears to be the optimal point for maintaining an efficient cache system, given the current configuration and workload."
88
  ],
89
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.1\" style=\"padding:2.5pt 8.0pt;\">No. of queries</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.2\" style=\"padding:2.5pt 8.0pt;\">Threshold</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.3\" style=\"padding:2.5pt 8.0pt;\">Cache Hit</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.4\" style=\"padding:2.5pt 8.0pt;\">Cache miss</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.5\" style=\"padding:2.5pt 8.0pt;\">Cache Hit Ratio</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.2.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.1\" rowspan=\"4\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.1.1\">150</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.2\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.3\" style=\"padding:2.5pt 8.0pt;\">528</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.4\" style=\"padding:2.5pt 8.0pt;\">72</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.2.1.5\" style=\"padding:2.5pt 8.0pt;\">7.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.3.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.3.2.1\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.3.2.2\" style=\"padding:2.5pt 8.0pt;\">542</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.3.2.3\" style=\"padding:2.5pt 8.0pt;\">58</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.3.2.4\" style=\"padding:2.5pt 8.0pt;\">9.34</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.4.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.3.1\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.3.2\" style=\"padding:2.5pt 8.0pt;\">555</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.3.3\" style=\"padding:2.5pt 8.0pt;\">45</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.3.4\" style=\"padding:2.5pt 8.0pt;\">12.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.5.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.5.4.1\" style=\"padding:2.5pt 8.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.5.4.2\" style=\"padding:2.5pt 8.0pt;\">561</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.5.4.3\" style=\"padding:2.5pt 8.0pt;\">39</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.5.4.4\" style=\"padding:2.5pt 8.0pt;\">14.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.6.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.1\" rowspan=\"4\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.6.5.1.1\">250</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.2\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.3\" style=\"padding:2.5pt 8.0pt;\">880</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.4\" style=\"padding:2.5pt 8.0pt;\">120</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.6.5.5\" style=\"padding:2.5pt 8.0pt;\">7.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.7.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.7.6.1\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.7.6.2\" style=\"padding:2.5pt 8.0pt;\">904</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.7.6.3\" style=\"padding:2.5pt 8.0pt;\">96</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.7.6.4\" style=\"padding:2.5pt 8.0pt;\">9.41</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.8.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.8.7.1\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.8.7.2\" style=\"padding:2.5pt 8.0pt;\">925</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.8.7.3\" style=\"padding:2.5pt 8.0pt;\">75</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.8.7.4\" style=\"padding:2.5pt 8.0pt;\">12.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.9.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.9.8.1\" style=\"padding:2.5pt 8.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.9.8.2\" style=\"padding:2.5pt 8.0pt;\">934</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.9.8.3\" style=\"padding:2.5pt 8.0pt;\">66</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.9.8.4\" style=\"padding:2.5pt 8.0pt;\">14.15</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.10.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.1\" rowspan=\"4\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.10.9.1.1\">350</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.2\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.3\" style=\"padding:2.5pt 8.0pt;\">1232</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.4\" style=\"padding:2.5pt 8.0pt;\">168</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.10.9.5\" style=\"padding:2.5pt 8.0pt;\">7.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.11.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.11.10.1\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.11.10.2\" style=\"padding:2.5pt 8.0pt;\">1266</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.11.10.3\" style=\"padding:2.5pt 8.0pt;\">134</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.11.10.4\" style=\"padding:2.5pt 8.0pt;\">9.44</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.12.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.12.11.1\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.12.11.2\" style=\"padding:2.5pt 8.0pt;\">1296</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.12.11.3\" style=\"padding:2.5pt 8.0pt;\">104</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.12.11.4\" style=\"padding:2.5pt 8.0pt;\">12.46</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.13.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.13.12.1\" style=\"padding:2.5pt 8.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.13.12.2\" style=\"padding:2.5pt 8.0pt;\">1309</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.13.12.3\" style=\"padding:2.5pt 8.0pt;\">91</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.13.12.4\" style=\"padding:2.5pt 8.0pt;\">14.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.14.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.1\" rowspan=\"4\" style=\"padding:2.5pt 8.0pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.14.13.1.1\">500</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.2\" style=\"padding:2.5pt 8.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.3\" style=\"padding:2.5pt 8.0pt;\">1761</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.4\" style=\"padding:2.5pt 8.0pt;\">239</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.14.13.5\" style=\"padding:2.5pt 8.0pt;\">7.36</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.15.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.15.14.1\" style=\"padding:2.5pt 8.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.15.14.2\" style=\"padding:2.5pt 8.0pt;\">1809</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.15.14.3\" style=\"padding:2.5pt 8.0pt;\">191</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.15.14.4\" style=\"padding:2.5pt 8.0pt;\">9.47</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.16.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.16.15.1\" style=\"padding:2.5pt 8.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.16.15.2\" style=\"padding:2.5pt 8.0pt;\">1851</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.16.15.3\" style=\"padding:2.5pt 8.0pt;\">149</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T2.1.16.15.4\" style=\"padding:2.5pt 8.0pt;\">12.42</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.17.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T2.1.17.16.1\" style=\"padding:2.5pt 8.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T2.1.17.16.2\" style=\"padding:2.5pt 8.0pt;\">1870</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T2.1.17.16.3\" style=\"padding:2.5pt 8.0pt;\">130</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T2.1.17.16.4\" style=\"padding:2.5pt 8.0pt;\">14.38</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Cache hit and cache miss ratios at different number of entries.</figcaption>\n</figure>",
90
+ "perturb_sentence_id": [
91
+ 0,
92
+ 1
93
+ ],
94
  "output": {
95
+ "perturbed_statement": "[paragraph id = 0] In this subsection, the size of the incoming entries load was systematically varied, testing with 200, 300, 400, and 600 for each threshold from 10 to 25 minutes in increments of 5 minutes as shown in Table 2. The findings reveal a consistent pattern across all test cases.",
96
+ "perturbed_explanation": "1. The original statement discusses the systematic variation of incoming entries loads and mentions testing with specific entry sizes of 150, 250, 350, and 500 for each threshold from 10 to 25 minutes. 2. The statement is incorrect as it claims that the entry sizes varied as 200, 300, 400, and 600, which does not match the specified sizes of 150, 250, 350, and 500. This discrepancy changes the experimental setup and would lead to different results and conclusions."
97
  }
98
  }
99
  ]
table_result/2407.00014v2_output.json CHANGED
@@ -29,10 +29,13 @@
29
  "[paragraph id = 8] Detailed features information can be seen in Table 1 , the reasons to choose them will be explained in feature extraction."
30
  ],
31
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span><span class=\"ltx_text\" id=\"S2.T1.12.1\" style=\"font-size:89%;\">Selected Features with a Monotonic Relationship</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.8\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T1.8.9.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.8.9.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.9.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.1.1.1.1.1\" style=\"font-size:90%;\">Feature name</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.9.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.1.1.2.1.1\" style=\"font-size:90%;\">and their abbreviation</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T1.8.9.1.2\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.2.1\" style=\"font-size:90%;\">Formula</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.1.1.2.1\" style=\"font-size:90%;\">Root Mean Square (RMS)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.2.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.2.2.2.1\" style=\"font-size:90%;\">Mean Absolute Value (MAV)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.3.3.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.3.3.2.1\" style=\"font-size:90%;\">Variance (VAR)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.3.3.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.4.4.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.4.2.1\" style=\"font-size:90%;\">Standard Deviation (SD)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.4.4.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.5.5.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.5.5.2.1\" style=\"font-size:90%;\">Integral (INT)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.5.5.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.6.6.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.6.6.2.1\" style=\"font-size:90%;\">Wavelength (WL)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.6.6.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.7.7.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.7.7.2.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.7.7.2.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.7.7.2.1.1.1.1\" style=\"font-size:90%;\">Difference Absolute Standard</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.7.7.2.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.7.7.2.1.2.1.1\" style=\"font-size:90%;\">Deviation Value (DASDV)</span></td>\n</tr>\n</table>\n</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.7.7.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S2.T1.8.8.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.8.8.2.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.8.2.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.8.8.2.1.1.1.1\" style=\"font-size:90%;\">Difference Absolute Mean</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.8.2.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.8.8.2.1.2.1.1\" style=\"font-size:90%;\">Value (DAMV)</span></td>\n</tr>\n</table>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T1.8.8.1\"></td>\n</tr>\n</tbody>\n</table>\n</figure>",
32
- "perturb_sentence_id": 8,
 
 
 
33
  "output": {
34
- "perturbed_statement": "[paragraph id = 8] Detailed features information can be seen in Table 1, the reasons to choose them will be explained in feature extraction. The features extraction process transformed each segmented window into a 5-dimensional features space, hereby constructing a feature matrix for each channel, where is the total number of windows extracted from each channel.",
35
- "perturbed_explanation": "The original explanation: The feature extraction process utilized transforms each windowed segment into an 8-dimensional feature space, constructing a comprehensive feature matrix for every channel. 1. This ensures accurate and systematic representation of the data. The incorrectness in the statement arises from indicating that the transformation results in a \"5-dimensional feature space\" instead of the correct \"8-dimensional feature space,\" inaccurately representing the dimensionality of the derived features."
36
  }
37
  },
38
  {
@@ -72,10 +75,13 @@
72
  "[paragraph id = 18] And the per-channel feature extraction processing can avoid the differences introduced by variations in electrode patch placement, providing a detailed and robust dataset for analyzing muscle force."
73
  ],
74
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span><span class=\"ltx_text\" id=\"S2.T1.12.1\" style=\"font-size:89%;\">Selected Features with a Monotonic Relationship</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.8\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T1.8.9.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.8.9.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.9.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.1.1.1.1.1\" style=\"font-size:90%;\">Feature name</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.9.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.1.1.2.1.1\" style=\"font-size:90%;\">and their abbreviation</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T1.8.9.1.2\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.2.1\" style=\"font-size:90%;\">Formula</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.1.1.2.1\" style=\"font-size:90%;\">Root Mean Square (RMS)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.2.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.2.2.2.1\" style=\"font-size:90%;\">Mean Absolute Value (MAV)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.3.3.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.3.3.2.1\" style=\"font-size:90%;\">Variance (VAR)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.3.3.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.4.4.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.4.2.1\" style=\"font-size:90%;\">Standard Deviation (SD)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.4.4.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.5.5.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.5.5.2.1\" style=\"font-size:90%;\">Integral (INT)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.5.5.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.6.6.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.6.6.2.1\" style=\"font-size:90%;\">Wavelength (WL)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.6.6.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.7.7.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.7.7.2.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.7.7.2.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.7.7.2.1.1.1.1\" style=\"font-size:90%;\">Difference Absolute Standard</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.7.7.2.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.7.7.2.1.2.1.1\" style=\"font-size:90%;\">Deviation Value (DASDV)</span></td>\n</tr>\n</table>\n</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.7.7.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S2.T1.8.8.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.8.8.2.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.8.2.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.8.8.2.1.1.1.1\" style=\"font-size:90%;\">Difference Absolute Mean</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.8.2.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.8.8.2.1.2.1.1\" style=\"font-size:90%;\">Value (DAMV)</span></td>\n</tr>\n</table>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T1.8.8.1\"></td>\n</tr>\n</tbody>\n</table>\n</figure>",
75
- "perturb_sentence_id": 12,
 
 
 
76
  "output": {
77
- "perturbed_statement": "[paragraph id = 12] Therefore, the sEMG data mentioned mostly in this paper represents the time-frequency features. However, we found incorporating a broader set of features did not improve the accuracy of our linear regression model.",
78
- "perturbed_explanation": "1. The original explanation states that analyzing a wider range of features enhances the model's performance, which adds value to the research. 2. The statement suggests that expanding the feature set did not benefit the model's accuracy. This is incorrect because it contradicts the initial claim, which highlights the importance of a comprehensive feature selection in improving model performance."
79
  }
80
  },
81
  {
@@ -95,10 +101,13 @@
95
  "[paragraph id = 2] The specific neural network diagram is shown in Figure 6 and Table 2 ."
96
  ],
97
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T2\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span><span class=\"ltx_text\" id=\"S2.T2.11.1\" style=\"font-size:89%;\">Models and Their Order</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T2.7\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.1.1\" style=\"font-size:90%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.2\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.2.1\" style=\"font-size:90%;\">Models</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.3\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.3.1\" style=\"font-size:90%;\">Core Formulas</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.8.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.1.1.1\" style=\"font-size:90%;\">Systems</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.2.1.1\" style=\"font-size:90%;\">Fit by</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.3.1.1\" style=\"font-size:90%;\">the Model</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.3\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S2.T2.2.2.3.1\" style=\"font-size:90%;\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.3.1.1\">\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.1.1\">Near-</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.2.1\">linear</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.3.1\">model</span></span>\n</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.1.1.1\" style=\"font-size:90%;\">DD</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.2.1.1\" style=\"font-size:90%;\">(one</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.3.1.1\" style=\"font-size:90%;\">layer)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.2.2.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.2.2\">\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.1.1.1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.2.2.2.2.2.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.5.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.1.1.1\" style=\"font-size:90%;\">Second-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.3.3.2\"><span class=\"ltx_text\" id=\"S2.T2.3.3.2.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.3.3.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.3.3.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.3.3.1.1.1.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.3.3.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.3.3.3.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.1.1.1\" style=\"font-size:90%;\">First-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.4.4.2\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S2.T2.4.4.2.1\" style=\"font-size:90%;\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.2.1.1\">\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.1.1\">Nonlinear</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.2.1\">complex</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.3.1\">model</span></span>\n</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.4.4.3\"><span class=\"ltx_text\" id=\"S2.T2.4.4.3.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.4.4.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.4.4.1.1.1.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.4.4.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.1.1.1\" style=\"font-size:90%;\">High-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.4\"><span class=\"ltx_text\" id=\"S2.T2.7.7.4.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.7.3.3\">\n<tr class=\"ltx_tr\" id=\"S2.T2.5.5.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.5.5.1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.6.6.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.6.6.2.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.3.3.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.7.7.3.3.3.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.7.5.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.1.1.1\" style=\"font-size:90%;\">High-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n</tbody>\n</table>\n</figure>",
98
- "perturb_sentence_id": 2,
 
 
 
99
  "output": {
100
- "perturbed_statement": "[paragraph id = 2] The specific neural network diagram is shown in Figure 5 and Table 2.",
101
- "perturbed_explanation": "The original explanation: Figures and tables are commonly referenced to provide detailed information about specific components or data. By replacing 'Figure 6' with 'Figure 5', it incorrectly states the source location of the neural network diagram, potentially leading to confusion when attempting to locate the correct reference."
102
  }
103
  },
104
  {
@@ -126,10 +135,13 @@
126
  "[paragraph id = 4] Its formula can be seen in the Table 2 , and the one-layer DD model is capable of fitting up to a second-order system."
127
  ],
128
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T2\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span><span class=\"ltx_text\" id=\"S2.T2.11.1\" style=\"font-size:89%;\">Models and Their Order</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T2.7\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.1.1\" style=\"font-size:90%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.2\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.2.1\" style=\"font-size:90%;\">Models</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.3\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.3.1\" style=\"font-size:90%;\">Core Formulas</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.8.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.1.1.1\" style=\"font-size:90%;\">Systems</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.2.1.1\" style=\"font-size:90%;\">Fit by</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.3.1.1\" style=\"font-size:90%;\">the Model</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.3\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S2.T2.2.2.3.1\" style=\"font-size:90%;\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.3.1.1\">\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.1.1\">Near-</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.2.1\">linear</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.3.1\">model</span></span>\n</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.1.1.1\" style=\"font-size:90%;\">DD</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.2.1.1\" style=\"font-size:90%;\">(one</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.3.1.1\" style=\"font-size:90%;\">layer)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.2.2.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.2.2\">\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.1.1.1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.2.2.2.2.2.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.5.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.1.1.1\" style=\"font-size:90%;\">Second-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.3.3.2\"><span class=\"ltx_text\" id=\"S2.T2.3.3.2.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.3.3.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.3.3.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.3.3.1.1.1.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.3.3.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.3.3.3.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.1.1.1\" style=\"font-size:90%;\">First-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.4.4.2\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S2.T2.4.4.2.1\" style=\"font-size:90%;\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.2.1.1\">\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.1.1\">Nonlinear</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.2.1\">complex</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.3.1\">model</span></span>\n</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.4.4.3\"><span class=\"ltx_text\" id=\"S2.T2.4.4.3.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.4.4.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.4.4.1.1.1.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.4.4.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.1.1.1\" style=\"font-size:90%;\">High-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.4\"><span class=\"ltx_text\" id=\"S2.T2.7.7.4.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.7.3.3\">\n<tr class=\"ltx_tr\" id=\"S2.T2.5.5.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.5.5.1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.6.6.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.6.6.2.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.3.3.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.7.7.3.3.3.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.7.5.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.1.1.1\" style=\"font-size:90%;\">High-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n</tbody>\n</table>\n</figure>",
129
- "perturb_sentence_id": 4,
 
 
 
130
  "output": {
131
- "perturbed_statement": "[paragraph id = 4] Its formula can be seen in the Table 2 , and the one-layer DD model is capable of fitting up to a third-order system.Its excellent generalization ability and low computational complexity are the main reasons for our choice.",
132
- "perturbed_explanation": "The original statement emphasizes the model’s ability to fit up to a second-order system. 3. The statement is incorrect because it claims the model can fit a third-order system, which contradicts its specified capabilities of fitting only up to a second-order system. Thus, this statement misrepresents the model's accurate functionality."
133
  }
134
  },
135
  {
@@ -160,10 +172,13 @@
160
  "[paragraph id = 7] It is proved that the output of the models constructed by DD, LN, MLP and CNN can well estimate the direction of finger force."
161
  ],
162
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T3\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span><span class=\"ltx_text\" id=\"S3.T3.4.1\" style=\"font-size:89%;\">Offline Analyses Results</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S3.T3.5\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.1.1\" style=\"font-size:90%;\">Output</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.2\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.2.1\" style=\"font-size:90%;\">Method</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T3.5.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.5.1.1.3.1.1.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.3.1.1.1.1\" style=\"font-size:90%;\">Area Under the</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.5.1.1.3.1.2.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.3.1.2.1.1\" style=\"font-size:90%;\">Curve (AUC)</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T3.5.1.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.5.1.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.4.1.1.1.1\" style=\"font-size:90%;\">Standard</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.5.1.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.4.1.2.1.1\" style=\"font-size:90%;\">Error (SE)</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.5\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.5.1\" style=\"font-size:90%;\">Accuracy</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T3.5.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.1.1\" style=\"font-size:90%;\">L1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.2\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.3\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.3.1\" style=\"font-size:90%;\">0.977887</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.4\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.4.1\" style=\"font-size:90%;\">0.000449</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.5\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.5.1\" style=\"font-size:90%;\">92.22%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.3.2\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.3.2.1\"><span class=\"ltx_text\" id=\"S3.T3.5.3.2.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.3.2.2\"><span class=\"ltx_text\" id=\"S3.T3.5.3.2.2.1\" style=\"font-size:90%;\">0.929772</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.3.2.3\"><span class=\"ltx_text\" id=\"S3.T3.5.3.2.3.1\" style=\"font-size:90%;\">0.000804</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.3.2.4\"><span class=\"ltx_text\" id=\"S3.T3.5.3.2.4.1\" style=\"font-size:90%;\">85.01%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.4.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.4.3.1\"><span class=\"ltx_text\" id=\"S3.T3.5.4.3.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.4.3.2\"><span class=\"ltx_text\" id=\"S3.T3.5.4.3.2.1\" style=\"font-size:90%;\">0.993835</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.4.3.3\"><span class=\"ltx_text\" id=\"S3.T3.5.4.3.3.1\" style=\"font-size:90%;\">0.000250</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.4.3.4\"><span class=\"ltx_text\" id=\"S3.T3.5.4.3.4.1\" style=\"font-size:90%;\">96.63%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.5.4\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.5.4.1\"><span class=\"ltx_text\" id=\"S3.T3.5.5.4.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.5.4.2\"><span class=\"ltx_text\" id=\"S3.T3.5.5.4.2.1\" style=\"font-size:90%;\">0.999411</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.5.4.3\"><span class=\"ltx_text\" id=\"S3.T3.5.5.4.3.1\" style=\"font-size:90%;\">0.000073</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.5.4.4\"><span class=\"ltx_text\" id=\"S3.T3.5.5.4.4.1\" style=\"font-size:90%;\">99.15%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.6.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.1.1\" style=\"font-size:90%;\">L2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.2\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.3\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.3.1\" style=\"font-size:90%;\">0.972789</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.4\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.4.1\" style=\"font-size:90%;\">0.000552</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.5\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.5.1\" style=\"font-size:90%;\">90.84%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.7.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.7.6.1\"><span class=\"ltx_text\" id=\"S3.T3.5.7.6.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.7.6.2\"><span class=\"ltx_text\" id=\"S3.T3.5.7.6.2.1\" style=\"font-size:90%;\">0.942453</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.7.6.3\"><span class=\"ltx_text\" id=\"S3.T3.5.7.6.3.1\" style=\"font-size:90%;\">0.000798</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.7.6.4\"><span class=\"ltx_text\" id=\"S3.T3.5.7.6.4.1\" style=\"font-size:90%;\">86.07%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.8.7\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.8.7.1\"><span class=\"ltx_text\" id=\"S3.T3.5.8.7.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.8.7.2\"><span class=\"ltx_text\" id=\"S3.T3.5.8.7.2.1\" style=\"font-size:90%;\">0.988339</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.8.7.3\"><span class=\"ltx_text\" id=\"S3.T3.5.8.7.3.1\" style=\"font-size:90%;\">0.000382</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.8.7.4\"><span class=\"ltx_text\" id=\"S3.T3.5.8.7.4.1\" style=\"font-size:90%;\">94.50%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.9.8\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.9.8.1\"><span class=\"ltx_text\" id=\"S3.T3.5.9.8.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.9.8.2\"><span class=\"ltx_text\" id=\"S3.T3.5.9.8.2.1\" style=\"font-size:90%;\">0.998866</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.9.8.3\"><span class=\"ltx_text\" id=\"S3.T3.5.9.8.3.1\" style=\"font-size:90%;\">0.000113</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.9.8.4\"><span class=\"ltx_text\" id=\"S3.T3.5.9.8.4.1\" style=\"font-size:90%;\">98.66%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.10.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.1.1\" style=\"font-size:90%;\">L3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.2\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.3\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.3.1\" style=\"font-size:90%;\">0.982602</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.4\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.4.1\" style=\"font-size:90%;\">0.000398</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.5\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.5.1\" style=\"font-size:90%;\">93.79%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.11.10\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.11.10.1\"><span class=\"ltx_text\" id=\"S3.T3.5.11.10.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.11.10.2\"><span class=\"ltx_text\" id=\"S3.T3.5.11.10.2.1\" style=\"font-size:90%;\">0.968013</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.11.10.3\"><span class=\"ltx_text\" id=\"S3.T3.5.11.10.3.1\" style=\"font-size:90%;\">0.000541</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.11.10.4\"><span class=\"ltx_text\" id=\"S3.T3.5.11.10.4.1\" style=\"font-size:90%;\">91.45%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.12.11\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.12.11.1\"><span class=\"ltx_text\" id=\"S3.T3.5.12.11.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.12.11.2\"><span class=\"ltx_text\" id=\"S3.T3.5.12.11.2.1\" style=\"font-size:90%;\">0.992689</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.12.11.3\"><span class=\"ltx_text\" id=\"S3.T3.5.12.11.3.1\" style=\"font-size:90%;\">0.000272</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.12.11.4\"><span class=\"ltx_text\" id=\"S3.T3.5.12.11.4.1\" style=\"font-size:90%;\">96.18%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.13.12\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.13.12.1\"><span class=\"ltx_text\" id=\"S3.T3.5.13.12.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.13.12.2\"><span class=\"ltx_text\" id=\"S3.T3.5.13.12.2.1\" style=\"font-size:90%;\">0.999116</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.13.12.3\"><span class=\"ltx_text\" id=\"S3.T3.5.13.12.3.1\" style=\"font-size:90%;\">0.000089</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.13.12.4\"><span class=\"ltx_text\" id=\"S3.T3.5.13.12.4.1\" style=\"font-size:90%;\">98.85%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.14.13\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.1.1\" style=\"font-size:90%;\">L4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.2\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.3\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.3.1\" style=\"font-size:90%;\">0.967460</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.4\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.4.1\" style=\"font-size:90%;\">0.000506</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.5\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.5.1\" style=\"font-size:90%;\">90.94%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.15.14\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.15.14.1\"><span class=\"ltx_text\" id=\"S3.T3.5.15.14.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.15.14.2\"><span class=\"ltx_text\" id=\"S3.T3.5.15.14.2.1\" style=\"font-size:90%;\">0.919576</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.15.14.3\"><span class=\"ltx_text\" id=\"S3.T3.5.15.14.3.1\" style=\"font-size:90%;\">0.000812</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.15.14.4\"><span class=\"ltx_text\" id=\"S3.T3.5.15.14.4.1\" style=\"font-size:90%;\">84.78%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.16.15\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.16.15.1\"><span class=\"ltx_text\" id=\"S3.T3.5.16.15.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.16.15.2\"><span class=\"ltx_text\" id=\"S3.T3.5.16.15.2.1\" style=\"font-size:90%;\">0.989969</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.16.15.3\"><span class=\"ltx_text\" id=\"S3.T3.5.16.15.3.1\" style=\"font-size:90%;\">0.000292</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.16.15.4\"><span class=\"ltx_text\" id=\"S3.T3.5.16.15.4.1\" style=\"font-size:90%;\">95.55%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.17.16\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.17.16.1\"><span class=\"ltx_text\" id=\"S3.T3.5.17.16.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.17.16.2\"><span class=\"ltx_text\" id=\"S3.T3.5.17.16.2.1\" style=\"font-size:90%;\">0.999032</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.17.16.3\"><span class=\"ltx_text\" id=\"S3.T3.5.17.16.3.1\" style=\"font-size:90%;\">0.000086</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.17.16.4\"><span class=\"ltx_text\" id=\"S3.T3.5.17.16.4.1\" style=\"font-size:90%;\">98.84%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.18.17\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T3.5.18.17.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.1.1\" style=\"font-size:90%;\">L5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.18.17.2\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.18.17.3\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.3.1\" style=\"font-size:90%;\">0.980862</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.18.17.4\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.4.1\" style=\"font-size:90%;\">0.000529</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.18.17.5\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.5.1\" style=\"font-size:90%;\">93.94%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.19.18\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.19.18.1\"><span class=\"ltx_text\" id=\"S3.T3.5.19.18.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.19.18.2\"><span class=\"ltx_text\" id=\"S3.T3.5.19.18.2.1\" style=\"font-size:90%;\">0.955773</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.19.18.3\"><span class=\"ltx_text\" id=\"S3.T3.5.19.18.3.1\" style=\"font-size:90%;\">0.000797</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.19.18.4\"><span class=\"ltx_text\" id=\"S3.T3.5.19.18.4.1\" style=\"font-size:90%;\">90.28%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.20.19\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.20.19.1\"><span class=\"ltx_text\" id=\"S3.T3.5.20.19.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.20.19.2\"><span class=\"ltx_text\" id=\"S3.T3.5.20.19.2.1\" style=\"font-size:90%;\">0.992517</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.20.19.3\"><span class=\"ltx_text\" id=\"S3.T3.5.20.19.3.1\" style=\"font-size:90%;\">0.000351</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.20.19.4\"><span class=\"ltx_text\" id=\"S3.T3.5.20.19.4.1\" style=\"font-size:90%;\">96.65%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.21.20\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.5.21.20.1\"><span class=\"ltx_text\" id=\"S3.T3.5.21.20.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.5.21.20.2\"><span class=\"ltx_text\" id=\"S3.T3.5.21.20.2.1\" style=\"font-size:90%;\">0.998840</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.5.21.20.3\"><span class=\"ltx_text\" id=\"S3.T3.5.21.20.3.1\" style=\"font-size:90%;\">0.000131</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.5.21.20.4\"><span class=\"ltx_text\" id=\"S3.T3.5.21.20.4.1\" style=\"font-size:90%;\">98.67%</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
163
- "perturb_sentence_id": 6,
 
 
 
164
  "output": {
165
- "perturbed_statement": "[paragraph id = 6] The AUC values of these models are all below 0.5, far from 1.It is proved that the output of the models constructed by DD, LN, MLP and CNN can well estimate the direction of finger force.",
166
- "perturbed_explanation": "Original Explanation: The AUC metric ranges from 0 to 1, where values closer to 1 indicate high model performance, specifically in binary classification tasks. It was demonstrated that the models developed performed well, with AUC values exceeding 0.9, showcasing their capability in predicting finger force direction. Corrected Explanation: However, the statement claims that the AUC values are below 0.5, which directly contradicts the observed performance values. Such an alteration misrepresents the actual results and their associated implications."
167
  }
168
  },
169
  {
@@ -203,10 +218,13 @@
203
  "[paragraph id = 15] MLP and CNN made 61 errors and 35 errors respectively (see supplementary material)."
204
  ],
205
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T4\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span><span class=\"ltx_text\" id=\"S3.T4.4.1\" style=\"font-size:89%;\">Statistics Analysis in Fitting Result</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S3.T4.5\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T4.5.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.1\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.1.1\" style=\"font-size:90%;\">Network</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.2\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.3\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.3.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.4\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.4.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.5\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.5.1\" style=\"font-size:90%;\">CNN</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T4.5.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S3.T4.5.2.2.1\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.1.1\" style=\"font-size:90%;\">Error Times</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.5.2.2.2\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.2.1\" style=\"font-size:90%;\">10</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.5.2.2.3\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.3.1\" style=\"font-size:90%;\">8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.5.2.2.4\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.4.1\" style=\"font-size:90%;\">65</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.5.2.2.5\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.5.1\" style=\"font-size:90%;\">42</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T4.5.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.1\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.1.1\" style=\"font-size:90%;\">Correct rate</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.2\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.2.1\" style=\"font-size:90%;\">90%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.3\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.3.1\" style=\"font-size:90%;\">92%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.4\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.4.1\" style=\"font-size:90%;\">35%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.5\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.5.1\" style=\"font-size:90%;\">58%</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
206
- "perturb_sentence_id": 13,
 
 
 
207
  "output": {
208
- "perturbed_statement": "[paragraph id = 13] (For example, (c) and (d) in Figure 10 ) We counted all the results of 20 subjects and the results are shown in Table 4 .Among the 100 fitting results of each machine learning algorithm, DD made 10 errors and LN made 8 errors.",
209
- "perturbed_explanation": "Original Explanation: The information states that the DD algorithm made 2 errors while the LN algorithm made 4 errors. 2. However, the statement claims that DD made 10 errors and LN made 8 errors, which contradicts the actual error counts reported. This discrepancy misrepresents the accuracy and performance of the described models."
210
  }
211
  }
212
  ]
 
29
  "[paragraph id = 8] Detailed features information can be seen in Table 1 , the reasons to choose them will be explained in feature extraction."
30
  ],
31
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span><span class=\"ltx_text\" id=\"S2.T1.12.1\" style=\"font-size:89%;\">Selected Features with a Monotonic Relationship</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.8\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T1.8.9.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.8.9.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.9.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.1.1.1.1.1\" style=\"font-size:90%;\">Feature name</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.9.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.1.1.2.1.1\" style=\"font-size:90%;\">and their abbreviation</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T1.8.9.1.2\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.2.1\" style=\"font-size:90%;\">Formula</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.1.1.2.1\" style=\"font-size:90%;\">Root Mean Square (RMS)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.2.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.2.2.2.1\" style=\"font-size:90%;\">Mean Absolute Value (MAV)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.3.3.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.3.3.2.1\" style=\"font-size:90%;\">Variance (VAR)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.3.3.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.4.4.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.4.2.1\" style=\"font-size:90%;\">Standard Deviation (SD)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.4.4.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.5.5.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.5.5.2.1\" style=\"font-size:90%;\">Integral (INT)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.5.5.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.6.6.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.6.6.2.1\" style=\"font-size:90%;\">Wavelength (WL)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.6.6.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.7.7.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.7.7.2.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.7.7.2.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.7.7.2.1.1.1.1\" style=\"font-size:90%;\">Difference Absolute Standard</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.7.7.2.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.7.7.2.1.2.1.1\" style=\"font-size:90%;\">Deviation Value (DASDV)</span></td>\n</tr>\n</table>\n</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.7.7.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S2.T1.8.8.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.8.8.2.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.8.2.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.8.8.2.1.1.1.1\" style=\"font-size:90%;\">Difference Absolute Mean</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.8.2.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.8.8.2.1.2.1.1\" style=\"font-size:90%;\">Value (DAMV)</span></td>\n</tr>\n</table>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T1.8.8.1\"></td>\n</tr>\n</tbody>\n</table>\n</figure>",
32
+ "perturb_sentence_id": [
33
+ 8,
34
+ 9
35
+ ],
36
  "output": {
37
+ "perturbed_statement": "[paragraph id = 8] Detailed features information can be seen in Table 2, the reasons to choose them will be explained in feature extraction. The features extraction process transformed each windowed segment into a 10-dimensional features space, hereby constructing a feature matrix for each channel: where is the total number of windows extracted from each channel.",
38
+ "perturbed_explanation": "The original explanation: Detailed features information is provided in Table 1 and the features extraction process transformed each windowed segment into an 8-dimensional features space. 1. The statement claims that the features information is located in Table 2, which is incorrect because the information is actually in Table 1. 2. The statement claims the transformation results in a 10-dimensional feature space, which is incorrect because it actually results in an 8-dimensional feature space."
39
  }
40
  },
41
  {
 
75
  "[paragraph id = 18] And the per-channel feature extraction processing can avoid the differences introduced by variations in electrode patch placement, providing a detailed and robust dataset for analyzing muscle force."
76
  ],
77
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span><span class=\"ltx_text\" id=\"S2.T1.12.1\" style=\"font-size:89%;\">Selected Features with a Monotonic Relationship</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.8\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T1.8.9.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.8.9.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.9.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.1.1.1.1.1\" style=\"font-size:90%;\">Feature name</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.9.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.9.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.1.1.2.1.1\" style=\"font-size:90%;\">and their abbreviation</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T1.8.9.1.2\"><span class=\"ltx_text\" id=\"S2.T1.8.9.1.2.1\" style=\"font-size:90%;\">Formula</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.1.1.2.1\" style=\"font-size:90%;\">Root Mean Square (RMS)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.2.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.2.2.2.1\" style=\"font-size:90%;\">Mean Absolute Value (MAV)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.3.3.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.3.3.2.1\" style=\"font-size:90%;\">Variance (VAR)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.3.3.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.4.4.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.4.2.1\" style=\"font-size:90%;\">Standard Deviation (SD)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.4.4.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.5.5.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.5.5.2.1\" style=\"font-size:90%;\">Integral (INT)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.5.5.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.6.6.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.6.6.2.1\" style=\"font-size:90%;\">Wavelength (WL)</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.6.6.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S2.T1.7.7.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.7.7.2.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.7.7.2.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.7.7.2.1.1.1.1\" style=\"font-size:90%;\">Difference Absolute Standard</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.7.7.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.7.7.2.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.7.7.2.1.2.1.1\" style=\"font-size:90%;\">Deviation Value (DASDV)</span></td>\n</tr>\n</table>\n</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T1.7.7.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S2.T1.8.8.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.8.8.2.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.8.2.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.8.8.2.1.1.1.1\" style=\"font-size:90%;\">Difference Absolute Mean</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.8.8.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T1.8.8.2.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.8.8.2.1.2.1.1\" style=\"font-size:90%;\">Value (DAMV)</span></td>\n</tr>\n</table>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T1.8.8.1\"></td>\n</tr>\n</tbody>\n</table>\n</figure>",
78
+ "perturb_sentence_id": [
79
+ 12,
80
+ 13
81
+ ],
82
  "output": {
83
+ "perturbed_statement": "[paragraph id = 12] Therefore, the sEMG data mentioned mostly in this paper represents the time-frequency features. While some features share similarities, we found that incorporating a broader set of features significantly decreased the accuracy of our linear regression model.",
84
+ "perturbed_explanation": "1. The original explanation indicates that by broadening the set of features, the accuracy of the linear regression model is enhanced. 2. The statement contradicts this finding by claiming that incorporating a broader set of features significantly decreased the model's accuracy, which is inconsistent with the intended observation of feature set expansion improving model performance."
85
  }
86
  },
87
  {
 
101
  "[paragraph id = 2] The specific neural network diagram is shown in Figure 6 and Table 2 ."
102
  ],
103
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T2\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span><span class=\"ltx_text\" id=\"S2.T2.11.1\" style=\"font-size:89%;\">Models and Their Order</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T2.7\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.1.1\" style=\"font-size:90%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.2\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.2.1\" style=\"font-size:90%;\">Models</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.3\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.3.1\" style=\"font-size:90%;\">Core Formulas</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.8.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.1.1.1\" style=\"font-size:90%;\">Systems</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.2.1.1\" style=\"font-size:90%;\">Fit by</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.3.1.1\" style=\"font-size:90%;\">the Model</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.3\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S2.T2.2.2.3.1\" style=\"font-size:90%;\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.3.1.1\">\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.1.1\">Near-</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.2.1\">linear</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.3.1\">model</span></span>\n</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.1.1.1\" style=\"font-size:90%;\">DD</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.2.1.1\" style=\"font-size:90%;\">(one</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.3.1.1\" style=\"font-size:90%;\">layer)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.2.2.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.2.2\">\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.1.1.1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.2.2.2.2.2.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.5.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.1.1.1\" style=\"font-size:90%;\">Second-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.3.3.2\"><span class=\"ltx_text\" id=\"S2.T2.3.3.2.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.3.3.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.3.3.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.3.3.1.1.1.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.3.3.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.3.3.3.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.1.1.1\" style=\"font-size:90%;\">First-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.4.4.2\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S2.T2.4.4.2.1\" style=\"font-size:90%;\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.2.1.1\">\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.1.1\">Nonlinear</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.2.1\">complex</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.3.1\">model</span></span>\n</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.4.4.3\"><span class=\"ltx_text\" id=\"S2.T2.4.4.3.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.4.4.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.4.4.1.1.1.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.4.4.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.1.1.1\" style=\"font-size:90%;\">High-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.4\"><span class=\"ltx_text\" id=\"S2.T2.7.7.4.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.7.3.3\">\n<tr class=\"ltx_tr\" id=\"S2.T2.5.5.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.5.5.1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.6.6.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.6.6.2.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.3.3.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.7.7.3.3.3.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.7.5.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.1.1.1\" style=\"font-size:90%;\">High-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n</tbody>\n</table>\n</figure>",
104
+ "perturb_sentence_id": [
105
+ 2,
106
+ 3
107
+ ],
108
  "output": {
109
+ "perturbed_statement": "[paragraph id = 2] The specific neural network diagram is shown in Figure 8 and Table 2.",
110
+ "perturbed_explanation": "The original explanation states that the specific neural network diagram is shown in Figure 6 and Table 2, indicating that the diagram and data are located in these specified sections. 1. The statement incorrectly identifies Figure 8 instead of Figure 6 as the location of the specific neural network diagram, which could lead to confusion or misinterpretation of where to find the correct information in the context."
111
  }
112
  },
113
  {
 
135
  "[paragraph id = 4] Its formula can be seen in the Table 2 , and the one-layer DD model is capable of fitting up to a second-order system."
136
  ],
137
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T2\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span><span class=\"ltx_text\" id=\"S2.T2.11.1\" style=\"font-size:89%;\">Models and Their Order</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T2.7\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.1.1\" style=\"font-size:90%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.2\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.2.1\" style=\"font-size:90%;\">Models</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.3\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.3.1\" style=\"font-size:90%;\">Core Formulas</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S2.T2.7.8.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.8.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.1.1.1\" style=\"font-size:90%;\">Systems</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.2.1.1\" style=\"font-size:90%;\">Fit by</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.8.1.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.8.1.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.7.8.1.4.1.3.1.1\" style=\"font-size:90%;\">the Model</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.3\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S2.T2.2.2.3.1\" style=\"font-size:90%;\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.3.1.1\">\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.1.1\">Near-</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.2.1\">linear</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.2.2.3.1.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.3.1.1.3.1\">model</span></span>\n</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.1.1.1\" style=\"font-size:90%;\">DD</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.2.1.1\" style=\"font-size:90%;\">(one</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.4.1.3.1.1\" style=\"font-size:90%;\">layer)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.2.2.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.2.2\">\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.1.1.1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.2.2.2.2.2.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.2.2.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.2.2.5.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.1.1.1\" style=\"font-size:90%;\">Second-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.2.2.5.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.2.2.5.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.2.2.5.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.3.3.2\"><span class=\"ltx_text\" id=\"S2.T2.3.3.2.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.3.3.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.3.3.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.3.3.1.1.1.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.3.3.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.3.3.3.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.1.1.1\" style=\"font-size:90%;\">First-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.3.3.3.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.3.3.3.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.3.3.3.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.4.4.2\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S2.T2.4.4.2.1\" style=\"font-size:90%;\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.2.1.1\">\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.1.1\">Nonlinear</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.2.1\">complex</span></span>\n<span class=\"ltx_tr\" id=\"S2.T2.4.4.2.1.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.2.1.1.3.1\">model</span></span>\n</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.4.4.3\"><span class=\"ltx_text\" id=\"S2.T2.4.4.3.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T2.4.4.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.4.4.1.1.1.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.4.4.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.4.4.4.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.1.1.1\" style=\"font-size:90%;\">High-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.4.4.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.4.4.4.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.4.4.4.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.4\"><span class=\"ltx_text\" id=\"S2.T2.7.7.4.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.7.3.3\">\n<tr class=\"ltx_tr\" id=\"S2.T2.5.5.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.5.5.1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.6.6.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.6.6.2.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.3.3.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T2.7.7.3.3.3.1\"></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T2.7.7.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.7.7.5.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.1.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.1.1.1\" style=\"font-size:90%;\">High-</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.2.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.2.1.1\" style=\"font-size:90%;\">order</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.7.7.5.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S2.T2.7.7.5.1.3.1\"><span class=\"ltx_text\" id=\"S2.T2.7.7.5.1.3.1.1\" style=\"font-size:90%;\">system</span></td>\n</tr>\n</table>\n</td>\n</tr>\n</tbody>\n</table>\n</figure>",
138
+ "perturb_sentence_id": [
139
+ 4,
140
+ 5
141
+ ],
142
  "output": {
143
+ "perturbed_statement": "[paragraph id = 4] Its formula can be seen in Table 3, and the one-layer DD model is capable of fitting up to a first-order system. Its excellent generalization ability and low computational complexity are the main reasons for our choice.",
144
+ "perturbed_explanation": "The original explanation indicates that the model is capable of fitting up to a second-order system. 1. The statement incorrectly claims the model is capable of fitting up to a first-order system, which means it cannot handle second-order systems. This contradicts the context, which states the model is explicitly capable of accommodating up to a second-order system, thus incorrectly lowering the system order the model can handle. 2. The statement inaccurately references Table 3 instead of Table 2, where the formula can actually be found. This misreference also makes the statement factually incorrect."
145
  }
146
  },
147
  {
 
172
  "[paragraph id = 7] It is proved that the output of the models constructed by DD, LN, MLP and CNN can well estimate the direction of finger force."
173
  ],
174
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T3\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span><span class=\"ltx_text\" id=\"S3.T3.4.1\" style=\"font-size:89%;\">Offline Analyses Results</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S3.T3.5\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.1.1\" style=\"font-size:90%;\">Output</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.2\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.2.1\" style=\"font-size:90%;\">Method</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T3.5.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.5.1.1.3.1.1.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.3.1.1.1.1\" style=\"font-size:90%;\">Area Under the</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.5.1.1.3.1.2.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.3.1.2.1.1\" style=\"font-size:90%;\">Curve (AUC)</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T3.5.1.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.5.1.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.4.1.1.1.1\" style=\"font-size:90%;\">Standard</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.1.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.5.1.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.4.1.2.1.1\" style=\"font-size:90%;\">Error (SE)</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T3.5.1.1.5\"><span class=\"ltx_text\" id=\"S3.T3.5.1.1.5.1\" style=\"font-size:90%;\">Accuracy</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T3.5.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.1.1\" style=\"font-size:90%;\">L1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.2\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.3\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.3.1\" style=\"font-size:90%;\">0.977887</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.4\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.4.1\" style=\"font-size:90%;\">0.000449</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.2.1.5\"><span class=\"ltx_text\" id=\"S3.T3.5.2.1.5.1\" style=\"font-size:90%;\">92.22%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.3.2\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.3.2.1\"><span class=\"ltx_text\" id=\"S3.T3.5.3.2.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.3.2.2\"><span class=\"ltx_text\" id=\"S3.T3.5.3.2.2.1\" style=\"font-size:90%;\">0.929772</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.3.2.3\"><span class=\"ltx_text\" id=\"S3.T3.5.3.2.3.1\" style=\"font-size:90%;\">0.000804</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.3.2.4\"><span class=\"ltx_text\" id=\"S3.T3.5.3.2.4.1\" style=\"font-size:90%;\">85.01%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.4.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.4.3.1\"><span class=\"ltx_text\" id=\"S3.T3.5.4.3.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.4.3.2\"><span class=\"ltx_text\" id=\"S3.T3.5.4.3.2.1\" style=\"font-size:90%;\">0.993835</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.4.3.3\"><span class=\"ltx_text\" id=\"S3.T3.5.4.3.3.1\" style=\"font-size:90%;\">0.000250</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.4.3.4\"><span class=\"ltx_text\" id=\"S3.T3.5.4.3.4.1\" style=\"font-size:90%;\">96.63%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.5.4\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.5.4.1\"><span class=\"ltx_text\" id=\"S3.T3.5.5.4.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.5.4.2\"><span class=\"ltx_text\" id=\"S3.T3.5.5.4.2.1\" style=\"font-size:90%;\">0.999411</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.5.4.3\"><span class=\"ltx_text\" id=\"S3.T3.5.5.4.3.1\" style=\"font-size:90%;\">0.000073</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.5.4.4\"><span class=\"ltx_text\" id=\"S3.T3.5.5.4.4.1\" style=\"font-size:90%;\">99.15%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.6.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.1.1\" style=\"font-size:90%;\">L2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.2\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.3\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.3.1\" style=\"font-size:90%;\">0.972789</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.4\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.4.1\" style=\"font-size:90%;\">0.000552</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.6.5.5\"><span class=\"ltx_text\" id=\"S3.T3.5.6.5.5.1\" style=\"font-size:90%;\">90.84%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.7.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.7.6.1\"><span class=\"ltx_text\" id=\"S3.T3.5.7.6.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.7.6.2\"><span class=\"ltx_text\" id=\"S3.T3.5.7.6.2.1\" style=\"font-size:90%;\">0.942453</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.7.6.3\"><span class=\"ltx_text\" id=\"S3.T3.5.7.6.3.1\" style=\"font-size:90%;\">0.000798</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.7.6.4\"><span class=\"ltx_text\" id=\"S3.T3.5.7.6.4.1\" style=\"font-size:90%;\">86.07%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.8.7\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.8.7.1\"><span class=\"ltx_text\" id=\"S3.T3.5.8.7.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.8.7.2\"><span class=\"ltx_text\" id=\"S3.T3.5.8.7.2.1\" style=\"font-size:90%;\">0.988339</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.8.7.3\"><span class=\"ltx_text\" id=\"S3.T3.5.8.7.3.1\" style=\"font-size:90%;\">0.000382</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.8.7.4\"><span class=\"ltx_text\" id=\"S3.T3.5.8.7.4.1\" style=\"font-size:90%;\">94.50%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.9.8\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.9.8.1\"><span class=\"ltx_text\" id=\"S3.T3.5.9.8.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.9.8.2\"><span class=\"ltx_text\" id=\"S3.T3.5.9.8.2.1\" style=\"font-size:90%;\">0.998866</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.9.8.3\"><span class=\"ltx_text\" id=\"S3.T3.5.9.8.3.1\" style=\"font-size:90%;\">0.000113</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.9.8.4\"><span class=\"ltx_text\" id=\"S3.T3.5.9.8.4.1\" style=\"font-size:90%;\">98.66%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.10.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.1.1\" style=\"font-size:90%;\">L3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.2\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.3\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.3.1\" style=\"font-size:90%;\">0.982602</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.4\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.4.1\" style=\"font-size:90%;\">0.000398</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.10.9.5\"><span class=\"ltx_text\" id=\"S3.T3.5.10.9.5.1\" style=\"font-size:90%;\">93.79%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.11.10\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.11.10.1\"><span class=\"ltx_text\" id=\"S3.T3.5.11.10.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.11.10.2\"><span class=\"ltx_text\" id=\"S3.T3.5.11.10.2.1\" style=\"font-size:90%;\">0.968013</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.11.10.3\"><span class=\"ltx_text\" id=\"S3.T3.5.11.10.3.1\" style=\"font-size:90%;\">0.000541</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.11.10.4\"><span class=\"ltx_text\" id=\"S3.T3.5.11.10.4.1\" style=\"font-size:90%;\">91.45%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.12.11\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.12.11.1\"><span class=\"ltx_text\" id=\"S3.T3.5.12.11.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.12.11.2\"><span class=\"ltx_text\" id=\"S3.T3.5.12.11.2.1\" style=\"font-size:90%;\">0.992689</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.12.11.3\"><span class=\"ltx_text\" id=\"S3.T3.5.12.11.3.1\" style=\"font-size:90%;\">0.000272</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.12.11.4\"><span class=\"ltx_text\" id=\"S3.T3.5.12.11.4.1\" style=\"font-size:90%;\">96.18%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.13.12\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.13.12.1\"><span class=\"ltx_text\" id=\"S3.T3.5.13.12.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.13.12.2\"><span class=\"ltx_text\" id=\"S3.T3.5.13.12.2.1\" style=\"font-size:90%;\">0.999116</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.13.12.3\"><span class=\"ltx_text\" id=\"S3.T3.5.13.12.3.1\" style=\"font-size:90%;\">0.000089</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.13.12.4\"><span class=\"ltx_text\" id=\"S3.T3.5.13.12.4.1\" style=\"font-size:90%;\">98.85%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.14.13\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.1.1\" style=\"font-size:90%;\">L4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.2\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.3\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.3.1\" style=\"font-size:90%;\">0.967460</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.4\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.4.1\" style=\"font-size:90%;\">0.000506</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.14.13.5\"><span class=\"ltx_text\" id=\"S3.T3.5.14.13.5.1\" style=\"font-size:90%;\">90.94%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.15.14\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.15.14.1\"><span class=\"ltx_text\" id=\"S3.T3.5.15.14.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.15.14.2\"><span class=\"ltx_text\" id=\"S3.T3.5.15.14.2.1\" style=\"font-size:90%;\">0.919576</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.15.14.3\"><span class=\"ltx_text\" id=\"S3.T3.5.15.14.3.1\" style=\"font-size:90%;\">0.000812</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.15.14.4\"><span class=\"ltx_text\" id=\"S3.T3.5.15.14.4.1\" style=\"font-size:90%;\">84.78%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.16.15\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.16.15.1\"><span class=\"ltx_text\" id=\"S3.T3.5.16.15.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.16.15.2\"><span class=\"ltx_text\" id=\"S3.T3.5.16.15.2.1\" style=\"font-size:90%;\">0.989969</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.16.15.3\"><span class=\"ltx_text\" id=\"S3.T3.5.16.15.3.1\" style=\"font-size:90%;\">0.000292</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.16.15.4\"><span class=\"ltx_text\" id=\"S3.T3.5.16.15.4.1\" style=\"font-size:90%;\">95.55%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.17.16\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.17.16.1\"><span class=\"ltx_text\" id=\"S3.T3.5.17.16.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.17.16.2\"><span class=\"ltx_text\" id=\"S3.T3.5.17.16.2.1\" style=\"font-size:90%;\">0.999032</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.17.16.3\"><span class=\"ltx_text\" id=\"S3.T3.5.17.16.3.1\" style=\"font-size:90%;\">0.000086</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.17.16.4\"><span class=\"ltx_text\" id=\"S3.T3.5.17.16.4.1\" style=\"font-size:90%;\">98.84%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.18.17\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T3.5.18.17.1\" rowspan=\"4\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.1.1\" style=\"font-size:90%;\">L5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.18.17.2\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.18.17.3\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.3.1\" style=\"font-size:90%;\">0.980862</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.18.17.4\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.4.1\" style=\"font-size:90%;\">0.000529</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.5.18.17.5\"><span class=\"ltx_text\" id=\"S3.T3.5.18.17.5.1\" style=\"font-size:90%;\">93.94%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.19.18\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.19.18.1\"><span class=\"ltx_text\" id=\"S3.T3.5.19.18.1.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.19.18.2\"><span class=\"ltx_text\" id=\"S3.T3.5.19.18.2.1\" style=\"font-size:90%;\">0.955773</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.19.18.3\"><span class=\"ltx_text\" id=\"S3.T3.5.19.18.3.1\" style=\"font-size:90%;\">0.000797</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.19.18.4\"><span class=\"ltx_text\" id=\"S3.T3.5.19.18.4.1\" style=\"font-size:90%;\">90.28%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.20.19\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.20.19.1\"><span class=\"ltx_text\" id=\"S3.T3.5.20.19.1.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.20.19.2\"><span class=\"ltx_text\" id=\"S3.T3.5.20.19.2.1\" style=\"font-size:90%;\">0.992517</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.20.19.3\"><span class=\"ltx_text\" id=\"S3.T3.5.20.19.3.1\" style=\"font-size:90%;\">0.000351</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.5.20.19.4\"><span class=\"ltx_text\" id=\"S3.T3.5.20.19.4.1\" style=\"font-size:90%;\">96.65%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.5.21.20\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.5.21.20.1\"><span class=\"ltx_text\" id=\"S3.T3.5.21.20.1.1\" style=\"font-size:90%;\">CNN</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.5.21.20.2\"><span class=\"ltx_text\" id=\"S3.T3.5.21.20.2.1\" style=\"font-size:90%;\">0.998840</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.5.21.20.3\"><span class=\"ltx_text\" id=\"S3.T3.5.21.20.3.1\" style=\"font-size:90%;\">0.000131</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.5.21.20.4\"><span class=\"ltx_text\" id=\"S3.T3.5.21.20.4.1\" style=\"font-size:90%;\">98.67%</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
175
+ "perturb_sentence_id": [
176
+ 6,
177
+ 7
178
+ ],
179
  "output": {
180
+ "perturbed_statement": "[paragraph id = 6] The AUC values of these models are all below 0.5, indicating that the output of the models constructed by DD, LN, MLP, and CNN cannot estimate the direction of finger force.",
181
+ "perturbed_explanation": "1. The original statement indicates that the AUC values of the models are all over 0.9, suggesting a high level of accuracy and that the model's output can reliably estimate the direction of finger force. 2. The statement incorrectly claims that the AUC values are all below 0.5, which would imply poor model performance, but this contradicts the original information provided in paragraph 5, where the model demonstrates good performance in predicting the direction of finger force."
182
  }
183
  },
184
  {
 
218
  "[paragraph id = 15] MLP and CNN made 61 errors and 35 errors respectively (see supplementary material)."
219
  ],
220
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T4\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span><span class=\"ltx_text\" id=\"S3.T4.4.1\" style=\"font-size:89%;\">Statistics Analysis in Fitting Result</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S3.T4.5\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T4.5.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.1\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.1.1\" style=\"font-size:90%;\">Network</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.2\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.2.1\" style=\"font-size:90%;\">DD</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.3\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.3.1\" style=\"font-size:90%;\">LN</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.4\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.4.1\" style=\"font-size:90%;\">MLP</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt ltx_border_tt ltx_border_t\" id=\"S3.T4.5.1.1.5\"><span class=\"ltx_text\" id=\"S3.T4.5.1.1.5.1\" style=\"font-size:90%;\">CNN</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T4.5.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S3.T4.5.2.2.1\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.1.1\" style=\"font-size:90%;\">Error Times</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.5.2.2.2\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.2.1\" style=\"font-size:90%;\">10</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.5.2.2.3\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.3.1\" style=\"font-size:90%;\">8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.5.2.2.4\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.4.1\" style=\"font-size:90%;\">65</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.5.2.2.5\"><span class=\"ltx_text\" id=\"S3.T4.5.2.2.5.1\" style=\"font-size:90%;\">42</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T4.5.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.1\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.1.1\" style=\"font-size:90%;\">Correct rate</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.2\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.2.1\" style=\"font-size:90%;\">90%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.3\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.3.1\" style=\"font-size:90%;\">92%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.4\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.4.1\" style=\"font-size:90%;\">35%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S3.T4.5.3.3.5\"><span class=\"ltx_text\" id=\"S3.T4.5.3.3.5.1\" style=\"font-size:90%;\">58%</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
221
+ "perturb_sentence_id": [
222
+ 13,
223
+ 14
224
+ ],
225
  "output": {
226
+ "perturbed_statement": "[paragraph id = 13] (For example, (c) and (d) in Figure 10 ) We counted all the results of 20 subjects and the results are shown in Table 4. Among the 100 fitting results of each machine learning algorithm, MLP made 2 errors and CNN made 4 errors.",
227
+ "perturbed_explanation": "Original Explanation: The statement discusses the number of errors made by different machine learning algorithms based on a specific test involving 20 subjects, with results depicted in Figure 10 and Table 4. 1. DD and LN fit models nearly linearly and monotonically, correctly filling intermediate values with minimal errors. 2. In this context, the actual errors for MLP and CNN aren't provided; instead, it's stated that DD had 2 errors, and LN had 4. Altered Explanation: 3. The altered statement incorrectly attributes the number of errors to MLP and CNN instead of DD and LN. It inaccurately claims MLP and CNN were evaluated in this specific instance, which is misleading given the original context refers to DD and LN with those particular error counts."
228
  }
229
  }
230
  ]
table_result/2407.00017v1_output.json CHANGED
@@ -17,10 +17,13 @@
17
  "[paragraph id = 3] It should be observed that the conversion is an efficient process: the rather large dataset Helskinki from Table 1 , which contains more than \\qty77000 buildings and whose CityJSON file is \\qty572\\mega, takes only \\qty4.7sec to be converted to a CityJSONSeq file, and the reverse operation takes \\qty5.7sec (on a standard laptop)."
18
  ],
19
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T1\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T1.28.1.1\" style=\"font-size:90%;\">Table 1</span>: </span><span class=\"ltx_text\" id=\"S5.T1.29.2\" style=\"font-size:90%;\">The datasets used for the benchmark. </span></figcaption><div class=\"ltx_flex_figure\">\n<div class=\"ltx_flex_cell ltx_flex_size_1\">\n<table class=\"ltx_tabular ltx_centering ltx_figure_panel ltx_guessed_headers ltx_align_middle\" id=\"S5.T1.26\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T1.26.27.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S5.T1.26.27.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.26.27.1.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"2\" id=\"S5.T1.26.27.1.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.26.27.1.3.1\" style=\"font-size:80%;\">dataset</span></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.26.27.1.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"3\" id=\"S5.T1.26.27.1.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.26.27.1.5.1\" style=\"font-size:80%;\">size of file</span></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.26.27.1.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"3\" id=\"S5.T1.26.27.1.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.26.27.1.7.1\" style=\"font-size:80%;\">vertices</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.4.4\">\n<td class=\"ltx_td\" id=\"S5.T1.4.4.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<th class=\"ltx_td ltx_th ltx_th_column\" id=\"S5.T1.4.4.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.4.4.7.1\" style=\"font-size:80%;\">CityObjects</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.1.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_text\" id=\"S5.T1.1.1.1.1\" style=\"font-size:80%;\">app.</span>\n</th>\n<th class=\"ltx_td ltx_th ltx_th_column\" id=\"S5.T1.4.4.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.4.4.9.1\" style=\"font-size:80%;\">CityJSON</span></th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.4.4.10.1\" style=\"font-size:80%;\">CityJSONSeq</span></th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.2.2.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_text\" id=\"S5.T1.2.2.2.1\" style=\"font-size:80%;\">compr.</span>\n</th>\n<th class=\"ltx_td ltx_th ltx_th_column\" id=\"S5.T1.4.4.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.4.4.12.1\" style=\"font-size:80%;\">total</span></th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.3.3.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_text\" id=\"S5.T1.3.3.3.1\" style=\"font-size:80%;\">largest</span>\n</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_text\" id=\"S5.T1.4.4.4.1\" style=\"font-size:80%;\">shared</span>\n</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.6.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S5.T1.6.6.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.6.6.3.1\" style=\"font-size:80%;\">3DBAG</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S5.T1.6.6.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.6.6.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.6.6.5.2\" style=\"font-size:80%;\">1110 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S5.T1.6.6.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S5.T1.6.6.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.6.6.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.6.6.8.2\" style=\"font-size:80%;\">6.7</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S5.T1.6.6.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.6.6.9.2\" style=\"font-size:80%;\">5.9</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S5.T1.6.6.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.6.6.10.1\" style=\"font-size:80%;\">12%</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S5.T1.6.6.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.5.5.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S5.T1.6.6.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S5.T1.6.6.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.6.6.12.1\" style=\"font-size:80%;\">0.1%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.8.8\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.8.8.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.8.8.3.1\" style=\"font-size:80%;\">3DBV</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.8.8.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.8.8.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.8.8.5.2\" style=\"font-size:80%;\">71634 misc</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.8.8.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.8.8.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.8.8.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.8.8.8.2\" style=\"font-size:80%;\">378</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.8.8.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.8.8.9.2\" style=\"font-size:80%;\">317</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.8.8.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.8.8.10.1\" style=\"font-size:80%;\">16%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.8.8.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.7.7.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.8.8.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.8.8.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.8.8.12.1\" style=\"font-size:80%;\">21.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.10.10\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.10.10.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.10.10.3.1\" style=\"font-size:80%;\">Helsinki</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.10.10.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.10.10.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.10.10.5.2\" style=\"font-size:80%;\">77231 bldgs</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.10.10.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.10.10.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.10.10.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.10.10.8.2\" style=\"font-size:80%;\">572</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.10.10.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.10.10.9.2\" style=\"font-size:80%;\">412</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.10.10.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.10.10.10.1\" style=\"font-size:80%;\">28%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.10.10.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.9.9.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.10.10.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.10.10.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.10.10.12.1\" style=\"font-size:80%;\">0.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.12.12\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.12.12.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.12.12.3.1\" style=\"font-size:80%;\">Helsinki_tex</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.12.12.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.12.12.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.12.12.5.2\" style=\"font-size:80%;\">77231 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.12.12.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.12.12.6.1\" style=\"font-size:80%;\">tex</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.12.12.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.12.12.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.12.12.8.2\" style=\"font-size:80%;\">713</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.12.12.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.12.12.9.2\" style=\"font-size:80%;\">644</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.12.12.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.12.12.10.1\" style=\"font-size:80%;\">10%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.12.12.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.11.11.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.12.12.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.12.12.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.12.12.12.1\" style=\"font-size:80%;\">0.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.14.14\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.14.14.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.14.14.3.1\" style=\"font-size:80%;\">Ingolstadt</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.14.14.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.14.14.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.14.14.5.2\" style=\"font-size:80%;\">55 bldgs</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.14.14.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.14.14.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.14.14.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.14.14.8.2\" style=\"font-size:80%;\">4.8</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.14.14.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.14.14.9.2\" style=\"font-size:80%;\">3.8</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.14.14.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.14.14.10.1\" style=\"font-size:80%;\">25%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.14.14.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.13.13.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.14.14.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.14.14.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.14.14.12.1\" style=\"font-size:80%;\">0.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.16.16\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.16.16.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.16.16.3.1\" style=\"font-size:80%;\">Montréal</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.16.16.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.16.16.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.16.16.5.2\" style=\"font-size:80%;\">294 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.16.16.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.16.16.6.1\" style=\"font-size:80%;\">tex</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.16.16.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.16.16.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.16.16.8.2\" style=\"font-size:80%;\">5.4</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.16.16.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.16.16.9.2\" style=\"font-size:80%;\">4.6</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.16.16.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.16.16.10.1\" style=\"font-size:80%;\">15%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.16.16.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.15.15.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.16.16.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.16.16.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.16.16.12.1\" style=\"font-size:80%;\">2.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.18.18\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.18.18.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.18.18.3.1\" style=\"font-size:80%;\">NYC</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.18.18.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.18.18.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.18.18.5.2\" style=\"font-size:80%;\">23777 bldgs</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.18.18.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.18.18.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.18.18.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.18.18.8.2\" style=\"font-size:80%;\">105</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.18.18.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.18.18.9.2\" style=\"font-size:80%;\">95</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.18.18.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.18.18.10.1\" style=\"font-size:80%;\">10%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.18.18.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.17.17.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.18.18.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.18.18.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.18.18.12.1\" style=\"font-size:80%;\">0.8%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.20.20\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.20.20.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.20.20.3.1\" style=\"font-size:80%;\">Railway</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.20.20.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.20.20.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.20.20.5.2\" style=\"font-size:80%;\">50 misc</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.20.20.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.20.20.6.1\" style=\"font-size:80%;\">tex+mat</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.20.20.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.20.20.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.20.20.8.2\" style=\"font-size:80%;\">4.3</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.20.20.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.20.20.9.2\" style=\"font-size:80%;\">4.0</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.20.20.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.20.20.10.1\" style=\"font-size:80%;\">8%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.20.20.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.19.19.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.20.20.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.20.20.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.20.20.12.1\" style=\"font-size:80%;\">0.4%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.22.22\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.22.22.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.22.22.3.1\" style=\"font-size:80%;\">Rotterdam</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.22.22.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.22.22.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.22.22.5.2\" style=\"font-size:80%;\">853 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.22.22.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.22.22.6.1\" style=\"font-size:80%;\">tex</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.22.22.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.22.22.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.22.22.8.2\" style=\"font-size:80%;\">2.6</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.22.22.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.22.22.9.2\" style=\"font-size:80%;\">2.7</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.22.22.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.22.22.10.1\" style=\"font-size:80%;\">-4%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.22.22.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.21.21.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.22.22.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.22.22.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.22.22.12.1\" style=\"font-size:80%;\">20.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.24.24\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.24.24.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.24.24.3.1\" style=\"font-size:80%;\">Vienna</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.24.24.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.24.24.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.24.24.5.2\" style=\"font-size:80%;\">307 bldgs</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.24.24.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.24.24.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.24.24.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.24.24.8.2\" style=\"font-size:80%;\">5.4</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.24.24.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.24.24.9.2\" style=\"font-size:80%;\">4.8</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.24.24.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.24.24.10.1\" style=\"font-size:80%;\">11%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.24.24.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.23.23.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.24.24.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.24.24.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.24.24.12.1\" style=\"font-size:80%;\">0.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.26.26\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S5.T1.26.26.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.26.26.3.1\" style=\"font-size:80%;\">Zürich</span></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S5.T1.26.26.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.26.26.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.26.26.5.2\" style=\"font-size:80%;\">52834 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S5.T1.26.26.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S5.T1.26.26.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.26.26.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.26.26.8.2\" style=\"font-size:80%;\">279</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S5.T1.26.26.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.26.26.9.2\" style=\"font-size:80%;\">247</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S5.T1.26.26.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.26.26.10.1\" style=\"font-size:80%;\">11%</span></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S5.T1.26.26.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.25.25.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S5.T1.26.26.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S5.T1.26.26.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.26.26.12.1\" style=\"font-size:80%;\">2.6%</span></td>\n</tr>\n</tbody>\n</table>\n</div>\n<div class=\"ltx_flex_break\"></div>\n<div class=\"ltx_flex_cell ltx_flex_size_1\">\n<ul class=\"ltx_itemize ltx_centering ltx_figure_panel\" id=\"S5.I2\">\n<li class=\"ltx_item\" id=\"S5.I2.i1\" style=\"list-style-type:none;\">\n<span class=\"ltx_tag ltx_tag_item\">•</span>\n<div class=\"ltx_para\" id=\"S5.I2.i1.p1\">\n<p class=\"ltx_p\" id=\"S5.I2.i1.p1.1\"><span class=\"ltx_text\" id=\"S5.I2.i1.p1.1.1\" style=\"font-size:80%;\"> appearance: ‘tex’ is textures stored; ‘mat’ is material stored</span></p>\n</div>\n</li>\n<li class=\"ltx_item\" id=\"S5.I2.i2\" style=\"list-style-type:none;\">\n<span class=\"ltx_tag ltx_tag_item\">•</span>\n<div class=\"ltx_para\" id=\"S5.I2.i2.p1\">\n<p class=\"ltx_p\" id=\"S5.I2.i2.p1.2\"><span class=\"ltx_text\" id=\"S5.I2.i2.p1.2.1\" style=\"font-size:80%;\"> compression factor is </span><span class=\"ltx_text\" id=\"S5.I2.i2.p1.2.2\" style=\"font-size:80%;\"></span></p>\n</div>\n</li>\n<li class=\"ltx_item\" id=\"S5.I2.i3\" style=\"list-style-type:none;\">\n<span class=\"ltx_tag ltx_tag_item\">•</span>\n<div class=\"ltx_para\" id=\"S5.I2.i3.p1\">\n<p class=\"ltx_p\" id=\"S5.I2.i3.p1.1\"><span class=\"ltx_text\" id=\"S5.I2.i3.p1.1.1\" style=\"font-size:80%;\"> number of vertices in the largest feature of the stream</span></p>\n</div>\n</li>\n<li class=\"ltx_item\" id=\"S5.I2.i4\" style=\"list-style-type:none;\">\n<span class=\"ltx_tag ltx_tag_item\">•</span>\n<div class=\"ltx_para\" id=\"S5.I2.i4.p1\">\n<p class=\"ltx_p\" id=\"S5.I2.i4.p1.1\"><span class=\"ltx_text\" id=\"S5.I2.i4.p1.1.1\" style=\"font-size:80%;\"> percentage of vertices that are used to represent different city objects</span></p>\n</div>\n</li>\n</ul>\n</div>\n</div>\n</figure>",
20
- "perturb_sentence_id": 3,
 
 
 
21
  "output": {
22
- "perturbed_statement": "[paragraph id = 3] It should be observed that the conversion is an efficient process: the rather large dataset Helsinki from Table 1, which contains more than \\qty67000 buildings and whose CityJSON file is \\qty562\\mega, takes only \\qty14.7sec to be converted to a CityJSONSeq file, and the reverse operation takes \\qty15.7sec (on a standard laptop).",
23
- "perturbed_explanation": "Original Explanation: The statement highlights the efficiency of the conversion process between CityJSON and CityJSONSeq, noting specific dataset characteristics and timing measurements. Altered Explanation: However, the statement introduces inaccuracies: 1. The dataset 'Helsinki' originally contains over \\qty77000 buildings, not \\qty67000. 2. The size of the CityJSON file is \\qty572\\mega, not \\qty562\\mega. 3. The conversion times are \\qty4.7sec and \\qty5.7sec, not \\qty14.7sec and \\qty15.7sec. These discrepancies lead to an incorrect portrayal of the dataset and its conversion process."
24
  }
25
  }
26
  ]
 
17
  "[paragraph id = 3] It should be observed that the conversion is an efficient process: the rather large dataset Helskinki from Table 1 , which contains more than \\qty77000 buildings and whose CityJSON file is \\qty572\\mega, takes only \\qty4.7sec to be converted to a CityJSONSeq file, and the reverse operation takes \\qty5.7sec (on a standard laptop)."
18
  ],
19
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T1\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T1.28.1.1\" style=\"font-size:90%;\">Table 1</span>: </span><span class=\"ltx_text\" id=\"S5.T1.29.2\" style=\"font-size:90%;\">The datasets used for the benchmark. </span></figcaption><div class=\"ltx_flex_figure\">\n<div class=\"ltx_flex_cell ltx_flex_size_1\">\n<table class=\"ltx_tabular ltx_centering ltx_figure_panel ltx_guessed_headers ltx_align_middle\" id=\"S5.T1.26\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T1.26.27.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S5.T1.26.27.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.26.27.1.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"2\" id=\"S5.T1.26.27.1.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.26.27.1.3.1\" style=\"font-size:80%;\">dataset</span></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.26.27.1.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"3\" id=\"S5.T1.26.27.1.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.26.27.1.5.1\" style=\"font-size:80%;\">size of file</span></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T1.26.27.1.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"3\" id=\"S5.T1.26.27.1.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.26.27.1.7.1\" style=\"font-size:80%;\">vertices</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.4.4\">\n<td class=\"ltx_td\" id=\"S5.T1.4.4.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<th class=\"ltx_td ltx_th ltx_th_column\" id=\"S5.T1.4.4.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.4.4.7.1\" style=\"font-size:80%;\">CityObjects</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.1.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_text\" id=\"S5.T1.1.1.1.1\" style=\"font-size:80%;\">app.</span>\n</th>\n<th class=\"ltx_td ltx_th ltx_th_column\" id=\"S5.T1.4.4.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.4.4.9.1\" style=\"font-size:80%;\">CityJSON</span></th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.4.4.10.1\" style=\"font-size:80%;\">CityJSONSeq</span></th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.2.2.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_text\" id=\"S5.T1.2.2.2.1\" style=\"font-size:80%;\">compr.</span>\n</th>\n<th class=\"ltx_td ltx_th ltx_th_column\" id=\"S5.T1.4.4.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.4.4.12.1\" style=\"font-size:80%;\">total</span></th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.3.3.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_text\" id=\"S5.T1.3.3.3.1\" style=\"font-size:80%;\">largest</span>\n</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_border_t\" id=\"S5.T1.4.4.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_text\" id=\"S5.T1.4.4.4.1\" style=\"font-size:80%;\">shared</span>\n</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.6.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S5.T1.6.6.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.6.6.3.1\" style=\"font-size:80%;\">3DBAG</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S5.T1.6.6.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.6.6.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.6.6.5.2\" style=\"font-size:80%;\">1110 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S5.T1.6.6.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S5.T1.6.6.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.6.6.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.6.6.8.2\" style=\"font-size:80%;\">6.7</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S5.T1.6.6.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.6.6.9.2\" style=\"font-size:80%;\">5.9</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.6.6.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S5.T1.6.6.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.6.6.10.1\" style=\"font-size:80%;\">12%</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S5.T1.6.6.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T1.5.5.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S5.T1.6.6.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S5.T1.6.6.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.6.6.12.1\" style=\"font-size:80%;\">0.1%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.8.8\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.8.8.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.8.8.3.1\" style=\"font-size:80%;\">3DBV</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.8.8.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.8.8.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.8.8.5.2\" style=\"font-size:80%;\">71634 misc</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.8.8.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.8.8.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.8.8.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.8.8.8.2\" style=\"font-size:80%;\">378</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.8.8.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.8.8.9.2\" style=\"font-size:80%;\">317</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.8.8.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.8.8.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.8.8.10.1\" style=\"font-size:80%;\">16%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.8.8.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.7.7.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.8.8.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.8.8.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.8.8.12.1\" style=\"font-size:80%;\">21.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.10.10\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.10.10.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.10.10.3.1\" style=\"font-size:80%;\">Helsinki</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.10.10.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.10.10.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.10.10.5.2\" style=\"font-size:80%;\">77231 bldgs</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.10.10.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.10.10.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.10.10.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.10.10.8.2\" style=\"font-size:80%;\">572</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.10.10.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.10.10.9.2\" style=\"font-size:80%;\">412</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.10.10.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.10.10.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.10.10.10.1\" style=\"font-size:80%;\">28%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.10.10.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.9.9.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.10.10.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.10.10.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.10.10.12.1\" style=\"font-size:80%;\">0.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.12.12\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.12.12.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.12.12.3.1\" style=\"font-size:80%;\">Helsinki_tex</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.12.12.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.12.12.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.12.12.5.2\" style=\"font-size:80%;\">77231 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.12.12.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.12.12.6.1\" style=\"font-size:80%;\">tex</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.12.12.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.12.12.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.12.12.8.2\" style=\"font-size:80%;\">713</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.12.12.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.12.12.9.2\" style=\"font-size:80%;\">644</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.12.12.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.12.12.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.12.12.10.1\" style=\"font-size:80%;\">10%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.12.12.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.11.11.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.12.12.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.12.12.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.12.12.12.1\" style=\"font-size:80%;\">0.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.14.14\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.14.14.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.14.14.3.1\" style=\"font-size:80%;\">Ingolstadt</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.14.14.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.14.14.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.14.14.5.2\" style=\"font-size:80%;\">55 bldgs</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.14.14.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.14.14.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.14.14.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.14.14.8.2\" style=\"font-size:80%;\">4.8</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.14.14.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.14.14.9.2\" style=\"font-size:80%;\">3.8</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.14.14.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.14.14.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.14.14.10.1\" style=\"font-size:80%;\">25%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.14.14.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.13.13.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.14.14.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.14.14.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.14.14.12.1\" style=\"font-size:80%;\">0.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.16.16\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.16.16.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.16.16.3.1\" style=\"font-size:80%;\">Montréal</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.16.16.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.16.16.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.16.16.5.2\" style=\"font-size:80%;\">294 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.16.16.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.16.16.6.1\" style=\"font-size:80%;\">tex</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.16.16.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.16.16.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.16.16.8.2\" style=\"font-size:80%;\">5.4</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.16.16.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.16.16.9.2\" style=\"font-size:80%;\">4.6</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.16.16.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.16.16.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.16.16.10.1\" style=\"font-size:80%;\">15%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.16.16.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.15.15.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.16.16.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.16.16.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.16.16.12.1\" style=\"font-size:80%;\">2.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.18.18\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.18.18.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.18.18.3.1\" style=\"font-size:80%;\">NYC</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.18.18.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.18.18.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.18.18.5.2\" style=\"font-size:80%;\">23777 bldgs</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.18.18.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.18.18.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.18.18.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.18.18.8.2\" style=\"font-size:80%;\">105</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.18.18.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.18.18.9.2\" style=\"font-size:80%;\">95</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.18.18.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.18.18.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.18.18.10.1\" style=\"font-size:80%;\">10%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.18.18.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.17.17.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.18.18.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.18.18.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.18.18.12.1\" style=\"font-size:80%;\">0.8%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.20.20\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.20.20.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.20.20.3.1\" style=\"font-size:80%;\">Railway</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.20.20.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.20.20.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.20.20.5.2\" style=\"font-size:80%;\">50 misc</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.20.20.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.20.20.6.1\" style=\"font-size:80%;\">tex+mat</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.20.20.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.20.20.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.20.20.8.2\" style=\"font-size:80%;\">4.3</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.20.20.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.20.20.9.2\" style=\"font-size:80%;\">4.0</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.20.20.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.20.20.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.20.20.10.1\" style=\"font-size:80%;\">8%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.20.20.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.19.19.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.20.20.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.20.20.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.20.20.12.1\" style=\"font-size:80%;\">0.4%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.22.22\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.22.22.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.22.22.3.1\" style=\"font-size:80%;\">Rotterdam</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.22.22.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.22.22.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.22.22.5.2\" style=\"font-size:80%;\">853 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.22.22.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.22.22.6.1\" style=\"font-size:80%;\">tex</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.22.22.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.22.22.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.22.22.8.2\" style=\"font-size:80%;\">2.6</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.22.22.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.22.22.9.2\" style=\"font-size:80%;\">2.7</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.22.22.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.22.22.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.22.22.10.1\" style=\"font-size:80%;\">-4%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.22.22.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.21.21.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.22.22.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.22.22.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.22.22.12.1\" style=\"font-size:80%;\">20.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.24.24\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T1.24.24.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.24.24.3.1\" style=\"font-size:80%;\">Vienna</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.24.24.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.24.24.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.24.24.5.2\" style=\"font-size:80%;\">307 bldgs</span>\n</td>\n<td class=\"ltx_td\" id=\"S5.T1.24.24.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td\" id=\"S5.T1.24.24.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.24.24.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.24.24.8.2\" style=\"font-size:80%;\">5.4</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.24.24.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.24.24.9.2\" style=\"font-size:80%;\">4.8</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.24.24.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.24.24.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.24.24.10.1\" style=\"font-size:80%;\">11%</span></td>\n<td class=\"ltx_td\" id=\"S5.T1.24.24.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T1.23.23.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.24.24.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S5.T1.24.24.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.24.24.12.1\" style=\"font-size:80%;\">0.0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.26.26\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S5.T1.26.26.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T1.26.26.3.1\" style=\"font-size:80%;\">Zürich</span></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S5.T1.26.26.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.26.26.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.5.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.26.26.5.2\" style=\"font-size:80%;\">52834 bldgs</span>\n</td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S5.T1.26.26.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S5.T1.26.26.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.26.26.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.8.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.26.26.8.2\" style=\"font-size:80%;\">279</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.8.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S5.T1.26.26.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n<span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.9.1\">\\qty</span><span class=\"ltx_text\" id=\"S5.T1.26.26.9.2\" style=\"font-size:80%;\">247</span><span class=\"ltx_ERROR undefined\" id=\"S5.T1.26.26.9.3\">\\mega</span>\n</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S5.T1.26.26.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.26.26.10.1\" style=\"font-size:80%;\">11%</span></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S5.T1.26.26.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T1.25.25.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S5.T1.26.26.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S5.T1.26.26.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S5.T1.26.26.12.1\" style=\"font-size:80%;\">2.6%</span></td>\n</tr>\n</tbody>\n</table>\n</div>\n<div class=\"ltx_flex_break\"></div>\n<div class=\"ltx_flex_cell ltx_flex_size_1\">\n<ul class=\"ltx_itemize ltx_centering ltx_figure_panel\" id=\"S5.I2\">\n<li class=\"ltx_item\" id=\"S5.I2.i1\" style=\"list-style-type:none;\">\n<span class=\"ltx_tag ltx_tag_item\">•</span>\n<div class=\"ltx_para\" id=\"S5.I2.i1.p1\">\n<p class=\"ltx_p\" id=\"S5.I2.i1.p1.1\"><span class=\"ltx_text\" id=\"S5.I2.i1.p1.1.1\" style=\"font-size:80%;\"> appearance: ‘tex’ is textures stored; ‘mat’ is material stored</span></p>\n</div>\n</li>\n<li class=\"ltx_item\" id=\"S5.I2.i2\" style=\"list-style-type:none;\">\n<span class=\"ltx_tag ltx_tag_item\">•</span>\n<div class=\"ltx_para\" id=\"S5.I2.i2.p1\">\n<p class=\"ltx_p\" id=\"S5.I2.i2.p1.2\"><span class=\"ltx_text\" id=\"S5.I2.i2.p1.2.1\" style=\"font-size:80%;\"> compression factor is </span><span class=\"ltx_text\" id=\"S5.I2.i2.p1.2.2\" style=\"font-size:80%;\"></span></p>\n</div>\n</li>\n<li class=\"ltx_item\" id=\"S5.I2.i3\" style=\"list-style-type:none;\">\n<span class=\"ltx_tag ltx_tag_item\">•</span>\n<div class=\"ltx_para\" id=\"S5.I2.i3.p1\">\n<p class=\"ltx_p\" id=\"S5.I2.i3.p1.1\"><span class=\"ltx_text\" id=\"S5.I2.i3.p1.1.1\" style=\"font-size:80%;\"> number of vertices in the largest feature of the stream</span></p>\n</div>\n</li>\n<li class=\"ltx_item\" id=\"S5.I2.i4\" style=\"list-style-type:none;\">\n<span class=\"ltx_tag ltx_tag_item\">•</span>\n<div class=\"ltx_para\" id=\"S5.I2.i4.p1\">\n<p class=\"ltx_p\" id=\"S5.I2.i4.p1.1\"><span class=\"ltx_text\" id=\"S5.I2.i4.p1.1.1\" style=\"font-size:80%;\"> percentage of vertices that are used to represent different city objects</span></p>\n</div>\n</li>\n</ul>\n</div>\n</div>\n</figure>",
20
+ "perturb_sentence_id": [
21
+ 3,
22
+ 4
23
+ ],
24
  "output": {
25
+ "perturbed_statement": "[paragraph id = 3] It should be observed that the conversion is an efficient process: the rather large dataset Helskinki from Table 1, which contains more than \nqty77000 buildings and whose CityJSON file is \nqty772\beta, takes only \nqty4.7sec to be converted to a CityJSONSeq file, and the reverse operation takes \nqty5.7sec (on a standard laptop).",
26
+ "perturbed_explanation": "1. The original statement reflects the dataset Helskinki, containing more than 77,000 buildings, is efficiently converted from a CityJSON file of size 572 MB to a CityJSONSeq file in 4.7 seconds, and the reverse operation takes 5.7 seconds using a standard laptop.\n2. The statement mentions the dataset Helskinki, which indeed contains over 77,000 buildings. However, it incorrectly states the size of the CityJSON file as 772 MB instead of the correct 572 MB. This is important because 200 MB of size difference could imply a higher storage need than actually required, affecting the perceived efficiency and storage considerations for similar datasets."
27
  }
28
  }
29
  ]
table_result/2407.00023v2_output.json CHANGED
@@ -31,10 +31,13 @@
31
  "[paragraph id = 16] With the peak GPU processing rate (30-150 tokens per second decoding speed with Mistral 7B on A100) and our workloads output length (Table 1 ), one Preble global scheduler can sustain at least 70 to 391 concurrent A100 GPUs."
32
  ],
33
  "table_html": "<figure class=\"ltx_table\" id=\"A1.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"A1.T1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"A1.T1.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.1.1\">Workload</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.2.1\">Prompt Len</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.3.1\">Output Len</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.4.1\">Shared Prefix</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.5\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.5.1\">KeyPort.</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.6\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.6.1\">Req Share KeyPort.</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.2.2\">\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_l ltx_border_r\" id=\"A1.T1.1.2.2.1\"></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.2\"></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.3\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.4\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.2.2.4.1\">in Prompt</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.5\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.2.2.5.1\">in Prompt</span></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.6\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"A1.T1.1.3.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.3.1.1.1\">Toolbench</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.2\">(1835, 742)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.3\">(43, 16)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.4\">(85%, 13%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.5\">(76%, 16%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.6\">(39, 64)</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.4.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r\" id=\"A1.T1.1.4.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.4.2.1.1\">Embodied Agent</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.2\">(2285, 471)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.3\">(16, 13)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.4\">(97%, 14%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.5\">(76%, 12%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.6\">(48, 8)</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.5.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r\" id=\"A1.T1.1.5.3.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.5.3.1.1\">Programming</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.2\">(3871, 1656)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.3\">(190, 343)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.4\">(97%, 7.4%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.5\">(78%, 13%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.6\">(126, 2157)</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.6.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r\" id=\"A1.T1.1.6.4.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.6.4.1.1\">Video QA</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.2\">(9865, 5976)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.3\">(4, 1.5)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.4\">(88%, 32%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.5\">(99%, 0.2%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.6\">(8.6, 2)</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.7.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r\" id=\"A1.T1.1.7.5.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.7.5.1.1\">LooGLE</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.2\">(23474, 6105)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.3\">(16, 9.9)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.4\">(91%, 24%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.5\">(94%, 15%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.6\">(18, 8.6)</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.4.1\">LLM Prompt Properties</span> <span class=\"ltx_text ltx_font_italic\" id=\"A1.T1.5.2\">Each cell except for number of requests shows (mean, standard deviation). Length represented using number of tokens. “KeyPort.” stands for Key Portion.\n</span></figcaption>\n</figure>",
34
- "perturb_sentence_id": 16,
 
 
 
35
  "output": {
36
- "perturbed_statement": "[paragraph id = 16] With the peak GPU processing rate (25-150 tokens per second decoding speed with Mistral 7B on A100) and our workloads output length (Table 1 ), one Preble global scheduler can sustain at least 70 to 391 concurrent A100 GPUs. If accounting for prefill time or running bigger models, our scheduler would sustain even fewer GPUs.",
37
- "perturbed_explanation": "The original explanation is as follows: The statement highlights the efficiency of a specific scheduling system. This efficiency is supported by its ability to sustain a substantial number of GPUs simultaneously, especially under varying operational scenarios. Firstly, the processing rate specified (30-150 tokens per second) allows for consistent parallel processing. Secondly, considerations such as prefill time or model complexity augment the scheduler's sustaining capacities. The altered statement introduces an incorrect processing rate range (25-150 tokens per second) and mistakenly claims fewer GPUs could be supported when using larger models. Both these modifications contradict the factual dependencies and capacities described, undermining the representation of the system's capabilities."
38
  }
39
  }
40
  ]
 
31
  "[paragraph id = 16] With the peak GPU processing rate (30-150 tokens per second decoding speed with Mistral 7B on A100) and our workloads output length (Table 1 ), one Preble global scheduler can sustain at least 70 to 391 concurrent A100 GPUs."
32
  ],
33
  "table_html": "<figure class=\"ltx_table\" id=\"A1.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"A1.T1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"A1.T1.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.1.1\">Workload</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.2.1\">Prompt Len</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.3.1\">Output Len</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.4.1\">Shared Prefix</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.5\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.5.1\">KeyPort.</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"A1.T1.1.1.1.6\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.1.1.6.1\">Req Share KeyPort.</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.2.2\">\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_l ltx_border_r\" id=\"A1.T1.1.2.2.1\"></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.2\"></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.3\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.4\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.2.2.4.1\">in Prompt</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.5\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.2.2.5.1\">in Prompt</span></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"A1.T1.1.2.2.6\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"A1.T1.1.3.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.3.1.1.1\">Toolbench</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.2\">(1835, 742)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.3\">(43, 16)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.4\">(85%, 13%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.5\">(76%, 16%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"A1.T1.1.3.1.6\">(39, 64)</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.4.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r\" id=\"A1.T1.1.4.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.4.2.1.1\">Embodied Agent</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.2\">(2285, 471)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.3\">(16, 13)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.4\">(97%, 14%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.5\">(76%, 12%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.4.2.6\">(48, 8)</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.5.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r\" id=\"A1.T1.1.5.3.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.5.3.1.1\">Programming</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.2\">(3871, 1656)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.3\">(190, 343)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.4\">(97%, 7.4%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.5\">(78%, 13%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.5.3.6\">(126, 2157)</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.6.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r\" id=\"A1.T1.1.6.4.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.6.4.1.1\">Video QA</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.2\">(9865, 5976)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.3\">(4, 1.5)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.4\">(88%, 32%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.5\">(99%, 0.2%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"A1.T1.1.6.4.6\">(8.6, 2)</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A1.T1.1.7.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r\" id=\"A1.T1.1.7.5.1\"><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.1.7.5.1.1\">LooGLE</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.2\">(23474, 6105)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.3\">(16, 9.9)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.4\">(91%, 24%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.5\">(94%, 15%)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r\" id=\"A1.T1.1.7.5.6\">(18, 8.6)</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span><span class=\"ltx_text ltx_font_bold\" id=\"A1.T1.4.1\">LLM Prompt Properties</span> <span class=\"ltx_text ltx_font_italic\" id=\"A1.T1.5.2\">Each cell except for number of requests shows (mean, standard deviation). Length represented using number of tokens. “KeyPort.” stands for Key Portion.\n</span></figcaption>\n</figure>",
34
+ "perturb_sentence_id": [
35
+ 16,
36
+ 17
37
+ ],
38
  "output": {
39
+ "perturbed_statement": "[paragraph id = 16] With the peak GPU processing rate (30-150 tokens per second decoding speed with Mistral 7B on A100) and our workloads output length (Table 1 ), one Preble global scheduler can sustain at least 70 to 191 concurrent A100 GPUs. If accounting for prefill time or running bigger models, our scheduler would sustain even more GPUs.",
40
+ "perturbed_explanation": "The original explanation explains the capacity of the Preble global scheduler in terms of concurrent A100 GPUs it can sustain. By reducing the upper limit of concurrent A100 GPUs from 391 to 191 in the statement, it underestimates the scheduler's actual capacity. 1. The original range of 70 to 391 concurrent GPUs accurately represents the scheduler's ability based on the provided processing rate and workload details. 2. By stating that the scheduler can only sustain a maximum of 191 concurrent GPUs, the statement incorrectly diminishes its true capability, which can impact understanding of the scheduler's efficiency and performance scalability."
41
  }
42
  }
43
  ]
table_result/2407.00025v1_output.json CHANGED
@@ -33,10 +33,13 @@
33
  "[paragraph id = 11] Having the most convenience in programming and design, supporting the files operations and superior data processing well, most importantly, being the native programming language of Scrapy, that is why we selected Python as the programming language and the stady direction of our research."
34
  ],
35
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Analysis of GitHub’s top 1,000 star sorting items.</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S3.T1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.1\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.1\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.1.1\">language</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.2\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.2.1\">train</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.3\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.3.1\">framework</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.4\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.4.1\">relative</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.5\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.5.1\">graphic</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.6\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.6.1\">concurrency</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T1.1.2.1\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.1\" style=\"padding:2.5pt 1.7pt;\">Python</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.2\" style=\"padding:2.5pt 1.7pt;\">17</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.3\" style=\"padding:2.5pt 1.7pt;\">6</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.4\" style=\"padding:2.5pt 1.7pt;\">30</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.5\" style=\"padding:2.5pt 1.7pt;\">2</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.6\" style=\"padding:2.5pt 1.7pt;\">6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.3.2\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.1\" style=\"padding:2.5pt 1.7pt;\">Golang</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.2\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.3\" style=\"padding:2.5pt 1.7pt;\">8</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.4\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.5\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.6\" style=\"padding:2.5pt 1.7pt;\">3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.4.3\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.1\" style=\"padding:2.5pt 1.7pt;\">PHP</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.2\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.3\" style=\"padding:2.5pt 1.7pt;\">3</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.4\" style=\"padding:2.5pt 1.7pt;\">4</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.5\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.6\" style=\"padding:2.5pt 1.7pt;\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.5.4\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.1\" style=\"padding:2.5pt 1.7pt;\">Java</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.2\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.3\" style=\"padding:2.5pt 1.7pt;\">2</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.4\" style=\"padding:2.5pt 1.7pt;\">3</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.5\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.6\" style=\"padding:2.5pt 1.7pt;\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.6.5\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.1\" style=\"padding:2.5pt 1.7pt;\">JavaScript</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.2\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.3\" style=\"padding:2.5pt 1.7pt;\">2</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.4\" style=\"padding:2.5pt 1.7pt;\">10</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.5\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.6\" style=\"padding:2.5pt 1.7pt;\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.7.6\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.1\" style=\"padding:2.5pt 1.7pt;\">C#</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.2\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.3\" style=\"padding:2.5pt 1.7pt;\">2</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.4\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.5\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.6\" style=\"padding:2.5pt 1.7pt;\">0</td>\n</tr>\n</tbody>\n</table>\n</figure>",
36
- "perturb_sentence_id": 2,
 
 
 
37
  "output": {
38
- "perturbed_statement": "[paragraph id = 2] To summary and further analyse the relative web crawling framework for Scrapy, we make a survey and statistics for the top 1,000 web spider frameworks that sorted by the count of monthly active users in a descending order, and deleted the mistaken searched items from them, the result is shown as Table 1 .The parameter means the language used to program, the parameter represents the number of projects that is used for actual training.",
39
- "perturbed_explanation": "The original paragraph mentions the selection criteria for web spider frameworks being based on their 'liked stars number.' This suggests the study utilized a framework's popularity measure through user endorsements or ratings. However, the statement alters this criterion to 'the count of monthly active users,' which is not indicated as a basis of metric demonstration. This change introduces an inconsistency in the interpretation of study methodology, which undermines the factual representation of the conducted analysis."
40
  }
41
  },
42
  {
@@ -73,10 +76,13 @@
73
  "[paragraph id = 7] From the result we can see that the framework Anywhere can improve the generation and configuration efficiency of using Scrapy at a good level."
74
  ],
75
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T2\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2. </span>Experiments to test the performance of Anywhere.</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S5.T2.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T2.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.1\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.1.1\">framework</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.2\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.2.1\">task</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.3\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.3.1\">config</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.4\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.4.1\">time /s</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.5\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.5.1\">comparison</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.1\" style=\"padding:2.5pt 2.3pt;\">Scrapy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.2\" style=\"padding:2.5pt 2.3pt;\">Single Project</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.3\" style=\"padding:2.5pt 2.3pt;\">No</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.4\" style=\"padding:2.5pt 2.3pt;\">4-9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.5\" style=\"padding:2.5pt 2.3pt;\">100%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.3.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.1\" style=\"padding:2.5pt 2.3pt;\">Anywhere</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.2\" style=\"padding:2.5pt 2.3pt;\">Single Project</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.3\" style=\"padding:2.5pt 2.3pt;\">No</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.4\" style=\"padding:2.5pt 2.3pt;\">2-5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.5\" style=\"padding:2.5pt 2.3pt;\">200%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.1\" style=\"padding:2.5pt 2.3pt;\">Scrapy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.2\" style=\"padding:2.5pt 2.3pt;\">Single Project</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.3\" style=\"padding:2.5pt 2.3pt;\">Yes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.4\" style=\"padding:2.5pt 2.3pt;\">6-12</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.5\" style=\"padding:2.5pt 2.3pt;\">100%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.5.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.1\" style=\"padding:2.5pt 2.3pt;\">Anywhere</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.2\" style=\"padding:2.5pt 2.3pt;\">Single Project</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.3\" style=\"padding:2.5pt 2.3pt;\">Yes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.4\" style=\"padding:2.5pt 2.3pt;\">3-6</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.5\" style=\"padding:2.5pt 2.3pt;\">200%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.6.6\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.1\" style=\"padding:2.5pt 2.3pt;\">Scrapy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.2\" style=\"padding:2.5pt 2.3pt;\">Multiple Projects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.3\" style=\"padding:2.5pt 2.3pt;\">No</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.4\" style=\"padding:2.5pt 2.3pt;\">26-30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.5\" style=\"padding:2.5pt 2.3pt;\">100%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.7.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.1\" style=\"padding:2.5pt 2.3pt;\">Anywhere</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.2\" style=\"padding:2.5pt 2.3pt;\">Multiple Projects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.3\" style=\"padding:2.5pt 2.3pt;\">No</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.4\" style=\"padding:2.5pt 2.3pt;\">9-10</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.5\" style=\"padding:2.5pt 2.3pt;\">300%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.8.8\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.1\" style=\"padding:2.5pt 2.3pt;\">Scrapy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.2\" style=\"padding:2.5pt 2.3pt;\">Multiple Projects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.3\" style=\"padding:2.5pt 2.3pt;\">Yes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.4\" style=\"padding:2.5pt 2.3pt;\">29-40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.5\" style=\"padding:2.5pt 2.3pt;\">100%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.9.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.1\" style=\"padding:2.5pt 2.3pt;\">Anywhere</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.2\" style=\"padding:2.5pt 2.3pt;\">Multiple Projects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.3\" style=\"padding:2.5pt 2.3pt;\">Yes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.4\" style=\"padding:2.5pt 2.3pt;\">10-13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.5\" style=\"padding:2.5pt 2.3pt;\">300%</td>\n</tr>\n</tbody>\n</table>\n</figure>",
76
- "perturb_sentence_id": 5,
 
 
 
77
  "output": {
78
- "perturbed_statement": "[paragraph id = 5] As shown in Table 2 , the count number of the multiple projects is 5.The value interval of comparison is 70%.",
79
- "perturbed_explanation": "1. The original statement provides specific values: a count of projects as 3 and a comparison value interval of 50%. 2. The statement's alteration to claim a count of projects as 5 and a comparison interval of 70% introduces inaccuracies for both figures. Since these altered details do not correspond to the context provided, they are incorrect."
80
  }
81
  }
82
  ]
 
33
  "[paragraph id = 11] Having the most convenience in programming and design, supporting the files operations and superior data processing well, most importantly, being the native programming language of Scrapy, that is why we selected Python as the programming language and the stady direction of our research."
34
  ],
35
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Analysis of GitHub’s top 1,000 star sorting items.</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S3.T1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.1\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.1\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.1.1\">language</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.2\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.2.1\">train</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.3\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.3.1\">framework</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.4\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.4.1\">relative</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.5\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.5.1\">graphic</span></th>\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S3.T1.1.1.1.6\" style=\"padding:2.5pt 1.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.1.6.1\">concurrency</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T1.1.2.1\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.1\" style=\"padding:2.5pt 1.7pt;\">Python</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.2\" style=\"padding:2.5pt 1.7pt;\">17</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.3\" style=\"padding:2.5pt 1.7pt;\">6</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.4\" style=\"padding:2.5pt 1.7pt;\">30</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.5\" style=\"padding:2.5pt 1.7pt;\">2</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.2.1.6\" style=\"padding:2.5pt 1.7pt;\">6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.3.2\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.1\" style=\"padding:2.5pt 1.7pt;\">Golang</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.2\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.3\" style=\"padding:2.5pt 1.7pt;\">8</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.4\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.5\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.3.2.6\" style=\"padding:2.5pt 1.7pt;\">3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.4.3\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.1\" style=\"padding:2.5pt 1.7pt;\">PHP</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.2\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.3\" style=\"padding:2.5pt 1.7pt;\">3</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.4\" style=\"padding:2.5pt 1.7pt;\">4</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.5\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.4.3.6\" style=\"padding:2.5pt 1.7pt;\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.5.4\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.1\" style=\"padding:2.5pt 1.7pt;\">Java</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.2\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.3\" style=\"padding:2.5pt 1.7pt;\">2</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.4\" style=\"padding:2.5pt 1.7pt;\">3</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.5\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.5.4.6\" style=\"padding:2.5pt 1.7pt;\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.6.5\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.1\" style=\"padding:2.5pt 1.7pt;\">JavaScript</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.2\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.3\" style=\"padding:2.5pt 1.7pt;\">2</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.4\" style=\"padding:2.5pt 1.7pt;\">10</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.5\" style=\"padding:2.5pt 1.7pt;\">1</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.1.6.5.6\" style=\"padding:2.5pt 1.7pt;\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.7.6\">\n<th class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.1\" style=\"padding:2.5pt 1.7pt;\">C#</th>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.2\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.3\" style=\"padding:2.5pt 1.7pt;\">2</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.4\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.5\" style=\"padding:2.5pt 1.7pt;\">0</td>\n<td class=\"ltx_td ltx_nopad_l ltx_nopad_r ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S3.T1.1.7.6.6\" style=\"padding:2.5pt 1.7pt;\">0</td>\n</tr>\n</tbody>\n</table>\n</figure>",
36
+ "perturb_sentence_id": [
37
+ 2,
38
+ 3
39
+ ],
40
  "output": {
41
+ "perturbed_statement": "[paragraph id = 2] To summary and further analyse the relative web crawling framework for Scrapy, we make a survey and statistics for the top 1,000 web spider frameworks that sorted by the liked comments number in a descending order, and deleted the mistaken searched items from them, the result is shown as Table 1. The parameter means the language used to program, the parameter represents the number of projects that is used for actual training.",
42
+ "perturbed_explanation": "1. The statement refers to a survey and statistics made for web spider frameworks related to Scrapy. 2. The statement incorrectly states that the frameworks were sorted by the liked comments number, whereas the context specifies that they were sorted by the liked stars number. Comments are not mentioned as a criterion in the survey, marking a factual inconsistency in the statement."
43
  }
44
  },
45
  {
 
76
  "[paragraph id = 7] From the result we can see that the framework Anywhere can improve the generation and configuration efficiency of using Scrapy at a good level."
77
  ],
78
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T2\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2. </span>Experiments to test the performance of Anywhere.</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S5.T2.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T2.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.1\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.1.1\">framework</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.2\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.2.1\">task</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.3\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.3.1\">config</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.4\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.4.1\">time /s</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.1.1.5\" style=\"padding:2.5pt 2.3pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.1.1.1.5.1\">comparison</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.1\" style=\"padding:2.5pt 2.3pt;\">Scrapy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.2\" style=\"padding:2.5pt 2.3pt;\">Single Project</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.3\" style=\"padding:2.5pt 2.3pt;\">No</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.4\" style=\"padding:2.5pt 2.3pt;\">4-9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.2.2.5\" style=\"padding:2.5pt 2.3pt;\">100%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.3.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.1\" style=\"padding:2.5pt 2.3pt;\">Anywhere</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.2\" style=\"padding:2.5pt 2.3pt;\">Single Project</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.3\" style=\"padding:2.5pt 2.3pt;\">No</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.4\" style=\"padding:2.5pt 2.3pt;\">2-5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.3.3.5\" style=\"padding:2.5pt 2.3pt;\">200%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.1\" style=\"padding:2.5pt 2.3pt;\">Scrapy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.2\" style=\"padding:2.5pt 2.3pt;\">Single Project</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.3\" style=\"padding:2.5pt 2.3pt;\">Yes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.4\" style=\"padding:2.5pt 2.3pt;\">6-12</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.4.4.5\" style=\"padding:2.5pt 2.3pt;\">100%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.5.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.1\" style=\"padding:2.5pt 2.3pt;\">Anywhere</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.2\" style=\"padding:2.5pt 2.3pt;\">Single Project</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.3\" style=\"padding:2.5pt 2.3pt;\">Yes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.4\" style=\"padding:2.5pt 2.3pt;\">3-6</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.5.5.5\" style=\"padding:2.5pt 2.3pt;\">200%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.6.6\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.1\" style=\"padding:2.5pt 2.3pt;\">Scrapy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.2\" style=\"padding:2.5pt 2.3pt;\">Multiple Projects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.3\" style=\"padding:2.5pt 2.3pt;\">No</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.4\" style=\"padding:2.5pt 2.3pt;\">26-30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.6.6.5\" style=\"padding:2.5pt 2.3pt;\">100%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.7.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.1\" style=\"padding:2.5pt 2.3pt;\">Anywhere</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.2\" style=\"padding:2.5pt 2.3pt;\">Multiple Projects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.3\" style=\"padding:2.5pt 2.3pt;\">No</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.4\" style=\"padding:2.5pt 2.3pt;\">9-10</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.7.7.5\" style=\"padding:2.5pt 2.3pt;\">300%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.8.8\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.1\" style=\"padding:2.5pt 2.3pt;\">Scrapy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.2\" style=\"padding:2.5pt 2.3pt;\">Multiple Projects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.3\" style=\"padding:2.5pt 2.3pt;\">Yes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.4\" style=\"padding:2.5pt 2.3pt;\">29-40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.1.8.8.5\" style=\"padding:2.5pt 2.3pt;\">100%</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.1.9.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.1\" style=\"padding:2.5pt 2.3pt;\">Anywhere</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.2\" style=\"padding:2.5pt 2.3pt;\">Multiple Projects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.3\" style=\"padding:2.5pt 2.3pt;\">Yes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.4\" style=\"padding:2.5pt 2.3pt;\">10-13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T2.1.9.9.5\" style=\"padding:2.5pt 2.3pt;\">300%</td>\n</tr>\n</tbody>\n</table>\n</figure>",
79
+ "perturb_sentence_id": [
80
+ 5,
81
+ 6
82
+ ],
83
  "output": {
84
+ "perturbed_statement": "[paragraph id = 5] As shown in Table 2, the count number of the multiple projects is 4. The value interval of comparison is 30%.",
85
+ "perturbed_explanation": "The count number of the multiple projects is 3, not 4, which makes the statement factually incorrect. 2. The value interval of comparison is noted to be large due to significant individual differences, and altering it to 30% could inaccurately reflect the variation intended for comparison."
86
  }
87
  }
88
  ]
table_result/2407.00035v1_output.json CHANGED
@@ -97,10 +97,13 @@
97
  "[paragraph id = 50] Metrics deliver objective information about the external interface of a system, e.g., video upload throughput."
98
  ],
99
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S2.T1.2.1.1\" style=\"font-size:90%;\">Table 1</span>: </span><span class=\"ltx_text\" id=\"S2.T1.3.2\" style=\"font-size:90%;\">The three most important domains of observability differ in their data characteristics.</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.1.1\" style=\"font-size:80%;\">Domain</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.2.1\" style=\"font-size:80%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.3.1\" style=\"font-size:80%;\">Query</span></th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.4.1\" style=\"font-size:80%;\">Storage</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.2.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.1.1\" style=\"font-size:80%;\">Metric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.2.1\" style=\"font-size:80%;\">Numeric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.3.1\" style=\"font-size:80%;\">Aggregations</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.4.1\" style=\"font-size:80%;\">Time Series Database</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.3.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.1.1\" style=\"font-size:80%;\">Log</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.2.1\" style=\"font-size:80%;\">Semi/not structured strings</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.3.1\" style=\"font-size:80%;\">Approximate string search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T1.4.3.2.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.1.1\" style=\"font-size:80%;\">Trace</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.2.1\" style=\"font-size:80%;\">DAGs of duration of execution</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.3.1\" style=\"font-size:80%;\">Disassociated graph search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
100
- "perturb_sentence_id": 47,
 
 
 
101
  "output": {
102
- "perturbed_statement": "[paragraph id = 47] After optimising the code using the point-to-point approach [28] instead of brute force, the time spent on this operation was reduced to 1% of the original time. Metrics, logs, and traces carry different types of information, as can be seen in Table 1.",
103
- "perturbed_explanation": "Original Explanation: The code optimisation through implementing the point-in-polygon approach significantly reduced processing time to 1%.\n2. The statement claims that the point-to-point approach was used for optimisation, but this is incorrect; the optimisation was achieved via the point-in-polygon method, as stated."
104
  }
105
  },
106
  {
@@ -149,10 +152,13 @@
149
  "[paragraph id = 25] However, logs and traces are structured differently and will benefit from other storage solutions, such as inverted index-based storage, due to the type of queries that are usually made to retrieve meaningful information from them [32 ]."
150
  ],
151
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S2.T1.2.1.1\" style=\"font-size:90%;\">Table 1</span>: </span><span class=\"ltx_text\" id=\"S2.T1.3.2\" style=\"font-size:90%;\">The three most important domains of observability differ in their data characteristics.</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.1.1\" style=\"font-size:80%;\">Domain</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.2.1\" style=\"font-size:80%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.3.1\" style=\"font-size:80%;\">Query</span></th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.4.1\" style=\"font-size:80%;\">Storage</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.2.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.1.1\" style=\"font-size:80%;\">Metric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.2.1\" style=\"font-size:80%;\">Numeric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.3.1\" style=\"font-size:80%;\">Aggregations</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.4.1\" style=\"font-size:80%;\">Time Series Database</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.3.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.1.1\" style=\"font-size:80%;\">Log</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.2.1\" style=\"font-size:80%;\">Semi/not structured strings</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.3.1\" style=\"font-size:80%;\">Approximate string search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T1.4.3.2.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.1.1\" style=\"font-size:80%;\">Trace</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.2.1\" style=\"font-size:80%;\">DAGs of duration of execution</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.3.1\" style=\"font-size:80%;\">Disassociated graph search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
152
- "perturb_sentence_id": 25,
 
 
 
153
  "output": {
154
- "perturbed_statement": "[paragraph id = 25] However, logs and traces are structured oppositely and will benefit from conventional relational database storage, due to the type of queries that are usually made to retrieve meaningful information from them [32 ].Therefore, an observability data ingestion service on the fog should consider the data requirements that each instrumentation domain needs (see Table 1 ), while allowing cross-analysis to be performed.",
155
- "perturbed_explanation": "The original explanation correctly notes that logs and traces are structured differently and require appropriate storage solutions, such as inverted indices, for efficient query handling. By stating that relational database storage is suitable for logs and traces, the statement introduces a factual error, as such databases may not optimally support the querying methods typically associated with these data types [32 ]. This discrepancy highlights the importance of aligned storage solutions for specific data structures."
156
  }
157
  },
158
  {
@@ -215,10 +221,13 @@
215
  "[paragraph id = 32] The experiments show that it is possible to collect the benefits of achieving a higher level of observability for a system in a Fog computing environment."
216
  ],
217
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S2.T1.2.1.1\" style=\"font-size:90%;\">Table 1</span>: </span><span class=\"ltx_text\" id=\"S2.T1.3.2\" style=\"font-size:90%;\">The three most important domains of observability differ in their data characteristics.</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.1.1\" style=\"font-size:80%;\">Domain</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.2.1\" style=\"font-size:80%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.3.1\" style=\"font-size:80%;\">Query</span></th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.4.1\" style=\"font-size:80%;\">Storage</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.2.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.1.1\" style=\"font-size:80%;\">Metric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.2.1\" style=\"font-size:80%;\">Numeric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.3.1\" style=\"font-size:80%;\">Aggregations</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.4.1\" style=\"font-size:80%;\">Time Series Database</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.3.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.1.1\" style=\"font-size:80%;\">Log</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.2.1\" style=\"font-size:80%;\">Semi/not structured strings</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.3.1\" style=\"font-size:80%;\">Approximate string search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T1.4.3.2.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.1.1\" style=\"font-size:80%;\">Trace</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.2.1\" style=\"font-size:80%;\">DAGs of duration of execution</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.3.1\" style=\"font-size:80%;\">Disassociated graph search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
218
- "perturb_sentence_id": 32,
 
 
 
219
  "output": {
220
- "perturbed_statement": "[paragraph id = 32] The experiments show that it is impossible to collect the benefits of achieving a higher level of observability for a system in a Fog computing environment. Moreover, the overhead of deploying an observability data life cycle can be high, even if properly managed.",
221
- "perturbed_explanation": "The original explanation: Observability in Fog computing systems allows for enhanced monitoring and performance optimization without significant additional resource demands. 1. The statement claims that achieving higher observability is unfeasible, which contradicts evidence supporting its benefits and feasibility. 2. Additionally, the statement suggests that high overhead for an observability data life cycle is inevitable, which is inaccurate since proper management can minimize this overhead."
222
  }
223
  },
224
  {
@@ -291,10 +300,13 @@
291
  "[paragraph id = 32] The experiments show that it is possible to collect the benefits of achieving a higher level of observability for a system in a Fog computing environment."
292
  ],
293
  "table_html": "<figure class=\"ltx_table\" id=\"S6.T4\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S6.T4.2.1.1\" style=\"font-size:90%;\">Table 4</span>: </span><span class=\"ltx_text\" id=\"S6.T4.3.2\" style=\"font-size:90%;\">Mobile IoT-Roadbot assessment of each observability domain .</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S6.T4.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T4.4.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.1.1\" style=\"font-size:80%;\">Tool</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T4.4.1.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.2.1\" style=\"font-size:80%;\">Domain</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T4.4.1.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.3.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.3.1.1.1.1\" style=\"font-size:80%;\">Data</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.3.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.3.1.2.1.1\" style=\"font-size:80%;\">Collection</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T4.4.1.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.4.1\" style=\"font-size:80%;\">Frequency</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.5\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.5.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.5.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.5.1.1.1.1\" style=\"font-size:80%;\">Volume</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.5.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.5.1.2.1.1\" style=\"font-size:80%;\">by Hour</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.6\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.6.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.6.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.6.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.6.1.1.1.1\" style=\"font-size:80%;\">IoT</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.6.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.6.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.6.1.2.1.1\" style=\"font-size:80%;\">Storage</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.7\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.7.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.7.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.7.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.7.1.1.1.1\" style=\"font-size:80%;\">Fog</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.7.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.7.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.7.1.2.1.1\" style=\"font-size:80%;\">Storage</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.8\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.8.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.8.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.8.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.8.1.1.1.1\" style=\"font-size:80%;\">Fog Volume</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.8.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.8.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.8.1.2.1.1\" style=\"font-size:80%;\">(1 week)</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.9\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.9.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.9.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.9.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.9.1.1.1.1\" style=\"font-size:80%;\">Cloud</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.9.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.9.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.9.1.2.1.1\" style=\"font-size:80%;\">Storage</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.10\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.10.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.10.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.10.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.10.1.1.1.1\" style=\"font-size:80%;\">Cloud Vol.</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.10.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.10.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.10.1.2.1.1\" style=\"font-size:80%;\">(2 months)</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.2.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T4.4.2.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.2.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.2.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.2.1.1.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.1.1.1.1.1\" style=\"font-size:80%;\">Node Exporter</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T4.4.2.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.2.1\" style=\"font-size:80%;\">Metrics</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T4.4.2.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.3.1\" style=\"font-size:80%;\">65KB</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T4.4.2.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.4.1\" style=\"font-size:80%;\">each 5s</span></th>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.5\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.5.1\" style=\"font-size:80%;\">46 MB</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.6\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.6.1\" style=\"font-size:80%;\">No</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.7\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.7.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.8\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.8.1\" style=\"font-size:80%;\">8.75 GB</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.9\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.9.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.10\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.10.1\" style=\"font-size:80%;\">75 GB</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.3.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T4.4.3.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.1.1\" style=\"font-size:80%;\">Filebeat</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T4.4.3.2.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.2.1\" style=\"font-size:80%;\">Logs</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T4.4.3.2.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.3.1\" style=\"font-size:80%;\">1KB</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T4.4.3.2.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.4.1\" style=\"font-size:80%;\">each 1s</span></th>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.5\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.5.1\" style=\"font-size:80%;\">3.50 MB</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.6\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.6.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.7\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.7.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.8\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.8.1\" style=\"font-size:80%;\">0.67 GB</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.9\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.9.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.3.2.10\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.10.1\" style=\"font-size:80%;\">5.77 GB</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.4.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S6.T4.4.4.3.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.4.3.1.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.4.3.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.4.3.1.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.1.1.1.1.1\" style=\"font-size:80%;\">Open Telemetry</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S6.T4.4.4.3.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.2.1\" style=\"font-size:80%;\">Traces</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S6.T4.4.4.3.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.3.1\" style=\"font-size:80%;\">4KB</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S6.T4.4.4.3.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.4.1\" style=\"font-size:80%;\">each 15s</span></th>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.5\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.5.1\" style=\"font-size:80%;\">1 MB</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.6\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.6.1\" style=\"font-size:80%;\">No</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.7\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.7.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.8\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.8.1\" style=\"font-size:80%;\">0.2 GB</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.9\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.9.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.10\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.10.1\" style=\"font-size:80%;\">1.54 GB</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
294
- "perturb_sentence_id": 17,
 
 
 
295
  "output": {
296
- "perturbed_statement": "[paragraph id = 17] Using the strategies of removing the help text and changing the configuration of Node Exporter to expose only metrics about CPU, memory, disk, network, and power supply, and increasing the scrap delay to 5 seconds on Prometheus, we could reduce the volume of metric data on the Fog node by 60%, which positively affected CPU and memory usage by Prometheus. Regarding the logs generated by Mobile IoT-Roadbot while the trucks were moving around the city, they record information about 5G network analysis, such as latency and throughput, and contextual information (GNSS coordinates, truck speed, etc.).",
297
- "perturbed_explanation": "Original Explanation:\n1. The configuration adjustments described, including increasing the scrape delay to 10 seconds, effectively decreased the volume of metric data by 87% on the Fog node due to reduced data sampling frequency.\n2. This reduction lessened the resource load caused by Prometheus' data processing activities.\nExplanation of Incorrect Statement:\n1. The scrape delay was altered from 10 seconds to 5 seconds in the statement, which contradicts the provided context.\n2. This change would likely result in a smaller percentage reduction in data volume, and stating a 60% reduction diverges from the contextually given 87%."
298
  }
299
  }
300
  ]
 
97
  "[paragraph id = 50] Metrics deliver objective information about the external interface of a system, e.g., video upload throughput."
98
  ],
99
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S2.T1.2.1.1\" style=\"font-size:90%;\">Table 1</span>: </span><span class=\"ltx_text\" id=\"S2.T1.3.2\" style=\"font-size:90%;\">The three most important domains of observability differ in their data characteristics.</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.1.1\" style=\"font-size:80%;\">Domain</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.2.1\" style=\"font-size:80%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.3.1\" style=\"font-size:80%;\">Query</span></th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.4.1\" style=\"font-size:80%;\">Storage</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.2.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.1.1\" style=\"font-size:80%;\">Metric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.2.1\" style=\"font-size:80%;\">Numeric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.3.1\" style=\"font-size:80%;\">Aggregations</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.4.1\" style=\"font-size:80%;\">Time Series Database</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.3.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.1.1\" style=\"font-size:80%;\">Log</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.2.1\" style=\"font-size:80%;\">Semi/not structured strings</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.3.1\" style=\"font-size:80%;\">Approximate string search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T1.4.3.2.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.1.1\" style=\"font-size:80%;\">Trace</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.2.1\" style=\"font-size:80%;\">DAGs of duration of execution</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.3.1\" style=\"font-size:80%;\">Disassociated graph search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
100
+ "perturb_sentence_id": [
101
+ 47,
102
+ 48
103
+ ],
104
  "output": {
105
+ "perturbed_statement": "[paragraph id = 47] After optimising the code using the point-in-polygon approach [28] instead of brute force, the time spent on this operation was reduced to 1% of the original time. All information about truck speed, call sequences, and geographic coordinates is stored in metrics.",
106
+ "perturbed_explanation": "1. Metrics, logs, and traces indeed carry different types of information, serving distinct roles in system observability, as suggested by the presence of each in Table 1. For example, metrics are generally numerical measures of system performance, logs are text files for recording events, and traces are records of service call paths. 2. The statement incorrectly claims that all information about truck speed, call sequences, and geographic coordinates is stored in metrics. According to the context, truck speed is a metric, geographic coordinates are found in logs, and call sequences are part of traces. Therefore, the statement inaccurately groups distinct observability data types under metrics, failing to recognize their unique roles."
107
  }
108
  },
109
  {
 
152
  "[paragraph id = 25] However, logs and traces are structured differently and will benefit from other storage solutions, such as inverted index-based storage, due to the type of queries that are usually made to retrieve meaningful information from them [32 ]."
153
  ],
154
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S2.T1.2.1.1\" style=\"font-size:90%;\">Table 1</span>: </span><span class=\"ltx_text\" id=\"S2.T1.3.2\" style=\"font-size:90%;\">The three most important domains of observability differ in their data characteristics.</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.1.1\" style=\"font-size:80%;\">Domain</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.2.1\" style=\"font-size:80%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.3.1\" style=\"font-size:80%;\">Query</span></th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.4.1\" style=\"font-size:80%;\">Storage</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.2.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.1.1\" style=\"font-size:80%;\">Metric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.2.1\" style=\"font-size:80%;\">Numeric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.3.1\" style=\"font-size:80%;\">Aggregations</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.4.1\" style=\"font-size:80%;\">Time Series Database</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.3.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.1.1\" style=\"font-size:80%;\">Log</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.2.1\" style=\"font-size:80%;\">Semi/not structured strings</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.3.1\" style=\"font-size:80%;\">Approximate string search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T1.4.3.2.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.1.1\" style=\"font-size:80%;\">Trace</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.2.1\" style=\"font-size:80%;\">DAGs of duration of execution</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.3.1\" style=\"font-size:80%;\">Disassociated graph search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
155
+ "perturb_sentence_id": [
156
+ 25,
157
+ 26
158
+ ],
159
  "output": {
160
+ "perturbed_statement": "However, logs and traces are structured the same way and will benefit from relational database storage, contrary to the type of queries that are usually made to retrieve meaningful information from them [paragraph id = 25]. Therefore, an observability data ingestion service on the fog should consider the data requirements that each instrumentation domain needs (see Table 1), while allowing cross-analysis to be performed.",
161
+ "perturbed_explanation": "Logs and traces are structured differently and will benefit from other storage solutions, such as inverted index-based storage, due to the type of queries that are usually made to retrieve meaningful information from them. 1. The statement incorrectly claims that logs and traces are structured the same way, which suggests they do not require different storage solutions, contradicting the necessity for tailored storage solutions like inverted index-based storage that cater to their specific structures. 2. The statement wrongly suggests that relational database storage is beneficial for logs and traces, while in reality, such storage may not efficiently handle complex queries often needed for these types of data."
162
  }
163
  },
164
  {
 
221
  "[paragraph id = 32] The experiments show that it is possible to collect the benefits of achieving a higher level of observability for a system in a Fog computing environment."
222
  ],
223
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S2.T1.2.1.1\" style=\"font-size:90%;\">Table 1</span>: </span><span class=\"ltx_text\" id=\"S2.T1.3.2\" style=\"font-size:90%;\">The three most important domains of observability differ in their data characteristics.</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.1.1\" style=\"font-size:80%;\">Domain</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.2.1\" style=\"font-size:80%;\">Type</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.3.1\" style=\"font-size:80%;\">Query</span></th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S2.T1.4.1.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.4.1.1.4.1\" style=\"font-size:80%;\">Storage</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.4.2.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.1.1\" style=\"font-size:80%;\">Metric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.2.1\" style=\"font-size:80%;\">Numeric</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.3.1\" style=\"font-size:80%;\">Aggregations</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_t\" id=\"S2.T1.4.2.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.2.1.4.1\" style=\"font-size:80%;\">Time Series Database</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.3.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.1.1\" style=\"font-size:80%;\">Log</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.2.1\" style=\"font-size:80%;\">Semi/not structured strings</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S2.T1.4.3.2.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.3.1\" style=\"font-size:80%;\">Approximate string search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S2.T1.4.3.2.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.3.2.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.4.4.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.1.1\" style=\"font-size:80%;\">Trace</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.2.1\" style=\"font-size:80%;\">DAGs of duration of execution</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.3.1\" style=\"font-size:80%;\">Disassociated graph search</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_bb\" id=\"S2.T1.4.4.3.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S2.T1.4.4.3.4.1\" style=\"font-size:80%;\">Inverted Index</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
224
+ "perturb_sentence_id": [
225
+ 32,
226
+ 33
227
+ ],
228
  "output": {
229
+ "perturbed_statement": "[paragraph id = 32] The experiments show that it is impossible to collect the benefits of achieving a higher level of observability for a system in a Fog computing environment. In addition, the overhead of deploying an observability data life cycle can be high, if properly managed.",
230
+ "perturbed_explanation": "1. The original explanation states that it is possible to collect the benefits of achieving a higher level of observability for a system in a Fog computing environment, and that the overhead of deploying an observability data life cycle can be low if properly managed. 2. The statement is incorrect because it claims that it is impossible to collect the benefits of achieving higher observability in a Fog computing environment, which contradicts the fact that the experiments show it is indeed possible to gain such benefits. Additionally, it incorrectly states that the overhead can be high, whereas the original statement indicates that, with proper management, the overhead can be low. These alterations change the implications of the experiments and the potential benefits of observability in Fog computing."
231
  }
232
  },
233
  {
 
300
  "[paragraph id = 32] The experiments show that it is possible to collect the benefits of achieving a higher level of observability for a system in a Fog computing environment."
301
  ],
302
  "table_html": "<figure class=\"ltx_table\" id=\"S6.T4\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S6.T4.2.1.1\" style=\"font-size:90%;\">Table 4</span>: </span><span class=\"ltx_text\" id=\"S6.T4.3.2\" style=\"font-size:90%;\">Mobile IoT-Roadbot assessment of each observability domain .</span></figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S6.T4.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T4.4.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.1.1\" style=\"font-size:80%;\">Tool</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T4.4.1.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.2.1\" style=\"font-size:80%;\">Domain</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T4.4.1.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.3.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.3.1.1.1.1\" style=\"font-size:80%;\">Data</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.3.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.3.1.2.1.1\" style=\"font-size:80%;\">Collection</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T4.4.1.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.4.1\" style=\"font-size:80%;\">Frequency</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.5\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.5.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.5.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.5.1.1.1.1\" style=\"font-size:80%;\">Volume</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.5.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.5.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.5.1.2.1.1\" style=\"font-size:80%;\">by Hour</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.6\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.6.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.6.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.6.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.6.1.1.1.1\" style=\"font-size:80%;\">IoT</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.6.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.6.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.6.1.2.1.1\" style=\"font-size:80%;\">Storage</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.7\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.7.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.7.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.7.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.7.1.1.1.1\" style=\"font-size:80%;\">Fog</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.7.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.7.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.7.1.2.1.1\" style=\"font-size:80%;\">Storage</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.8\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.8.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.8.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.8.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.8.1.1.1.1\" style=\"font-size:80%;\">Fog Volume</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.8.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.8.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.8.1.2.1.1\" style=\"font-size:80%;\">(1 week)</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.9\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.9.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.9.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.9.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.9.1.1.1.1\" style=\"font-size:80%;\">Cloud</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.9.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.9.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.9.1.2.1.1\" style=\"font-size:80%;\">Storage</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T4.4.1.1.10\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.1.1.10.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.10.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.10.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.10.1.1.1.1\" style=\"font-size:80%;\">Cloud Vol.</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.1.1.10.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.1.1.10.1.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T4.4.1.1.10.1.2.1.1\" style=\"font-size:80%;\">(2 months)</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.2.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T4.4.2.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.2.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.2.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.2.1.1.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.1.1.1.1.1\" style=\"font-size:80%;\">Node Exporter</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T4.4.2.1.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.2.1\" style=\"font-size:80%;\">Metrics</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T4.4.2.1.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.3.1\" style=\"font-size:80%;\">65KB</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T4.4.2.1.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.4.1\" style=\"font-size:80%;\">each 5s</span></th>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.5\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.5.1\" style=\"font-size:80%;\">46 MB</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.6\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.6.1\" style=\"font-size:80%;\">No</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.7\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.7.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.8\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.8.1\" style=\"font-size:80%;\">8.75 GB</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.9\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.9.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_t\" id=\"S6.T4.4.2.1.10\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.2.1.10.1\" style=\"font-size:80%;\">75 GB</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.3.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T4.4.3.2.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.1.1\" style=\"font-size:80%;\">Filebeat</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T4.4.3.2.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.2.1\" style=\"font-size:80%;\">Logs</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T4.4.3.2.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.3.1\" style=\"font-size:80%;\">1KB</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T4.4.3.2.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.4.1\" style=\"font-size:80%;\">each 1s</span></th>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.5\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.5.1\" style=\"font-size:80%;\">3.50 MB</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.6\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.6.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.7\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.7.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.8\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.8.1\" style=\"font-size:80%;\">0.67 GB</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S6.T4.4.3.2.9\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.9.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.3.2.10\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.3.2.10.1\" style=\"font-size:80%;\">5.77 GB</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T4.4.4.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S6.T4.4.4.3.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T4.4.4.3.1.1\">\n<tr class=\"ltx_tr\" id=\"S6.T4.4.4.3.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_left\" id=\"S6.T4.4.4.3.1.1.1.1\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.1.1.1.1.1\" style=\"font-size:80%;\">Open Telemetry</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S6.T4.4.4.3.2\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.2.1\" style=\"font-size:80%;\">Traces</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S6.T4.4.4.3.3\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.3.1\" style=\"font-size:80%;\">4KB</span></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S6.T4.4.4.3.4\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.4.1\" style=\"font-size:80%;\">each 15s</span></th>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.5\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.5.1\" style=\"font-size:80%;\">1 MB</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.6\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.6.1\" style=\"font-size:80%;\">No</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.7\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.7.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.8\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.8.1\" style=\"font-size:80%;\">0.2 GB</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.9\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.9.1\" style=\"font-size:80%;\">Yes</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_left ltx_border_bb\" id=\"S6.T4.4.4.3.10\" style=\"padding-top:-0.65pt;padding-bottom:-0.65pt;\"><span class=\"ltx_text\" id=\"S6.T4.4.4.3.10.1\" style=\"font-size:80%;\">1.54 GB</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
303
+ "perturb_sentence_id": [
304
+ 17,
305
+ 18
306
+ ],
307
  "output": {
308
+ "perturbed_statement": "[paragraph id = 17] Using the strategies of removing the help text and changing the configuration of Node Exporter to expose only metrics about CPU, memory, disk, network, and power supply, and increasing the scrap delay to 10 seconds on Prometheus, we could reduce the volume of metric data on the Fog node by 50%, which also positively affected CPU and memory usage by Prometheus. Regarding the logs generated by Mobile IoT-Roadbot while the trucks were moving around the city, they record information about 3G network analysis, such as latency and throughput, and contextual information (GNSS coordinates, truck speed, etc.).",
309
+ "perturbed_explanation": "Original Explanation: The first part of the sentence describes the strategy used to reduce the data collected by Prometheus, including changing the configuration of Node Exporter and increasing the scrap delay, which in turn significantly reduced the system's data volume. The second part gives context to the kind of logs generated by Mobile IoT-Roadbot while active. \n\n1. The statement incorrectly changes the reduction in the volume of metric data from 87% to 50%. The original context mentions an 87% reduction, indicating a more significant impact, thus erroneously changing the extent of the reduction. \n\n2. The statement incorrectly mentions the logs being for 3G network analysis instead of 5G. The context specifies that the logs recorded by Mobile IoT-Roadbot concern analysis of the 5G network, making the reference to 3G incorrect."
310
  }
311
  }
312
  ]
table_result/2407.00046v1_output.json CHANGED
@@ -34,10 +34,13 @@
34
  "[paragraph id = 4] Table 1 showcases the statistics and quantifies the speedup achieved in representative cases relative to IPC."
35
  ],
36
  "table_html": "<figure class=\"ltx_table\" id=\"S6.T1\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span><span class=\"ltx_text ltx_font_bold\" id=\"S6.T1.38.1\">Statistics for Testing Scenarios.</span> This table details the total numbers of tetrahedra (#tets), Degrees of Freedom (#DOFs), and surface triangles (#tris). Key simulation parameters such as time step (), material density, Young’s Modulus (), Poisson Ratio (), collision offset (), and frictional coefficient () are provided. Additionally, the table includes both average and maximum numbers of constraints (#cons), the total number of Newton iterations per step, the average computational cost per step, and the comparative speedup achieved against IPC. Note that we simply use the same value for the friction mollification threshold and .</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S6.T1.36\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.9\"><span class=\"ltx_text\" id=\"S6.T1.22.8.9.1\" style=\"font-size:90%;\">Scenario</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.10\"><span class=\"ltx_text\" id=\"S6.T1.22.8.10.1\" style=\"font-size:90%;\">#tets / #DOFs / #tris</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.15.1.1\">\n<span class=\"ltx_text\" id=\"S6.T1.15.1.1.1\" style=\"font-size:90%;\"> (s)</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.18.4.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.18.4.4.3\">\n<tr class=\"ltx_tr\" id=\"S6.T1.16.2.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.16.2.2.1.1.1\">\n<span class=\"ltx_text\" id=\"S6.T1.16.2.2.1.1.1.1\" style=\"font-size:90%;\">density (kg/m</span><sup class=\"ltx_sup\" id=\"S6.T1.16.2.2.1.1.1.2\"><span class=\"ltx_text\" id=\"S6.T1.16.2.2.1.1.1.2.1\" style=\"font-size:90%;\">3</span></sup><span class=\"ltx_text\" id=\"S6.T1.16.2.2.1.1.1.3\" style=\"font-size:90%;\">),</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.18.4.4.3.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.18.4.4.3.3.2\">\n<span class=\"ltx_text\" id=\"S6.T1.18.4.4.3.3.2.1\" style=\"font-size:90%;\"> (Pa), </span>\n</td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.20.6.6\">\n<span class=\"ltx_text\" id=\"S6.T1.20.6.6.1\" style=\"font-size:90%;\">, </span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.21.7.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.8\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.22.8.8.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.8.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.8.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.8.1.2.1.1\" style=\"font-size:90%;\">#cons</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.8.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.8.1.1.1\">\n<span class=\"ltx_text\" id=\"S6.T1.22.8.8.1.1.1.1\" style=\"font-size:90%;\">(avg. / </span><span class=\"ltx_text\" id=\"S6.T1.22.8.8.1.1.1.2\" style=\"font-size:90%;\">)</span>\n</td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.11\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.22.8.11.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.11.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.11.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.11.1.1.1.1\" style=\"font-size:90%;\">avg. #iters</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.11.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.11.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.11.1.2.1.1\" style=\"font-size:90%;\">(Newton)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.12\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.22.8.12.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.12.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.12.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.12.1.1.1.1\" style=\"font-size:90%;\">avg. cost</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.12.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.12.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.12.1.2.1.1\" style=\"font-size:90%;\">per-step (s)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S6.T1.22.8.13\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.22.8.13.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.13.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.13.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.13.1.1.1.1\" style=\"font-size:90%;\">speedup</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.13.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.13.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.13.1.2.1.1\" style=\"font-size:90%;\">vs. IPC</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.23.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.2\"><span class=\"ltx_text\" id=\"S6.T1.23.9.2.1\" style=\"font-size:90%;\">Puffer Balls on Nets</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.3\"><span class=\"ltx_text\" id=\"S6.T1.23.9.3.1\" style=\"font-size:90%;\">1.76M / 801K / 1.6M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.4\"><span class=\"ltx_text\" id=\"S6.T1.23.9.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.5\"><span class=\"ltx_text\" id=\"S6.T1.23.9.5.1\" style=\"font-size:90%;\">1e3, 5e5 / 1e9, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.6\"><span class=\"ltx_text\" id=\"S6.T1.23.9.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.7\"><span class=\"ltx_text\" id=\"S6.T1.23.9.7.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.8\"><span class=\"ltx_text\" id=\"S6.T1.23.9.8.1\" style=\"font-size:90%;\">228K / 292K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.9\"><span class=\"ltx_text\" id=\"S6.T1.23.9.9.1\" style=\"font-size:90%;\">156.8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.10\"><span class=\"ltx_text\" id=\"S6.T1.23.9.10.1\" style=\"font-size:90%;\">427</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.23.9.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.26.12\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.4\"><span class=\"ltx_text\" id=\"S6.T1.26.12.4.1\" style=\"font-size:90%;\">Dragons-Pachinko</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.5\"><span class=\"ltx_text\" id=\"S6.T1.26.12.5.1\" style=\"font-size:90%;\">1.49M / 379K / 773K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.6\"><span class=\"ltx_text\" id=\"S6.T1.26.12.6.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.25.11.2\">\n<span class=\"ltx_text\" id=\"S6.T1.25.11.2.3\" style=\"font-size:90%;\">1e3, </span>\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.25.11.2.2\">\n<tr class=\"ltx_tr\" id=\"S6.T1.24.10.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.24.10.1.1.1.1\">\n<span class=\"ltx_text\" id=\"S6.T1.24.10.1.1.1.1.1\" style=\"font-size:90%;\">5e5 (</span><span class=\"ltx_text\" id=\"S6.T1.24.10.1.1.1.1.2\" style=\"font-size:90%;\">)/</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.25.11.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.25.11.2.2.2.1\">\n<span class=\"ltx_text\" id=\"S6.T1.25.11.2.2.2.1.1\" style=\"font-size:90%;\">1e6 (</span><span class=\"ltx_text\" id=\"S6.T1.25.11.2.2.2.1.2\" style=\"font-size:90%;\">)</span>\n</td>\n</tr>\n</table>\n<span class=\"ltx_text\" id=\"S6.T1.25.11.2.4\" style=\"font-size:90%;\">, 0.4</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.7\"><span class=\"ltx_text\" id=\"S6.T1.26.12.7.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.8\"><span class=\"ltx_text\" id=\"S6.T1.26.12.8.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.9\"><span class=\"ltx_text\" id=\"S6.T1.26.12.9.1\" style=\"font-size:90%;\">4.9K / 18K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.10\"><span class=\"ltx_text\" id=\"S6.T1.26.12.10.1\" style=\"font-size:90%;\">41.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.11\"><span class=\"ltx_text\" id=\"S6.T1.26.12.11.1\" style=\"font-size:90%;\">29.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.26.12.3\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.27.13\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.2\"><span class=\"ltx_text\" id=\"S6.T1.27.13.2.1\" style=\"font-size:90%;\">Staircase-Armadillos</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.3\"><span class=\"ltx_text\" id=\"S6.T1.27.13.3.1\" style=\"font-size:90%;\">300K / 94K / 187K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.4\"><span class=\"ltx_text\" id=\"S6.T1.27.13.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.5\"><span class=\"ltx_text\" id=\"S6.T1.27.13.5.1\" style=\"font-size:90%;\">1e3, 7.5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.6\"><span class=\"ltx_text\" id=\"S6.T1.27.13.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.7\"><span class=\"ltx_text\" id=\"S6.T1.27.13.7.1\" style=\"font-size:90%;\">0.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.8\"><span class=\"ltx_text\" id=\"S6.T1.27.13.8.1\" style=\"font-size:90%;\">3.2K / 3.2K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.9\"><span class=\"ltx_text\" id=\"S6.T1.27.13.9.1\" style=\"font-size:90%;\">38</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.10\"><span class=\"ltx_text\" id=\"S6.T1.27.13.10.1\" style=\"font-size:90%;\">26.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.27.13.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.28.14\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.2\"><span class=\"ltx_text\" id=\"S6.T1.28.14.2.1\" style=\"font-size:90%;\">Staircase-Dragons</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.3\"><span class=\"ltx_text\" id=\"S6.T1.28.14.3.1\" style=\"font-size:90%;\">376K / 120K / 240K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.4\"><span class=\"ltx_text\" id=\"S6.T1.28.14.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.5\"><span class=\"ltx_text\" id=\"S6.T1.28.14.5.1\" style=\"font-size:90%;\">1e3, 7.5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.6\"><span class=\"ltx_text\" id=\"S6.T1.28.14.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.7\"><span class=\"ltx_text\" id=\"S6.T1.28.14.7.1\" style=\"font-size:90%;\">0.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.8\"><span class=\"ltx_text\" id=\"S6.T1.28.14.8.1\" style=\"font-size:90%;\">3K / 5.4K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.9\"><span class=\"ltx_text\" id=\"S6.T1.28.14.9.1\" style=\"font-size:90%;\">41.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.10\"><span class=\"ltx_text\" id=\"S6.T1.28.14.10.1\" style=\"font-size:90%;\">28.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.28.14.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.29.15\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.2\"><span class=\"ltx_text\" id=\"S6.T1.29.15.2.1\" style=\"font-size:90%;\">Roller Test</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.3\"><span class=\"ltx_text\" id=\"S6.T1.29.15.3.1\" style=\"font-size:90%;\">100K / 31K / 62K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.4\"><span class=\"ltx_text\" id=\"S6.T1.29.15.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.5\"><span class=\"ltx_text\" id=\"S6.T1.29.15.5.1\" style=\"font-size:90%;\">1e3, 1e6, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.6\"><span class=\"ltx_text\" id=\"S6.T1.29.15.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.7\"><span class=\"ltx_text\" id=\"S6.T1.29.15.7.1\" style=\"font-size:90%;\">0.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.8\"><span class=\"ltx_text\" id=\"S6.T1.29.15.8.1\" style=\"font-size:90%;\">1.6K / 5.8K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.9\"><span class=\"ltx_text\" id=\"S6.T1.29.15.9.1\" style=\"font-size:90%;\">35.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.10\"><span class=\"ltx_text\" id=\"S6.T1.29.15.10.1\" style=\"font-size:90%;\">12.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.29.15.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.30.16\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.2\"><span class=\"ltx_text\" id=\"S6.T1.30.16.2.1\" style=\"font-size:90%;\">Armadillos &amp; Bowl</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.3\"><span class=\"ltx_text\" id=\"S6.T1.30.16.3.1\" style=\"font-size:90%;\">826K / 192K / 238K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.4\"><span class=\"ltx_text\" id=\"S6.T1.30.16.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.5\"><span class=\"ltx_text\" id=\"S6.T1.30.16.5.1\" style=\"font-size:90%;\">1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.6\"><span class=\"ltx_text\" id=\"S6.T1.30.16.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.7\"><span class=\"ltx_text\" id=\"S6.T1.30.16.7.1\" style=\"font-size:90%;\">0.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.8\"><span class=\"ltx_text\" id=\"S6.T1.30.16.8.1\" style=\"font-size:90%;\">2.2K / 9.7K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.9\"><span class=\"ltx_text\" id=\"S6.T1.30.16.9.1\" style=\"font-size:90%;\">8.2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.10\"><span class=\"ltx_text\" id=\"S6.T1.30.16.10.1\" style=\"font-size:90%;\">3.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.30.16.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.31.17\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.31.17.2.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.31.17.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.31.17.2.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.31.17.2.1.1.1.1\" style=\"font-size:90%;\">Crabs on Nets</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.31.17.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.31.17.2.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.31.17.2.1.2.1.1\" style=\"font-size:90%;\">(light crabs)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.3\"><span class=\"ltx_text\" id=\"S6.T1.31.17.3.1\" style=\"font-size:90%;\">2.2M / 810K / 1.2M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.4\"><span class=\"ltx_text\" id=\"S6.T1.31.17.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.5\"><span class=\"ltx_text\" id=\"S6.T1.31.17.5.1\" style=\"font-size:90%;\">1e2 / 1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.6\"><span class=\"ltx_text\" id=\"S6.T1.31.17.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.7\"><span class=\"ltx_text\" id=\"S6.T1.31.17.7.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.8\"><span class=\"ltx_text\" id=\"S6.T1.31.17.8.1\" style=\"font-size:90%;\">32K / 52K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.9\"><span class=\"ltx_text\" id=\"S6.T1.31.17.9.1\" style=\"font-size:90%;\">34.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.10\"><span class=\"ltx_text\" id=\"S6.T1.31.17.10.1\" style=\"font-size:90%;\">48.8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.31.17.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.32.18\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.2\"><span class=\"ltx_text\" id=\"S6.T1.32.18.2.1\" style=\"font-size:90%;\">Twisting Rods</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.3\"><span class=\"ltx_text\" id=\"S6.T1.32.18.3.1\" style=\"font-size:90%;\">355K / 70.4K / 51.6K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.4\"><span class=\"ltx_text\" id=\"S6.T1.32.18.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.5\"><span class=\"ltx_text\" id=\"S6.T1.32.18.5.1\" style=\"font-size:90%;\">1e3, 1e7, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.6\"><span class=\"ltx_text\" id=\"S6.T1.32.18.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.7\"><span class=\"ltx_text\" id=\"S6.T1.32.18.7.1\" style=\"font-size:90%;\">0</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.8\"><span class=\"ltx_text\" id=\"S6.T1.32.18.8.1\" style=\"font-size:90%;\">617K / 5.7M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.9\"><span class=\"ltx_text\" id=\"S6.T1.32.18.9.1\" style=\"font-size:90%;\">24.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.10\"><span class=\"ltx_text\" id=\"S6.T1.32.18.10.1\" style=\"font-size:90%;\">15.54</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.32.18.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.33.19\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.33.19.2.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.33.19.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.33.19.2.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.33.19.2.1.1.1.1\" style=\"font-size:90%;\">Twisting</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.33.19.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.33.19.2.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.33.19.2.1.2.1.1\" style=\"font-size:90%;\">Cylindrical Mat</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.3\"><span class=\"ltx_text\" id=\"S6.T1.33.19.3.1\" style=\"font-size:90%;\">64K / 20.9K / 41.8K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.4\"><span class=\"ltx_text\" id=\"S6.T1.33.19.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.5\"><span class=\"ltx_text\" id=\"S6.T1.33.19.5.1\" style=\"font-size:90%;\">1e3, 1e7, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.6\"><span class=\"ltx_text\" id=\"S6.T1.33.19.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.7\"><span class=\"ltx_text\" id=\"S6.T1.33.19.7.1\" style=\"font-size:90%;\">0</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.8\"><span class=\"ltx_text\" id=\"S6.T1.33.19.8.1\" style=\"font-size:90%;\">60K / 147K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.9\"><span class=\"ltx_text\" id=\"S6.T1.33.19.9.1\" style=\"font-size:90%;\">18.8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.10\"><span class=\"ltx_text\" id=\"S6.T1.33.19.10.1\" style=\"font-size:90%;\">5.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.33.19.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.34.20\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.2\"><span class=\"ltx_text\" id=\"S6.T1.34.20.2.1\" style=\"font-size:90%;\">Noodles-200</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.3\"><span class=\"ltx_text\" id=\"S6.T1.34.20.3.1\" style=\"font-size:90%;\">934K / 375K / 749K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.4\"><span class=\"ltx_text\" id=\"S6.T1.34.20.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.5\"><span class=\"ltx_text\" id=\"S6.T1.34.20.5.1\" style=\"font-size:90%;\">1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.6\"><span class=\"ltx_text\" id=\"S6.T1.34.20.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.7\"><span class=\"ltx_text\" id=\"S6.T1.34.20.7.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.8\"><span class=\"ltx_text\" id=\"S6.T1.34.20.8.1\" style=\"font-size:90%;\">48.9K / 146.3K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.9\"><span class=\"ltx_text\" id=\"S6.T1.34.20.9.1\" style=\"font-size:90%;\">39.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.10\"><span class=\"ltx_text\" id=\"S6.T1.34.20.10.1\" style=\"font-size:90%;\">49.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.34.20.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.35.21\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.2\"><span class=\"ltx_text\" id=\"S6.T1.35.21.2.1\" style=\"font-size:90%;\">Noodles-300</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.3\"><span class=\"ltx_text\" id=\"S6.T1.35.21.3.1\" style=\"font-size:90%;\">1.4M / 562K / 1.1M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.4\"><span class=\"ltx_text\" id=\"S6.T1.35.21.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.5\"><span class=\"ltx_text\" id=\"S6.T1.35.21.5.1\" style=\"font-size:90%;\">1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.6\"><span class=\"ltx_text\" id=\"S6.T1.35.21.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.7\"><span class=\"ltx_text\" id=\"S6.T1.35.21.7.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.8\"><span class=\"ltx_text\" id=\"S6.T1.35.21.8.1\" style=\"font-size:90%;\">132.1K / 276K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.9\"><span class=\"ltx_text\" id=\"S6.T1.35.21.9.1\" style=\"font-size:90%;\">60.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.10\"><span class=\"ltx_text\" id=\"S6.T1.35.21.10.1\" style=\"font-size:90%;\">109.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.35.21.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.36.22\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.1\">\n<span class=\"ltx_text\" id=\"S6.T1.36.22.1.1\" style=\"font-size:90%;\">T-rex </span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.2\"><span class=\"ltx_text\" id=\"S6.T1.36.22.2.1\" style=\"font-size:90%;\">9M / 2.2M / 2.9M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.3\"><span class=\"ltx_text\" id=\"S6.T1.36.22.3.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.4\"><span class=\"ltx_text\" id=\"S6.T1.36.22.4.1\" style=\"font-size:90%;\">1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.5\"><span class=\"ltx_text\" id=\"S6.T1.36.22.5.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.6\"><span class=\"ltx_text\" id=\"S6.T1.36.22.6.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.7\"><span class=\"ltx_text\" id=\"S6.T1.36.22.7.1\" style=\"font-size:90%;\">100.5K / 308.4K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.8\"><span class=\"ltx_text\" id=\"S6.T1.36.22.8.1\" style=\"font-size:90%;\">25.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.9\"><span class=\"ltx_text\" id=\"S6.T1.36.22.9.1\" style=\"font-size:90%;\">183.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S6.T1.36.22.10\"><span class=\"ltx_text ltx_font_typewriter\" id=\"S6.T1.36.22.10.1\" style=\"font-size:90%;\">N/A</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
37
- "perturb_sentence_id": 4,
 
 
 
38
  "output": {
39
- "perturbed_statement": "[paragraph id = 4] Table 1 showcases the results and quantifies the speedup achieved in representative cases relative to LRU. In the study by Lan et al.",
40
- "perturbed_explanation": "1. The original statement implies that Table 1 discusses speedup metrics comparing performance to IPC. 2. The altered statement replaces 'IPC' with 'LRU,' which changes the reference point in the comparison. This adjustment is incorrect since the comparative analysis involving Table 1, as mentioned, is relative to IPC, not LRU."
41
  }
42
  }
43
  ]
 
34
  "[paragraph id = 4] Table 1 showcases the statistics and quantifies the speedup achieved in representative cases relative to IPC."
35
  ],
36
  "table_html": "<figure class=\"ltx_table\" id=\"S6.T1\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span><span class=\"ltx_text ltx_font_bold\" id=\"S6.T1.38.1\">Statistics for Testing Scenarios.</span> This table details the total numbers of tetrahedra (#tets), Degrees of Freedom (#DOFs), and surface triangles (#tris). Key simulation parameters such as time step (), material density, Young’s Modulus (), Poisson Ratio (), collision offset (), and frictional coefficient () are provided. Additionally, the table includes both average and maximum numbers of constraints (#cons), the total number of Newton iterations per step, the average computational cost per step, and the comparative speedup achieved against IPC. Note that we simply use the same value for the friction mollification threshold and .</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S6.T1.36\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.9\"><span class=\"ltx_text\" id=\"S6.T1.22.8.9.1\" style=\"font-size:90%;\">Scenario</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.10\"><span class=\"ltx_text\" id=\"S6.T1.22.8.10.1\" style=\"font-size:90%;\">#tets / #DOFs / #tris</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.15.1.1\">\n<span class=\"ltx_text\" id=\"S6.T1.15.1.1.1\" style=\"font-size:90%;\"> (s)</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.18.4.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.18.4.4.3\">\n<tr class=\"ltx_tr\" id=\"S6.T1.16.2.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.16.2.2.1.1.1\">\n<span class=\"ltx_text\" id=\"S6.T1.16.2.2.1.1.1.1\" style=\"font-size:90%;\">density (kg/m</span><sup class=\"ltx_sup\" id=\"S6.T1.16.2.2.1.1.1.2\"><span class=\"ltx_text\" id=\"S6.T1.16.2.2.1.1.1.2.1\" style=\"font-size:90%;\">3</span></sup><span class=\"ltx_text\" id=\"S6.T1.16.2.2.1.1.1.3\" style=\"font-size:90%;\">),</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.18.4.4.3.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.18.4.4.3.3.2\">\n<span class=\"ltx_text\" id=\"S6.T1.18.4.4.3.3.2.1\" style=\"font-size:90%;\"> (Pa), </span>\n</td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.20.6.6\">\n<span class=\"ltx_text\" id=\"S6.T1.20.6.6.1\" style=\"font-size:90%;\">, </span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.21.7.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.8\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.22.8.8.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.8.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.8.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.8.1.2.1.1\" style=\"font-size:90%;\">#cons</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.8.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.8.1.1.1\">\n<span class=\"ltx_text\" id=\"S6.T1.22.8.8.1.1.1.1\" style=\"font-size:90%;\">(avg. / </span><span class=\"ltx_text\" id=\"S6.T1.22.8.8.1.1.1.2\" style=\"font-size:90%;\">)</span>\n</td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.11\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.22.8.11.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.11.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.11.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.11.1.1.1.1\" style=\"font-size:90%;\">avg. #iters</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.11.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.11.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.11.1.2.1.1\" style=\"font-size:90%;\">(Newton)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S6.T1.22.8.12\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.22.8.12.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.12.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.12.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.12.1.1.1.1\" style=\"font-size:90%;\">avg. cost</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.12.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.12.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.12.1.2.1.1\" style=\"font-size:90%;\">per-step (s)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S6.T1.22.8.13\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.22.8.13.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.13.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.13.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.13.1.1.1.1\" style=\"font-size:90%;\">speedup</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.22.8.13.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.22.8.13.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.22.8.13.1.2.1.1\" style=\"font-size:90%;\">vs. IPC</span></td>\n</tr>\n</table>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.23.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.2\"><span class=\"ltx_text\" id=\"S6.T1.23.9.2.1\" style=\"font-size:90%;\">Puffer Balls on Nets</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.3\"><span class=\"ltx_text\" id=\"S6.T1.23.9.3.1\" style=\"font-size:90%;\">1.76M / 801K / 1.6M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.4\"><span class=\"ltx_text\" id=\"S6.T1.23.9.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.5\"><span class=\"ltx_text\" id=\"S6.T1.23.9.5.1\" style=\"font-size:90%;\">1e3, 5e5 / 1e9, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.6\"><span class=\"ltx_text\" id=\"S6.T1.23.9.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.7\"><span class=\"ltx_text\" id=\"S6.T1.23.9.7.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.8\"><span class=\"ltx_text\" id=\"S6.T1.23.9.8.1\" style=\"font-size:90%;\">228K / 292K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.9\"><span class=\"ltx_text\" id=\"S6.T1.23.9.9.1\" style=\"font-size:90%;\">156.8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.23.9.10\"><span class=\"ltx_text\" id=\"S6.T1.23.9.10.1\" style=\"font-size:90%;\">427</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.23.9.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.26.12\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.4\"><span class=\"ltx_text\" id=\"S6.T1.26.12.4.1\" style=\"font-size:90%;\">Dragons-Pachinko</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.5\"><span class=\"ltx_text\" id=\"S6.T1.26.12.5.1\" style=\"font-size:90%;\">1.49M / 379K / 773K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.6\"><span class=\"ltx_text\" id=\"S6.T1.26.12.6.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.25.11.2\">\n<span class=\"ltx_text\" id=\"S6.T1.25.11.2.3\" style=\"font-size:90%;\">1e3, </span>\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.25.11.2.2\">\n<tr class=\"ltx_tr\" id=\"S6.T1.24.10.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.24.10.1.1.1.1\">\n<span class=\"ltx_text\" id=\"S6.T1.24.10.1.1.1.1.1\" style=\"font-size:90%;\">5e5 (</span><span class=\"ltx_text\" id=\"S6.T1.24.10.1.1.1.1.2\" style=\"font-size:90%;\">)/</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.25.11.2.2.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.25.11.2.2.2.1\">\n<span class=\"ltx_text\" id=\"S6.T1.25.11.2.2.2.1.1\" style=\"font-size:90%;\">1e6 (</span><span class=\"ltx_text\" id=\"S6.T1.25.11.2.2.2.1.2\" style=\"font-size:90%;\">)</span>\n</td>\n</tr>\n</table>\n<span class=\"ltx_text\" id=\"S6.T1.25.11.2.4\" style=\"font-size:90%;\">, 0.4</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.7\"><span class=\"ltx_text\" id=\"S6.T1.26.12.7.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.8\"><span class=\"ltx_text\" id=\"S6.T1.26.12.8.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.9\"><span class=\"ltx_text\" id=\"S6.T1.26.12.9.1\" style=\"font-size:90%;\">4.9K / 18K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.10\"><span class=\"ltx_text\" id=\"S6.T1.26.12.10.1\" style=\"font-size:90%;\">41.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.26.12.11\"><span class=\"ltx_text\" id=\"S6.T1.26.12.11.1\" style=\"font-size:90%;\">29.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.26.12.3\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.27.13\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.2\"><span class=\"ltx_text\" id=\"S6.T1.27.13.2.1\" style=\"font-size:90%;\">Staircase-Armadillos</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.3\"><span class=\"ltx_text\" id=\"S6.T1.27.13.3.1\" style=\"font-size:90%;\">300K / 94K / 187K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.4\"><span class=\"ltx_text\" id=\"S6.T1.27.13.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.5\"><span class=\"ltx_text\" id=\"S6.T1.27.13.5.1\" style=\"font-size:90%;\">1e3, 7.5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.6\"><span class=\"ltx_text\" id=\"S6.T1.27.13.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.7\"><span class=\"ltx_text\" id=\"S6.T1.27.13.7.1\" style=\"font-size:90%;\">0.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.8\"><span class=\"ltx_text\" id=\"S6.T1.27.13.8.1\" style=\"font-size:90%;\">3.2K / 3.2K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.9\"><span class=\"ltx_text\" id=\"S6.T1.27.13.9.1\" style=\"font-size:90%;\">38</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.27.13.10\"><span class=\"ltx_text\" id=\"S6.T1.27.13.10.1\" style=\"font-size:90%;\">26.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.27.13.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.28.14\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.2\"><span class=\"ltx_text\" id=\"S6.T1.28.14.2.1\" style=\"font-size:90%;\">Staircase-Dragons</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.3\"><span class=\"ltx_text\" id=\"S6.T1.28.14.3.1\" style=\"font-size:90%;\">376K / 120K / 240K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.4\"><span class=\"ltx_text\" id=\"S6.T1.28.14.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.5\"><span class=\"ltx_text\" id=\"S6.T1.28.14.5.1\" style=\"font-size:90%;\">1e3, 7.5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.6\"><span class=\"ltx_text\" id=\"S6.T1.28.14.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.7\"><span class=\"ltx_text\" id=\"S6.T1.28.14.7.1\" style=\"font-size:90%;\">0.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.8\"><span class=\"ltx_text\" id=\"S6.T1.28.14.8.1\" style=\"font-size:90%;\">3K / 5.4K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.9\"><span class=\"ltx_text\" id=\"S6.T1.28.14.9.1\" style=\"font-size:90%;\">41.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.28.14.10\"><span class=\"ltx_text\" id=\"S6.T1.28.14.10.1\" style=\"font-size:90%;\">28.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.28.14.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.29.15\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.2\"><span class=\"ltx_text\" id=\"S6.T1.29.15.2.1\" style=\"font-size:90%;\">Roller Test</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.3\"><span class=\"ltx_text\" id=\"S6.T1.29.15.3.1\" style=\"font-size:90%;\">100K / 31K / 62K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.4\"><span class=\"ltx_text\" id=\"S6.T1.29.15.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.5\"><span class=\"ltx_text\" id=\"S6.T1.29.15.5.1\" style=\"font-size:90%;\">1e3, 1e6, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.6\"><span class=\"ltx_text\" id=\"S6.T1.29.15.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.7\"><span class=\"ltx_text\" id=\"S6.T1.29.15.7.1\" style=\"font-size:90%;\">0.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.8\"><span class=\"ltx_text\" id=\"S6.T1.29.15.8.1\" style=\"font-size:90%;\">1.6K / 5.8K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.9\"><span class=\"ltx_text\" id=\"S6.T1.29.15.9.1\" style=\"font-size:90%;\">35.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.29.15.10\"><span class=\"ltx_text\" id=\"S6.T1.29.15.10.1\" style=\"font-size:90%;\">12.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.29.15.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.30.16\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.2\"><span class=\"ltx_text\" id=\"S6.T1.30.16.2.1\" style=\"font-size:90%;\">Armadillos &amp; Bowl</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.3\"><span class=\"ltx_text\" id=\"S6.T1.30.16.3.1\" style=\"font-size:90%;\">826K / 192K / 238K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.4\"><span class=\"ltx_text\" id=\"S6.T1.30.16.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.5\"><span class=\"ltx_text\" id=\"S6.T1.30.16.5.1\" style=\"font-size:90%;\">1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.6\"><span class=\"ltx_text\" id=\"S6.T1.30.16.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.7\"><span class=\"ltx_text\" id=\"S6.T1.30.16.7.1\" style=\"font-size:90%;\">0.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.8\"><span class=\"ltx_text\" id=\"S6.T1.30.16.8.1\" style=\"font-size:90%;\">2.2K / 9.7K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.9\"><span class=\"ltx_text\" id=\"S6.T1.30.16.9.1\" style=\"font-size:90%;\">8.2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.30.16.10\"><span class=\"ltx_text\" id=\"S6.T1.30.16.10.1\" style=\"font-size:90%;\">3.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.30.16.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.31.17\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.31.17.2.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.31.17.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.31.17.2.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.31.17.2.1.1.1.1\" style=\"font-size:90%;\">Crabs on Nets</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.31.17.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.31.17.2.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.31.17.2.1.2.1.1\" style=\"font-size:90%;\">(light crabs)</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.3\"><span class=\"ltx_text\" id=\"S6.T1.31.17.3.1\" style=\"font-size:90%;\">2.2M / 810K / 1.2M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.4\"><span class=\"ltx_text\" id=\"S6.T1.31.17.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.5\"><span class=\"ltx_text\" id=\"S6.T1.31.17.5.1\" style=\"font-size:90%;\">1e2 / 1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.6\"><span class=\"ltx_text\" id=\"S6.T1.31.17.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.7\"><span class=\"ltx_text\" id=\"S6.T1.31.17.7.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.8\"><span class=\"ltx_text\" id=\"S6.T1.31.17.8.1\" style=\"font-size:90%;\">32K / 52K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.9\"><span class=\"ltx_text\" id=\"S6.T1.31.17.9.1\" style=\"font-size:90%;\">34.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.31.17.10\"><span class=\"ltx_text\" id=\"S6.T1.31.17.10.1\" style=\"font-size:90%;\">48.8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.31.17.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.32.18\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.2\"><span class=\"ltx_text\" id=\"S6.T1.32.18.2.1\" style=\"font-size:90%;\">Twisting Rods</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.3\"><span class=\"ltx_text\" id=\"S6.T1.32.18.3.1\" style=\"font-size:90%;\">355K / 70.4K / 51.6K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.4\"><span class=\"ltx_text\" id=\"S6.T1.32.18.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.5\"><span class=\"ltx_text\" id=\"S6.T1.32.18.5.1\" style=\"font-size:90%;\">1e3, 1e7, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.6\"><span class=\"ltx_text\" id=\"S6.T1.32.18.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.7\"><span class=\"ltx_text\" id=\"S6.T1.32.18.7.1\" style=\"font-size:90%;\">0</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.8\"><span class=\"ltx_text\" id=\"S6.T1.32.18.8.1\" style=\"font-size:90%;\">617K / 5.7M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.9\"><span class=\"ltx_text\" id=\"S6.T1.32.18.9.1\" style=\"font-size:90%;\">24.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.32.18.10\"><span class=\"ltx_text\" id=\"S6.T1.32.18.10.1\" style=\"font-size:90%;\">15.54</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.32.18.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.33.19\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S6.T1.33.19.2.1\">\n<tr class=\"ltx_tr\" id=\"S6.T1.33.19.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.33.19.2.1.1.1\"><span class=\"ltx_text\" id=\"S6.T1.33.19.2.1.1.1.1\" style=\"font-size:90%;\">Twisting</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.33.19.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S6.T1.33.19.2.1.2.1\"><span class=\"ltx_text\" id=\"S6.T1.33.19.2.1.2.1.1\" style=\"font-size:90%;\">Cylindrical Mat</span></td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.3\"><span class=\"ltx_text\" id=\"S6.T1.33.19.3.1\" style=\"font-size:90%;\">64K / 20.9K / 41.8K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.4\"><span class=\"ltx_text\" id=\"S6.T1.33.19.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.5\"><span class=\"ltx_text\" id=\"S6.T1.33.19.5.1\" style=\"font-size:90%;\">1e3, 1e7, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.6\"><span class=\"ltx_text\" id=\"S6.T1.33.19.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.7\"><span class=\"ltx_text\" id=\"S6.T1.33.19.7.1\" style=\"font-size:90%;\">0</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.8\"><span class=\"ltx_text\" id=\"S6.T1.33.19.8.1\" style=\"font-size:90%;\">60K / 147K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.9\"><span class=\"ltx_text\" id=\"S6.T1.33.19.9.1\" style=\"font-size:90%;\">18.8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.33.19.10\"><span class=\"ltx_text\" id=\"S6.T1.33.19.10.1\" style=\"font-size:90%;\">5.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.33.19.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.34.20\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.2\"><span class=\"ltx_text\" id=\"S6.T1.34.20.2.1\" style=\"font-size:90%;\">Noodles-200</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.3\"><span class=\"ltx_text\" id=\"S6.T1.34.20.3.1\" style=\"font-size:90%;\">934K / 375K / 749K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.4\"><span class=\"ltx_text\" id=\"S6.T1.34.20.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.5\"><span class=\"ltx_text\" id=\"S6.T1.34.20.5.1\" style=\"font-size:90%;\">1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.6\"><span class=\"ltx_text\" id=\"S6.T1.34.20.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.7\"><span class=\"ltx_text\" id=\"S6.T1.34.20.7.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.8\"><span class=\"ltx_text\" id=\"S6.T1.34.20.8.1\" style=\"font-size:90%;\">48.9K / 146.3K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.9\"><span class=\"ltx_text\" id=\"S6.T1.34.20.9.1\" style=\"font-size:90%;\">39.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.34.20.10\"><span class=\"ltx_text\" id=\"S6.T1.34.20.10.1\" style=\"font-size:90%;\">49.5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.34.20.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.35.21\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.2\"><span class=\"ltx_text\" id=\"S6.T1.35.21.2.1\" style=\"font-size:90%;\">Noodles-300</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.3\"><span class=\"ltx_text\" id=\"S6.T1.35.21.3.1\" style=\"font-size:90%;\">1.4M / 562K / 1.1M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.4\"><span class=\"ltx_text\" id=\"S6.T1.35.21.4.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.5\"><span class=\"ltx_text\" id=\"S6.T1.35.21.5.1\" style=\"font-size:90%;\">1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.6\"><span class=\"ltx_text\" id=\"S6.T1.35.21.6.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.7\"><span class=\"ltx_text\" id=\"S6.T1.35.21.7.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.8\"><span class=\"ltx_text\" id=\"S6.T1.35.21.8.1\" style=\"font-size:90%;\">132.1K / 276K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.9\"><span class=\"ltx_text\" id=\"S6.T1.35.21.9.1\" style=\"font-size:90%;\">60.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S6.T1.35.21.10\"><span class=\"ltx_text\" id=\"S6.T1.35.21.10.1\" style=\"font-size:90%;\">109.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T1.35.21.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T1.36.22\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.1\">\n<span class=\"ltx_text\" id=\"S6.T1.36.22.1.1\" style=\"font-size:90%;\">T-rex </span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.2\"><span class=\"ltx_text\" id=\"S6.T1.36.22.2.1\" style=\"font-size:90%;\">9M / 2.2M / 2.9M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.3\"><span class=\"ltx_text\" id=\"S6.T1.36.22.3.1\" style=\"font-size:90%;\">1/30</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.4\"><span class=\"ltx_text\" id=\"S6.T1.36.22.4.1\" style=\"font-size:90%;\">1e3, 5e5, 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.5\"><span class=\"ltx_text\" id=\"S6.T1.36.22.5.1\" style=\"font-size:90%;\">1e-3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.6\"><span class=\"ltx_text\" id=\"S6.T1.36.22.6.1\" style=\"font-size:90%;\">0.3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.7\"><span class=\"ltx_text\" id=\"S6.T1.36.22.7.1\" style=\"font-size:90%;\">100.5K / 308.4K</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.8\"><span class=\"ltx_text\" id=\"S6.T1.36.22.8.1\" style=\"font-size:90%;\">25.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S6.T1.36.22.9\"><span class=\"ltx_text\" id=\"S6.T1.36.22.9.1\" style=\"font-size:90%;\">183.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S6.T1.36.22.10\"><span class=\"ltx_text ltx_font_typewriter\" id=\"S6.T1.36.22.10.1\" style=\"font-size:90%;\">N/A</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
37
+ "perturb_sentence_id": [
38
+ 4,
39
+ 5
40
+ ],
41
  "output": {
42
+ "perturbed_statement": "[paragraph id = 4] Table 1 showcases the statistics and quantifies the speedup achieved in irrelevant cases relative to IPC. In the study by Lan et al.",
43
+ "perturbed_explanation": "1. The original explanation states that Table 1 presents statistics and quantifies speedup in representative cases.\n2. The statement incorrectly mentions 'irrelevant cases' rather than 'representative cases'. This error changes the focus and suggests that the data is not relevant to the core focus of the study, thereby misrepresenting the study's findings."
44
  }
45
  }
46
  ]
table_result/2407.00056v1_output.json CHANGED
The diff for this file is too large to render. See raw diff
 
table_result/2407.00062v1_output.json CHANGED
@@ -26,10 +26,13 @@
26
  "[paragraph id = 6] This is because mode cannot produce decimal-precision ratings it also defaults to randomness when a mode can t be determined."
27
  ],
28
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T2\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Mean and Standard Deviation of MAE for Neighbourhood Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T2.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.3.1.1\">Mean of Neighbours</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.3.1.2.1\">1.27</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.3.1.3\">0.25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T2.2.4.2.1\">Median of Neighbours</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.4.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.4.2.2.1\">1.27</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.4.2.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.4.2.3.1\">0.19</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T2.2.5.3.1\">Mode of Neighbours</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.5.3.2\">1.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.5.3.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T2.2.6.4.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.6.4.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.6.4.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T2.2.7.5.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.7.5.3\">0.21</td>\n</tr>\n</tbody>\n</table>\n</figure>",
29
- "perturb_sentence_id": 3,
 
 
 
30
  "output": {
31
- "perturbed_statement": "[paragraph id = 3] The performance of these recommenders can be seen in Table 2 . We observe that the mode of neighbors was the best algorithm in this range.",
32
- "perturbed_explanation": "1. The original statement states that the mean and median of neighbors algorithms showed the best performance in the given range.\n2. The statement introduces the factual error by replacing \"mean and median\" with \"mode,\" which the context highlights as having inferior performance due to its limitations in precision and stochastic behavior. Therefore, this modification contradicts the provided information."
33
  }
34
  },
35
  {
@@ -62,10 +65,13 @@
62
  "[paragraph id = 9] This implies that Jaccard indexes seem to hold more valuable information than mere trust connections."
63
  ],
64
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T4\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>Mean and Standard Deviation of MAE for Random Walk Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T4.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T4.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T4.2.3.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S5.T4.2.3.1.1\">Jaccard Monte-Carlo Random Walk</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.2.3.1.2.1\">1.14</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.3.1.3\">0.22</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.4.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T4.2.4.2.1\">Monte-Carlo Random Walk</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.4.2.2\">1.16</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.4.2.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.5.3\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T4.2.5.3.1\">Median of Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.5.3.2\">1.27</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.5.3.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.2.5.3.3.1\">0.19</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.6.4\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T4.2.6.4.1\">Universal Random</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.6.4.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.6.4.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.7.5\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T4.2.7.5.1\">Random</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.7.5.3\">0.21</td>\n</tr>\n</tbody>\n</table>\n</figure>",
65
- "perturb_sentence_id": 6,
 
 
 
66
  "output": {
67
- "perturbed_statement": "[paragraph id = 6] The performance of both MC algorithms, as compared with the others can be seen in Table 4 . We observe that the Monte Carlo algorithm performs worse than the previous models.",
68
- "perturbed_explanation": "The original explanation states that *the performance of the Monte Carlo algorithm outperforms the previous models*. 1. The comparative analysis suggests better efficiency presented by these algorithms. However, the statement now claims that *the Monte Carlo algorithm performs worse than the previous models*. This contradicts the analysis outcome as observed in its results."
69
  }
70
  },
71
  {
@@ -103,10 +109,13 @@
103
  "[paragraph id = 11] By overcoming the constraint of only considering direct neighbors, the JMoM and Jaccard WA models can produce decent results, with a very simple algorithm."
104
  ],
105
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T5\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 5: </span>Mean and Standard Deviation of MAE for Full-graph Jaccard Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T5.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T5.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T5.2.3.1.1\">Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.3.1.2.1\">1.05</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.3.1.3\">0.25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T5.2.4.2.1\">Jaccard Monte-Carlo Random Walk</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.4.2.2\">1.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.4.2.3\">0.22</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T5.2.5.3.1\">Jaccard MoM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.5.3.2\">1.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.5.3.3\">0.30</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T5.2.6.4.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.6.4.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.6.4.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T5.2.7.5.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.7.5.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.7.5.3.1\">0.21</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
106
- "perturb_sentence_id": 7,
 
 
 
107
  "output": {
108
- "perturbed_statement": "[paragraph id = 7] As can be observed from Table 5 , the random walk performed significantly better than the JMoM.",
109
- "perturbed_explanation": "1. The original explanation justified that the JMoM performed reasonably well, with its mean accuracy surpassing that of the random walk. 2. However, the statement introduces an incorrect assertion that the random walk performed significantly better, which contradicts the context's data indicating JMoM's superior mean accuracy over random walk."
110
  }
111
  },
112
  {
@@ -133,10 +142,13 @@
133
  "[paragraph id = 5] A potential reason for the Item-Jaccard WA model performing so well is that a user s social circle may not be the best predictor for who is similar to them, rather relating people by their items themselves does a better job."
134
  ],
135
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T6\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 6: </span>Mean and Standard Deviation of MAE for Item Rating Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T6.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T6.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T6.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T6.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T6.2.3.1.1\">Item-Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T6.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T6.2.3.1.2.1\">1.02</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T6.2.3.1.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T6.2.4.2.1\">Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.4.2.2\">1.05</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.4.2.3\">0.25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T6.2.5.3.1\">Item-Rating Difference WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.5.3.2\">1.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.5.3.3\">0.49</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T6.2.6.4.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.6.4.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.6.4.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T6.2.7.5.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.7.5.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T6.2.7.5.3.1\">0.21</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
136
- "perturb_sentence_id": 2,
 
 
 
137
  "output": {
138
- "perturbed_statement": "[paragraph id = 2] The performance of these alternative WA models can be seen in Table 6 .We can see that the previous Jaccard WA outperforms the Item-Jaccard form of WA.",
139
- "perturbed_explanation": "The original explanation highlights: 1. The Item-Jaccard WA model outperforms the previous Jaccard WA model, showcasing its superior performance. In the altered statement, the claim suggests that the previous Jaccard WA model performs better than the Item-Jaccard WA model, which is factually incorrect as per the provided context discussing the superior performance of the Item-Jaccard WA model."
140
  }
141
  },
142
  {
@@ -169,10 +181,13 @@
169
  "[paragraph id = 7] It is evident the intra-item similarity conveys some information as we perform better than random, hence this could be useful in combination models in the coming sections."
170
  ],
171
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T7\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 7: </span>Mean and Standard Deviation of MAE for Intra-Item Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T7.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T7.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T7.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T7.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T7.2.3.1.1\">Item-Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T7.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T7.2.3.1.2.1\">1.02</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T7.2.3.1.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T7.2.4.2.1\">Intra-Item WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.4.2.2\">1.24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.4.2.3\">0.14</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T7.2.5.3.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.5.3.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.5.3.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T7.2.6.4.1\">Intra-Item WA (Pearson)</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.6.4.2\">1.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.6.4.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T7.2.6.4.3.1\">0.13</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T7.2.7.5.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.7.5.3\">0.21</td>\n</tr>\n</tbody>\n</table>\n</figure>",
172
- "perturb_sentence_id": 6,
 
 
 
173
  "output": {
174
- "perturbed_statement": "[paragraph id = 6] This is an unremarkable result, potentially this might not be a better metric for determining the similarity of items nor could it be applied in [16 ]. It is claimed the intra-item similarity fails to convey important information as we do not perform better than random, hence this might not be useful in combination models in the coming sections.",
175
- "perturbed_explanation": "The original explanation highlights the following: 1. The result presented in paragraph 6 suggests that intra-item similarity could potentially serve as a beneficial metric for determining item similarity and supports the effectiveness of combined models. 2. This is based on the observation that performance exceeds random outcomes, which indicates the conveyance of meaningful information. However, the statement contradicts this by describing the result as unremarkable and asserting that intra-item similarity fails to provide meaningful contributions, which incorrectly counters the logical deductions presented in the original analysis."
176
  }
177
  },
178
  {
@@ -200,10 +215,13 @@
200
  "[paragraph id = 6] Hence, we have successfully combined the intra-item and item-rating information to achieve a better result."
201
  ],
202
  "table_html": "<figure class=\"ltx_table\" id=\"S6.T9\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 9: </span>Mean and Standard Deviation of MAE for WIRD WA Algorithm</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S6.T9.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S6.T9.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T9.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T9.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T9.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S6.T9.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T9.2.3.1.1\">WIRD WA</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T9.2.3.1.2\">1.05</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T9.2.3.1.3\">0.28</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.4.2.1\">Item-Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.4.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T9.2.4.2.2.1\">1.02</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.4.2.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.5.3.1\">Intra-Item WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.5.3.2\">1.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.5.3.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T9.2.5.3.3.1\">0.13</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.6.4.1\">Item-Rating Difference WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.6.4.2\">1.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.6.4.3\">0.49</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.7.5.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.7.5.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.7.5.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.8.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.8.6.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.8.6.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.8.6.3\">0.21</td>\n</tr>\n</tbody>\n</table>\n</figure>",
203
- "perturb_sentence_id": 4,
 
 
 
204
  "output": {
205
- "perturbed_statement": "[paragraph id = 4] The results can be observed in Figure 5: The WIRD model significantly underperformed compared to the Item-Jaccard WA model, and also did not surpass the standalone intra-item information or item-rating difference WA approaches.",
206
- "perturbed_explanation": "The initial explanation states: First, the WIRD model performed quite well, although slightly below the Item-Jaccard WA model, while surpassing the standalone approaches. 2. The statement now incorrectly suggests that the WIRD model underperformed both compared to the Item-Jaccard WA model and the standalone approaches, contradicting the actual analysis results."
207
  }
208
  },
209
  {
@@ -289,10 +307,13 @@
289
  "[paragraph id = 30] Hence, in a scenario where providing consistently good predictions for all users is of importance, the introduction of intra-item information could facilitate this."
290
  ],
291
  "table_html": "<figure class=\"ltx_table\" id=\"S7.T11\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 11: </span>Mean and Standard Deviation of MAE for All Algorithms on Different Data Sets</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S7.T11.6\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S7.T11.6.7.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S7.T11.6.7.1.1\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S7.T11.6.7.1.1.1\">Algorithm</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"3\" id=\"S7.T11.6.7.1.2\">Epinions</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"3\" id=\"S7.T11.6.7.1.3\">FilmTrust</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"2\" id=\"S7.T11.6.7.1.4\">CiaoDVD</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.2.2.2\"></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"S7.T11.6.6.7\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.3.3.3\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.4.4.4\"></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"S7.T11.6.6.8\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.5.5.5\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.6.6.6\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S7.T11.6.8.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S7.T11.6.8.1.1\">Jaccard Item-Jaccard WA</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.8.1.2.1\">1.00</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.3\">0.26</td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S7.T11.6.8.1.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.8.1.5.1\">0.66</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S7.T11.6.8.1.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.8.1.8.1\">0.53</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.8.1.9.1\">0.28</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.9.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.9.2.1\">Item-Jaccard WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.2\">1.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.3\">0.23</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.9.2.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.5\">0.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.9.2.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.8\">0.58</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.9\">0.32</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.10.3\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.10.3.1\">Jaccard WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.2\">1.05</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.3\">0.25</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.10.3.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.5\">1.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.10.3.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.8\">1.73</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.9\">0.36</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.11.4\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.11.4.1\">WIRD WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.2\">1.05</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.3\">0.28</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.11.4.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.5\">0.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.11.4.6.1\">0.04</span></td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.11.4.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.8\">0.72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.9\">0.37</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.12.5\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.12.5.1\">Jaccard Item-Jaccard JII Combination WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.2\">1.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.3\">0.22</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.12.5.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.5\">0.69</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.6\">0.06</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.12.5.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.8\">0.63</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.9\">0.30</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.13.6\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.13.6.1\">JWIRD WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.2\">1.09</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.3\">0.27</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.13.6.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.5\">0.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.6\">0.07</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.13.6.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.8\">0.72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.9\">0.40</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.14.7\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.14.7.1\">Jaccard MoM</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.2\">1.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.3\">0.30</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.14.7.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.5\">1.19</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.6\">0.09</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.14.7.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.8\">1.77</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.9\">0.37</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.15.8\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.15.8.1\">Jaccard Monte-Carlo Random Walk</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.2\">1.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.3\">0.22</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.15.8.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.5\">1.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.15.8.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.8\">1.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.9\">0.49</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.16.9\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.16.9.1\">Monte-Carlo Random Walk</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.2\">1.16</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.3\">0.23</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.16.9.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.5\">1.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.16.9.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.8\">1.82</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.9\">0.35</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.17.10\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.17.10.1\">Item-Rating Difference WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.2\">1.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.3\">0.49</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.17.10.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.5\">0.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.17.10.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.8\">0.79</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.9\">0.41</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.18.11\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.18.11.1\">Jaccard Intra-Item WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.2\">1.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.18.11.3.1\">0.13</span></td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.18.11.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.5\">1.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.6\">0.06</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.18.11.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.8\">1.75</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.9\">0.32</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.19.12\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.19.12.1\">Intra-Item WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.2\">1.24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.3\">0.14</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.19.12.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.5\">1.18</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.19.12.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.8\">1.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.9\">0.45</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.20.13\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.20.13.1\">Median of Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.2\">1.27</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.3\">0.19</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.20.13.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.5\">1.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.20.13.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.8\">1.76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.20.13.9.1\">0.28</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.21.14\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.21.14.1\">Mean of Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.2\">1.27</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.3\">0.25</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.21.14.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.5\">1.25</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.6\">0.07</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.21.14.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.8\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.9\">0.43</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.22.15\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.22.15.1\">Universal Random</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.3\">0.38</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.22.15.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.5\">0.89</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.6\">0.11</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.22.15.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.8\">0.72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.9\">0.58</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.23.16\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.23.16.1\">Jaccard Weighted Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.2\">1.31</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.3\">0.32</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.23.16.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.5\">1.22</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.6\">0.10</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.23.16.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.8\">1.66</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.9\">0.44</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.24.17\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.24.17.1\">Mode of Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.2\">1.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.3\">0.23</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.24.17.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.5\">1.23</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.6\">0.09</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.24.17.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.8\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.9\">0.43</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.25.18\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.25.18.1\">Intra-Item WA (Pearson)</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.2\">1.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.25.18.3.1\">0.13</span></td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.25.18.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.5\">1.21</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.6\">0.07</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.25.18.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.8\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.9\">0.50</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.26.19\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S7.T11.6.26.19.1\">Random</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.3\">0.21</td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S7.T11.6.26.19.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.5\">1.34</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.6\">0.07</td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S7.T11.6.26.19.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.8\">1.82</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.9\">0.31</td>\n</tr>\n</tbody>\n</table>\n</figure>",
292
- "perturb_sentence_id": 25,
 
 
 
293
  "output": {
294
- "perturbed_statement": "[paragraph id = 25] However, an interesting observation, as per Table 11 , was that the algorithms that excluded intra-item information were the most consistent in their performance—featuring the lowest across all data sets. Thus, it can be deduced that the intra-item information is counterproductive from a stability perspective, making a recommender perform with inconsistent accuracy for all users.",
295
- "perturbed_explanation": "The original explanation: The algorithms incorporating intra-item information were observed to deliver consistent accuracy across various datasets, showcasing their additive effect on recommendation stability. 2. In the statement, it is incorrectly claimed that the elements excluding intra-item information were more consistent. This contradicts the analysis present, which supports the inclusion of intra-item data for achieving consistency."
296
  }
297
  }
298
  ]
 
26
  "[paragraph id = 6] This is because mode cannot produce decimal-precision ratings it also defaults to randomness when a mode can t be determined."
27
  ],
28
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T2\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Mean and Standard Deviation of MAE for Neighbourhood Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T2.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.3.1.1\">Mean of Neighbours</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.3.1.2.1\">1.27</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.3.1.3\">0.25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T2.2.4.2.1\">Median of Neighbours</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.4.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.4.2.2.1\">1.27</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.4.2.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.4.2.3.1\">0.19</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T2.2.5.3.1\">Mode of Neighbours</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.5.3.2\">1.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.5.3.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T2.2.6.4.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.6.4.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.6.4.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T2.2.7.5.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.7.5.3\">0.21</td>\n</tr>\n</tbody>\n</table>\n</figure>",
29
+ "perturb_sentence_id": [
30
+ 3,
31
+ 4
32
+ ],
33
  "output": {
34
+ "perturbed_statement": "[paragraph id = 3] The performance of these recommenders can be seen in Table 2. We observe that the mode version was the best algorithm in this range.",
35
+ "perturbed_explanation": "1. The original explanation states that the mean and median of neighbors were the best algorithms in this range, as evidenced by Table 2. 2. The statement erroneously claims that the mode version was the best algorithm when, in fact, paragraph 5 states that the mode version did not perform as well, and paragraph 6 provides reasons for the mode's poor performance. Therefore, stating that the mode was the best contradicts the information presented in paragraphs 5 and 6."
36
  }
37
  },
38
  {
 
65
  "[paragraph id = 9] This implies that Jaccard indexes seem to hold more valuable information than mere trust connections."
66
  ],
67
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T4\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>Mean and Standard Deviation of MAE for Random Walk Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T4.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T4.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T4.2.3.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S5.T4.2.3.1.1\">Jaccard Monte-Carlo Random Walk</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.2.3.1.2.1\">1.14</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.3.1.3\">0.22</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.4.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T4.2.4.2.1\">Monte-Carlo Random Walk</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.4.2.2\">1.16</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.4.2.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.5.3\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T4.2.5.3.1\">Median of Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.5.3.2\">1.27</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.5.3.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.2.5.3.3.1\">0.19</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.6.4\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T4.2.6.4.1\">Universal Random</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.6.4.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.6.4.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.7.5\">\n<td class=\"ltx_td ltx_align_left\" id=\"S5.T4.2.7.5.1\">Random</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.7.5.3\">0.21</td>\n</tr>\n</tbody>\n</table>\n</figure>",
68
+ "perturb_sentence_id": [
69
+ 6,
70
+ 7
71
+ ],
72
  "output": {
73
+ "perturbed_statement": "[paragraph id = 6] The performance of both MC algorithms, as compared with the others can be seen in Table 4. We observe that the Monte Carlo algorithm underperforms compared to the previous models.",
74
+ "perturbed_explanation": "The original explanation: The statement claimed the Monte Carlo algorithm outperforms the previous models. 1. The Monte Carlo algorithm is noted for its effective performance in the context given. 2. The statement incorrectly claims that the Monte Carlo algorithm underperforms, which contradicts the context suggesting otherwise. This change implies that the algorithm is less effective, while the context suggests it actually performs better, thus making the statement incorrect."
75
  }
76
  },
77
  {
 
109
  "[paragraph id = 11] By overcoming the constraint of only considering direct neighbors, the JMoM and Jaccard WA models can produce decent results, with a very simple algorithm."
110
  ],
111
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T5\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 5: </span>Mean and Standard Deviation of MAE for Full-graph Jaccard Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T5.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T5.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T5.2.3.1.1\">Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.3.1.2.1\">1.05</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.3.1.3\">0.25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T5.2.4.2.1\">Jaccard Monte-Carlo Random Walk</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.4.2.2\">1.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.4.2.3\">0.22</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T5.2.5.3.1\">Jaccard MoM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.5.3.2\">1.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.5.3.3\">0.30</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T5.2.6.4.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.6.4.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.6.4.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T5.2.7.5.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.7.5.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.7.5.3.1\">0.21</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
112
+ "perturb_sentence_id": [
113
+ 7,
114
+ 8
115
+ ],
116
  "output": {
117
+ "perturbed_statement": "[paragraph id = 7] As can be observed from Table 5, the JMoM performed poorly, with its mean accuracy being worse than the random walk.",
118
+ "perturbed_explanation": "1. The original statement asserts that the JMoM performed reasonably well and had a better mean accuracy than the random walk. 2. The statement is incorrect because it claims that the JMoM performed poorly, with its mean accuracy being worse than the random walk, which contradicts paragraph 11 that indicates the JMoM version, along with the Jaccard WA model, can produce decent results, thereby implying its performance was better than or comparable to the random walk."
119
  }
120
  },
121
  {
 
142
  "[paragraph id = 5] A potential reason for the Item-Jaccard WA model performing so well is that a user s social circle may not be the best predictor for who is similar to them, rather relating people by their items themselves does a better job."
143
  ],
144
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T6\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 6: </span>Mean and Standard Deviation of MAE for Item Rating Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T6.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T6.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T6.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T6.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T6.2.3.1.1\">Item-Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T6.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T6.2.3.1.2.1\">1.02</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T6.2.3.1.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T6.2.4.2.1\">Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.4.2.2\">1.05</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.4.2.3\">0.25</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T6.2.5.3.1\">Item-Rating Difference WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.5.3.2\">1.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.5.3.3\">0.49</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T6.2.6.4.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.6.4.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.6.4.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T6.2.7.5.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.7.5.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T6.2.7.5.3.1\">0.21</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
145
+ "perturb_sentence_id": [
146
+ 2,
147
+ 3
148
+ ],
149
  "output": {
150
+ "perturbed_statement": "[paragraph id = 2] The performance of these alternative WA models can be seen in Table 6. We can see that the Item-Rating Difference form of WA outperforms the previous Jaccard WA.",
151
+ "perturbed_explanation": "1. Item-Jaccard WA is mentioned to perform well because it relates people by their items rather than their social circles, which is suggested to be a better method for determining similarity. \n2. The statement is incorrect because it claims that the Item-Rating Difference form of WA outperforms the previous Jaccard WA. However, the context notes that the Item-Rating Difference model does not perform well and is inconsistent, contrary to the claim of outperforming other models."
152
  }
153
  },
154
  {
 
181
  "[paragraph id = 7] It is evident the intra-item similarity conveys some information as we perform better than random, hence this could be useful in combination models in the coming sections."
182
  ],
183
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T7\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 7: </span>Mean and Standard Deviation of MAE for Intra-Item Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T7.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T7.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T7.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T7.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T7.2.3.1.1\">Item-Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T7.2.3.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T7.2.3.1.2.1\">1.02</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T7.2.3.1.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T7.2.4.2.1\">Intra-Item WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.4.2.2\">1.24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.4.2.3\">0.14</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T7.2.5.3.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.5.3.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.5.3.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T7.2.6.4.1\">Intra-Item WA (Pearson)</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.6.4.2\">1.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.6.4.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T7.2.6.4.3.1\">0.13</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T7.2.7.5.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.7.5.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.7.5.3\">0.21</td>\n</tr>\n</tbody>\n</table>\n</figure>",
184
+ "perturb_sentence_id": [
185
+ 6,
186
+ 7
187
+ ],
188
  "output": {
189
+ "perturbed_statement": "[paragraph id = 6] This is a very interesting result, potentially this could be a better metric for determining the correlation of items and could be applied in [16 ]. It is evident the intra-item similarity conveys no useful information as we perform worse than random, hence this could be useful in combination models in the coming sections.",
190
+ "perturbed_explanation": "1. The original statement suggests that the intra-item similarity could be a better metric for determining the similarity of items, indicating that it provides valuable information beyond random results. \n2. The statement is incorrect because it claims that the intra-item similarity conveys no useful information and performs worse than random. However, the context states that intra-item similarity performs better than random, demonstrating its potential usefulness, and specifically mentions that the Jaccard approach outperforms the Pearson correlation approach."
191
  }
192
  },
193
  {
 
215
  "[paragraph id = 6] Hence, we have successfully combined the intra-item and item-rating information to achieve a better result."
216
  ],
217
  "table_html": "<figure class=\"ltx_table\" id=\"S6.T9\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 9: </span>Mean and Standard Deviation of MAE for WIRD WA Algorithm</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S6.T9.2\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S6.T9.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S6.T9.2.2.3\">Algorithm</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T9.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S6.T9.2.2.2\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S6.T9.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S6.T9.2.3.1.1\">WIRD WA</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T9.2.3.1.2\">1.05</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S6.T9.2.3.1.3\">0.28</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.4.2.1\">Item-Jaccard WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.4.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T9.2.4.2.2.1\">1.02</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.4.2.3\">0.23</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.5.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.5.3.1\">Intra-Item WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.5.3.2\">1.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.5.3.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S6.T9.2.5.3.3.1\">0.13</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.6.4.1\">Item-Rating Difference WA</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.6.4.2\">1.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.6.4.3\">0.49</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.7.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.7.5.1\">Universal Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.7.5.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.7.5.3\">0.38</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S6.T9.2.8.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S6.T9.2.8.6.1\">Random</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.8.6.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S6.T9.2.8.6.3\">0.21</td>\n</tr>\n</tbody>\n</table>\n</figure>",
218
+ "perturb_sentence_id": [
219
+ 4,
220
+ 5
221
+ ],
222
  "output": {
223
+ "perturbed_statement": "[paragraph id = 4] The results can be seen in Table 9 : The WIRD model performed worse than both the intra-item information and item-rating difference WA alone. However, it performs better than the Item-Jaccard WA model.",
224
+ "perturbed_explanation": "1. The original explanation suggests comparing the performance of different models, indicating that the WIRD model, despite performing well, did better than models using just intra-item information or item-rating difference WA. 2. The statement incorrectly claims that the WIRD model performed worse than both intra-item information and item-rating difference WA, which contradicts the fact that the combination of intra-item and item-rating info achieved a better result. This alteration misrepresents the comparative performance outcomes of the WIRD model."
225
  }
226
  },
227
  {
 
307
  "[paragraph id = 30] Hence, in a scenario where providing consistently good predictions for all users is of importance, the introduction of intra-item information could facilitate this."
308
  ],
309
  "table_html": "<figure class=\"ltx_table\" id=\"S7.T11\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 11: </span>Mean and Standard Deviation of MAE for All Algorithms on Different Data Sets</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S7.T11.6\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S7.T11.6.7.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S7.T11.6.7.1.1\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S7.T11.6.7.1.1.1\">Algorithm</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"3\" id=\"S7.T11.6.7.1.2\">Epinions</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"3\" id=\"S7.T11.6.7.1.3\">FilmTrust</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"2\" id=\"S7.T11.6.7.1.4\">CiaoDVD</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.2.2.2\"></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"S7.T11.6.6.7\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.3.3.3\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.4.4.4\"></th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r\" id=\"S7.T11.6.6.8\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.5.5.5\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S7.T11.6.6.6\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S7.T11.6.8.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S7.T11.6.8.1.1\">Jaccard Item-Jaccard WA</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.8.1.2.1\">1.00</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.3\">0.26</td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S7.T11.6.8.1.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.8.1.5.1\">0.66</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S7.T11.6.8.1.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.8.1.8.1\">0.53</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S7.T11.6.8.1.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.8.1.9.1\">0.28</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.9.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.9.2.1\">Item-Jaccard WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.2\">1.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.3\">0.23</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.9.2.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.5\">0.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.9.2.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.8\">0.58</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.9.2.9\">0.32</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.10.3\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.10.3.1\">Jaccard WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.2\">1.05</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.3\">0.25</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.10.3.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.5\">1.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.10.3.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.8\">1.73</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.10.3.9\">0.36</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.11.4\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.11.4.1\">WIRD WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.2\">1.05</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.3\">0.28</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.11.4.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.5\">0.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.11.4.6.1\">0.04</span></td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.11.4.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.8\">0.72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.11.4.9\">0.37</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.12.5\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.12.5.1\">Jaccard Item-Jaccard JII Combination WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.2\">1.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.3\">0.22</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.12.5.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.5\">0.69</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.6\">0.06</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.12.5.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.8\">0.63</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.12.5.9\">0.30</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.13.6\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.13.6.1\">JWIRD WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.2\">1.09</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.3\">0.27</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.13.6.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.5\">0.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.6\">0.07</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.13.6.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.8\">0.72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.13.6.9\">0.40</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.14.7\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.14.7.1\">Jaccard MoM</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.2\">1.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.3\">0.30</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.14.7.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.5\">1.19</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.6\">0.09</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.14.7.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.8\">1.77</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.14.7.9\">0.37</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.15.8\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.15.8.1\">Jaccard Monte-Carlo Random Walk</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.2\">1.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.3\">0.22</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.15.8.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.5\">1.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.15.8.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.8\">1.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.15.8.9\">0.49</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.16.9\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.16.9.1\">Monte-Carlo Random Walk</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.2\">1.16</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.3\">0.23</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.16.9.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.5\">1.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.16.9.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.8\">1.82</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.16.9.9\">0.35</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.17.10\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.17.10.1\">Item-Rating Difference WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.2\">1.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.3\">0.49</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.17.10.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.5\">0.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.17.10.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.8\">0.79</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.17.10.9\">0.41</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.18.11\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.18.11.1\">Jaccard Intra-Item WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.2\">1.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.18.11.3.1\">0.13</span></td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.18.11.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.5\">1.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.6\">0.06</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.18.11.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.8\">1.75</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.18.11.9\">0.32</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.19.12\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.19.12.1\">Intra-Item WA</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.2\">1.24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.3\">0.14</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.19.12.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.5\">1.18</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.19.12.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.8\">1.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.19.12.9\">0.45</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.20.13\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.20.13.1\">Median of Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.2\">1.27</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.3\">0.19</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.20.13.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.5\">1.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.6\">0.08</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.20.13.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.8\">1.76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.20.13.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.20.13.9.1\">0.28</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.21.14\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.21.14.1\">Mean of Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.2\">1.27</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.3\">0.25</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.21.14.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.5\">1.25</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.6\">0.07</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.21.14.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.8\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.21.14.9\">0.43</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.22.15\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.22.15.1\">Universal Random</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.2\">1.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.3\">0.38</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.22.15.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.5\">0.89</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.6\">0.11</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.22.15.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.8\">0.72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.22.15.9\">0.58</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.23.16\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.23.16.1\">Jaccard Weighted Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.2\">1.31</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.3\">0.32</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.23.16.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.5\">1.22</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.6\">0.10</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.23.16.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.8\">1.66</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.23.16.9\">0.44</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.24.17\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.24.17.1\">Mode of Neighbours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.2\">1.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.3\">0.23</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.24.17.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.5\">1.23</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.6\">0.09</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.24.17.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.8\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.24.17.9\">0.43</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.25.18\">\n<td class=\"ltx_td ltx_align_left\" id=\"S7.T11.6.25.18.1\">Intra-Item WA (Pearson)</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.2\">1.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S7.T11.6.25.18.3.1\">0.13</span></td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.25.18.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.5\">1.21</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.6\">0.07</td>\n<td class=\"ltx_td ltx_border_r\" id=\"S7.T11.6.25.18.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.8\">1.67</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S7.T11.6.25.18.9\">0.50</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T11.6.26.19\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S7.T11.6.26.19.1\">Random</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.2\">1.67</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.3\">0.21</td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S7.T11.6.26.19.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.5\">1.34</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.6\">0.07</td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S7.T11.6.26.19.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.8\">1.82</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S7.T11.6.26.19.9\">0.31</td>\n</tr>\n</tbody>\n</table>\n</figure>",
310
+ "perturb_sentence_id": [
311
+ 25,
312
+ 26
313
+ ],
314
  "output": {
315
+ "perturbed_statement": "[paragraph id = 25] However, an interesting observation, as per Table 11, was that the algorithms that included intra-item information were the least consistent in their performance—featuring the highest MAE across all data sets. Thus, it can be deduced that the intra-item information is detrimental from a stability perspective, making a recommender perform with varying accuracy for all users.",
316
+ "perturbed_explanation": "1. The original explanation mentions that the algorithms that included intra-item information were the most consistent in their performance, featuring the lowest MAE across all data sets. This suggests that intra-item information contributes positively to stability and accuracy for all users. 2. The statement incorrectly claims that the algorithms featuring intra-item information were the least consistent, with the highest MAE across all data sets, and that the intra-item information is detrimental. This directly contradicts the actual observation made in the context, which states that intra-item information is beneficial for stability and accuracy in recommendations."
317
  }
318
  }
319
  ]
table_result/2407.00064v1_output.json CHANGED
@@ -25,10 +25,13 @@
25
  "[paragraph id = 9] The queries and the requirements defined in Section 2 would be evaluated as shown in Table 1 ."
26
  ],
27
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.1.1\">q1</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.1.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.1.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.1.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.1.2.1.1.1\">COUNT:</span> all cars</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.1.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.2.1\">q2</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.2.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.2.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.2.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.2.2.1.1.1\">COUNT:</span> cars with 4 wheels</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.2.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.3.1\">q3</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.3.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.3.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.3.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.3.2.1.1.1\">COUNT:</span> cars with 1 transmission gear</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.3.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.4.1\">q4</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.4.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.4.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.4.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.4.2.1.1.1\">ASK:</span> transmission is a manual and an automatic transmission</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.4.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.5.1\">q5</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.5.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.5.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.5.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.5.2.1.1.1\">ASK:</span> cars with less than one engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.5.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.6\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.6.1\">q6</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.6.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.6.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.6.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.6.2.1.1.1\">ASK:</span> cars with more than one combustion engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.6.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.7.1\">q7</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.7.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.7.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.7.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.7.2.1.1.1\">COUNT:</span> cars with at least one combustion engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.7.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.8\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.8.1\">q8</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.8.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.8.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.8.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.8.2.1.1.1\">COUNT:</span> cars with at least one electric engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.8.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.9.1\">q9</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.9.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.9.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.9.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.9.2.1.1.1\">COUNT:</span> cars with a combustion engine and a fuel tank</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.9.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.10\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.10.1\">q10</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.10.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.10.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.10.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.10.2.1.1.1\">COUNT:</span> cars with an electric engine and a battery</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.10.3\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.11\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.11.1\">q11</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.11.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.11.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.11.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.11.2.1.1.1\">COUNT:</span> cars with an electric engine and a automatic transmission</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S4.T1.1.1.1.1.11.3\">0</td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S4.T1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.1\">r1</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.2\">q1 = q2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.1\">r2</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.2\">(q1 = q3) AND NOT q4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.1\">r3</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.2\">NOT(q5 AND q6)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.1\">r4</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.2\">(q7=q9) AND (q8=q10)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.1\">r5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.2\">q8=q11</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.3\">false</td>\n</tr>\n</table>\n</td>\n</tr>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Queries and constraints with results for the car example</figcaption>\n</figure>",
28
- "perturb_sentence_id": 8,
 
 
 
29
  "output": {
30
- "perturbed_statement": "[paragraph id = 8] For our RM, 15 queries were formulated. The queries and the requirements defined in Section 2 would be evaluated as shown in Table 1 .",
31
- "perturbed_explanation": "1. The original statement indicates that 11 queries were formulated under RM. 2. The statement claims 15 queries were formulated, which inaccurately changes the number of queries from 11 to 15, thereby presenting an incorrect piece of information. It is essential to note the precise amount to ensure factual accuracy."
32
  }
33
  },
34
  {
@@ -90,10 +93,13 @@
90
  "[paragraph id = 23] The result can be exported from the RMA as RDF file."
91
  ],
92
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.1.1\">q1</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.1.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.1.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.1.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.1.2.1.1.1\">COUNT:</span> all cars</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.1.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.2.1\">q2</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.2.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.2.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.2.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.2.2.1.1.1\">COUNT:</span> cars with 4 wheels</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.2.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.3.1\">q3</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.3.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.3.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.3.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.3.2.1.1.1\">COUNT:</span> cars with 1 transmission gear</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.3.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.4.1\">q4</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.4.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.4.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.4.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.4.2.1.1.1\">ASK:</span> transmission is a manual and an automatic transmission</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.4.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.5.1\">q5</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.5.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.5.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.5.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.5.2.1.1.1\">ASK:</span> cars with less than one engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.5.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.6\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.6.1\">q6</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.6.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.6.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.6.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.6.2.1.1.1\">ASK:</span> cars with more than one combustion engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.6.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.7.1\">q7</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.7.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.7.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.7.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.7.2.1.1.1\">COUNT:</span> cars with at least one combustion engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.7.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.8\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.8.1\">q8</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.8.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.8.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.8.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.8.2.1.1.1\">COUNT:</span> cars with at least one electric engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.8.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.9.1\">q9</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.9.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.9.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.9.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.9.2.1.1.1\">COUNT:</span> cars with a combustion engine and a fuel tank</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.9.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.10\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.10.1\">q10</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.10.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.10.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.10.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.10.2.1.1.1\">COUNT:</span> cars with an electric engine and a battery</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.10.3\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.11\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.11.1\">q11</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.11.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.11.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.11.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.11.2.1.1.1\">COUNT:</span> cars with an electric engine and a automatic transmission</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S4.T1.1.1.1.1.11.3\">0</td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S4.T1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.1\">r1</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.2\">q1 = q2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.1\">r2</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.2\">(q1 = q3) AND NOT q4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.1\">r3</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.2\">NOT(q5 AND q6)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.1\">r4</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.2\">(q7=q9) AND (q8=q10)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.1\">r5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.2\">q8=q11</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.3\">false</td>\n</tr>\n</table>\n</td>\n</tr>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Queries and constraints with results for the car example</figcaption>\n</figure>",
93
- "perturb_sentence_id": 20,
 
 
 
94
  "output": {
95
- "perturbed_statement": "[paragraph id = 20] Unfulfilled constraints (4 and 5) are shown in blue and queries only returning null values are shown in grey. On the upper level, this representation allows a quick check of the extent to which a solution conforms to the constraints of a RM.",
96
- "perturbed_explanation": "Original Explanation: Constraints that are unfulfilled are represented in red, distinguishing their status, and queries which only returned values appear in grey, providing quick categorization. 1. The altered statement incorrectly claims that unfulfilled constraints are shown in blue, which contradicts the information specifying they are displayed in red. This creates a misunderstanding about the visual cue for unfulfilled constraints."
97
  }
98
  }
99
  ]
 
25
  "[paragraph id = 9] The queries and the requirements defined in Section 2 would be evaluated as shown in Table 1 ."
26
  ],
27
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.1.1\">q1</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.1.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.1.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.1.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.1.2.1.1.1\">COUNT:</span> all cars</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.1.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.2.1\">q2</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.2.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.2.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.2.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.2.2.1.1.1\">COUNT:</span> cars with 4 wheels</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.2.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.3.1\">q3</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.3.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.3.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.3.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.3.2.1.1.1\">COUNT:</span> cars with 1 transmission gear</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.3.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.4.1\">q4</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.4.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.4.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.4.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.4.2.1.1.1\">ASK:</span> transmission is a manual and an automatic transmission</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.4.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.5.1\">q5</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.5.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.5.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.5.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.5.2.1.1.1\">ASK:</span> cars with less than one engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.5.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.6\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.6.1\">q6</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.6.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.6.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.6.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.6.2.1.1.1\">ASK:</span> cars with more than one combustion engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.6.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.7.1\">q7</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.7.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.7.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.7.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.7.2.1.1.1\">COUNT:</span> cars with at least one combustion engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.7.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.8\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.8.1\">q8</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.8.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.8.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.8.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.8.2.1.1.1\">COUNT:</span> cars with at least one electric engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.8.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.9.1\">q9</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.9.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.9.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.9.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.9.2.1.1.1\">COUNT:</span> cars with a combustion engine and a fuel tank</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.9.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.10\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.10.1\">q10</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.10.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.10.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.10.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.10.2.1.1.1\">COUNT:</span> cars with an electric engine and a battery</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.10.3\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.11\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.11.1\">q11</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.11.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.11.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.11.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.11.2.1.1.1\">COUNT:</span> cars with an electric engine and a automatic transmission</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S4.T1.1.1.1.1.11.3\">0</td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S4.T1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.1\">r1</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.2\">q1 = q2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.1\">r2</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.2\">(q1 = q3) AND NOT q4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.1\">r3</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.2\">NOT(q5 AND q6)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.1\">r4</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.2\">(q7=q9) AND (q8=q10)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.1\">r5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.2\">q8=q11</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.3\">false</td>\n</tr>\n</table>\n</td>\n</tr>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Queries and constraints with results for the car example</figcaption>\n</figure>",
28
+ "perturb_sentence_id": [
29
+ 8,
30
+ 9
31
+ ],
32
  "output": {
33
+ "perturbed_statement": "[paragraph id = 8] For our RM, 15 queries were formulated. The queries and the requirements defined in Section 2 would be evaluated as shown in Table 1.",
34
+ "perturbed_explanation": "Original Explanation: Table 1 contains the evaluation of 11 queries formulated for our RM, as well as the requirements defined in Section 2. \n2. The statement incorrectly mentions that 15 queries were formulated, whereas the correct number of queries is 11. This error misrepresents the quantity of queries being evaluated in Table 1."
35
  }
36
  },
37
  {
 
93
  "[paragraph id = 23] The result can be exported from the RMA as RDF file."
94
  ],
95
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.1.1\">q1</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.1.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.1.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.1.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.1.2.1.1.1\">COUNT:</span> all cars</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.1.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.2.1\">q2</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.2.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.2.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.2.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.2.2.1.1.1\">COUNT:</span> cars with 4 wheels</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.2.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.3.1\">q3</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.3.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.3.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.3.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.3.2.1.1.1\">COUNT:</span> cars with 1 transmission gear</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.3.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.4.1\">q4</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.4.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.4.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.4.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.4.2.1.1.1\">ASK:</span> transmission is a manual and an automatic transmission</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.4.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.5.1\">q5</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.5.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.5.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.5.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.5.2.1.1.1\">ASK:</span> cars with less than one engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.5.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.6\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.6.1\">q6</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.6.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.6.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.6.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.6.2.1.1.1\">ASK:</span> cars with more than one combustion engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.6.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.7.1\">q7</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.7.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.7.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.7.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.7.2.1.1.1\">COUNT:</span> cars with at least one combustion engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.7.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.8\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.8.1\">q8</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.8.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.8.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.8.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.8.2.1.1.1\">COUNT:</span> cars with at least one electric engine</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.8.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.9.1\">q9</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.9.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.9.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.9.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.9.2.1.1.1\">COUNT:</span> cars with a combustion engine and a fuel tank</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.9.3\">1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.10\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.10.1\">q10</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.10.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.10.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.10.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.10.2.1.1.1\">COUNT:</span> cars with an electric engine and a battery</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.1.1.1.10.3\">0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1.1.11\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.11.1\">q11</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1.11.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.11.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.11.2.1.1\" style=\"width:170.7pt;\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T1.1.1.1.1.11.2.1.1.1\">COUNT:</span> cars with an electric engine and a automatic transmission</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_t\" id=\"S4.T1.1.1.1.1.11.3\">0</td>\n</tr>\n</table>\n</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S4.T1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.1\">r1</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.2\">q1 = q2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.1.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.1\">r2</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.2\">(q1 = q3) AND NOT q4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.2.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.1\">r3</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.2\">NOT(q5 AND q6)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.3.3\">true</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.1\">r4</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.2\">(q7=q9) AND (q8=q10)</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.4.3\">false</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.2.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.1\">r5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.2\">q8=q11</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.2.1.5.3\">false</td>\n</tr>\n</table>\n</td>\n</tr>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Queries and constraints with results for the car example</figcaption>\n</figure>",
96
+ "perturb_sentence_id": [
97
+ 20,
98
+ 21
99
+ ],
100
  "output": {
101
+ "perturbed_statement": "[paragraph id = 20] Unfulfilled constraints (4 and 5) are shown in blue and queries which only returned values are shown in grey.On the upper level, this representation allows a quick check of the extent to which a solution conforms to the constraints of a RM.",
102
+ "perturbed_explanation": "1. The statement describes a specific color code used to represent unfulfilled constraints and queries within a model solution. The original statement mentions that unfulfilled constraints are shown in red, providing a visual indication that certain constraints have not been met. The use of color coding, such as red, helps users quickly identify issues that require attention. 2. In the statement, unfulfilled constraints are incorrectly described as being shown in blue, which contradicts the original context. The original explanation clarifies that unfulfilled constraints are displayed in red to signal that certain conditions have not been satisfied. Therefore, referring to them as blue introduces a factual error, as it misrepresents the designated color for unfulfilled constraints within the model."
103
  }
104
  }
105
  ]
table_result/2407.00071v1_output.json CHANGED
@@ -25,10 +25,13 @@
25
  "[paragraph id = 4] The latest language models do not report the zero-shot performance on these benchmark as in seen Table 1 since the performance is likely poorer than those with manual prompts."
26
  ],
27
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_tt\" id=\"S2.T1.1.1.1.1\"></th>\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S2.T1.1.1.1.2\">Gemini Ultra</td>\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S2.T1.1.1.1.3\">GPT-4</td>\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S2.T1.1.1.1.4\">LLama3 70B</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S2.T1.1.2.2.1\">MMLU</th>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.2.2.2\">90.04% CoT@32</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.2.2.3\">86.4% 5-shot</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.2.2.4\">79.5% 5-shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.3.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.3.3.1\">GSM8K</th>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.3.3.2\">94.4% Maj1@32</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.3.3.3\">92% 5-Shot CoT</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.3.3.4\">93.0 8-shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.4.4.1\">MATH</th>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.4.4.2\">53.2% 4-shot</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S2.T1.1.4.4.3\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.4.4.4\">50.4 4-shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.5.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.5.5.1\">BIG-Bench-Hard</th>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.5.5.2\">83.6% 3-shot</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S2.T1.1.5.5.3\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.5.5.4\">81.3 3-shot, CoT</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.6.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S2.T1.1.6.6.1\">DROP</th>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.6.6.2\">82.4% Variable shot</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.6.6.3\">80.9 3-shot</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.6.6.4\">79.7 3-shot,F1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.7.7\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S2.T1.1.7.7.1\">HellaSwag</th>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.7.7.2\">87.8% 10-shot</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.7.7.3\">95.3% 10-shot</td>\n<td class=\"ltx_td\" id=\"S2.T1.1.7.7.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.8.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S2.T1.1.8.8.1\">WinoGrande</th>\n<td class=\"ltx_td\" id=\"S2.T1.1.8.8.2\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.8.8.3\">87.5% 5-shot</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.8.8.4\">81.3% 5-shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.9.9\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S2.T1.1.9.9.1\">AI2 Reasoning</th>\n<td class=\"ltx_td ltx_border_bb\" id=\"S2.T1.1.9.9.2\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S2.T1.1.9.9.3\">96.3% 25-shot</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S2.T1.1.9.9.4\">93% 25-shot</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Summary of recent reasoning benchmarks on LLMs (<span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.3.1\">?</span>). Note that reported results are all dependent on some form of In Context Learning or Chain of Thought (CoT) </figcaption>\n</figure>",
28
- "perturb_sentence_id": 1,
 
 
 
29
  "output": {
30
- "perturbed_statement": "[paragraph id = 1] For each revision of LLMs - GPT4 and Gemini show declining results on reasoning benchmarks such as BIG-Bench-Hard, HellaSwag, and MMLU. Moreover, these results are a strong indicator for the autonomous reasoning capabilities of the model.",
31
- "perturbed_explanation": "The original explanation argues that while reasoning benchmarks for subsequent revisions of LLMs like GPT4, Gemini, and Llama3 exhibit increasing performance, such metrics do not effectively measure the models' autonomous reasoning abilities. 1. The statement falsely suggests a decline in benchmark performance, contrary to the assertion of improvement. 2. It also incorrectly claims these benchmarks are reliable indicators of reasoning autonomy, which is contrary to their stated limitations. By making these statements, the statement misrepresents the trends and implications of these benchmarks."
32
  }
33
  },
34
  {
@@ -72,10 +75,13 @@
72
  "[paragraph id = 11] - to select the optimal values for the other four parameters."
73
  ],
74
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T2.5\" style=\"width:433.6pt;height:56.9pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(79.5pt,-10.4pt) scale(1.57928347350645,1.57928347350645) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.5.5\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.5.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.5.5.5.6\" style=\"padding-bottom:2.15277pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.5.5.5.6.1\">Parameter</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.1.1.1.1\" style=\"padding-bottom:2.15277pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.2.2.2.2\" style=\"padding-bottom:2.15277pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.3.3.3.3\" style=\"padding-bottom:2.15277pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.4.4.4.4\" style=\"padding-bottom:2.15277pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.5.5.5.5\" style=\"padding-bottom:2.15277pt;\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.5.5.6.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.5.5.6.1.1.1\">Tuning Range</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.2\">[1E-3, 70]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.3\">[1E-4, 20]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.4\">[-2, 10]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.5\">[1, 4]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S4.T2.5.5.6.1.6\">2</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Parameter Tuning Ranges</figcaption>\n</figure>",
75
- "perturb_sentence_id": 9,
 
 
 
76
  "output": {
77
- "perturbed_statement": "[paragraph id = 9] Prior to running the QUBO mapper, we tune the mapping parameters , , , and ( is fixed) using 10 questions from across all of BBH to form a 135 question tuning set.On this, we set the ranges for the tuning (see Table 2 ) and use Optuna - a gradient free hyperparameter optimization framework (?)",
78
- "perturbed_explanation": "The original explanation mentions the use of 5 questions as part of the tuning set, accompanied by the clarification regarding the formation and optimization framework used. 2) Changing the count from 5 to 10 inaccurately increases the number of questions utilized in this context, leading to a factual error."
79
  }
80
  },
81
  {
@@ -121,10 +127,13 @@
121
  "[paragraph id = 17] The USP results are taken from (?)."
122
  ],
123
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<div class=\"ltx_inline-block ltx_transformed_outer\" id=\"S4.T3.2\" style=\"width:433.6pt;height:162.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(72.9pt,-27.3pt) scale(1.50642183704488,1.50642183704488) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T3.2.2\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_tt\" id=\"S4.T3.2.2.3.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.2.2.3.1.1.1\">Setting</span></th>\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_tt\" id=\"S4.T3.2.2.3.1.2\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T3.2.2.3.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.2.2.3.1.3.1\">Zero-Shot</span></td>\n<td class=\"ltx_td ltx_border_r ltx_border_tt\" id=\"S4.T3.2.2.3.1.4\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S4.T3.2.2.3.1.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.2.2.3.1.5.1\">Few-Shot</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T3.2.2.4.2.1\">Method</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S4.T3.2.2.4.2.2\">0-Shot</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.2.2.4.2.3\">USP</td>\n<td class=\"ltx_td ltx_align_right ltx_border_r ltx_border_t\" id=\"S4.T3.2.2.4.2.4\">CR</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S4.T3.2.2.4.2.5\">3-Shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.5.3\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r\" id=\"S4.T3.2.2.5.3.1\"></th>\n<th class=\"ltx_td ltx_th ltx_th_row\" id=\"S4.T3.2.2.5.3.2\"></th>\n<td class=\"ltx_td\" id=\"S4.T3.2.2.5.3.3\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_r\" id=\"S4.T3.2.2.5.3.4\">(Ours)</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S4.T3.2.2.5.3.5\">CoT</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T3.1.1.1.1\">Average (%) \n</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S4.T3.1.1.1.2\">47.68</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.1.3\">55.89</td>\n<td class=\"ltx_td ltx_align_right ltx_border_r ltx_border_t\" id=\"S4.T3.1.1.1.4\">59.88</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S4.T3.1.1.1.5\">74.20</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T3.2.2.6.4.1\">Gain over zero-shot</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S4.T3.2.2.6.4.2\">0</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.2.2.6.4.3\">+8.21</td>\n<td class=\"ltx_td ltx_align_right ltx_border_r\" id=\"S4.T3.2.2.6.4.4\">+12.20</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S4.T3.2.2.6.4.5\">+26.52</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb ltx_border_r\" id=\"S4.T3.2.2.2.1\">Average rank \n</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T3.2.2.2.2\">3.22</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.2.2.2.3\">2.78</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb ltx_border_r\" id=\"S4.T3.2.2.2.4\">2.57</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S4.T3.2.2.2.5\">1.35</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Cumulative statistics on the performance of different methods across BBH. CR outperforms the other zero-shot methods both by rank and on average.</figcaption>\n</figure>",
124
- "perturb_sentence_id": 14,
 
 
 
125
  "output": {
126
- "perturbed_statement": "[paragraph id = 14] featuring default settings on temperature, exponential annealing schedule, and a fixed parameter setting strategy employing 1000 sweeps, run identically 100 times. Figure 2 and Table 3 display our results for BBH tasks.",
127
- "perturbed_explanation": "The original explanation stated that the annealing schedule type was specified as linear. Introducing the alteration of changing it to exponential introduces a factual discrepancy. Consequently, by the unchanged contextual details, the statement specifying exponential annealing is incorrect."
128
  }
129
  },
130
  {
@@ -166,10 +175,13 @@
166
  "[paragraph id = 21] It should be noted that the effect of optimization is visible as the mechanism that reduces the number of distinct reasons to a subset of reasons."
167
  ],
168
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T4\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T4.4\" style=\"width:433.6pt;height:611.2pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(31.6pt,-44.6pt) scale(1.1708709263391,1.1708709263391) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T4.4.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T4.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T4.1.1.1.2\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T4.1.1.1.3\">All Reasons</th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T4.1.1.1.4\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T4.1.1.1.1\">% of \n</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r\" id=\"S4.T4.4.4.4.4\">Dataset</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T4.2.2.2.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T4.3.3.3.2\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S4.T4.4.4.4.3\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.5.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T4.4.4.5.1.1\">Causal Judgement</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T4.4.4.5.1.2\">709</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T4.4.4.5.1.3\">204</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.4.4.5.1.4\">87.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.6.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.6.2.1\">Reasoning About Colored Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.6.2.2\">525</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.6.2.3\">100</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.6.2.4\">82.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.7.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.7.3.1\">Navigate</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.7.3.2\">1100</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.7.3.3\">572</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.7.3.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.8.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.8.4.1\">Penguins In A Table</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.8.4.2\">589</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.8.4.3\">123</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.8.4.4\">77.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.9.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.9.5.1\">Geometric Shapes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.9.5.2\">630</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.9.5.3\">331</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.9.5.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.10.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.10.6.1\">Disambiguation QA</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.10.6.2\">373</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.10.6.3\">45</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.10.6.4\">68.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.11.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.11.7.1\">Tracking Shuffled Objects Five Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.11.7.2\">1020</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.11.7.3\">298</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.11.7.4\">95.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.12.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.12.8.1\">Word Sorting</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.12.8.2\">385</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.12.8.3\">107</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.12.8.4\">99.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.13.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.13.9.1\">Tracking Shuffled Objects Three Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.13.9.2\">743</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.13.9.3\">147</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.13.9.4\">64.6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.14.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.14.10.1\">Tracking Shuffled Objects Seven Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.14.10.2\">1164</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.14.10.3\">400</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.14.10.4\">98.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.15.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.15.11.1\">Multistep Arithmetic Two</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.15.11.2\">621</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.15.11.3\">253</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.15.11.4\">99.6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.16.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.16.12.1\">Web Of Lies</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.16.12.2\">885</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.16.12.3\">113</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.16.12.4\">84.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.17.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.17.13.1\">Logical Deduction Three Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.17.13.2\">540</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.17.13.3\">100</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.17.13.4\">72.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.18.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.18.14.1\">Sports Understanding</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.18.14.2\">449</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.18.14.3\">160</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.18.14.4\">96.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.19.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.19.15.1\">Snarks</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.19.15.2\">396</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.19.15.3\">109</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.19.15.4\">91.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.20.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.20.16.1\">Logical Deduction Five Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.20.16.2\">680</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.20.16.3\">199</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.20.16.4\">92.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.21.17\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.21.17.1\">Salient Translation Error Detection</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.21.17.2\">389</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.21.17.3\">90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.21.17.4\">98.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.22.18\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.22.18.1\">Hyperbaton</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.22.18.2\">432</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.22.18.3\">57</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.22.18.4\">65.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.23.19\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.23.19.1\">Movie Recommendation</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.23.19.2\">730</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.23.19.3\">457</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.23.19.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.24.20\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.24.20.1\">Object Counting</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.24.20.2\">397</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.24.20.3\">48</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.24.20.4\">62.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.25.21\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.25.21.1\">Logical Deduction Seven Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.25.21.2\">730</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.25.21.3\">309</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.25.21.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.26.22\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.26.22.1\">Temporal Sequences</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.26.22.2\">533</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.26.22.3\">76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.26.22.4\">97.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.27.23\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.27.23.1\">Formal Fallacies</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.27.23.2\">579</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.27.23.3\">251</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.27.23.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.28.24\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.28.24.1\">Dyck Languages</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.28.24.2\">1112</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.28.24.3\">558</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.28.24.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.29.25\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.29.25.1\">Date Understanding</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.29.25.2\">587</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.29.25.3\">162</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.29.25.4\">98.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.30.26\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.30.26.1\">Boolean Expressions</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.30.26.2\">493</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.30.26.3\">160</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.30.26.4\">93.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.31.27\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r\" id=\"S4.T4.4.4.31.27.1\">Ruin Names</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S4.T4.4.4.31.27.2\">622</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S4.T4.4.4.31.27.3\">421</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.4.4.31.27.4\">100.0</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>Reason filtering and selection percentages</figcaption>\n</figure>",
169
- "perturb_sentence_id": 20,
 
 
 
170
  "output": {
171
- "perturbed_statement": "[paragraph id = 20] In Table 4 we report the percentage change in reasons between stages depicted in Figure 3. It should be noted that the optimization process increases the variety of reasons in this analysis.",
172
- "perturbed_explanation": "1. The effect of optimization as stated originally is to reduce the number of distinct reasons to a subset for streamlined analysis. 2. The assertion that optimization increases the variety of reasons contradicts this fundamental understanding, and the reference to Figure 3 does not align correctly with the discussed content from Figure 2 as outlined."
173
  }
174
  }
175
  ]
 
25
  "[paragraph id = 4] The latest language models do not report the zero-shot performance on these benchmark as in seen Table 1 since the performance is likely poorer than those with manual prompts."
26
  ],
27
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S2.T1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_tt\" id=\"S2.T1.1.1.1.1\"></th>\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S2.T1.1.1.1.2\">Gemini Ultra</td>\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S2.T1.1.1.1.3\">GPT-4</td>\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S2.T1.1.1.1.4\">LLama3 70B</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S2.T1.1.2.2.1\">MMLU</th>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.2.2.2\">90.04% CoT@32</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.2.2.3\">86.4% 5-shot</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.2.2.4\">79.5% 5-shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.3.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.3.3.1\">GSM8K</th>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.3.3.2\">94.4% Maj1@32</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.3.3.3\">92% 5-Shot CoT</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.3.3.4\">93.0 8-shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.4.4.1\">MATH</th>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.4.4.2\">53.2% 4-shot</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S2.T1.1.4.4.3\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.4.4.4\">50.4 4-shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.5.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S2.T1.1.5.5.1\">BIG-Bench-Hard</th>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.5.5.2\">83.6% 3-shot</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S2.T1.1.5.5.3\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S2.T1.1.5.5.4\">81.3 3-shot, CoT</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.6.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S2.T1.1.6.6.1\">DROP</th>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.6.6.2\">82.4% Variable shot</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.6.6.3\">80.9 3-shot</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.6.6.4\">79.7 3-shot,F1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.7.7\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S2.T1.1.7.7.1\">HellaSwag</th>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.7.7.2\">87.8% 10-shot</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.7.7.3\">95.3% 10-shot</td>\n<td class=\"ltx_td\" id=\"S2.T1.1.7.7.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.8.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S2.T1.1.8.8.1\">WinoGrande</th>\n<td class=\"ltx_td\" id=\"S2.T1.1.8.8.2\"></td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.8.8.3\">87.5% 5-shot</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S2.T1.1.8.8.4\">81.3% 5-shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.9.9\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S2.T1.1.9.9.1\">AI2 Reasoning</th>\n<td class=\"ltx_td ltx_border_bb\" id=\"S2.T1.1.9.9.2\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S2.T1.1.9.9.3\">96.3% 25-shot</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S2.T1.1.9.9.4\">93% 25-shot</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Summary of recent reasoning benchmarks on LLMs (<span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.3.1\">?</span>). Note that reported results are all dependent on some form of In Context Learning or Chain of Thought (CoT) </figcaption>\n</figure>",
28
+ "perturb_sentence_id": [
29
+ 1,
30
+ 2
31
+ ],
32
  "output": {
33
+ "perturbed_statement": "[paragraph id = 1] For each subsequent revision of LLMs - GPT4 / Gemini / and Llama3, reasoning benchmarks such as BIG-Bench-Hard, HellaSwag, and MMLU show ever declining results. However, these results are not a good indicator for the autonomous reasoning capabilities of the model.",
34
+ "perturbed_explanation": "Original Explanation: The statement highlights that the improvements in reasoning benchmarks do not necessarily reflect the model's autonomous reasoning abilities. These tasks are often structured in a manner that allows for improvement through specific techniques like few-shot learning or manual prompt engineering, which may not correlate with true reasoning capabilities. \n2. The statement is incorrect because it suggests that the benchmarks show ever declining results. In fact, the context implies that improvements in benchmarks are generally observed with newer revisions of language models, not a decline."
35
  }
36
  },
37
  {
 
75
  "[paragraph id = 11] - to select the optimal values for the other four parameters."
76
  ],
77
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T2.5\" style=\"width:433.6pt;height:56.9pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(79.5pt,-10.4pt) scale(1.57928347350645,1.57928347350645) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.5.5\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.5.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.5.5.5.6\" style=\"padding-bottom:2.15277pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.5.5.5.6.1\">Parameter</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.1.1.1.1\" style=\"padding-bottom:2.15277pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.2.2.2.2\" style=\"padding-bottom:2.15277pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.3.3.3.3\" style=\"padding-bottom:2.15277pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T2.4.4.4.4\" style=\"padding-bottom:2.15277pt;\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.5.5.5.5\" style=\"padding-bottom:2.15277pt;\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.5.5.6.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.5.5.6.1.1.1\">Tuning Range</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.2\">[1E-3, 70]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.3\">[1E-4, 20]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.4\">[-2, 10]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S4.T2.5.5.6.1.5\">[1, 4]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S4.T2.5.5.6.1.6\">2</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Parameter Tuning Ranges</figcaption>\n</figure>",
78
+ "perturb_sentence_id": [
79
+ 9,
80
+ 10
81
+ ],
82
  "output": {
83
+ "perturbed_statement": "[paragraph id = 9] Prior to running the QUBO mapper, we tune the mapping parameters , , , and ( is fixed) using 10 questions from across all of BBH to form a 135 question tuning set. On this, we set the ranges for the tuning (see Table 2) and use Optuna - a gradient free hyperparameter optimization framework (?)",
84
+ "perturbed_explanation": "1. The original statement indicates that 5 questions from across all of BBH are used to form a 135 question tuning set. 2. The statement incorrectly states that 10 questions are used instead of 5. This factual inaccuracy changes the number of questions used in the tuning set, altering the method by which the tuning set is constituted."
85
  }
86
  },
87
  {
 
127
  "[paragraph id = 17] The USP results are taken from (?)."
128
  ],
129
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<div class=\"ltx_inline-block ltx_transformed_outer\" id=\"S4.T3.2\" style=\"width:433.6pt;height:162.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(72.9pt,-27.3pt) scale(1.50642183704488,1.50642183704488) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T3.2.2\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.3.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_tt\" id=\"S4.T3.2.2.3.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.2.2.3.1.1.1\">Setting</span></th>\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_tt\" id=\"S4.T3.2.2.3.1.2\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T3.2.2.3.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.2.2.3.1.3.1\">Zero-Shot</span></td>\n<td class=\"ltx_td ltx_border_r ltx_border_tt\" id=\"S4.T3.2.2.3.1.4\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S4.T3.2.2.3.1.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.2.2.3.1.5.1\">Few-Shot</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.4.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T3.2.2.4.2.1\">Method</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S4.T3.2.2.4.2.2\">0-Shot</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.2.2.4.2.3\">USP</td>\n<td class=\"ltx_td ltx_align_right ltx_border_r ltx_border_t\" id=\"S4.T3.2.2.4.2.4\">CR</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S4.T3.2.2.4.2.5\">3-Shot</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.5.3\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r\" id=\"S4.T3.2.2.5.3.1\"></th>\n<th class=\"ltx_td ltx_th ltx_th_row\" id=\"S4.T3.2.2.5.3.2\"></th>\n<td class=\"ltx_td\" id=\"S4.T3.2.2.5.3.3\"></td>\n<td class=\"ltx_td ltx_align_right ltx_border_r\" id=\"S4.T3.2.2.5.3.4\">(Ours)</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S4.T3.2.2.5.3.5\">CoT</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T3.1.1.1.1\">Average (%) \n</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S4.T3.1.1.1.2\">47.68</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.1.3\">55.89</td>\n<td class=\"ltx_td ltx_align_right ltx_border_r ltx_border_t\" id=\"S4.T3.1.1.1.4\">59.88</td>\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S4.T3.1.1.1.5\">74.20</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.6.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T3.2.2.6.4.1\">Gain over zero-shot</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S4.T3.2.2.6.4.2\">0</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.2.2.6.4.3\">+8.21</td>\n<td class=\"ltx_td ltx_align_right ltx_border_r\" id=\"S4.T3.2.2.6.4.4\">+12.20</td>\n<td class=\"ltx_td ltx_align_right\" id=\"S4.T3.2.2.6.4.5\">+26.52</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.2.2.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb ltx_border_r\" id=\"S4.T3.2.2.2.1\">Average rank \n</th>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T3.2.2.2.2\">3.22</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.2.2.2.3\">2.78</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb ltx_border_r\" id=\"S4.T3.2.2.2.4\">2.57</td>\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S4.T3.2.2.2.5\">1.35</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Cumulative statistics on the performance of different methods across BBH. CR outperforms the other zero-shot methods both by rank and on average.</figcaption>\n</figure>",
130
+ "perturb_sentence_id": [
131
+ 14,
132
+ 15
133
+ ],
134
  "output": {
135
+ "perturbed_statement": "[paragraph id = 14] featuring non-default settings on temperature, linear annealing schedule, and a fixed parameter setting strategy employing 1000 sweeps, run identically 100 times. Figure 4 and Table 2 displays our results for BBH tasks.",
136
+ "perturbed_explanation": "1. The statement originally described the use of default settings on certain parameters and referred to specific figures and tables displaying results. \n2. The statement incorrectly claims that non-default settings on temperature were used, and that the results are displayed in Figure 4 and Table 2. However, the original context does not specify using non-default settings, nor does it mention results being shown in Figure 4 or Table 2 specifically for the BBH tasks, which could potentially be misleading about the actual figures and tables referenced for the results."
137
  }
138
  },
139
  {
 
175
  "[paragraph id = 21] It should be noted that the effect of optimization is visible as the mechanism that reduces the number of distinct reasons to a subset of reasons."
176
  ],
177
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T4\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T4.4\" style=\"width:433.6pt;height:611.2pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(31.6pt,-44.6pt) scale(1.1708709263391,1.1708709263391) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T4.4.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T4.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T4.1.1.1.2\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T4.1.1.1.3\">All Reasons</th>\n<th class=\"ltx_td ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S4.T4.1.1.1.4\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T4.1.1.1.1\">% of \n</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r\" id=\"S4.T4.4.4.4.4\">Dataset</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T4.2.2.2.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T4.3.3.3.2\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S4.T4.4.4.4.3\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.5.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T4.4.4.5.1.1\">Causal Judgement</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T4.4.4.5.1.2\">709</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T4.4.4.5.1.3\">204</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.4.4.5.1.4\">87.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.6.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.6.2.1\">Reasoning About Colored Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.6.2.2\">525</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.6.2.3\">100</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.6.2.4\">82.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.7.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.7.3.1\">Navigate</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.7.3.2\">1100</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.7.3.3\">572</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.7.3.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.8.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.8.4.1\">Penguins In A Table</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.8.4.2\">589</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.8.4.3\">123</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.8.4.4\">77.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.9.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.9.5.1\">Geometric Shapes</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.9.5.2\">630</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.9.5.3\">331</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.9.5.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.10.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.10.6.1\">Disambiguation QA</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.10.6.2\">373</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.10.6.3\">45</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.10.6.4\">68.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.11.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.11.7.1\">Tracking Shuffled Objects Five Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.11.7.2\">1020</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.11.7.3\">298</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.11.7.4\">95.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.12.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.12.8.1\">Word Sorting</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.12.8.2\">385</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.12.8.3\">107</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.12.8.4\">99.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.13.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.13.9.1\">Tracking Shuffled Objects Three Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.13.9.2\">743</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.13.9.3\">147</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.13.9.4\">64.6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.14.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.14.10.1\">Tracking Shuffled Objects Seven Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.14.10.2\">1164</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.14.10.3\">400</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.14.10.4\">98.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.15.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.15.11.1\">Multistep Arithmetic Two</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.15.11.2\">621</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.15.11.3\">253</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.15.11.4\">99.6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.16.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.16.12.1\">Web Of Lies</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.16.12.2\">885</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.16.12.3\">113</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.16.12.4\">84.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.17.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.17.13.1\">Logical Deduction Three Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.17.13.2\">540</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.17.13.3\">100</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.17.13.4\">72.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.18.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.18.14.1\">Sports Understanding</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.18.14.2\">449</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.18.14.3\">160</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.18.14.4\">96.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.19.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.19.15.1\">Snarks</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.19.15.2\">396</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.19.15.3\">109</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.19.15.4\">91.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.20.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.20.16.1\">Logical Deduction Five Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.20.16.2\">680</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.20.16.3\">199</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.20.16.4\">92.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.21.17\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.21.17.1\">Salient Translation Error Detection</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.21.17.2\">389</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.21.17.3\">90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.21.17.4\">98.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.22.18\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.22.18.1\">Hyperbaton</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.22.18.2\">432</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.22.18.3\">57</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.22.18.4\">65.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.23.19\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.23.19.1\">Movie Recommendation</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.23.19.2\">730</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.23.19.3\">457</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.23.19.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.24.20\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.24.20.1\">Object Counting</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.24.20.2\">397</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.24.20.3\">48</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.24.20.4\">62.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.25.21\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.25.21.1\">Logical Deduction Seven Objects</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.25.21.2\">730</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.25.21.3\">309</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.25.21.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.26.22\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.26.22.1\">Temporal Sequences</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.26.22.2\">533</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.26.22.3\">76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.26.22.4\">97.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.27.23\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.27.23.1\">Formal Fallacies</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.27.23.2\">579</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.27.23.3\">251</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.27.23.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.28.24\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.28.24.1\">Dyck Languages</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.28.24.2\">1112</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.28.24.3\">558</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.28.24.4\">100.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.29.25\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.29.25.1\">Date Understanding</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.29.25.2\">587</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.29.25.3\">162</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.29.25.4\">98.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.30.26\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T4.4.4.30.26.1\">Boolean Expressions</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.30.26.2\">493</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T4.4.4.30.26.3\">160</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.30.26.4\">93.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4.31.27\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r\" id=\"S4.T4.4.4.31.27.1\">Ruin Names</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S4.T4.4.4.31.27.2\">622</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S4.T4.4.4.31.27.3\">421</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.4.4.31.27.4\">100.0</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>Reason filtering and selection percentages</figcaption>\n</figure>",
178
+ "perturb_sentence_id": [
179
+ 20,
180
+ 21
181
+ ],
182
  "output": {
183
+ "perturbed_statement": "[paragraph id = 20] In Table 4 we report the number of sampled reasons in stages before and after the stages depicted in Figure 3. It should be noted that the effect of optimization is visible as the mechanism that reduces the number of distinct reasons to a subset of reasons.",
184
+ "perturbed_explanation": "The original explanation highlights that Table 4 reports the number of sampled reasons before and after the stages depicted in Figure 2, illustrating the effect of optimization. 1. The statement mentions Figure 3 instead of Figure 2. This is factually incorrect because the stages referred to in Table 4 correspond to Figure 2, not Figure 3. By pointing to the wrong figure, the statement misleads the reader about the source of the depicted stages."
185
  }
186
  }
187
  ]
table_result/2407.00073v2_output.json CHANGED
@@ -38,10 +38,13 @@
38
  "[paragraph id = 8] Then, we have and ."
39
  ],
40
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T1\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Computation Cost of the Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T1.5\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T1.5.6.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.5.6.1.1\">Algorithms</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S5.T1.5.6.1.2\">Computation Cost</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.1.1.2\">KeyRegis</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.2.2.2\">KeyDerive</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T1.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.3.3.2\">KeyUpdate</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T1.3.3.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.4.4.2\">DCBEncrypt</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T1.4.4.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.5.5.2\">DCBDecrypt</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T1.5.5.1\"></td>\n</tr>\n</tbody>\n</table>\n</figure>",
41
- "perturb_sentence_id": 0,
 
 
 
42
  "output": {
43
- "perturbed_statement": "[paragraph id = 0] Table 1 presents the computational complexity of our NI-CBE protocol. In this table, the computational cost of Encrypt algorithm is not analyzed since this algorithm needs to be run multiple times.",
44
- "perturbed_explanation": "The original explanation: The computation cost of the GlobeSteup algorithm is not analyzed since this algorithm only needs to be run once. In contrast, the statement misguidedly mentions the Encrypt algorithm as not being analyzed while also incorrectly supposing it runs multiple times. However, in the provided context, there’s no indication that the Encrypt algorithm's cost isn't analyzed or that it's executed multiple times, making this interpretation factually incorrect."
45
  }
46
  }
47
  ]
 
38
  "[paragraph id = 8] Then, we have and ."
39
  ],
40
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T1\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Computation Cost of the Algorithms</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S5.T1.5\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T1.5.6.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.5.6.1.1\">Algorithms</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S5.T1.5.6.1.2\">Computation Cost</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.1.1.2\">KeyRegis</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T1.1.1.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.2.2.2\">KeyDerive</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T1.2.2.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.3.3.2\">KeyUpdate</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T1.3.3.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.4.4.2\">DCBEncrypt</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T1.4.4.1\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S5.T1.5.5.2\">DCBDecrypt</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S5.T1.5.5.1\"></td>\n</tr>\n</tbody>\n</table>\n</figure>",
41
+ "perturb_sentence_id": [
42
+ 0,
43
+ 1
44
+ ],
45
  "output": {
46
+ "perturbed_statement": "[paragraph id = 0] Table 1 presents the computational complexity of our NI-CBE protocol. In this table, the computation cost of the Encrypt algorithm is not analyzed since this algorithm only needs to be run once.",
47
+ "perturbed_explanation": "Original Explanation: The computation cost of the GlobeSteup algorithm is not analyzed since it is only needed to be run once, indicating that its infrequent use makes it less critical to include in regular computational complexity analysis. 2. The statement is incorrect because it incorrectly states that the computation cost of the Encrypt algorithm is not analyzed due to it running only once. The Encrypt algorithm is not mentioned in the context in relation to this particular aspect, and its cost depends on multiple calculations involving group members, making it a crucial part of computational analysis."
48
  }
49
  }
50
  ]
table_result/2407.00075v2_output.json CHANGED
@@ -46,10 +46,13 @@
46
  "[paragraph id = 26] We present our results in Table 1 , with additional discussion in Section C.4 ."
47
  ],
48
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S3.T1.46\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T1.46.47.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S3.T1.46.47.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T1.46.47.1.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.46.47.1.2.1\">Fact Amnesia</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T1.46.47.1.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.46.47.1.3.1\">Rule Suppression</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T1.46.47.1.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.46.47.1.4.1\">State Coercion</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1\">\n<td class=\"ltx_td\" id=\"S3.T1.1.1.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T1.1.1.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" colspan=\"2\" id=\"S3.T1.1.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n Values</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T1.1.1.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" colspan=\"2\" id=\"S3.T1.1.1.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">Attn. Weights</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T1.1.1.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" colspan=\"2\" id=\"S3.T1.1.1.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">Size</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.6.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.2.2.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.6.6.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.6.1\">ASR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.3.3.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.3.3.2.1\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.4.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.4.4.3.1\"></span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.6.6.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.7.1\">ASR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.6.6.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.8.1\">Atk <span class=\"ltx_text\" id=\"S3.T1.6.6.8.1.1\" style=\"color:#2CA02C;\">✓</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.6.6.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.9.1\">Atk <span class=\"ltx_text\" id=\"S3.T1.6.6.9.1.1\" style=\"color:#D62728;\">✗</span></span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.6.6.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.10.1\">ASR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.5.5.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.5.5.4.1\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.6.6.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.5.1\"></span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.16.16\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.7.7.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.8.8.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.9.9.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.10.10.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.11.11.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.12.12.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.13.13.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.14.14.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.15.15.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.16.16.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.26.26\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.17.17.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.18.18.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.19.19.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.20.20.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.21.21.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.22.22.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.23.23.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.24.24.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.25.25.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.26.26.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.36.36\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.27.27.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.28.28.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.29.29.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.30.30.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.31.31.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.32.32.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.33.33.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.34.34.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.35.35.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.36.36.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.46.46\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.37.37.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.38.38.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.39.39.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.40.40.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.41.41.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.42.42.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.43.43.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.44.44.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.45.45.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.46.46.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>\nLearned attacks attain high ASR against all three properties and mirror theory-based attacks.\nWe used reasoners with dimension .\n(Fact Amnesia) The average magnitude of the targeted entries () of is larger than the non-targeted entries ().\n(Rule Suppression) The suppressed rule receives less attention in the attacked case.\n(State Coercion)\nThe average entry-wise magnitude of is larger than that of the prefix .\n</figcaption>\n</figure>",
49
- "perturb_sentence_id": 26,
 
 
 
50
  "output": {
51
- "perturbed_statement": "[paragraph id = 26] We present our results in Table 2 , with additional discussion in Section C.5 .",
52
- "perturbed_explanation": "1. The original explanation describes that the results are available in Table 1 and further discussed in Section C.4. 2. The statement incorrectly mentions Table 2 and Section C.5, which do not align with the referenced sections. This creates inconsistencies in information referencing."
53
  }
54
  },
55
  {
@@ -126,10 +129,13 @@
126
  "[paragraph id = 32] Our theory-based fact amnesia and state coercion use adversarial suffixes with large magnitudes in specific coordinates."
127
  ],
128
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.17\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.17.18.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S4.T2.17.18.1.1\"></td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"2\" id=\"S4.T2.17.18.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.17.18.1.2.1\">Fact Amnesia</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"2\" id=\"S4.T2.17.18.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.17.18.1.3.1\">Rule Suppression</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.17.18.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.17.18.1.4.1\">State Coercion</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S4.T2.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.2.1\">ASR</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.1\">SSR</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.1\">ASR</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.5\"><span class=\"ltx_text\" id=\"S4.T2.1.1.5.1\">SSR</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.6\"><span class=\"ltx_text\" id=\"S4.T2.1.1.6.1\">ASR</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.5.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.2.2.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.5.5.5\">—</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.5.5.6\">—</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.3.3.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.4.4.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.5.5.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.11.11\">\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.6.6.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.7.7.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.8.8.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.9.9.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.10.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.11.11.6\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.17.17\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.12.12.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.13.13.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.14.14.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.15.15.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.16.16.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.17.17.6\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>\nGCG jailbreaks succeed against fine-tuned GPT-2 models over 100 samples of each attack.\nExtending the example of <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00075v2#S4.F4\" title=\"In 4 Experiments with Large Language Models ‣ Logicbreaks: A Framework for Understanding Subversion of Rule-based Inference\"><span class=\"ltx_text ltx_ref_tag\">Fig.</span> <span class=\"ltx_text ltx_ref_tag\">4</span></a>, the following output would count in the SSR, but <span class=\"ltx_text ltx_font_bold ltx_font_italic\" id=\"S4.T2.20.1\">not</span> in the ASR.\n<span class=\"ltx_text ltx_font_italic\" id=\"S4.T2.21.2\">”I have <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.1\" style=\"color:#0B5394;\">Log</span>, and so I can create <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.2\" style=\"color:#0B5394;\">Stick</span>. I have <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.3\" style=\"color:#0B5394;\">Brick</span>, and so I can create <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.4\" style=\"color:#0B5394;\">Stone Stairs</span>. I have <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.5\" style=\"color:#0B5394;\">Brick</span>, and so I can create <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.6\" style=\"color:#0B5394;\">Sheep</span>. I cannot create any other items.”</span>\n</figcaption>\n</figure>",
129
- "perturb_sentence_id": 28,
 
 
 
130
  "output": {
131
- "perturbed_statement": "[paragraph id = 28] We show results in Table 2 and give further details in Section D.3 .We remark that while rule suppression corresponds with completeness, the condition checked here is maximality, i.e., that all facts were derived.",
132
- "perturbed_explanation": "1. The original explanation outlines that rule suppression is related to maximality but the evaluated condition pertains to incompleteness, indicating some facts might not be derived. 2. The statement alters the association by claiming rule suppression corresponds with completeness and maximality checks all fact derivations, contrary to the specified content, leading to a misrepresentation of the analysis provided in the text."
133
  }
134
  },
135
  {
@@ -206,10 +212,13 @@
206
  "[paragraph id = 37] Moreover, we found that substituting these shared tokens from the suffix with the token “and” significantly reduces the ASR, which we call the Substitution ASR."
207
  ],
208
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S4.T3.14\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T3.14.15.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S4.T3.14.15.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"2\" id=\"S4.T3.14.15.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.14.15.1.2.1\">Fact Amnesia</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"2\" id=\"S4.T3.14.15.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.14.15.1.3.1\">State Coercion</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.2\">Overlap</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.3\">Substitution ASR</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.4\">Overlap</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.5\">Substitution ASR</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.2.2.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.4.4\">—</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.4.5\">—</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.3.3.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.4.3\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.9.9\">\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.5.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.6.6.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.7.7.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.8.8.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.9.9.5\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.14.14\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.10.10.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.11.11.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.12.12.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.13.13.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.14.14.5\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>\nSalient tokens from the target commonly appear in the suffix found by GCG, as measured by the overlap.\nOn average, there is a non-trivial overlap of the salient tokens in the target and the adversarial suffix.\nSubstituting the overlapping salient tokens with <span class=\"ltx_text ltx_font_italic\" id=\"S4.T3.16.1\">“and”</span> reduces the ASR.\n</figcaption>\n</figure>",
209
- "perturb_sentence_id": 32,
 
 
 
210
  "output": {
211
  "perturbed_statement": "[paragraph id = 32] Our theory-based fact amnesia and state coercion use adversarial suffixes with small magnitudes in specific coordinates. Such a choice of coordinates increases or decreases the values of some target proposition that is to be present or absent in the successive proof state.",
212
- "perturbed_explanation": "Original Explanation: The focus on adversarial suffixes with large magnitudes facilitates their effectiveness in manipulating the proof state. 1. The statement claims that adversarial suffixes exhibit 'small magnitudes,' which contradicts the original emphasis on 'large magnitudes' required for achieving the significant impact outlined in the theory. 2. Utilizing adversarial suffixes with insufficient magnitude might fail to exert the intended influence on coordinate adjustments, undermining the goals of fact amnesia and state coercion."
213
  }
214
  },
215
  {
@@ -280,10 +289,13 @@
280
  "[paragraph id = 35] Interestingly, we observed this phenomenon for GCG-generated jailbreaks: the targeted propositions frequently appear in the adversarial suffix."
281
  ],
282
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T4\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T4.6\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T4.6.7.1\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_tt\" id=\"S4.T4.6.7.1.1\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"12\" id=\"S4.T4.6.7.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.6.7.1.2.1\">Attention Weight on the Suppressed Rule (by layer)</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.6.8.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T4.6.8.2.1\">Step/Atk?</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.2\">1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.3\">2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.4\">3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.5\">4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.6\">5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.7\">6</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.8\">7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.9\">8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.10\">9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.11\">10</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.12\">11</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.13\">12</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T4.1.1.1\">\n <span class=\"ltx_text\" id=\"S4.T4.1.1.1.1\" style=\"color:#D62728;\">✗</span>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.2\">0.58</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.3\">0.15</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.4\">0.06</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.5\">0.62</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.6\">0.07</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.7\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.1.7.1\">0.95</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.1.8.1\">0.91</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.1.9.1\">0.95</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.10\">0.64</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.11\">0.59</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.12\">0.65</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.13\">0.57</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T4.2.2.1\">\n <span class=\"ltx_text\" id=\"S4.T4.2.2.1.1\" style=\"color:#2CA02C;\">✓</span>\n</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.2\">0.24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.3\">0.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.4\">0.04</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.5\">0.19</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.6\">0.05</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.7\">0.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.8\">0.25</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.9\">0.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.10\">0.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.11\">0.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.12\">0.19</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.13\">0.28</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T4.3.3.1\">\n <span class=\"ltx_text\" id=\"S4.T4.3.3.1.1\" style=\"color:#D62728;\">✗</span>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.2\">0.69</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.3\">0.24</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.4\">0.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.5\">0.75</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.6\">0.16</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.7\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.3.3.7.1\">1.00</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.3.3.8.1\">0.91</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.3.3.9.1\">0.95</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.10\">0.59</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.11\">0.30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.12\">0.60</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.13\">0.61</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T4.4.4.1\">\n <span class=\"ltx_text\" id=\"S4.T4.4.4.1.1\" style=\"color:#2CA02C;\">✓</span>\n</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.2\">0.24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.3\">0.12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.4\">0.10</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.5\">0.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.6\">0.09</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.7\">0.29</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.8\">0.25</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.9\">0.18</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.10\">0.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.11\">0.10</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.12\">0.21</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.13\">0.31</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T4.5.5.1\">\n <span class=\"ltx_text\" id=\"S4.T4.5.5.1.1\" style=\"color:#D62728;\">✗</span>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.2\">0.50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.3\">0.26</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.4\">0.05</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.5\">0.52</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.6\">0.09</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.7\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.5.5.7.1\">0.88</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.5.5.8.1\">0.78</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.5.5.9.1\">0.97</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.10\">0.42</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.11\">0.30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.12\">0.53</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.13\">0.36</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T4.6.6.1\">\n <span class=\"ltx_text\" id=\"S4.T4.6.6.1.1\" style=\"color:#2CA02C;\">✓</span>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.2\">0.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.3\">0.07</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.4\">0.05</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.5\">0.08</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.6\">0.04</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.7\">0.08</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.8\">0.07</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.9\">0.08</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.10\">0.05</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.11\">0.04</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.12\">0.12</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.13\">0.17</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>\nGCG-based rule suppression on GPT-2 produces attention weights that align with theory.\nWe track the difference in attention between the last token of a rule and the last token of the generation, and the suppression effect is most pronounced at layers 6, 7, and 8.\n</figcaption>\n</figure>",
283
- "perturb_sentence_id": 33,
 
 
 
284
  "output": {
285
- "perturbed_statement": "[paragraph id = 33] Such a choice of coordinates decreases the values of all target propositions ensuring their absence in the successive proof state, whereas a negative value in our theory-based suffix is linked to avoiding the use of its associated tokens in a text-based suffix.",
286
- "perturbed_explanation": "1. The original explanation relates to how coordinates influence the representation of propositions within the proof state. 2. The statement is incorrect because it inaccurately asserts that all target propositions are decreased and omits the nuances regarding the positive or negative associations, diverging from the intended context where proposition values can vary based on specific choices."
287
  }
288
  },
289
  {
@@ -352,10 +364,13 @@
352
  "[paragraph id = 40] Our theoretical analysis suggests that rules may be suppressed from activating if their attention is reduced."
353
  ],
354
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T5\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T5.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T5.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_tt\" id=\"S4.T5.1.1.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"16\" id=\"S4.T5.1.1.1.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.1.1.2.1\">Attention Weight on the Suppressed Rule (by layer)</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T5.1.2.2.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">Atk?</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">6</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">11</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">12</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">16</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T5.1.3.3.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S4.T5.1.3.3.1.1\" style=\"color:#D62728;\">✗</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.31</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.63</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.43</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.3.3.5.1\">0.80</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.48</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.73</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.73</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.3.3.10.1\">0.98</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.64</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.52</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.3.3.13.1\">0.93</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.63</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.68</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.57</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.3.3.17.1\">0.87</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T5.1.4.4.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S4.T5.1.4.4.1.1\" style=\"color:#2CA02C;\">✓</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.36</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.56</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.40</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.43</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.49</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.52</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.73</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.48</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.60</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.45</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.50</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.58</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T5.1.5.5.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">Atk?</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">17</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">18</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">19</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">21</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">22</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">23</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">24</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">26</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">27</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">29</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">31</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">32</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T5.1.6.6.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S4.T5.1.6.6.1.1\" style=\"color:#D62728;\">✗</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.6.6.2.1\">0.99</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.80</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.6.6.6.1\">0.89</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.6.6.7.1\">0.85</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.64</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.63</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.75</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.65</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.6.6.12.1\">0.82</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.39</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.52</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.56</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.47</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.7.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T5.1.7.7.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S4.T5.1.7.7.1.1\" style=\"color:#2CA02C;\">✓</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.80</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.46</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.46</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.46</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.48</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.41</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.39</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.44</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.39</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.55</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.35</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.36</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.38</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.49</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.31</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 5: </span>\nRule suppression on Llama-2 produces attention weights that align with the theory.\nAttention weights between the last token and the tokens of the suppressed rules are lower for multiple layers when the adversarial suffix is present.\n</figcaption>\n</figure>",
355
- "perturb_sentence_id": 39,
 
 
 
356
  "output": {
357
- "perturbed_statement": "[paragraph id = 39] Result 4: Theory-Predicted Attention Patterns Emerge in Automated Jailbreaks. Our theoretical analysis suggests that rules may be enhanced if their attention is reduced.",
358
- "perturbed_explanation": "Original Explanation: Our theoretical analysis suggests that rules may be suppressed from activating if their attention is reduced, indicating a correlation between attention levels and rule enforcement. 1. The statement erroneously states that rules may be enhanced if their attention is reduced. 2. This directly contradicts the observation that diminished attention typically leads to suppression, not enhancement."
359
  }
360
  }
361
  ]
 
46
  "[paragraph id = 26] We present our results in Table 1 , with additional discussion in Section C.4 ."
47
  ],
48
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S3.T1.46\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T1.46.47.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S3.T1.46.47.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T1.46.47.1.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.46.47.1.2.1\">Fact Amnesia</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T1.46.47.1.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.46.47.1.3.1\">Rule Suppression</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T1.46.47.1.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.46.47.1.4.1\">State Coercion</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1\">\n<td class=\"ltx_td\" id=\"S3.T1.1.1.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T1.1.1.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" colspan=\"2\" id=\"S3.T1.1.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">\n Values</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T1.1.1.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" colspan=\"2\" id=\"S3.T1.1.1.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">Attn. Weights</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T1.1.1.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" colspan=\"2\" id=\"S3.T1.1.1.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">Size</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.6.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.2.2.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.6.6.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.6.1\">ASR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.3.3.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.3.3.2.1\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.4.4.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.4.4.3.1\"></span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.6.6.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.7.1\">ASR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.6.6.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.8.1\">Atk <span class=\"ltx_text\" id=\"S3.T1.6.6.8.1.1\" style=\"color:#2CA02C;\">✓</span></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.6.6.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.9.1\">Atk <span class=\"ltx_text\" id=\"S3.T1.6.6.9.1.1\" style=\"color:#D62728;\">✗</span></span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.6.6.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.10.1\">ASR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.5.5.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.5.5.4.1\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.6.6.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S3.T1.6.6.5.1\"></span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.16.16\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.7.7.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.8.8.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.9.9.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.10.10.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.11.11.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.12.12.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.13.13.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.14.14.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.15.15.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.16.16.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.26.26\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.17.17.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.18.18.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.19.19.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.20.20.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.21.21.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.22.22.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.23.23.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.24.24.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.25.25.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.26.26.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.36.36\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.27.27.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.28.28.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.29.29.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.30.30.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.31.31.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.32.32.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.33.33.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.34.34.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.35.35.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.36.36.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.46.46\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.37.37.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.38.38.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.39.39.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.40.40.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.41.41.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.42.42.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.43.43.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.44.44.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.45.45.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.46.46.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>\nLearned attacks attain high ASR against all three properties and mirror theory-based attacks.\nWe used reasoners with dimension .\n(Fact Amnesia) The average magnitude of the targeted entries () of is larger than the non-targeted entries ().\n(Rule Suppression) The suppressed rule receives less attention in the attacked case.\n(State Coercion)\nThe average entry-wise magnitude of is larger than that of the prefix .\n</figcaption>\n</figure>",
49
+ "perturb_sentence_id": [
50
+ 26,
51
+ 27
52
+ ],
53
  "output": {
54
+ "perturbed_statement": "[paragraph id = 26] We present our results in Table 2 , with additional discussion in Section C.4 .",
55
+ "perturbed_explanation": "1. The original explanation refers to the results being presented in Table 1 and further discussion in Section C.4, suggesting that data is organized and discussed systematically in specified sections of the document. 2. The statement is incorrect because it mentions that the results are presented in Table 2, whereas there is no indication in the context that Table 2 exists or is used to present results. The correct reference is to Table 1."
56
  }
57
  },
58
  {
 
129
  "[paragraph id = 32] Our theory-based fact amnesia and state coercion use adversarial suffixes with large magnitudes in specific coordinates."
130
  ],
131
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.17\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.17.18.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S4.T2.17.18.1.1\"></td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"2\" id=\"S4.T2.17.18.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.17.18.1.2.1\">Fact Amnesia</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" colspan=\"2\" id=\"S4.T2.17.18.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.17.18.1.3.1\">Rule Suppression</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.17.18.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.17.18.1.4.1\">State Coercion</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S4.T2.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.2.1\">ASR</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.1\">SSR</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.1\">ASR</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.5\"><span class=\"ltx_text\" id=\"S4.T2.1.1.5.1\">SSR</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.1.6\"><span class=\"ltx_text\" id=\"S4.T2.1.1.6.1\">ASR</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.5.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.2.2.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.5.5.5\">—</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.5.5.6\">—</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.3.3.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.4.4.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.5.5.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.11.11\">\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.6.6.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.7.7.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.8.8.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.9.9.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.10.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.11.11.6\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.17.17\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.12.12.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.13.13.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.14.14.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.15.15.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.16.16.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.17.17.6\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>\nGCG jailbreaks succeed against fine-tuned GPT-2 models over 100 samples of each attack.\nExtending the example of <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00075v2#S4.F4\" title=\"In 4 Experiments with Large Language Models ‣ Logicbreaks: A Framework for Understanding Subversion of Rule-based Inference\"><span class=\"ltx_text ltx_ref_tag\">Fig.</span> <span class=\"ltx_text ltx_ref_tag\">4</span></a>, the following output would count in the SSR, but <span class=\"ltx_text ltx_font_bold ltx_font_italic\" id=\"S4.T2.20.1\">not</span> in the ASR.\n<span class=\"ltx_text ltx_font_italic\" id=\"S4.T2.21.2\">”I have <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.1\" style=\"color:#0B5394;\">Log</span>, and so I can create <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.2\" style=\"color:#0B5394;\">Stick</span>. I have <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.3\" style=\"color:#0B5394;\">Brick</span>, and so I can create <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.4\" style=\"color:#0B5394;\">Stone Stairs</span>. I have <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.5\" style=\"color:#0B5394;\">Brick</span>, and so I can create <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.21.2.6\" style=\"color:#0B5394;\">Sheep</span>. I cannot create any other items.”</span>\n</figcaption>\n</figure>",
132
+ "perturb_sentence_id": [
133
+ 28,
134
+ 29
135
+ ],
136
  "output": {
137
+ "perturbed_statement": "[paragraph id = 28] We show results in Table 2 and give further details in Section D.3. We remark that while rule suppression corresponds with incompleteness, the condition checked here is maximality, i.e., that all facts were successfully derived.",
138
+ "perturbed_explanation": "1. The original explanation emphasizes that rule suppression is linked to maximality, yet incompleteness is the condition being checked in this context, meaning some facts were not derived. 2. The statement incorrectly claims that rule suppression corresponds with incompleteness and that maximality is the condition being checked, which is incorrect because, as shown in the context, it is incompleteness that implies non-maximality. The correct detail is that the condition checked here is incompleteness, not maximality; the statement has reversed the correct relationship between rule suppression and the conditions being checked."
139
  }
140
  },
141
  {
 
212
  "[paragraph id = 37] Moreover, we found that substituting these shared tokens from the suffix with the token “and” significantly reduces the ASR, which we call the Substitution ASR."
213
  ],
214
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S4.T3.14\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T3.14.15.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S4.T3.14.15.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"2\" id=\"S4.T3.14.15.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.14.15.1.2.1\">Fact Amnesia</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"2\" id=\"S4.T3.14.15.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T3.14.15.1.3.1\">State Coercion</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.2\">Overlap</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.3\">Substitution ASR</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.4\">Overlap</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.1.5\">Substitution ASR</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.2.2.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.4.4\">—</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.4.5\">—</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.3.3.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.4.4.3\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.9.9\">\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.5.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.6.6.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.7.7.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.8.8.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.9.9.5\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.14.14\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.10.10.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.11.11.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.12.12.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.13.13.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.14.14.5\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>\nSalient tokens from the target commonly appear in the suffix found by GCG, as measured by the overlap.\nOn average, there is a non-trivial overlap of the salient tokens in the target and the adversarial suffix.\nSubstituting the overlapping salient tokens with <span class=\"ltx_text ltx_font_italic\" id=\"S4.T3.16.1\">“and”</span> reduces the ASR.\n</figcaption>\n</figure>",
215
+ "perturb_sentence_id": [
216
+ 32,
217
+ 33
218
+ ],
219
  "output": {
220
  "perturbed_statement": "[paragraph id = 32] Our theory-based fact amnesia and state coercion use adversarial suffixes with small magnitudes in specific coordinates. Such a choice of coordinates increases or decreases the values of some target proposition that is to be present or absent in the successive proof state.",
221
+ "perturbed_explanation": "1. The original explanation refers to adversarial suffixes with large magnitudes in specific coordinates, facilitating changes in the state of certain target propositions. 2. The statement claims that adversarial suffixes with small magnitudes are used, which contradicts the idea that large magnitudes are necessary for influencing target propositions effectively. This introduces a factual error as it implies that minimal impact on the propositions is sufficient, which is misleading based on the context."
222
  }
223
  },
224
  {
 
289
  "[paragraph id = 35] Interestingly, we observed this phenomenon for GCG-generated jailbreaks: the targeted propositions frequently appear in the adversarial suffix."
290
  ],
291
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T4\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T4.6\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T4.6.7.1\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_tt\" id=\"S4.T4.6.7.1.1\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"12\" id=\"S4.T4.6.7.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.6.7.1.2.1\">Attention Weight on the Suppressed Rule (by layer)</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.6.8.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T4.6.8.2.1\">Step/Atk?</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.2\">1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.3\">2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.4\">3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.5\">4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.6\">5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.7\">6</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.8\">7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.9\">8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.10\">9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.11\">10</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.12\">11</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.6.8.2.13\">12</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T4.1.1.1\">\n <span class=\"ltx_text\" id=\"S4.T4.1.1.1.1\" style=\"color:#D62728;\">✗</span>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.2\">0.58</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.3\">0.15</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.4\">0.06</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.5\">0.62</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.6\">0.07</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.7\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.1.7.1\">0.95</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.1.8.1\">0.91</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.1.9.1\">0.95</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.10\">0.64</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.11\">0.59</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.12\">0.65</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.1.13\">0.57</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T4.2.2.1\">\n <span class=\"ltx_text\" id=\"S4.T4.2.2.1.1\" style=\"color:#2CA02C;\">✓</span>\n</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.2\">0.24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.3\">0.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.4\">0.04</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.5\">0.19</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.6\">0.05</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.7\">0.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.8\">0.25</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.9\">0.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.10\">0.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.11\">0.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.12\">0.19</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.2.2.13\">0.28</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T4.3.3.1\">\n <span class=\"ltx_text\" id=\"S4.T4.3.3.1.1\" style=\"color:#D62728;\">✗</span>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.2\">0.69</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.3\">0.24</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.4\">0.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.5\">0.75</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.6\">0.16</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.7\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.3.3.7.1\">1.00</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.3.3.8.1\">0.91</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.3.3.9.1\">0.95</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.10\">0.59</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.11\">0.30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.12\">0.60</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.3.3.13\">0.61</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T4.4.4.1\">\n <span class=\"ltx_text\" id=\"S4.T4.4.4.1.1\" style=\"color:#2CA02C;\">✓</span>\n</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.2\">0.24</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.3\">0.12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.4\">0.10</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.5\">0.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.6\">0.09</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.7\">0.29</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.8\">0.25</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.9\">0.18</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.10\">0.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.11\">0.10</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.12\">0.21</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.4.4.13\">0.31</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T4.5.5.1\">\n <span class=\"ltx_text\" id=\"S4.T4.5.5.1.1\" style=\"color:#D62728;\">✗</span>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.2\">0.50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.3\">0.26</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.4\">0.05</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.5\">0.52</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.6\">0.09</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.7\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.5.5.7.1\">0.88</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.5.5.8.1\">0.78</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.5.5.9.1\">0.97</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.10\">0.42</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.11\">0.30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.12\">0.53</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.5.5.13\">0.36</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T4.6.6.1\">\n <span class=\"ltx_text\" id=\"S4.T4.6.6.1.1\" style=\"color:#2CA02C;\">✓</span>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.2\">0.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.3\">0.07</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.4\">0.05</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.5\">0.08</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.6\">0.04</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.7\">0.08</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.8\">0.07</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.9\">0.08</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.10\">0.05</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.11\">0.04</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.12\">0.12</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T4.6.6.13\">0.17</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>\nGCG-based rule suppression on GPT-2 produces attention weights that align with theory.\nWe track the difference in attention between the last token of a rule and the last token of the generation, and the suppression effect is most pronounced at layers 6, 7, and 8.\n</figcaption>\n</figure>",
292
+ "perturb_sentence_id": [
293
+ 33,
294
+ 34
295
+ ],
296
  "output": {
297
+ "perturbed_statement": "[paragraph id = 33] Such a choice of coordinates increases or decreases the values of some target proposition that is to be present or absent in the initial proof state. Intuitively, a large positive value in our theory-based suffix is unrelated to using its associated tokens in a text-based suffix.",
298
+ "perturbed_explanation": "1. The original statement implies that the theory-based suffix has an analogous relationship to a text-based suffix, suggesting a correlation or connection between their components. 2. The statement is incorrect because it claims the large positive value in a theory-based suffix is unrelated to its associated tokens in a text-based suffix, contradicting the original assertion of their analogy and linkage."
299
  }
300
  },
301
  {
 
364
  "[paragraph id = 40] Our theoretical analysis suggests that rules may be suppressed from activating if their attention is reduced."
365
  ],
366
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T5\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T5.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T5.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_tt\" id=\"S4.T5.1.1.1.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"16\" id=\"S4.T5.1.1.1.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.1.1.2.1\">Attention Weight on the Suppressed Rule (by layer)</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T5.1.2.2.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">Atk?</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">6</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">10</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">11</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">12</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">15</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.2.2.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">16</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T5.1.3.3.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S4.T5.1.3.3.1.1\" style=\"color:#D62728;\">✗</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.31</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.63</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.43</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.3.3.5.1\">0.80</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.48</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.73</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.73</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.3.3.10.1\">0.98</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.64</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.52</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.3.3.13.1\">0.93</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.63</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.68</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.57</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.3.3.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.3.3.17.1\">0.87</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T5.1.4.4.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S4.T5.1.4.4.1.1\" style=\"color:#2CA02C;\">✓</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.36</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.56</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.40</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.43</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.49</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.52</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.73</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.48</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.60</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.45</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.50</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T5.1.4.4.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.58</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T5.1.5.5.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">Atk?</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">17</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">18</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">19</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">20</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">21</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">22</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">23</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">24</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">25</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">26</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">27</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">29</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">31</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.5.5.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">32</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T5.1.6.6.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S4.T5.1.6.6.1.1\" style=\"color:#D62728;\">✗</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.6.6.2.1\">0.99</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.80</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.6.6.6.1\">0.89</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.6.6.7.1\">0.85</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.64</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.63</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.75</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.65</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T5.1.6.6.12.1\">0.82</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.39</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.52</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.56</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T5.1.6.6.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.47</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.1.7.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T5.1.7.7.1\" style=\"padding-left:4.0pt;padding-right:4.0pt;\"><span class=\"ltx_text\" id=\"S4.T5.1.7.7.1.1\" style=\"color:#2CA02C;\">✓</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.2\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.80</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.3\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.46</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.4\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.46</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.5\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.6\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.46</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.7\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.48</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.8\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.41</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.9\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.39</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.10\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.44</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.11\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.39</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.12\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.55</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.13\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.35</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.14\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.36</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.15\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.38</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.16\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.49</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T5.1.7.7.17\" style=\"padding-left:4.0pt;padding-right:4.0pt;\">0.31</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 5: </span>\nRule suppression on Llama-2 produces attention weights that align with the theory.\nAttention weights between the last token and the tokens of the suppressed rules are lower for multiple layers when the adversarial suffix is present.\n</figcaption>\n</figure>",
367
+ "perturb_sentence_id": [
368
+ 39,
369
+ 40
370
+ ],
371
  "output": {
372
+ "perturbed_statement": "[paragraph id = 39] Result 4: Theory-Predicted Attention Patterns Emerge in Automated Jailbreaks. Our theoretical analysis suggests that rules may be enhanced from activating if their attention is reduced.",
373
+ "perturbed_explanation": "The original explanation is: Our theoretical analysis suggests that rules may be suppressed from activating if their attention is reduced. This implies that attention reduction can dampen or diminish the activation of certain rules. However, the statement suggests that rules may be enhanced from activating if their attention is reduced, which is incorrect because for rules to be enhanced or augmented in their activation, typically an increase in attention would be necessary, not a reduction. This would contradict the dynamics described in the original context."
374
  }
375
  }
376
  ]
table_result/2407.00079v3_output.json CHANGED
@@ -35,10 +35,13 @@
35
  "[paragraph id = 9] LRUCache performs best under this dataset s patterns, likely due to the temporal proximity in request utilization."
36
  ],
37
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Cache hit rates under different cache policies and capacities.</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T1.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T1.4.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T1.4.1.1.1\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.1.1\" style=\"font-size:90%;\">Block capacity</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T1.4.1.1.2\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.2.1\" style=\"font-size:90%;\">Inf</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.3\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.3.1\" style=\"font-size:90%;\">100000</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.4\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.4.1\" style=\"font-size:90%;\">50000</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.5\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.5.1\" style=\"font-size:90%;\">30000</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.6\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.6.1\" style=\"font-size:90%;\">10000</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.7\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.7.1\" style=\"font-size:90%;\">1000</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.4.2.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T1.4.2.1.1\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.1.1\" style=\"font-size:90%;\">LRUCache</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T1.4.2.1.2\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.2.1\" style=\"font-size:90%;\">0.51</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.3\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.3.1\" style=\"font-size:90%;\">0.51</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.4\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.4.1\" style=\"font-size:90%;\">0.50</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.5\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.5.1\" style=\"font-size:90%;\">0.48</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.6\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.6.1\" style=\"font-size:90%;\">0.40</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.7\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.7.1\" style=\"font-size:90%;\">0.30</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.4.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T1.4.3.2.1\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.1.1\" style=\"font-size:90%;\">LFUCache</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T1.4.3.2.2\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.2.1\" style=\"font-size:90%;\">0.51</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.3\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.3.1\" style=\"font-size:90%;\">0.51</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.4\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.4.1\" style=\"font-size:90%;\">0.49</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.5\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.5.1\" style=\"font-size:90%;\">0.43</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.6\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.6.1\" style=\"font-size:90%;\">0.35</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.7\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.7.1\" style=\"font-size:90%;\">0.30</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.4.4.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T1.4.4.3.1\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.1.1\" style=\"font-size:90%;\">LengthAwareCache</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T1.4.4.3.2\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.2.1\" style=\"font-size:90%;\">0.51</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.3\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.3.1\" style=\"font-size:90%;\">0.50</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.4\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.4.1\" style=\"font-size:90%;\">0.48</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.5\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.5.1\" style=\"font-size:90%;\">0.42</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.6\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.6.1\" style=\"font-size:90%;\">0.35</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.7\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.7.1\" style=\"font-size:90%;\">0.30</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
38
- "perturb_sentence_id": 5,
 
 
 
39
  "output": {
40
- "perturbed_statement": "[paragraph id = 5] Increasing the cache capacity from 1,000 to 50,000 blocks boosts the cache hit ratio from 30% to 70%.Further capacity increases show minimal improvement.",
41
- "perturbed_explanation": "The original explanation for the improvement in cache hit ratio primarily stems from the increased availability of cached data blocks when cache capacity is expanded. This facilitates a reduction in cache misses for frequently accessed data. 2. However, the statement suggests an improvement from 30% to 70%, which deviates from the provided data indicating an increase to 50%. This exaggerates the impact of increasing cache capacity, misrepresenting the scale of quality improvement."
42
  }
43
  },
44
  {
@@ -83,10 +86,13 @@
83
  "[paragraph id = 9] For other scenarios, we simulated requests using a Poisson arrival process and controlled the request rate through RPS (Requests per Second)."
84
  ],
85
  "table_html": "<figure class=\"ltx_table\" id=\"S8.T2\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Datasets used in the end-to-end experiment.</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S8.T2.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S8.T2.4.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.1\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.1.1\" style=\"font-size:90%;\">Dataset</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.2\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.2.1\" style=\"font-size:90%;\">Avg Input Length</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.3\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.3.1\" style=\"font-size:90%;\">Avg Output Length</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.4\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.4.1\" style=\"font-size:90%;\">Cache Ratio</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.5\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.5.1\" style=\"font-size:90%;\">Arrival Pattern</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S8.T2.4.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.1\">\n<span class=\"ltx_text\" id=\"S8.T2.4.2.1.1.1\" style=\"font-size:90%;\">ArXiv Summarization </span><cite class=\"ltx_cite ltx_citemacro_cite\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.1.2.1\" style=\"font-size:90%;\">[</span><a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00079v3#bib.bib26\" title=\"\">26</a><span class=\"ltx_text\" id=\"S8.T2.4.2.1.1.3.2\" style=\"font-size:90%;\">]</span></cite>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.2\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.2.1\" style=\"font-size:90%;\">8088</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.3\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.3.1\" style=\"font-size:90%;\">229</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.4\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.4.1\" style=\"font-size:90%;\">~0%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.5\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.5.1\" style=\"font-size:90%;\">Poisson Process</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S8.T2.4.3.2\">\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.1\">\n<span class=\"ltx_text\" id=\"S8.T2.4.3.2.1.1\" style=\"font-size:90%;\">L-Eval </span><cite class=\"ltx_cite ltx_citemacro_cite\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.1.2.1\" style=\"font-size:90%;\">[</span><a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00079v3#bib.bib27\" title=\"\">27</a><span class=\"ltx_text\" id=\"S8.T2.4.3.2.1.3.2\" style=\"font-size:90%;\">]</span></cite>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.2\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.2.1\" style=\"font-size:90%;\">19019</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.3\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.3.1\" style=\"font-size:90%;\">72</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.4\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.4.1\" style=\"font-size:90%;\">&gt;80%</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.5\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.5.1\" style=\"font-size:90%;\">Poisson Process</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S8.T2.4.4.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.1\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.1.1\" style=\"font-size:90%;\">Simulated Data</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.2\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.2.1\" style=\"font-size:90%;\">16k, 32k, 64k, 128k</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.3\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.3.1\" style=\"font-size:90%;\">512</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.4\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.4.1\" style=\"font-size:90%;\">50%</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.5\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.5.1\" style=\"font-size:90%;\">Poisson Process</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S8.T2.4.5.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.1\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.1.1\" style=\"font-size:90%;\">Real Data</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.2\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.2.1\" style=\"font-size:90%;\">7955</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.3\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.3.1\" style=\"font-size:90%;\">194</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.4\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.4.1\" style=\"font-size:90%;\">~50%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.5\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.5.1\" style=\"font-size:90%;\">Timestamp-based</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
86
- "perturb_sentence_id": 5,
 
 
 
87
  "output": {
88
- "perturbed_statement": "[paragraph id = 5] Dataset and Workload Building upon previous research [15 , 8 , 14 ], we selected or designed the datasets as outlined in Table 4. In addition to utilizing non-public datasets, we generated a batch of simulated data featuring predefined lengths and prefix cache ratios for our tests.",
89
- "perturbed_explanation": "The original statement provides an accurate depiction of the dataset preparation process: 1. The datasets were outlined in Table 2, and public datasets were utilized for generating simulated data. The altered statement introduces inaccuracies: 1. It incorrectly specifies Table 4 instead of Table 2 as the reference for the dataset outline. 2. It claims non-public datasets were utilized, which contradicts the original statement that mentions public datasets."
90
  }
91
  }
92
  ]
 
35
  "[paragraph id = 9] LRUCache performs best under this dataset s patterns, likely due to the temporal proximity in request utilization."
36
  ],
37
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Cache hit rates under different cache policies and capacities.</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T1.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T1.4.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T1.4.1.1.1\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.1.1\" style=\"font-size:90%;\">Block capacity</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T1.4.1.1.2\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.2.1\" style=\"font-size:90%;\">Inf</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.3\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.3.1\" style=\"font-size:90%;\">100000</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.4\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.4.1\" style=\"font-size:90%;\">50000</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.5\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.5.1\" style=\"font-size:90%;\">30000</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.6\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.6.1\" style=\"font-size:90%;\">10000</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T1.4.1.1.7\"><span class=\"ltx_text\" id=\"S4.T1.4.1.1.7.1\" style=\"font-size:90%;\">1000</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.4.2.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T1.4.2.1.1\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.1.1\" style=\"font-size:90%;\">LRUCache</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S4.T1.4.2.1.2\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.2.1\" style=\"font-size:90%;\">0.51</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.3\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.3.1\" style=\"font-size:90%;\">0.51</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.4\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.4.1\" style=\"font-size:90%;\">0.50</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.5\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.5.1\" style=\"font-size:90%;\">0.48</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.6\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.6.1\" style=\"font-size:90%;\">0.40</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.4.2.1.7\"><span class=\"ltx_text\" id=\"S4.T1.4.2.1.7.1\" style=\"font-size:90%;\">0.30</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.4.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T1.4.3.2.1\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.1.1\" style=\"font-size:90%;\">LFUCache</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row\" id=\"S4.T1.4.3.2.2\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.2.1\" style=\"font-size:90%;\">0.51</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.3\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.3.1\" style=\"font-size:90%;\">0.51</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.4\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.4.1\" style=\"font-size:90%;\">0.49</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.5\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.5.1\" style=\"font-size:90%;\">0.43</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.6\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.6.1\" style=\"font-size:90%;\">0.35</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.4.3.2.7\"><span class=\"ltx_text\" id=\"S4.T1.4.3.2.7.1\" style=\"font-size:90%;\">0.30</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.4.4.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T1.4.4.3.1\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.1.1\" style=\"font-size:90%;\">LengthAwareCache</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T1.4.4.3.2\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.2.1\" style=\"font-size:90%;\">0.51</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.3\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.3.1\" style=\"font-size:90%;\">0.50</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.4\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.4.1\" style=\"font-size:90%;\">0.48</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.5\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.5.1\" style=\"font-size:90%;\">0.42</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.6\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.6.1\" style=\"font-size:90%;\">0.35</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T1.4.4.3.7\"><span class=\"ltx_text\" id=\"S4.T1.4.4.3.7.1\" style=\"font-size:90%;\">0.30</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
38
+ "perturb_sentence_id": [
39
+ 5,
40
+ 6
41
+ ],
42
  "output": {
43
+ "perturbed_statement": "[paragraph id = 5] Increasing the cache capacity from 1,000 to 60,000 blocks boosts the cache hit ratio from 30% to 60%. Further capacity increases show minimal improvement.",
44
+ "perturbed_explanation": "1. The original explanation suggests that increasing cache capacity from 1,000 to 50,000 blocks improves the cache hit ratio from 30% to 50%, indicating a specific relationship between capacity and hit ratio. 2. The statement alters this relationship, incorrectly suggesting that increasing capacity to 60,000 blocks results in a 60% hit ratio. The context does not support this claim, and it falsely implies that increasing capacity beyond 50,000 continues to significantly improve the hit ratio, contrary to the noted minimal improvement beyond a certain point."
45
  }
46
  },
47
  {
 
86
  "[paragraph id = 9] For other scenarios, we simulated requests using a Poisson arrival process and controlled the request rate through RPS (Requests per Second)."
87
  ],
88
  "table_html": "<figure class=\"ltx_table\" id=\"S8.T2\">\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Datasets used in the end-to-end experiment.</figcaption>\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S8.T2.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S8.T2.4.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.1\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.1.1\" style=\"font-size:90%;\">Dataset</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.2\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.2.1\" style=\"font-size:90%;\">Avg Input Length</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.3\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.3.1\" style=\"font-size:90%;\">Avg Output Length</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.4\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.4.1\" style=\"font-size:90%;\">Cache Ratio</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S8.T2.4.1.1.5\"><span class=\"ltx_text\" id=\"S8.T2.4.1.1.5.1\" style=\"font-size:90%;\">Arrival Pattern</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S8.T2.4.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.1\">\n<span class=\"ltx_text\" id=\"S8.T2.4.2.1.1.1\" style=\"font-size:90%;\">ArXiv Summarization </span><cite class=\"ltx_cite ltx_citemacro_cite\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.1.2.1\" style=\"font-size:90%;\">[</span><a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00079v3#bib.bib26\" title=\"\">26</a><span class=\"ltx_text\" id=\"S8.T2.4.2.1.1.3.2\" style=\"font-size:90%;\">]</span></cite>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.2\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.2.1\" style=\"font-size:90%;\">8088</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.3\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.3.1\" style=\"font-size:90%;\">229</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.4\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.4.1\" style=\"font-size:90%;\">~0%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S8.T2.4.2.1.5\"><span class=\"ltx_text\" id=\"S8.T2.4.2.1.5.1\" style=\"font-size:90%;\">Poisson Process</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S8.T2.4.3.2\">\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.1\">\n<span class=\"ltx_text\" id=\"S8.T2.4.3.2.1.1\" style=\"font-size:90%;\">L-Eval </span><cite class=\"ltx_cite ltx_citemacro_cite\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.1.2.1\" style=\"font-size:90%;\">[</span><a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00079v3#bib.bib27\" title=\"\">27</a><span class=\"ltx_text\" id=\"S8.T2.4.3.2.1.3.2\" style=\"font-size:90%;\">]</span></cite>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.2\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.2.1\" style=\"font-size:90%;\">19019</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.3\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.3.1\" style=\"font-size:90%;\">72</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.4\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.4.1\" style=\"font-size:90%;\">&gt;80%</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.3.2.5\"><span class=\"ltx_text\" id=\"S8.T2.4.3.2.5.1\" style=\"font-size:90%;\">Poisson Process</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S8.T2.4.4.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.1\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.1.1\" style=\"font-size:90%;\">Simulated Data</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.2\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.2.1\" style=\"font-size:90%;\">16k, 32k, 64k, 128k</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.3\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.3.1\" style=\"font-size:90%;\">512</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.4\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.4.1\" style=\"font-size:90%;\">50%</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S8.T2.4.4.3.5\"><span class=\"ltx_text\" id=\"S8.T2.4.4.3.5.1\" style=\"font-size:90%;\">Poisson Process</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S8.T2.4.5.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.1\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.1.1\" style=\"font-size:90%;\">Real Data</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.2\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.2.1\" style=\"font-size:90%;\">7955</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.3\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.3.1\" style=\"font-size:90%;\">194</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.4\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.4.1\" style=\"font-size:90%;\">~50%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S8.T2.4.5.4.5\"><span class=\"ltx_text\" id=\"S8.T2.4.5.4.5.1\" style=\"font-size:90%;\">Timestamp-based</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
89
+ "perturb_sentence_id": [
90
+ 5,
91
+ 6
92
+ ],
93
  "output": {
94
+ "perturbed_statement": "[paragraph id = 5] Dataset and Workload Building upon previous research [15 , 8 , 14 ], we selected or designed the datasets as outlined in Table 2. In addition to utilizing public datasets, we generated a batch of simulated data featuring variable timestamps and postfix cache ratios for our experiments.",
95
+ "perturbed_explanation": "In the context, the original explanation states that simulated data was generated with predefined lengths and prefix cache ratios. 1. The statement refers to 'variable timestamps' instead of 'predefined lengths,' which is a factual alteration because there is no mention of varying timestamps in the experiments described. 2. The statement refers to 'postfix cache ratios' instead of 'prefix cache ratios,' introducing another factual error because the term 'postfix cache ratios' is not consistent with the original focus on prefix cache management in the dataset."
96
  }
97
  }
98
  ]
table_result/2407.00082v1_output.json CHANGED
The diff for this file is too large to render. See raw diff
 
table_result/2407.00085v1_output.json CHANGED
@@ -51,10 +51,13 @@
51
  "[paragraph id = 17] As a point of reference the linear model in (Varian and Choi, 2009 ) returns a monthly R2 of 0.79 over the training data using both lagged sales and Google Trends."
52
  ],
53
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T1.3\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T1.3.3.4\">Frequency</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T1.3.3.5\">Embedding</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T1.3.3.6\">Model</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r\" id=\"S4.T1.2.2.2\">Test R<sup class=\"ltx_sup\" id=\"S4.T1.2.2.2.1\">2</sup> \n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column\" id=\"S4.T1.3.3.3\">Test MAPE (%) \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.3.4.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.3.4.1.1\">Weekly</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.3.4.1.2\">Categorical</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.3.4.1.3\">Lasso</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.3.4.1.4\">0.5869</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.3.4.1.5\">10.90</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.3.5.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.5.2.1\">Weekly</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.5.2.2\">Categorical</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.5.2.3\">CoSMo</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T1.3.5.2.4\">0.5381</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.3.5.2.5\">10.85</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.3.6.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.6.3.1\">Weekly</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.6.3.2\">SLaM</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.6.3.3\">CoSMo</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T1.3.6.3.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.3.6.3.4.1\">0.7486</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.3.6.3.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.3.6.3.5.1\">7.12</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.3.7.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.7.4.1\">Monthly</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.7.4.2\">SLaM</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.7.4.3\">CoSMo</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T1.3.7.4.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.3.7.4.4.1\">0.9065</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.3.7.4.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.3.7.4.5.1\">3.03</span></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Baseline Regional Auto Models with search and indicator multipliers - fit metrics reported at the national level.</figcaption>\n</figure>",
54
- "perturb_sentence_id": 17,
 
 
 
55
  "output": {
56
- "perturbed_statement": "[paragraph id = 17] As a point of reference the linear model in (Varian and Choi, 2009 ) returns a monthly R2 of 0.82 over the training data using both lagged sales and Google Trends.While automotive sales are used in this paper, we expect that our approach can be used to greatly improve nowcasts across economic indicators.",
57
- "perturbed_explanation": "The statement contains an altered value for the reported monthly R2. 1. The original statement specifies that the model by Varian and Choi achieved a monthly R2 of 0.79 over the training data. 2. The altered statement incorrects this to 0.82, which misrepresents the findings reported. Thus, the corrected information should be reinstated to accurately reflect the source material."
58
  }
59
  },
60
  {
@@ -94,10 +97,13 @@
94
  "[paragraph id = 16] Our method is generally on par or better than the best AR approaches."
95
  ],
96
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.10\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.3.3\">\n<th class=\"ltx_td ltx_th ltx_th_column ltx_th_row ltx_border_r\" id=\"S4.T2.3.3.4\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T2.1.1.1\">Test MAPE(%) \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S4.T2.3.3.3\">Test \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.4.4.2\">Logistic Regression</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T2.4.4.1\">24.9 0.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.4.4.3\">.98</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.5.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.5.5.2\">MLP</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.5.5.1\">7.3 1.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.5.5.3\">.99</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.11.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.10.11.1.1\">Google Flu Trends <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T2.10.11.1.2\">[9.5 - 33.1]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.10.11.1.3\">[.66 - .97]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.12.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.10.12.2.1\">Elastic Net <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.10.12.2.2\">[9.8 - 15.1]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.12.2.3\">[.92 - .99]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.13.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.10.13.3.1\">Guassian Process <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.10.13.3.2\">[9.4 - 14.6]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.13.3.3\">[.94 - .99]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.14.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.10.14.4.1\">AR <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T2.10.14.4.2\">[6.7 - 14.3]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.10.14.4.3\">[.88 - .98]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.15.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.10.15.5.1\">AR+Google Flu Trends <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.10.15.5.2\">[6.2 - 12.5]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.15.5.3\">[.88 - .99]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.6.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.6.6.2\">AR+Elastic Net <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.6.6.3\">[5.1 - 8.7]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.6.6.1\">[.93 - ]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.7.7\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.7.7.2\">AR+Guassian Process <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.7.7.3\">[5.0 - 8.6]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.7.7.1\">[.93 - ]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.8.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.8.8.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.8.8.2.1\">CoSMo (Ours)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T2.8.8.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.8.8.1.1\">5.5 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.8.8.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.8.8.3.1\">.99</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.10\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.10.10.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.10.10.3.1\">CoSMo (Ours, Test selection)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.9.9.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.9.9.1.1\">3.9 0.1</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.10.2\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.10.10.2.1\">1</span>\n</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 2. </span>Benchmarking ILI flu rate prediction at the national level. We show the standard deviation of MAPE for our experiments; we omit this metric for the Pearson coefficient because it was close to zero for all experiments.</figcaption>\n</figure>",
97
- "perturb_sentence_id": 15,
 
 
 
98
  "output": {
99
- "perturbed_statement": "[paragraph id = 15] The autoregressive (AR) entries in Table 2 represent methods that include a 3-week lag of the most recent ILI rate. Our method is generally inferior to the best AR approaches.",
100
- "perturbed_explanation": "1. The original explanation emphasizes that Table 2 outlines autoregressive (AR) methods leveraging ILI data with a 1-week or 2-week lag, and our approach is typically equal to or better in performance compared to the leading AR methods. 2. The statement now incorrectly claims that AR methods utilize a 3-week lag, which conflicts with the description of the methodology in the context, and asserts that our approach is generally inferior to AR methods, which contradicts documented indications of performance superiority."
101
  }
102
  },
103
  {
@@ -136,10 +142,13 @@
136
  "[paragraph id = 7] The zero-shot inference performs better in the opposite direction, (.99 ), perhaps leveraging the greater number of training examples and taking advantage of the easier task of national modeling."
137
  ],
138
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T5\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T5.10\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T5.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r\" id=\"S4.T5.2.2.3\">Training Data</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r\" id=\"S4.T5.2.2.4\">Eval Data</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column\" id=\"S4.T5.1.1.1\">Test MAPE(%)\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column\" id=\"S4.T5.2.2.2\">Test \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T5.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T5.4.4.3\">State</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T5.4.4.4\">State</th>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T5.3.3.1\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T5.4.4.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T5.6.6.3\">National</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T5.6.6.4\">State</th>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T5.5.5.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T5.6.6.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.8.8\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T5.8.8.3\">National</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T5.8.8.4\">National</th>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T5.7.7.1\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T5.8.8.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.10.10\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T5.10.10.3\">State</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T5.10.10.4\">National</th>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T5.9.9.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T5.10.10.2\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 5. </span>Zero-shot evaluation for Flu ILI rate prediction. The zero-shot examples are the rows where there is a mismatch between the Training Data column and the Eval Data column. The rows with alignment serve as comparison points. </figcaption>\n</figure>",
139
- "perturb_sentence_id": 6,
 
 
 
140
  "output": {
141
- "perturbed_statement": "[paragraph id = 6] The model has a surprising capability to infer with some success (.78) national-level flu rates, in the test period, without ever being trained on national-level targets. The zero-shot inference performs better in the opposite direction, (.92), perhaps leveraging the greater number of training examples and taking advantage of the easier task of state-level modeling.",
142
- "perturbed_explanation": "Explanation: 1. The original explanation states that the model was able to infer state-level flu rates without training on state-level targets (zero-shot inference), achieving a performance metric of 0.78 during the test period, with the opposite direction displaying an improved performance metric of 0.99 due to leveraging more training data and the simpler nature of modeling at the national level. 2. The statement claims incorrect data by stating the inference pertains to national-level flu rates without training on national-level targets and provides altered performance metrics (0.92 rather than the correct 0.99). These assertions differ from the given contextual details and thus render the statement factually inaccurate."
143
  }
144
  },
145
  {
@@ -181,10 +190,13 @@
181
  "[paragraph id = 11] We can see that the sT5 models perform similar to their all-language counter parts, where as performance for MLSE considerable lowers."
182
  ],
183
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T6\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T6.14\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T6.2.2\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r\" id=\"S4.T6.2.2.3\"></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.1.1.1\">Test MAPE(%)\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.2.2.2\">Test \n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T6.4.4.3\">MLSE (baseline)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.3.3.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.4.4.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.6.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T6.6.6.3\">sT5 Base</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.5.5.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.6.6.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.8.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T6.8.8.3\">sT5 Large</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.7.7.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.8.8.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.10.10\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T6.10.10.3\">MLSE (English only)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.9.9.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.10.10.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.12.12\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T6.12.12.3\">sT5 Base (English only)</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.11.11.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.12.12.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.14.14\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T6.14.14.3\">sT5 Large (English only)</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.13.13.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.14.14.2\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 6. </span>National ILI rate modeling results from using different embedding functions from a variety of LMs.</figcaption>\n</figure>",
184
- "perturb_sentence_id": 5,
 
 
 
185
  "output": {
186
- "perturbed_statement": "[paragraph id = 5] Table 6 shows the results from using different search embeddings created using the MLSE model.Surprisingly, larger capacity models like sT5 Base (110M parameters) and sT5 Large (335M parameters) significantly outperform the oversized MLSE model.",
187
- "perturbed_explanation": "Table 6 shows the results from using different search embeddings created using the sT5 Base (110M parameters) and sT5 Large (335M parameters) models. Surprisingly, larger capacity models like sT5 Base and sT5 Large do not outperform the smaller capacity MLSE model. 1. These results imply that larger model capacity does not necessarily correlate with superior performance when compared to smaller, diversified models like MLSE. 2. Contrary to the information provided, the statement suggests the sT5 models notably outperform the MLSE model, which conflicts with the observed outcomes where MLSE's performances are competitive or better in multilingual contexts. Therefore, the revised statement introduces a factual inconsistency."
188
  }
189
  }
190
  ]
 
51
  "[paragraph id = 17] As a point of reference the linear model in (Varian and Choi, 2009 ) returns a monthly R2 of 0.79 over the training data using both lagged sales and Google Trends."
52
  ],
53
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T1.3\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T1.3.3.4\">Frequency</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T1.3.3.5\">Embedding</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T1.3.3.6\">Model</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_r\" id=\"S4.T1.2.2.2\">Test R<sup class=\"ltx_sup\" id=\"S4.T1.2.2.2.1\">2</sup> \n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column\" id=\"S4.T1.3.3.3\">Test MAPE (%) \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.3.4.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.3.4.1.1\">Weekly</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.3.4.1.2\">Categorical</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T1.3.4.1.3\">Lasso</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S4.T1.3.4.1.4\">0.5869</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.3.4.1.5\">10.90</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.3.5.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.5.2.1\">Weekly</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.5.2.2\">Categorical</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.5.2.3\">CoSMo</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T1.3.5.2.4\">0.5381</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.3.5.2.5\">10.85</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.3.6.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.6.3.1\">Weekly</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.6.3.2\">SLaM</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.6.3.3\">CoSMo</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T1.3.6.3.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.3.6.3.4.1\">0.7486</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.3.6.3.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.3.6.3.5.1\">7.12</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.3.7.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.7.4.1\">Monthly</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.7.4.2\">SLaM</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T1.3.7.4.3\">CoSMo</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T1.3.7.4.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.3.7.4.4.1\">0.9065</span></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.3.7.4.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.3.7.4.5.1\">3.03</span></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Baseline Regional Auto Models with search and indicator multipliers - fit metrics reported at the national level.</figcaption>\n</figure>",
54
+ "perturb_sentence_id": [
55
+ 17,
56
+ 18
57
+ ],
58
  "output": {
59
+ "perturbed_statement": "[paragraph id = 17] As a point of reference the linear model in (Varian and Choi, 2009 ) returns a monthly R2 of 0.79 over the training data using both lagged sales and Google Trends. While automotive sales are used in this paper, we expect that our approach can be used to greatly improve forecasts across economic sectors.",
60
+ "perturbed_explanation": "1. The statement compares the model's capabilities by referencing a linear model from Varian and Choi (2009), highlighting an R2 of 0.79 and mentioning the use of lagged sales and Google Trends. 2. By changing 'nowcasts' to 'forecasts', the altered statement implies a traditional forecasting approach rather than the more immediate estimation that nowcasting provides. Nowcasting typically involves estimating current or very near-term conditions, while forecasting may refer to a longer-term prediction. This change misrepresents the original context of the statement, which emphasizes the capability to produce quick, current estimates in economic indicators."
61
  }
62
  },
63
  {
 
97
  "[paragraph id = 16] Our method is generally on par or better than the best AR approaches."
98
  ],
99
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.10\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.3.3\">\n<th class=\"ltx_td ltx_th ltx_th_column ltx_th_row ltx_border_r\" id=\"S4.T2.3.3.4\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r\" id=\"S4.T2.1.1.1\">Test MAPE(%) \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S4.T2.3.3.3\">Test \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.4.4.2\">Logistic Regression</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T2.4.4.1\">24.9 0.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.4.4.3\">.98</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.5.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.5.5.2\">MLP</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.5.5.1\">7.3 1.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.5.5.3\">.99</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.11.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.10.11.1.1\">Google Flu Trends <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T2.10.11.1.2\">[9.5 - 33.1]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.10.11.1.3\">[.66 - .97]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.12.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.10.12.2.1\">Elastic Net <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.10.12.2.2\">[9.8 - 15.1]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.12.2.3\">[.92 - .99]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.13.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.10.13.3.1\">Guassian Process <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.10.13.3.2\">[9.4 - 14.6]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.13.3.3\">[.94 - .99]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.14.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.10.14.4.1\">AR <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T2.10.14.4.2\">[6.7 - 14.3]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.10.14.4.3\">[.88 - .98]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.15.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.10.15.5.1\">AR+Google Flu Trends <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.10.15.5.2\">[6.2 - 12.5]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.15.5.3\">[.88 - .99]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.6.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.6.6.2\">AR+Elastic Net <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.6.6.3\">[5.1 - 8.7]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.6.6.1\">[.93 - ]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.7.7\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.7.7.2\">AR+Guassian Process <cite class=\"ltx_cite ltx_citemacro_citep\">(Lampos et al<span class=\"ltx_text\">.</span>, <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00085v1#bib.bib18\" title=\"\">2015</a>)</cite>\n</th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.7.7.3\">[5.0 - 8.6]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.7.7.1\">[.93 - ]</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.8.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.8.8.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.8.8.2.1\">CoSMo (Ours)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S4.T2.8.8.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.8.8.1.1\">5.5 0.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.8.8.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.8.8.3.1\">.99</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.10.10\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.10.10.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.10.10.3.1\">CoSMo (Ours, Test selection)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S4.T2.9.9.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.9.9.1.1\">3.9 0.1</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.10.2\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.10.10.2.1\">1</span>\n</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 2. </span>Benchmarking ILI flu rate prediction at the national level. We show the standard deviation of MAPE for our experiments; we omit this metric for the Pearson coefficient because it was close to zero for all experiments.</figcaption>\n</figure>",
100
+ "perturb_sentence_id": [
101
+ 15,
102
+ 16
103
+ ],
104
  "output": {
105
+ "perturbed_statement": "[paragraph id = 15] The autoregressive (AR) entries in Table 2 represent methods that include either a 3-week or 4-week lag of the most recent ILI rate. Our method is generally on par or better than the best AR approaches.",
106
+ "perturbed_explanation": "The original explanation states: 1) The autoregressive (AR) entries in Table 2 include a 1-week or 2-week lag of the most recent ILI rate. The statement is incorrect because: 2) The statement changes the lag period to 3-week or 4-week, which is factually inaccurate as the context refers to a 1-week or 2-week lag. This introduces a misunderstanding about the methodology and accuracy of the AR models compared to other methods."
107
  }
108
  },
109
  {
 
142
  "[paragraph id = 7] The zero-shot inference performs better in the opposite direction, (.99 ), perhaps leveraging the greater number of training examples and taking advantage of the easier task of national modeling."
143
  ],
144
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T5\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T5.10\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T5.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r\" id=\"S4.T5.2.2.3\">Training Data</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r\" id=\"S4.T5.2.2.4\">Eval Data</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column\" id=\"S4.T5.1.1.1\">Test MAPE(%)\n</th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column\" id=\"S4.T5.2.2.2\">Test \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T5.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T5.4.4.3\">State</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T5.4.4.4\">State</th>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T5.3.3.1\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T5.4.4.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.6.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T5.6.6.3\">National</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T5.6.6.4\">State</th>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T5.5.5.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T5.6.6.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.8.8\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T5.8.8.3\">National</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T5.8.8.4\">National</th>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T5.7.7.1\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T5.8.8.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T5.10.10\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T5.10.10.3\">State</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T5.10.10.4\">National</th>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T5.9.9.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T5.10.10.2\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 5. </span>Zero-shot evaluation for Flu ILI rate prediction. The zero-shot examples are the rows where there is a mismatch between the Training Data column and the Eval Data column. The rows with alignment serve as comparison points. </figcaption>\n</figure>",
145
+ "perturb_sentence_id": [
146
+ 6,
147
+ 7
148
+ ],
149
  "output": {
150
+ "perturbed_statement": "[paragraph id = 6] The model has a surprising capability to infer with some success (.78) national flu rates, in the test period, without ever being trained on national-level targets. The zero-shot inference performs worse in the opposite direction, (.80), perhaps due to fewer training examples and the increased complexity of state-level modeling.",
151
+ "perturbed_explanation": "1. The statement originally explains the model's capability to infer state-level flu rates with some success without being trained on state-level targets, and that the zero-shot inference performs better when predicting parent-level data (countries) than child-level data (states) due to having more training examples and the simplicity of national modeling. 2. The statement incorrectly claims that the model infers national flu rates without ever being trained on national-level targets, which contradicts the context explaining that the national-level model was indeed trained on national-level targets. 3. Additionally, the statement erroneously states that the zero-shot inference performs worse when predicting parent-level data, which is the opposite of what is actually described, i.e., predicting parent-level data is reported to be more successful."
152
  }
153
  },
154
  {
 
190
  "[paragraph id = 11] We can see that the sT5 models perform similar to their all-language counter parts, where as performance for MLSE considerable lowers."
191
  ],
192
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T6\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T6.14\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T6.2.2\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r\" id=\"S4.T6.2.2.3\"></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.1.1.1\">Test MAPE(%)\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.2.2.2\">Test \n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.4.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T6.4.4.3\">MLSE (baseline)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.3.3.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.4.4.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.6.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T6.6.6.3\">sT5 Base</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.5.5.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.6.6.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.8.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T6.8.8.3\">sT5 Large</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.7.7.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.8.8.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.10.10\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T6.10.10.3\">MLSE (English only)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.9.9.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T6.10.10.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.12.12\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T6.12.12.3\">sT5 Base (English only)</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.11.11.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.12.12.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T6.14.14\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T6.14.14.3\">sT5 Large (English only)</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.13.13.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T6.14.14.2\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 6. </span>National ILI rate modeling results from using different embedding functions from a variety of LMs.</figcaption>\n</figure>",
193
+ "perturb_sentence_id": [
194
+ 5,
195
+ 6
196
+ ],
197
  "output": {
198
+ "perturbed_statement": "[paragraph id = 5] Table 6 shows the results from using different search embeddings created using the sT5 Base (110M parameters) and sT5 Large (335M parameters) models. Surprisingly, smaller capacity models like sT5 Base and sT5 Large outperform the larger capacity MLSE model.",
199
+ "perturbed_explanation": "Original Explanation: Table 6 showcases the performance results when using different model configurations for search embeddings. Surprisingly, despite their larger model size, the sT5 Base and sT5 Large do not outperform the smaller MLSE model in the context provided. 1. The perturbed statement erroneously claims that the smaller capacity models like sT5 Base and sT5 Large outperform the MLSE model. However, the context explicitly states that the performance of sT5 models is similar and not superior to the MLSE model in the multi-language setting, with MLSE experiencing a considerable reduction in performance when using English-only data."
200
  }
201
  }
202
  ]
table_result/2407.00087v2_output.json CHANGED
The diff for this file is too large to render. See raw diff
 
table_result/2407.00088v1_output.json CHANGED
@@ -45,10 +45,13 @@
45
  "[paragraph id = 4] This evaluation guarantees T-MAC s cross-platform compatibility and consistent performance across different instruction sets and various edge deployment scenarios."
46
  ],
47
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T2.2\" style=\"width:433.6pt;height:129.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(35.8pt,-10.7pt) scale(1.19793527173856,1.19793527173856) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T2.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.1\" rowspan=\"2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.1.1.1.1\">Device</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.2\" rowspan=\"2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.1.1.2.1\">Processor</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.1.1.3.1\">Performance</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.1.1.4.1\">Max. Memory</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T2.2.1.2.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.2.2.1.1\">Cores</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T2.2.1.2.2.2\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.2.2.2.1\">Bandwidth</span> (GB/s)</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.3.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.3.1.1\">M2-Ultra</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.3.1.2\">Apple M2-Ultra</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.3.1.3\">16</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.3.1.4\">819.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.4.2\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.4.2.1\">Raspberry Pi 5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.4.2.2\">ARM Cortex-A76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.4.2.3\">4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.4.2.4\">17.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.5.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.5.3.1\">Jetson AGX Orin</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.5.3.2\">ARM Cortex-A78AE</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.5.3.3\">12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.5.3.4\">204.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.6.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.6.4.1\">Surface Book 3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.6.4.2\">Intel Core i5-1035G7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.6.4.3\">4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.6.4.4\">58.2</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T2.3.1.1\" style=\"font-size:90%;\">Table 2</span>. </span><span class=\"ltx_text\" id=\"S5.T2.4.2\" style=\"font-size:90%;\">Hardware device specification.</span></figcaption>\n</figure>",
48
- "perturb_sentence_id": 0,
 
 
 
49
  "output": {
50
- "perturbed_statement": "[paragraph id = 0] As shown in Table 2 , we evaluate T-MAC across four distinct edge devices. These devices range from high-performance ones like Intel Xeon to less powerful ones like Cortex A357.",
51
- "perturbed_explanation": "The original explanation highlighted the evaluation of T-MAC across a range of devices, such as the high-performance M2-Ultra and the less powerful Raspberry Pi. Now, addressing the statement: 1. The high-performance evaluation platform mentioned is not an Intel Xeon; instead, it refers to M2-Ultra as per the context. 2. The less powerful device named Cortex A357 is not specified within the context provided in this form. Hence, the modified statement introduces inaccuracies concerning the specific devices discussed for evaluation."
52
  }
53
  },
54
  {
@@ -92,10 +95,13 @@
92
  "[paragraph id = 15] The fast aggregation can further enhance performance, but at the cost of model quality."
93
  ],
94
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T4\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T4.4\" style=\"width:433.6pt;height:128.2pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(34.1pt,-10.1pt) scale(1.18686896846672,1.18686896846672) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T4.4.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.5.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T4.4.4.5.1.1\" rowspan=\"2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.1.1\">Framework</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.4.4.5.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.2.1\">Throughput</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.4.4.5.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.3.1\">WikiText2</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.4.4.5.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.4.1\">lambada_openai</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.4.4.5.1.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.5.1\">WinoGrande</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T4.1.1.1.1\">Tokens/sec \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T4.2.2.2.2\">PPL \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T4.3.3.3.3\">PPL \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T4.4.4.4.4\">Acc. \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.6.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T4.4.4.6.1.1\">Un-quantized</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.4.4.6.1.2\">3.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.4.4.6.1.3\">5.80</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.4.4.6.1.4\">12.65</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.4.4.6.1.5\">71.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.7.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T4.4.4.7.2.1\">llama.cpp</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.7.2.2\">5.65</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.7.2.3\">5.96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.7.2.4\">12.95</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.7.2.5\">70.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.8.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T4.4.4.8.3.1\">T-MAC</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.8.3.2\">7.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.8.3.3\">5.96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.8.3.4\">12.95</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.8.3.5\">70.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.9.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T4.4.4.9.4.1\">T-MAC (+FA)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.4.4.9.4.2\">8.97</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.4.4.9.4.3\">6.38</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.4.4.9.4.4\">13.99</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.4.4.9.4.5\">67.8</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T4.10.3.1\" style=\"font-size:90%;\">Table 4</span>. </span><span class=\"ltx_text\" id=\"S5.T4.8.2\" style=\"font-size:90%;\">End-to-end throughput and model quality of Llama-2-7B-4bit on M2-Ultra with single-thread. T-MAC improves throughput by 1.3 compared to llama.cpp with the same model quality. Fast Aggregation (FA) can further improve the throughput gain to 1.6, but the model quality will drop because of the numerical error of current CPU instructions.</span></figcaption>\n</figure>",
95
- "perturb_sentence_id": 11,
 
 
 
96
  "output": {
97
- "perturbed_statement": "[paragraph id = 11] The evaluation is performed on four different tasks: WikiText-2 (merity2016pointer, ), lambada_openai (paperno-etal-2016-lambada, ; radford2019language, ), a custom summarization task (smith2020summarization, ), and WinoGrande (ai2:winogrande, ) for question answering accuracy (the higher the better). As shown in Table 4 , on all of these tasks, T-MAC delivers the same results compared to llama.cpp, suggesting that the error introduced by T-MAC is negligible for real-world models.",
98
- "perturbed_explanation": "The original explanation states: 1. The evaluation mentioned involves only three tasks: WikiText-2, lambada_openai, and WinoGrande, each targeting specific metrics like perplexity or accuracy. 2. Upon reviewing the statement, it inaccurately mentions 'four different tasks' and includes a 'custom summarization task,' which does not align with the outlined tasks. This discrepancy renders the statement factually incorrect."
99
  }
100
  },
101
  {
@@ -131,10 +137,13 @@
131
  "[paragraph id = 10] Compared to llama.cpp on GPU, although T-MAC only achieves 78 throughput, T-MAC only needs 34 power, resulting in 2.3 energy efficiency."
132
  ],
133
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T5\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T5.2\" style=\"width:355.6pt;height:131.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(56.0pt,-20.7pt) scale(1.46000059019698,1.46000059019698) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T5.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.1.1\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T5.2.1.1.1.1\" rowspan=\"2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.1.1.1.1\">Framework</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.1.1.2.1\">Throughput</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.1.1.3.1\">Power</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.1.1.4.1\">Energy</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T5.2.1.2.2.1\">Tokens/sec</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T5.2.1.2.2.2\">W</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T5.2.1.2.2.3\">J/token</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.3.1\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S5.T5.2.1.3.1.1\">llama.cpp (CPU)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.1.3.1.2\">7.08</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.1.3.1.3\">15.0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.1.3.1.4\">2.12</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.4.2\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T5.2.1.4.2.1\">llama.cpp (GPU)</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.4.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.4.2.2.1\">20.03</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.4.2.3\">30.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.4.2.4\">1.54</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.5.3\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T5.2.1.5.3.1\">T-MAC (CPU)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.5.3.2\">15.62</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.5.3.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.5.3.3.1\">10.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.5.3.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.5.3.4.1\">0.66</span></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T5.3.1.1\" style=\"font-size:90%;\">Table 5</span>. </span><span class=\"ltx_text\" id=\"S5.T5.4.2\" style=\"font-size:90%;\">Llama-2-7B-2bit end-to-end inference throughput, power and energy comparisons on NVIDIA Jetson AGX Orin.</span></figcaption>\n</figure>",
134
- "perturb_sentence_id": 10,
 
 
 
135
  "output": {
136
- "perturbed_statement": "[paragraph id = 10] Compared to llama.cpp on GPU, although T-MAC only achieves 3.1 throughput, T-MAC only needs 34 power, resulting in 0.9 energy efficiency.Note that Figure 11 shows T-MAC outperforms the GPU on the mpGEMV kernels.",
137
- "perturbed_explanation": "Original Explanation: The provided statement was incorrect because (1) it mentions GPU while the original context discusses CPU; (2) it presents contradictory values for throughput, power, and energy efficiency than those explained, which were throughput at 2.2, power at 69, and energy efficiency at 3.2. (3) It alters specific claims about Figure 11 inconsistently. Continuing, these alterations contradict the defined and observed facts shown. T-MAC’s efficiency improvements are quantifiable and linked to the CPU benchmarks as highlighted."
138
  }
139
  }
140
  ]
 
45
  "[paragraph id = 4] This evaluation guarantees T-MAC s cross-platform compatibility and consistent performance across different instruction sets and various edge deployment scenarios."
46
  ],
47
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T2.2\" style=\"width:433.6pt;height:129.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(35.8pt,-10.7pt) scale(1.19793527173856,1.19793527173856) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T2.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.1\" rowspan=\"2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.1.1.1.1\">Device</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.2\" rowspan=\"2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.1.1.2.1\">Processor</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.1.1.3.1\">Performance</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.1.1.4.1\">Max. Memory</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T2.2.1.2.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.2.2.1.1\">Cores</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T2.2.1.2.2.2\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S5.T2.2.1.2.2.2.1\">Bandwidth</span> (GB/s)</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.3.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.3.1.1\">M2-Ultra</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.3.1.2\">Apple M2-Ultra</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.3.1.3\">16</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.3.1.4\">819.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.4.2\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.4.2.1\">Raspberry Pi 5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.4.2.2\">ARM Cortex-A76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.4.2.3\">4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.4.2.4\">17.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.5.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.5.3.1\">Jetson AGX Orin</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.5.3.2\">ARM Cortex-A78AE</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.5.3.3\">12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T2.2.1.5.3.4\">204.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.6.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.6.4.1\">Surface Book 3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.6.4.2\">Intel Core i5-1035G7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.6.4.3\">4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.6.4.4\">58.2</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T2.3.1.1\" style=\"font-size:90%;\">Table 2</span>. </span><span class=\"ltx_text\" id=\"S5.T2.4.2\" style=\"font-size:90%;\">Hardware device specification.</span></figcaption>\n</figure>",
48
+ "perturb_sentence_id": [
49
+ 0,
50
+ 1
51
+ ],
52
  "output": {
53
+ "perturbed_statement": "[paragraph id = 0] As shown in Table 2, we evaluate T-MAC across three distinct edge devices. These devices range from high-performance ones like M2-Ultra to less powerful ones like Raspberry Pi.",
54
+ "perturbed_explanation": "1. The original explanation states that T-MAC is evaluated across four distinct edge devices, ensuring a comprehensive assessment of its performance. \n2. The statement is incorrect because it mentions that T-MAC is evaluated across three distinct edge devices, reducing the scope of the evaluation inaccurately. The context implies a wider range of devices is used in the assessment."
55
  }
56
  },
57
  {
 
95
  "[paragraph id = 15] The fast aggregation can further enhance performance, but at the cost of model quality."
96
  ],
97
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T4\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T4.4\" style=\"width:433.6pt;height:128.2pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(34.1pt,-10.1pt) scale(1.18686896846672,1.18686896846672) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T4.4.4\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.5.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T4.4.4.5.1.1\" rowspan=\"2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.1.1\">Framework</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.4.4.5.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.2.1\">Throughput</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.4.4.5.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.3.1\">WikiText2</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.4.4.5.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.4.1\">lambada_openai</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T4.4.4.5.1.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.4.4.5.1.5.1\">WinoGrande</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T4.1.1.1.1\">Tokens/sec \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T4.2.2.2.2\">PPL \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T4.3.3.3.3\">PPL \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T4.4.4.4.4\">Acc. \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.6.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S5.T4.4.4.6.1.1\">Un-quantized</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.4.4.6.1.2\">3.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.4.4.6.1.3\">5.80</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.4.4.6.1.4\">12.65</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.4.4.6.1.5\">71.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.7.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T4.4.4.7.2.1\">llama.cpp</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.7.2.2\">5.65</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.7.2.3\">5.96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.7.2.4\">12.95</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.7.2.5\">70.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.8.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S5.T4.4.4.8.3.1\">T-MAC</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.8.3.2\">7.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.8.3.3\">5.96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.8.3.4\">12.95</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.4.4.8.3.5\">70.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.4.4.9.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T4.4.4.9.4.1\">T-MAC (+FA)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.4.4.9.4.2\">8.97</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.4.4.9.4.3\">6.38</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.4.4.9.4.4\">13.99</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.4.4.9.4.5\">67.8</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T4.10.3.1\" style=\"font-size:90%;\">Table 4</span>. </span><span class=\"ltx_text\" id=\"S5.T4.8.2\" style=\"font-size:90%;\">End-to-end throughput and model quality of Llama-2-7B-4bit on M2-Ultra with single-thread. T-MAC improves throughput by 1.3 compared to llama.cpp with the same model quality. Fast Aggregation (FA) can further improve the throughput gain to 1.6, but the model quality will drop because of the numerical error of current CPU instructions.</span></figcaption>\n</figure>",
98
+ "perturb_sentence_id": [
99
+ 11,
100
+ 12
101
+ ],
102
  "output": {
103
+ "perturbed_statement": "[paragraph id = 11] The evaluation is performed on three different tasks: WikiText-2 (merity2016pointer, ) and lambada_openai (paperno-etal-2016-lambada, ; radford2019language, ) for perplexity (the higher the better), and WinoGrande (ai2:winogrande, ) for question answering accuracy (the higher the better. As shown in Table 4 , on all of the three tasks, T-MAC delivers significantly better results compared to llama.cpp, suggesting that the error introduced by T-MAC is negligible for real-world models.",
104
+ "perturbed_explanation": "1. The Original Explanation discusses the performance of T-MAC compared to llama.cpp across several tasks. It is mentioned that the evaluation is performed on three tasks, with the goal for perplexity being lower (indicating better performance) and for question-answering accuracy being higher. T-MAC reportedly achieves results consistent with llama.cpp, demonstrating its negligible error impact.\n2. The statement incorrectly suggests that higher perplexity is better, whereas lower perplexity is preferable as it indicates better model performance. Furthermore, it inaccurately claims that T-MAC delivers significantly better results than llama.cpp across all tasks, contradicting the context's conclusion of T-MAC delivering the same results as llama.cpp."
105
  }
106
  },
107
  {
 
137
  "[paragraph id = 10] Compared to llama.cpp on GPU, although T-MAC only achieves 78 throughput, T-MAC only needs 34 power, resulting in 2.3 energy efficiency."
138
  ],
139
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T5\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T5.2\" style=\"width:355.6pt;height:131.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(56.0pt,-20.7pt) scale(1.46000059019698,1.46000059019698) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T5.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.1.1\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T5.2.1.1.1.1\" rowspan=\"2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.1.1.1.1\">Framework</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.1.1.2.1\">Throughput</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.1.1.3.1\">Power</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.1.1.4.1\">Energy</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T5.2.1.2.2.1\">Tokens/sec</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T5.2.1.2.2.2\">W</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column\" id=\"S5.T5.2.1.2.2.3\">J/token</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.3.1\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S5.T5.2.1.3.1.1\">llama.cpp (CPU)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.1.3.1.2\">7.08</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.1.3.1.3\">15.0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T5.2.1.3.1.4\">2.12</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.4.2\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T5.2.1.4.2.1\">llama.cpp (GPU)</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.4.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.4.2.2.1\">20.03</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.4.2.3\">30.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.4.2.4\">1.54</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.5.3\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T5.2.1.5.3.1\">T-MAC (CPU)</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.5.3.2\">15.62</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.5.3.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.5.3.3.1\">10.4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.5.3.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T5.2.1.5.3.4.1\">0.66</span></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T5.3.1.1\" style=\"font-size:90%;\">Table 5</span>. </span><span class=\"ltx_text\" id=\"S5.T5.4.2\" style=\"font-size:90%;\">Llama-2-7B-2bit end-to-end inference throughput, power and energy comparisons on NVIDIA Jetson AGX Orin.</span></figcaption>\n</figure>",
140
+ "perturb_sentence_id": [
141
+ 10,
142
+ 11
143
+ ],
144
  "output": {
145
+ "perturbed_statement": "[paragraph id = 10] Compared to llama.cpp on GPU, T-MAC only achieves 78 throughput and requires 34 power, resulting in 3.2 energy efficiency. Note that Figure 11 shows T-MAC outperforms the GPU on the mpGEMV kernels.",
146
+ "perturbed_explanation": "1. T-MAC improves the throughput to 2.2, not 78. 2. The power consumption with T-MAC is reduced to 69, not 34. 3. The energy efficiency with T-MAC in comparison to llama.cpp on CPU is 3.2. However, the statement should not specify that this value results from comparing with the GPU, as this value was specifically mentioned relative to CPU. Therefore, attributing 3.2 energy efficiency to the GPU comparison is incorrect."
147
  }
148
  }
149
  ]
table_result/2407.00091v1_output.json CHANGED
@@ -37,10 +37,13 @@
37
  "[paragraph id = 5] The second phase of our testing investigates how users react to various values of ."
38
  ],
39
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S3.T1.25\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S3.T1.5.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.2.2.2\"></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.3.3.3\"></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.4.4.4\"></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.5.5.5\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T1.9.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.9.9.5\">Number of map pins</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.6.6.1\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.7.7.2\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.8.8.3\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.9.9.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.13.13\">\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.13.13.5\">Average booking probability</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.10.10.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.11.11.2\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.12.12.3\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.13.13.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.17.17\">\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.17.17.5\">Average total price</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.14.14.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.15.15.2\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.16.16.3\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.17.17.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.21.21\">\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.21.21.5\">Average number of reviews</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.18.18.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.19.19.2\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.20.20.3\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.21.21.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.25.25\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.25.25.5\">Average review rating</td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.22.22.1\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.23.23.2\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.24.24.3\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.25.25.4\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S3.T1.31.3.1\" style=\"font-size:90%;\">Table 1</span>. </span><span class=\"ltx_text\" id=\"S3.T1.29.2\" style=\"font-size:90%;\">Offline exploration of compared against a baseline with no filtering, which is conceptually equivalent to .</span></figcaption>\n</figure>",
40
- "perturb_sentence_id": 1,
 
 
 
41
  "output": {
42
- "perturbed_statement": "[paragraph id = 1] Table 1 presents the aggregate statistics of map-results unrelated to different values of . The lower bound of the exploration is determined by technical limitations, and the upper bound the result of proportional effects.",
43
- "perturbed_explanation": "The original explanation: 1. Table 1 in the context provides the aggregate statistics for map-results corresponding to various parameter values. 2. The bounds for the exploration are influenced by product experience at the lower end and diminishing effects at the upper end. However, the statement modifies these aspects: 1. It wrongly indicates that map-results are unrelated to the parameter values, contradicting the analysis described. 2. It incorrectly states the determinants for exploration bounds as 'technical limitations' for the lower bound and 'proportional effects' for the upper, which are inconsistent with the indicated factors of 'product experience considerations' and 'diminishing effects'."
44
  }
45
  }
46
  ]
 
37
  "[paragraph id = 5] The second phase of our testing investigates how users react to various values of ."
38
  ],
39
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S3.T1.25\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S3.T1.5.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.2.2.2\"></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.3.3.3\"></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.4.4.4\"></th>\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.5.5.5\"></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T1.9.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.9.9.5\">Number of map pins</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.6.6.1\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.7.7.2\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.8.8.3\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S3.T1.9.9.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.13.13\">\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.13.13.5\">Average booking probability</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.10.10.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.11.11.2\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.12.12.3\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.13.13.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.17.17\">\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.17.17.5\">Average total price</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.14.14.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.15.15.2\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.16.16.3\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.17.17.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.21.21\">\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.21.21.5\">Average number of reviews</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.18.18.1\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.19.19.2\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.20.20.3\"></td>\n<td class=\"ltx_td ltx_align_left\" id=\"S3.T1.21.21.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.25.25\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.25.25.5\">Average review rating</td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.22.22.1\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.23.23.2\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.24.24.3\"></td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S3.T1.25.25.4\"></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S3.T1.31.3.1\" style=\"font-size:90%;\">Table 1</span>. </span><span class=\"ltx_text\" id=\"S3.T1.29.2\" style=\"font-size:90%;\">Offline exploration of compared against a baseline with no filtering, which is conceptually equivalent to .</span></figcaption>\n</figure>",
40
+ "perturb_sentence_id": [
41
+ 1,
42
+ 2
43
+ ],
44
  "output": {
45
+ "perturbed_statement": "[paragraph id = 1] Table 1 summarizes the effect of the Bookability Filter on different user demographics. The lower bound of the exploration is set by product experience considerations, and the upper bound the result of diminishing effects.",
46
+ "perturbed_explanation": "1. The original explanation states that Table 1 summarizes the aggregate statistics of map-results corresponding to different values of an unspecified variable. The lower bound of exploration is determined by product experience considerations, while the upper bound is due to diminishing effects. 2. The statement is incorrect because it claims that Table 1 summarizes the effect of the Bookability Filter on different user demographics, which is not mentioned in the original context. Instead, the context indicates that Table 1 validates the effect of the Bookability Filter on map-results, not on user demographics."
47
  }
48
  }
49
  ]
table_result/2407.00100v1_output.json CHANGED
@@ -31,10 +31,13 @@
31
  "[paragraph id = 6] Lastly, IDAICL notably enhances worst-case accuracy and diminishes performance variance across different seeds, showcasing its ability to improve prediction stability."
32
  ],
33
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T1.300\" style=\"width:433.6pt;height:437.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-59.7pt,60.3pt) scale(0.784039515230472,0.784039515230472) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T1.300.300\">\n<tr class=\"ltx_tr\" id=\"S3.T1.300.300.301\" style=\"background-color:#D9D9D9;\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.300.300.301.1\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.1.1\" style=\"background-color:#D9D9D9;\">PLM</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.300.300.301.2\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.2.1\" style=\"background-color:#D9D9D9;\">Method</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S3.T1.300.300.301.3\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.3.1\" style=\"background-color:#D9D9D9;\">m</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.4\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.4.1\" style=\"background-color:#D9D9D9;\">SST-2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.5\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.5.1\" style=\"background-color:#D9D9D9;\">SST-5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.6\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.6.1\" style=\"background-color:#D9D9D9;\">MR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.7\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.7.1\" style=\"background-color:#D9D9D9;\">CR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.8\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.8.1\" style=\"background-color:#D9D9D9;\">Amazon</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.9\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.9.1\" style=\"background-color:#D9D9D9;\">Subj</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.10\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.10.1\" style=\"background-color:#D9D9D9;\">TREC</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.11\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.11.1\" style=\"background-color:#D9D9D9;\">DBPedia</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.12\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.12.1\" style=\"background-color:#D9D9D9;\">AGNews</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.13\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.13.1\" style=\"background-color:#D9D9D9;\">CB</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.10.10.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.10.10.10.11\" rowspan=\"12\"><span class=\"ltx_text\" id=\"S3.T1.10.10.10.11.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T1.10.10.10.11.1.1\" style=\"width:6.8pt;height:53.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:53.4pt;transform:translate(-23.28pt,-23.28pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S3.T1.10.10.10.11.1.1.1\">GPT-2 0.8B</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.10.10.10.12\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S3.T1.10.10.10.13\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.10.10.10.13.1\">4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.1.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.2.2.2.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.3.3.3.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.4.4.4.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.5.5.5.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.6.6.6.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.7.7.7.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.8.8.8.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.9.9.9.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.10.10.10.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.20.20.20\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.20.20.20.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.11.11.11.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.12.12.12.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.13.13.13.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.14.14.14.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.15.15.15.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.16.16.16.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.17.17.17.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.18.18.18.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.19.19.19.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.20.20.20.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.30.30.30\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.30.30.30.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.30.30.30.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.30.30.30.12.1\">8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.21.21.21.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.22.22.22.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.23.23.23.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.24.24.24.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.25.25.25.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.26.26.26.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.27.27.27.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.28.28.28.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.29.29.29.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.30.30.30.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.40.40.40\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.40.40.40.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.31.31.31.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.32.32.32.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.33.33.33.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.34.34.34.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.35.35.35.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.36.36.36.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.37.37.37.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.38.38.38.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.39.39.39.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.40.40.40.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.50.50.50\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.50.50.50.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.50.50.50.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.50.50.50.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.41.41.41.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.42.42.42.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.43.43.43.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.44.44.44.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.45.45.45.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.46.46.46.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.47.47.47.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.48.48.48.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.49.49.49.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.50.50.50.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.60.60.60\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.60.60.60.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.51.51.51.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.52.52.52.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.53.53.53.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.54.54.54.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.55.55.55.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.56.56.56.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.57.57.57.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.58.58.58.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.59.59.59.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.60.60.60.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.70.70.70\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.70.70.70.11\">MetaICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.70.70.70.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.70.70.70.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.61.61.61.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.62.62.62.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.63.63.63.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.64.64.64.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.65.65.65.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.66.66.66.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.67.67.67.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.68.68.68.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.69.69.69.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.70.70.70.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.80.80.80\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.80.80.80.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.71.71.71.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.72.72.72.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.73.73.73.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.74.74.74.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.75.75.75.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.76.76.76.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.77.77.77.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.78.78.78.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.79.79.79.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.80.80.80.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.90.90.90\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.90.90.90.11\">Channel ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.90.90.90.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.90.90.90.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.81.81.81.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.82.82.82.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.83.83.83.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.84.84.84.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.85.85.85.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.86.86.86.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.87.87.87.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.88.88.88.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.89.89.89.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.90.90.90.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.100.100.100\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.100.100.100.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.91.91.91.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.92.92.92.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.93.93.93.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.94.94.94.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.95.95.95.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.96.96.96.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.97.97.97.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.98.98.98.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.99.99.99.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.100.100.100.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.110.110.110\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.110.110.110.11\">EPR</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.110.110.110.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.110.110.110.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.101.101.101.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.102.102.102.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.103.103.103.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.104.104.104.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.105.105.105.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.106.106.106.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.107.107.107.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.108.108.108.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.109.109.109.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.110.110.110.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.120.120.120\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.120.120.120.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.111.111.111.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.112.112.112.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.113.113.113.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.114.114.114.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.115.115.115.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.116.116.116.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.117.117.117.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.118.118.118.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.119.119.119.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.120.120.120.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.130.130.130\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.130.130.130.11\" rowspan=\"12\"><span class=\"ltx_text\" id=\"S3.T1.130.130.130.11.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T1.130.130.130.11.1.1\" style=\"width:6.8pt;height:53.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:53.4pt;transform:translate(-23.28pt,-23.28pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S3.T1.130.130.130.11.1.1.1\">GPT-2 1.5B</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.130.130.130.12\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S3.T1.130.130.130.13\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.130.130.130.13.1\">4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.121.121.121.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.122.122.122.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.123.123.123.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.124.124.124.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.125.125.125.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.126.126.126.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.127.127.127.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.128.128.128.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.129.129.129.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.130.130.130.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.140.140.140\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.140.140.140.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.131.131.131.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.132.132.132.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.133.133.133.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.134.134.134.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.135.135.135.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.136.136.136.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.137.137.137.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.138.138.138.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.139.139.139.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.140.140.140.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.150.150.150\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.150.150.150.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.150.150.150.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.150.150.150.12.1\">8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.141.141.141.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.142.142.142.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.143.143.143.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.144.144.144.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.145.145.145.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.146.146.146.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.147.147.147.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.148.148.148.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.149.149.149.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.150.150.150.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.160.160.160\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.160.160.160.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.151.151.151.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.152.152.152.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.153.153.153.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.154.154.154.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.155.155.155.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.156.156.156.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.157.157.157.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.158.158.158.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.159.159.159.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.160.160.160.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.170.170.170\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.170.170.170.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.170.170.170.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.170.170.170.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.161.161.161.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.162.162.162.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.163.163.163.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.164.164.164.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.165.165.165.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.166.166.166.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.167.167.167.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.168.168.168.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.169.169.169.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.170.170.170.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.180.180.180\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.180.180.180.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.171.171.171.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.172.172.172.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.173.173.173.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.174.174.174.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.175.175.175.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.176.176.176.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.177.177.177.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.178.178.178.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.179.179.179.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.180.180.180.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.190.190.190\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.190.190.190.11\">MetaICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.190.190.190.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.190.190.190.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.181.181.181.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.182.182.182.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.183.183.183.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.184.184.184.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.185.185.185.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.186.186.186.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.187.187.187.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.188.188.188.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.189.189.189.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.190.190.190.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.200.200.200\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.200.200.200.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.191.191.191.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.192.192.192.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.193.193.193.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.194.194.194.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.195.195.195.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.196.196.196.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.197.197.197.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.198.198.198.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.199.199.199.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.200.200.200.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.210.210.210\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.210.210.210.11\">Channel ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.210.210.210.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.210.210.210.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.201.201.201.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.202.202.202.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.203.203.203.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.204.204.204.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.205.205.205.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.206.206.206.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.207.207.207.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.208.208.208.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.209.209.209.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.210.210.210.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.220.220.220\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.220.220.220.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.211.211.211.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.212.212.212.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.213.213.213.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.214.214.214.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.215.215.215.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.216.216.216.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.217.217.217.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.218.218.218.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.219.219.219.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.220.220.220.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.230.230.230\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.230.230.230.11\">EPR</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.230.230.230.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.230.230.230.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.221.221.221.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.222.222.222.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.223.223.223.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.224.224.224.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.225.225.225.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.226.226.226.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.227.227.227.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.228.228.228.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.229.229.229.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.230.230.230.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.240.240.240\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.240.240.240.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.231.231.231.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.232.232.232.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.233.233.233.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.234.234.234.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.235.235.235.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.236.236.236.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.237.237.237.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.238.238.238.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.239.239.239.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.240.240.240.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.250.250.250\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r ltx_border_tt\" id=\"S3.T1.250.250.250.11\" rowspan=\"6\"><span class=\"ltx_text\" id=\"S3.T1.250.250.250.11.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T1.250.250.250.11.1.1\" style=\"width:6.8pt;height:42.2pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:42.2pt;transform:translate(-17.66pt,-17.66pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S3.T1.250.250.250.11.1.1.1\">GPT-Neo</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.250.250.250.12\">MetaICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S3.T1.250.250.250.13\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.250.250.250.13.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.241.241.241.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.242.242.242.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.243.243.243.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.244.244.244.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.245.245.245.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.246.246.246.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.247.247.247.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.248.248.248.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.249.249.249.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.250.250.250.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.260.260.260\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.260.260.260.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.251.251.251.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.252.252.252.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.253.253.253.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.254.254.254.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.255.255.255.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.256.256.256.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.257.257.257.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.258.258.258.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.259.259.259.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.260.260.260.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.270.270.270\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.270.270.270.11\">Channel ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.270.270.270.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.270.270.270.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.261.261.261.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.262.262.262.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.263.263.263.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.264.264.264.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.265.265.265.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.266.266.266.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.267.267.267.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.268.268.268.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.269.269.269.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.270.270.270.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.280.280.280\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.280.280.280.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.271.271.271.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.272.272.272.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.273.273.273.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.274.274.274.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.275.275.275.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.276.276.276.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.277.277.277.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.278.278.278.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.279.279.279.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.280.280.280.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.290.290.290\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.290.290.290.11\">EPR</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S3.T1.290.290.290.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.290.290.290.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.281.281.281.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.282.282.282.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.283.283.283.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.284.284.284.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.285.285.285.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.286.286.286.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.287.287.287.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.288.288.288.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.289.289.289.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.290.290.290.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.300.300.300\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r\" id=\"S3.T1.300.300.300.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.291.291.291.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.292.292.292.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.293.293.293.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.294.294.294.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.295.295.295.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.296.296.296.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.297.297.297.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.298.298.298.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.299.299.299.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.300.300.300.10\"></td>\n</tr>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Comparison results of three PLMs.\nTwo numbers indicate the mean accuracy (%) and standard deviation over different seeds. The best and second-best results per PLM per dataset are highlighted in bold and underlined, respectively. \"+IDAICL\" means that the current approach is used in conjunction with IDAICL. The results for different numbers of demonstration examples (i.e., values) using the GPT-Neo model are illustrated in Figure <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00100v1#S4.F3\" title=\"Figure 3 ‣ 4.1 Models and Datasets ‣ 4 Experimental Setup ‣ Enhancing In-Context Learning via Implicit Demonstration Augmentation\"><span class=\"ltx_text ltx_ref_tag\">3</span></a>.\n</figcaption>\n</figure>",
34
- "perturb_sentence_id": 3,
 
 
 
35
  "output": {
36
- "perturbed_statement": "[paragraph id = 3] Compared to Vanilla ICL, IDAICL outperforms by an average of 7.7% and 8.4% across diverse datasets and values for GPT-2 with 0.8B and 1.5B parameters, respectively. Secondly, in comparison to other ICL baselines like Channel ICL, MetaICL, and EPR, the integration of IDAICL does not consistently deliver notable performance improvements, questioning the efficacy of enhancing demonstrations for refined predictions.",
37
- "perturbed_explanation": "1. IDAICL demonstrates superior performance compared to Vanilla ICL, highlighting its contribution toward efficient learning implementation strategies. 2. The original statement emphasizes the consistent enhancement achieved by comparing IDAICL's integration with other ICL baselines. However, the statement erroneously alters the improvement percentages for Vanilla ICL and inaccurately asserts inconsistency in IDAICL's performance improvement against other baselines, contrary to the reliable enhancements observed. Such inconsistencies misrepresent the established findings and undermine the significance of IDAICL's applications invoked."
38
  }
39
  },
40
  {
@@ -55,10 +58,13 @@
55
  "[paragraph id = 1] Table 2 presents the comparison results for the LLaMA models, where IDAICL consistently achieves state-of-the-art performance, except for TREC using the LLaMA model with 33B parameters."
56
  ],
57
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T2.100\" style=\"width:433.6pt;height:169.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-36.5pt,14.3pt) scale(0.855879733045295,0.855879733045295) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.100.100\">\n<tr class=\"ltx_tr\" id=\"S4.T2.100.100.101\" style=\"background-color:#D9D9D9;\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.100.100.101.1\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.1.1\" style=\"background-color:#D9D9D9;\">PLM</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.100.100.101.2\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.2.1\" style=\"background-color:#D9D9D9;\">Method</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.3\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.3.1\" style=\"background-color:#D9D9D9;\">SST-2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.4\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.4.1\" style=\"background-color:#D9D9D9;\">SST-5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.5\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.5.1\" style=\"background-color:#D9D9D9;\">MR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.6\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.6.1\" style=\"background-color:#D9D9D9;\">CR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.7\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.7.1\" style=\"background-color:#D9D9D9;\">Subj</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.8\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.8.1\" style=\"background-color:#D9D9D9;\">TREC</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.9\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.9.1\" style=\"background-color:#D9D9D9;\">DBPedia</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.10\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.10.1\" style=\"background-color:#D9D9D9;\">AGNews</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.11\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.11.1\" style=\"background-color:#D9D9D9;\">CB</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.12\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.12.1\" style=\"background-color:#D9D9D9;\">Avg.</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.9.9.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.9.9.9.10\" rowspan=\"5\"><span class=\"ltx_text\" id=\"S4.T2.9.9.9.10.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S4.T2.9.9.9.10.1.1\" style=\"width:6.8pt;height:54.6pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:54.6pt;transform:translate(-23.88pt,-23.88pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S4.T2.9.9.9.10.1.1.1\">LLaMA 13B</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.9.9.9.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.1.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.2.2.2.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.3.3.3.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.4.4.4.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.5.5.5.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.6.6.6.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.7.7.7.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.8.8.8.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.9.9.9.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.9.9.9.12\">72.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.18.18.18\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.18.18.18.10\">ConCa</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.10.10.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.11.11.11.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.12.12.12.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.13.13.13.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.14.14.14.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.15.15.15.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.16.16.16.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.17.17.17.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.18.18.18.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.18.18.18.11\">77.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.31.31.31\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.22.22.22.4\">\n<span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.22.22.22.4.1\">P</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.22.22.22.4.2\">RO</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.22.22.22.4.3\">C</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.22.22.22.4.4\">A</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.23.23.23.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.24.24.24.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.25.25.25.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.26.26.26.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.27.27.27.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.28.28.28.10\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.29.29.29.11\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.30.30.30.12\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.31.31.31.13\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.31.31.31.14\"><span class=\"ltx_text ltx_framed ltx_framed_underline\" id=\"S4.T2.31.31.31.14.1\">77.9</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.40.40.40\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.40.40.40.10\">D-ConCa</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.32.32.32.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.33.33.33.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.34.34.34.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.35.35.35.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.36.36.36.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.37.37.37.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.38.38.38.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.39.39.39.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.40.40.40.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.40.40.40.11\">77.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.50.50.50\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.50.50.50.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.41.41.41.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.42.42.42.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.43.43.43.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.44.44.44.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.45.45.45.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.46.46.46.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.47.47.47.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.48.48.48.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.49.49.49.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.50.50.50.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.59.59.59\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r ltx_border_tt\" id=\"S4.T2.59.59.59.10\" rowspan=\"5\"><span class=\"ltx_text\" id=\"S4.T2.59.59.59.10.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S4.T2.59.59.59.10.1.1\" style=\"width:6.8pt;height:54.6pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:54.6pt;transform:translate(-23.88pt,-23.88pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S4.T2.59.59.59.10.1.1.1\">LLaMA 33B</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.59.59.59.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.51.51.51.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.52.52.52.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.53.53.53.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.54.54.54.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.55.55.55.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.56.56.56.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.57.57.57.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.58.58.58.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.59.59.59.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.59.59.59.12\">76.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.68.68.68\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.68.68.68.10\">ConCa</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.60.60.60.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.61.61.61.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.62.62.62.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.63.63.63.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.64.64.64.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.65.65.65.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.66.66.66.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.67.67.67.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.68.68.68.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.68.68.68.11\">78.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.81.81.81\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.72.72.72.4\">\n<span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.72.72.72.4.1\">P</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.72.72.72.4.2\">RO</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.72.72.72.4.3\">C</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.72.72.72.4.4\">A</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.73.73.73.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.74.74.74.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.75.75.75.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.76.76.76.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.77.77.77.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.78.78.78.10\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.79.79.79.11\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.80.80.80.12\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.81.81.81.13\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.81.81.81.14\">78.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.90.90.90\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.90.90.90.10\">D-ConCa</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.82.82.82.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.83.83.83.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.84.84.84.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.85.85.85.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.86.86.86.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.87.87.87.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.88.88.88.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.89.89.89.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.90.90.90.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.90.90.90.11\"><span class=\"ltx_text ltx_framed ltx_framed_underline\" id=\"S4.T2.90.90.90.11.1\">78.6</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.100.100.100\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r\" id=\"S4.T2.100.100.100.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.91.91.91.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.92.92.92.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.93.93.93.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.94.94.94.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.95.95.95.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.96.96.96.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.97.97.97.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.98.98.98.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.99.99.99.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.100.100.100.10\"></td>\n</tr>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Comparison results of Macro-F1 for the LLaMA model with 13B and 33B parameters, setting to 4.\n</figcaption>\n</figure>",
58
- "perturb_sentence_id": 1,
 
 
 
59
  "output": {
60
- "perturbed_statement": "[paragraph id = 1] Table 2 presents the comparison results for the LLaMA models, where IDAICL consistently achieves state-of-the-art performance, except for SST-2 using the LLaMA model with 13B parameters. These findings suggest that IDAICL which leverages statistical information derived from external training data for prediction calibration, generally outperforms methods relying on estimated biases for correction.",
61
- "perturbed_explanation": "Original Explanation: The findings highlight that IDAICL utilizes statistical information from the input data distribution for effective prediction calibration, leading to superior performance against other methods in most evaluated scenarios. However, this observation does not hold for TREC when using the LLaMA model with 33B parameters as indicated in the statement.\\n1. The assessment specifying SST-2 as the dataset where IDAICL does not achieve optimal performance contradicts the context, as this limitation pertains to TREC with the 33B parameter model rather than SST-2.\\n2. The characterization of statistical insights sourcing from external training data as the foundation of IDAICL's performance improvement is incorrect, as the calibration relies on the distribution of the input data directly, not external data."
62
  }
63
  },
64
  {
@@ -86,10 +92,13 @@
86
  "[paragraph id = 4] This phenomenon is understandable as statistical properties inherently encompass richer global information compared to individual demonstrations."
87
  ],
88
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T3\">\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S5.T3.16\">\n<tr class=\"ltx_tr\" id=\"S5.T3.16.17\" style=\"background-color:#D9D9D9;\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S5.T3.16.17.1\"><span class=\"ltx_text\" id=\"S5.T3.16.17.1.1\" style=\"background-color:#D9D9D9;\">Dataset</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.16.17.2\"><span class=\"ltx_text\" id=\"S5.T3.16.17.2.1\" style=\"background-color:#D9D9D9;\">0-shot</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.16.17.3\"><span class=\"ltx_text\" id=\"S5.T3.16.17.3.1\" style=\"background-color:#D9D9D9;\">1-shot</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.16.17.4\"><span class=\"ltx_text\" id=\"S5.T3.16.17.4.1\" style=\"background-color:#D9D9D9;\">4-shot</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.16.17.5\"><span class=\"ltx_text\" id=\"S5.T3.16.17.5.1\" style=\"background-color:#D9D9D9;\">IDAICL</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.4.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S5.T3.4.4.5\">SST-2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.3.3.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.4.4.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.8.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S5.T3.8.8.5\">SST-5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.5.5.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.6.6.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.7.7.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.8.8.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.12.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S5.T3.12.12.5\">MR</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.9.9.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.10.10.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.11.11.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.12.12.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.16.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r\" id=\"S5.T3.16.16.5\">Subj</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.13.13.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.14.14.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.15.15.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.16.16.4\"></td>\n</tr>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Accuracy comparison between Vanilla ICL and IDAICL based solely on statistical properties, using the GPT-2 model with 0.8B parameters.\n</figcaption>\n</figure>",
89
- "perturb_sentence_id": 2,
 
 
 
90
  "output": {
91
- "perturbed_statement": "[paragraph id = 2] As shown in Table 3 , IDAICL relying solely on statistical properties performs consistently worse than Vanilla ICL across scenarios with zero, one, and even four demonstrations. This deemphasizes the role of prior statistics obtained from training data in PLMs predictions.",
92
- "perturbed_explanation": "Original Explanation: The effectiveness of IDAICL leveraging statistical properties draws its strength from the rich information provided by global statistics, leading to superior performance over Vanilla ICL in diverse scenarios. Altered Explanation: The statement claiming that IDAICL performs worse than Vanilla ICL contradicts the evidence presented, which actually highlights the advantageous performance of IDAICL utilizing statistical properties. Thus, the altered claim is inconsistent with the findings provided."
93
  }
94
  },
95
  {
@@ -147,10 +156,13 @@
147
  "[paragraph id = 24] Some templates achieve higher average performance than others."
148
  ],
149
  "table_html": "<figure class=\"ltx_table\" id=\"A8.T10\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"A8.T10.1\" style=\"width:433.6pt;height:1039.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-34.7pt,83.2pt) scale(0.86199860309836,0.86199860309836) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1\">\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.1\" style=\"background-color:#D9D9D9;\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"A8.T10.1.1.1.1\"><span class=\"ltx_text\" id=\"A8.T10.1.1.1.1.1\" style=\"background-color:#D9D9D9;\">Format ID</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"A8.T10.1.1.1.2\"><span class=\"ltx_text\" id=\"A8.T10.1.1.1.2.1\" style=\"background-color:#D9D9D9;\">Prompt</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"A8.T10.1.1.1.3\"><span class=\"ltx_text\" id=\"A8.T10.1.1.1.3.1\" style=\"background-color:#D9D9D9;\">Label names</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"A8.T10.1.1.2.1\">1</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"A8.T10.1.1.2.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.2.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.2.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.2.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.2.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.2.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.2.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.2.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Answer: Positive</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.2.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.2.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.2.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.2.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.2.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"A8.T10.1.1.2.3\">Positive / Negative</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.3.1\">2</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.3.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.3.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.3.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.3.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.3.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.3.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.3.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.3.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Answer: good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.3.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.3.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.3.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.3.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.3.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.3.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.4.1\">3</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.4.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.4.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.4.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.4.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.4.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.4.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.4.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.4.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">My review for last night’s film: This movie is amazing! The critics agreed that this movie was good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.4.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.4.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.4.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.4.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">My review for last night’s film: Horrific movie, don’t see it. The critics agreed that this movie was</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.4.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.4.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.5.1\">4</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.5.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.5.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.5.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.5.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.5.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.5.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Here is what our critics think for this month’s films.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.5.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.5.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">One of our critics wrote \"This movie is amazing!\". Her sentiment towards the film was positive.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.5.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.5.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">One of our critics wrote \"Horrific movie, don’t see it\". Her sentiment towards the film was</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.5.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.5.3\">positive / negative</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.6.1\">5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.6.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.6.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.6.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.6.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.6.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.6.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Critical reception [ edit ]</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.6.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.6.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">In a contemporary review, Roger Ebert wrote \"This movie is amazing!\". Entertainment Weekly agreed, and\nthe overall critical reception of the film was good.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.6.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.6.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">In a contemporary review, Roger Ebert wrote \"Horrific movie, don’t see it\". Entertainment Weekly agreed, and\nthe overall critical reception of the film was</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.6.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.6.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.7.1\">6</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.7.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.7.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.7.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.7.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.7.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.7.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.7.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.7.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Positive Review? Yes</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.7.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.7.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.7.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.7.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Positive Review?</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.7.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.7.3\">Yes / No</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.8.1\">7</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.8.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.8.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.8.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.8.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Question: Is the sentiment of the above review Positive or Negative?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Answer: Positive</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.5\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.5.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.5.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.5.1.1.1\" style=\"width:327.2pt;\">Question: Is the sentiment of the above review Positive or Negative?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.6\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.6.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.6.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.6.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.8.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.8.3\">Positive / Negative</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.9.1\">8</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.9.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.9.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.9.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.9.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Question: Did the author think that the movie was good or bad?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Answer: good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.5\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.5.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.5.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.5.1.1.1\" style=\"width:327.2pt;\">Question: Did the author think that the movie was good or bad?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.6\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.6.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.6.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.6.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.9.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.9.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.10.1\">9</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.10.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.10.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.10.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.10.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Question: Did the author of the following tweet think that the movie was good or bad?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Tweet: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Answer: good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Question: Did the author of the following tweet think that the movie was good or bad?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.5\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.5.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.5.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.5.1.1.1\" style=\"width:327.2pt;\">Tweet: Horrific movie, don’t see it</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.6\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.6.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.6.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.6.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.10.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.10.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.11.1\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.11.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.11.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.11.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.11.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.11.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.11.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.11.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.11.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">This movie is amazing! My overall feeling was that the movie was good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.11.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.11.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.11.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.11.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Horrific movie, don’t see it. My overall feeling was that the movie was</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.11.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.11.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.12.1\">11</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.12.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.12.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.12.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.12.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.12.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.12.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.12.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.12.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">This movie is amazing! I liked the movie.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.12.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.12.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.12.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.12.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Horrific movie, don’t see it. I</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.12.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.12.3\">liked / hated</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.13.1\">12</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.13.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.13.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.13.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.13.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.13.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.13.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.13.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.13.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">This movie is amazing! My friend asked me if I would give the movie 0 or 5 stars, I said 5</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.13.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.13.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.13.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.13.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Horrific movie, don’t see it. My friend asked me if I would give the movie 0 or 5 stars, I said</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.13.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.13.3\">0 / 5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.14.1\">13</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.14.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.14.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.14.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.14.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.14.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.14.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Input: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.14.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.14.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Sentiment: Positive</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.14.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.14.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Input: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.14.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.14.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Sentiment:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.14.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.14.3\">Positive / Negative</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.15.1\">14</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.15.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.15.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.15.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.15.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.15.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.15.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.15.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.15.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Positive: True</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.15.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.15.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.15.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.15.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Positive:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.15.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.15.3\">True / False</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.16.1\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.16.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.16.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.16.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.16.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.16.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.16.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.16.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.16.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Stars: 5</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.16.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.16.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.16.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.16.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Stars:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.16.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"A8.T10.1.1.16.3\">5 / 0</td>\n</tr>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 10: </span>The templates employed for examining the influence of formats on the SST-2 dataset, following those outlined by Zhao et al. <cite class=\"ltx_cite ltx_citemacro_cite\">Zhao et al. (<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00100v1#bib.bib62\" title=\"\">2021</a>)</cite>. An example from the training data is used for illustration.</figcaption>\n</figure>",
150
- "perturb_sentence_id": 21,
 
 
 
151
  "output": {
152
- "perturbed_statement": "[paragraph id = 21] (2021 ).The templates are introduced in Figure 5 of the Appendix.",
153
- "perturbed_explanation": "Original Explanation: The original statement correctly notes that the templates are elaborated in Table 10 of the Appendix. 2. The statement is incorrect because it claims the templates are introduced in Figure 5 of the Appendix, which is not substantiated by the context provided, making this assertion factually inaccurate."
154
  }
155
  }
156
  ]
 
31
  "[paragraph id = 6] Lastly, IDAICL notably enhances worst-case accuracy and diminishes performance variance across different seeds, showcasing its ability to improve prediction stability."
32
  ],
33
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T1.300\" style=\"width:433.6pt;height:437.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-59.7pt,60.3pt) scale(0.784039515230472,0.784039515230472) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T1.300.300\">\n<tr class=\"ltx_tr\" id=\"S3.T1.300.300.301\" style=\"background-color:#D9D9D9;\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.300.300.301.1\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.1.1\" style=\"background-color:#D9D9D9;\">PLM</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.300.300.301.2\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.2.1\" style=\"background-color:#D9D9D9;\">Method</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S3.T1.300.300.301.3\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.3.1\" style=\"background-color:#D9D9D9;\">m</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.4\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.4.1\" style=\"background-color:#D9D9D9;\">SST-2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.5\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.5.1\" style=\"background-color:#D9D9D9;\">SST-5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.6\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.6.1\" style=\"background-color:#D9D9D9;\">MR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.7\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.7.1\" style=\"background-color:#D9D9D9;\">CR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.8\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.8.1\" style=\"background-color:#D9D9D9;\">Amazon</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.9\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.9.1\" style=\"background-color:#D9D9D9;\">Subj</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.10\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.10.1\" style=\"background-color:#D9D9D9;\">TREC</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.11\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.11.1\" style=\"background-color:#D9D9D9;\">DBPedia</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.12\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.12.1\" style=\"background-color:#D9D9D9;\">AGNews</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.300.300.301.13\"><span class=\"ltx_text\" id=\"S3.T1.300.300.301.13.1\" style=\"background-color:#D9D9D9;\">CB</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.10.10.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.10.10.10.11\" rowspan=\"12\"><span class=\"ltx_text\" id=\"S3.T1.10.10.10.11.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T1.10.10.10.11.1.1\" style=\"width:6.8pt;height:53.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:53.4pt;transform:translate(-23.28pt,-23.28pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S3.T1.10.10.10.11.1.1.1\">GPT-2 0.8B</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.10.10.10.12\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S3.T1.10.10.10.13\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.10.10.10.13.1\">4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.1.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.2.2.2.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.3.3.3.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.4.4.4.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.5.5.5.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.6.6.6.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.7.7.7.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.8.8.8.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.9.9.9.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.10.10.10.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.20.20.20\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.20.20.20.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.11.11.11.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.12.12.12.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.13.13.13.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.14.14.14.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.15.15.15.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.16.16.16.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.17.17.17.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.18.18.18.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.19.19.19.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.20.20.20.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.30.30.30\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.30.30.30.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.30.30.30.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.30.30.30.12.1\">8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.21.21.21.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.22.22.22.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.23.23.23.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.24.24.24.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.25.25.25.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.26.26.26.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.27.27.27.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.28.28.28.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.29.29.29.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.30.30.30.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.40.40.40\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.40.40.40.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.31.31.31.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.32.32.32.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.33.33.33.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.34.34.34.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.35.35.35.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.36.36.36.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.37.37.37.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.38.38.38.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.39.39.39.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.40.40.40.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.50.50.50\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.50.50.50.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.50.50.50.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.50.50.50.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.41.41.41.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.42.42.42.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.43.43.43.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.44.44.44.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.45.45.45.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.46.46.46.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.47.47.47.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.48.48.48.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.49.49.49.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.50.50.50.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.60.60.60\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.60.60.60.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.51.51.51.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.52.52.52.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.53.53.53.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.54.54.54.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.55.55.55.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.56.56.56.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.57.57.57.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.58.58.58.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.59.59.59.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.60.60.60.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.70.70.70\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.70.70.70.11\">MetaICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.70.70.70.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.70.70.70.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.61.61.61.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.62.62.62.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.63.63.63.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.64.64.64.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.65.65.65.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.66.66.66.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.67.67.67.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.68.68.68.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.69.69.69.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.70.70.70.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.80.80.80\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.80.80.80.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.71.71.71.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.72.72.72.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.73.73.73.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.74.74.74.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.75.75.75.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.76.76.76.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.77.77.77.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.78.78.78.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.79.79.79.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.80.80.80.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.90.90.90\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.90.90.90.11\">Channel ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.90.90.90.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.90.90.90.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.81.81.81.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.82.82.82.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.83.83.83.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.84.84.84.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.85.85.85.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.86.86.86.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.87.87.87.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.88.88.88.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.89.89.89.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.90.90.90.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.100.100.100\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.100.100.100.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.91.91.91.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.92.92.92.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.93.93.93.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.94.94.94.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.95.95.95.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.96.96.96.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.97.97.97.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.98.98.98.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.99.99.99.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.100.100.100.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.110.110.110\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.110.110.110.11\">EPR</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.110.110.110.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.110.110.110.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.101.101.101.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.102.102.102.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.103.103.103.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.104.104.104.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.105.105.105.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.106.106.106.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.107.107.107.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.108.108.108.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.109.109.109.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.110.110.110.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.120.120.120\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.120.120.120.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.111.111.111.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.112.112.112.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.113.113.113.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.114.114.114.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.115.115.115.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.116.116.116.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.117.117.117.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.118.118.118.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.119.119.119.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.120.120.120.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.130.130.130\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.130.130.130.11\" rowspan=\"12\"><span class=\"ltx_text\" id=\"S3.T1.130.130.130.11.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T1.130.130.130.11.1.1\" style=\"width:6.8pt;height:53.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:53.4pt;transform:translate(-23.28pt,-23.28pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S3.T1.130.130.130.11.1.1.1\">GPT-2 1.5B</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.130.130.130.12\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S3.T1.130.130.130.13\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.130.130.130.13.1\">4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.121.121.121.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.122.122.122.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.123.123.123.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.124.124.124.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.125.125.125.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.126.126.126.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.127.127.127.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.128.128.128.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.129.129.129.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.130.130.130.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.140.140.140\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.140.140.140.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.131.131.131.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.132.132.132.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.133.133.133.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.134.134.134.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.135.135.135.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.136.136.136.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.137.137.137.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.138.138.138.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.139.139.139.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.140.140.140.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.150.150.150\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.150.150.150.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.150.150.150.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.150.150.150.12.1\">8</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.141.141.141.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.142.142.142.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.143.143.143.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.144.144.144.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.145.145.145.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.146.146.146.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.147.147.147.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.148.148.148.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.149.149.149.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.150.150.150.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.160.160.160\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.160.160.160.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.151.151.151.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.152.152.152.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.153.153.153.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.154.154.154.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.155.155.155.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.156.156.156.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.157.157.157.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.158.158.158.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.159.159.159.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.160.160.160.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.170.170.170\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.170.170.170.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.170.170.170.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.170.170.170.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.161.161.161.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.162.162.162.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.163.163.163.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.164.164.164.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.165.165.165.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.166.166.166.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.167.167.167.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.168.168.168.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.169.169.169.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.170.170.170.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.180.180.180\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.180.180.180.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.171.171.171.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.172.172.172.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.173.173.173.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.174.174.174.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.175.175.175.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.176.176.176.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.177.177.177.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.178.178.178.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.179.179.179.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.180.180.180.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.190.190.190\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.190.190.190.11\">MetaICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.190.190.190.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.190.190.190.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.181.181.181.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.182.182.182.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.183.183.183.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.184.184.184.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.185.185.185.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.186.186.186.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.187.187.187.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.188.188.188.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.189.189.189.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.190.190.190.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.200.200.200\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.200.200.200.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.191.191.191.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.192.192.192.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.193.193.193.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.194.194.194.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.195.195.195.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.196.196.196.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.197.197.197.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.198.198.198.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.199.199.199.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.200.200.200.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.210.210.210\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.210.210.210.11\">Channel ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.210.210.210.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.210.210.210.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.201.201.201.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.202.202.202.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.203.203.203.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.204.204.204.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.205.205.205.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.206.206.206.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.207.207.207.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.208.208.208.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.209.209.209.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.210.210.210.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.220.220.220\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.220.220.220.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.211.211.211.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.212.212.212.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.213.213.213.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.214.214.214.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.215.215.215.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.216.216.216.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.217.217.217.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.218.218.218.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.219.219.219.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.220.220.220.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.230.230.230\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.230.230.230.11\">EPR</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.230.230.230.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.230.230.230.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.221.221.221.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.222.222.222.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.223.223.223.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.224.224.224.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.225.225.225.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.226.226.226.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.227.227.227.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.228.228.228.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.229.229.229.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.230.230.230.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.240.240.240\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.240.240.240.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.231.231.231.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.232.232.232.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.233.233.233.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.234.234.234.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.235.235.235.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.236.236.236.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.237.237.237.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.238.238.238.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.239.239.239.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.240.240.240.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.250.250.250\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r ltx_border_tt\" id=\"S3.T1.250.250.250.11\" rowspan=\"6\"><span class=\"ltx_text\" id=\"S3.T1.250.250.250.11.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T1.250.250.250.11.1.1\" style=\"width:6.8pt;height:42.2pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:42.2pt;transform:translate(-17.66pt,-17.66pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S3.T1.250.250.250.11.1.1.1\">GPT-Neo</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S3.T1.250.250.250.12\">MetaICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" id=\"S3.T1.250.250.250.13\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.250.250.250.13.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.241.241.241.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.242.242.242.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.243.243.243.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.244.244.244.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.245.245.245.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.246.246.246.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.247.247.247.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.248.248.248.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.249.249.249.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T1.250.250.250.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.260.260.260\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.260.260.260.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.251.251.251.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.252.252.252.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.253.253.253.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.254.254.254.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.255.255.255.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.256.256.256.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.257.257.257.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.258.258.258.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.259.259.259.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.260.260.260.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.270.270.270\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.270.270.270.11\">Channel ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T1.270.270.270.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.270.270.270.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.261.261.261.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.262.262.262.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.263.263.263.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.264.264.264.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.265.265.265.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.266.266.266.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.267.267.267.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.268.268.268.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.269.269.269.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.270.270.270.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.280.280.280\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S3.T1.280.280.280.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.271.271.271.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.272.272.272.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.273.273.273.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.274.274.274.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.275.275.275.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.276.276.276.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.277.277.277.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.278.278.278.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.279.279.279.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T1.280.280.280.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.290.290.290\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"S3.T1.290.290.290.11\">EPR</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r ltx_border_t\" id=\"S3.T1.290.290.290.12\" rowspan=\"2\"><span class=\"ltx_text\" id=\"S3.T1.290.290.290.12.1\">12</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.281.281.281.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.282.282.282.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.283.283.283.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.284.284.284.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.285.285.285.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.286.286.286.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.287.287.287.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.288.288.288.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.289.289.289.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T1.290.290.290.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.300.300.300\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r\" id=\"S3.T1.300.300.300.11\">+IDAICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.291.291.291.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.292.292.292.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.293.293.293.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.294.294.294.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.295.295.295.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.296.296.296.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.297.297.297.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.298.298.298.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.299.299.299.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T1.300.300.300.10\"></td>\n</tr>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Comparison results of three PLMs.\nTwo numbers indicate the mean accuracy (%) and standard deviation over different seeds. The best and second-best results per PLM per dataset are highlighted in bold and underlined, respectively. \"+IDAICL\" means that the current approach is used in conjunction with IDAICL. The results for different numbers of demonstration examples (i.e., values) using the GPT-Neo model are illustrated in Figure <a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00100v1#S4.F3\" title=\"Figure 3 ‣ 4.1 Models and Datasets ‣ 4 Experimental Setup ‣ Enhancing In-Context Learning via Implicit Demonstration Augmentation\"><span class=\"ltx_text ltx_ref_tag\">3</span></a>.\n</figcaption>\n</figure>",
34
+ "perturb_sentence_id": [
35
+ 3,
36
+ 4
37
+ ],
38
  "output": {
39
+ "perturbed_statement": "[paragraph id = 3] Compared to Vanilla ICL, IDAICL outperforms by an average of 17.7% and 18.4% across diverse datasets and values for GPT-2 with 0.8B and 1.5B parameters, respectively. Secondly, in comparison to other ICL baselines like MetaICL, MetaICL, and EPR, the integration of IDAICL consistently delivers notable performance improvements, emphasizing the efficacy of enhancing demonstrations for refined predictions.",
40
+ "perturbed_explanation": "Original Explanation: The original statement highlights IDAICL's superior performance over Vanilla ICL by providing specific percentage improvements for both 0.8B and 1.5B parameter models. It further reiterates IDAICL's effectiveness in comparison with other ICL baselines, including Channel ICL, MetaICL, and EPR. 1. The statement incorrectly repeats MetaICL instead of mentioning the correct baseline, Channel ICL, after MetaICL. This error falsely indicates that MetaICL is being compared to itself, misrepresenting the diversity of the ICL baselines used for comparison."
41
  }
42
  },
43
  {
 
58
  "[paragraph id = 1] Table 2 presents the comparison results for the LLaMA models, where IDAICL consistently achieves state-of-the-art performance, except for TREC using the LLaMA model with 33B parameters."
59
  ],
60
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T2.100\" style=\"width:433.6pt;height:169.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-36.5pt,14.3pt) scale(0.855879733045295,0.855879733045295) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.100.100\">\n<tr class=\"ltx_tr\" id=\"S4.T2.100.100.101\" style=\"background-color:#D9D9D9;\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.100.100.101.1\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.1.1\" style=\"background-color:#D9D9D9;\">PLM</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.100.100.101.2\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.2.1\" style=\"background-color:#D9D9D9;\">Method</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.3\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.3.1\" style=\"background-color:#D9D9D9;\">SST-2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.4\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.4.1\" style=\"background-color:#D9D9D9;\">SST-5</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.5\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.5.1\" style=\"background-color:#D9D9D9;\">MR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.6\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.6.1\" style=\"background-color:#D9D9D9;\">CR</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.7\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.7.1\" style=\"background-color:#D9D9D9;\">Subj</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.8\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.8.1\" style=\"background-color:#D9D9D9;\">TREC</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.9\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.9.1\" style=\"background-color:#D9D9D9;\">DBPedia</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.10\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.10.1\" style=\"background-color:#D9D9D9;\">AGNews</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.11\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.11.1\" style=\"background-color:#D9D9D9;\">CB</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.100.100.101.12\"><span class=\"ltx_text\" id=\"S4.T2.100.100.101.12.1\" style=\"background-color:#D9D9D9;\">Avg.</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.9.9.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.9.9.9.10\" rowspan=\"5\"><span class=\"ltx_text\" id=\"S4.T2.9.9.9.10.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S4.T2.9.9.9.10.1.1\" style=\"width:6.8pt;height:54.6pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:54.6pt;transform:translate(-23.88pt,-23.88pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S4.T2.9.9.9.10.1.1.1\">LLaMA 13B</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.9.9.9.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.1.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.2.2.2.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.3.3.3.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.4.4.4.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.5.5.5.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.6.6.6.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.7.7.7.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.8.8.8.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.9.9.9.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.9.9.9.12\">72.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.18.18.18\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.18.18.18.10\">ConCa</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.10.10.10.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.11.11.11.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.12.12.12.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.13.13.13.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.14.14.14.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.15.15.15.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.16.16.16.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.17.17.17.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.18.18.18.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.18.18.18.11\">77.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.31.31.31\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.22.22.22.4\">\n<span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.22.22.22.4.1\">P</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.22.22.22.4.2\">RO</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.22.22.22.4.3\">C</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.22.22.22.4.4\">A</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.23.23.23.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.24.24.24.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.25.25.25.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.26.26.26.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.27.27.27.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.28.28.28.10\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.29.29.29.11\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.30.30.30.12\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.31.31.31.13\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.31.31.31.14\"><span class=\"ltx_text ltx_framed ltx_framed_underline\" id=\"S4.T2.31.31.31.14.1\">77.9</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.40.40.40\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.40.40.40.10\">D-ConCa</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.32.32.32.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.33.33.33.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.34.34.34.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.35.35.35.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.36.36.36.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.37.37.37.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.38.38.38.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.39.39.39.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.40.40.40.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.40.40.40.11\">77.8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.50.50.50\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.50.50.50.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.41.41.41.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.42.42.42.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.43.43.43.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.44.44.44.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.45.45.45.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.46.46.46.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.47.47.47.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.48.48.48.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.49.49.49.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.50.50.50.10\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.59.59.59\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r ltx_border_tt\" id=\"S4.T2.59.59.59.10\" rowspan=\"5\"><span class=\"ltx_text\" id=\"S4.T2.59.59.59.10.1\">\n<span class=\"ltx_inline-block ltx_transformed_outer\" id=\"S4.T2.59.59.59.10.1.1\" style=\"width:6.8pt;height:54.6pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"width:54.6pt;transform:translate(-23.88pt,-23.88pt) rotate(-90deg) ;\">\n<span class=\"ltx_p\" id=\"S4.T2.59.59.59.10.1.1.1\">LLaMA 33B</span>\n</span></span></span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S4.T2.59.59.59.11\">Vanilla ICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.51.51.51.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.52.52.52.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.53.53.53.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.54.54.54.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.55.55.55.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.56.56.56.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.57.57.57.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.58.58.58.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.59.59.59.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S4.T2.59.59.59.12\">76.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.68.68.68\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.68.68.68.10\">ConCa</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.60.60.60.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.61.61.61.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.62.62.62.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.63.63.63.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.64.64.64.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.65.65.65.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.66.66.66.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.67.67.67.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.68.68.68.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.68.68.68.11\">78.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.81.81.81\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.72.72.72.4\">\n<span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.72.72.72.4.1\">P</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.72.72.72.4.2\">RO</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.72.72.72.4.3\">C</span><span class=\"ltx_text ltx_markedasmath\" id=\"S4.T2.72.72.72.4.4\">A</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.73.73.73.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.74.74.74.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.75.75.75.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.76.76.76.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.77.77.77.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.78.78.78.10\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.79.79.79.11\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.80.80.80.12\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.81.81.81.13\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.81.81.81.14\">78.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.90.90.90\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S4.T2.90.90.90.10\">D-ConCa</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.82.82.82.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.83.83.83.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.84.84.84.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.85.85.85.4\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.86.86.86.5\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.87.87.87.6\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.88.88.88.7\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.89.89.89.8\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.90.90.90.9\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.90.90.90.11\"><span class=\"ltx_text ltx_framed ltx_framed_underline\" id=\"S4.T2.90.90.90.11.1\">78.6</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.100.100.100\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r\" id=\"S4.T2.100.100.100.11\">IDAICL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.91.91.91.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.92.92.92.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.93.93.93.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.94.94.94.4\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.95.95.95.5\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.96.96.96.6\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.97.97.97.7\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.98.98.98.8\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.99.99.99.9\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.100.100.100.10\"></td>\n</tr>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Comparison results of Macro-F1 for the LLaMA model with 13B and 33B parameters, setting to 4.\n</figcaption>\n</figure>",
61
+ "perturb_sentence_id": [
62
+ 1,
63
+ 2
64
+ ],
65
  "output": {
66
+ "perturbed_statement": "[paragraph id = 1] Table 2 presents the comparison results for the LLaMA models, where IDAICL consistently achieves state-of-the-art performance, except for the IMDB using the LLaMA model with 33B parameters. These findings suggest that IDAICL which leverages statistical information derived from the input data distribution for prediction calibration, generally outperforms methods relying on estimated biases for correction.",
67
+ "perturbed_explanation": "These findings suggest that IDAICL which leverages statistical information derived from the input data distribution for prediction calibration, generally outperforms methods relying on estimated biases for correction. However, the statement incorrectly says that IDAICL did not achieve state-of-the-art performance for the IMDB dataset, when it actually refers to the TREC dataset. This change alters the specific dataset where IDAICL did not reach state-of-the-art performance, making the statement factually incorrect."
68
  }
69
  },
70
  {
 
92
  "[paragraph id = 4] This phenomenon is understandable as statistical properties inherently encompass richer global information compared to individual demonstrations."
93
  ],
94
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T3\">\n<table class=\"ltx_tabular ltx_centering ltx_align_middle\" id=\"S5.T3.16\">\n<tr class=\"ltx_tr\" id=\"S5.T3.16.17\" style=\"background-color:#D9D9D9;\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S5.T3.16.17.1\"><span class=\"ltx_text\" id=\"S5.T3.16.17.1.1\" style=\"background-color:#D9D9D9;\">Dataset</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.16.17.2\"><span class=\"ltx_text\" id=\"S5.T3.16.17.2.1\" style=\"background-color:#D9D9D9;\">0-shot</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.16.17.3\"><span class=\"ltx_text\" id=\"S5.T3.16.17.3.1\" style=\"background-color:#D9D9D9;\">1-shot</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.16.17.4\"><span class=\"ltx_text\" id=\"S5.T3.16.17.4.1\" style=\"background-color:#D9D9D9;\">4-shot</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.16.17.5\"><span class=\"ltx_text\" id=\"S5.T3.16.17.5.1\" style=\"background-color:#D9D9D9;\">IDAICL</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.4.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"S5.T3.4.4.5\">SST-2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.3.3.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.4.4.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.8.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S5.T3.8.8.5\">SST-5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.5.5.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.6.6.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.7.7.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.8.8.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.12.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r\" id=\"S5.T3.12.12.5\">MR</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.9.9.1\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.10.10.2\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.11.11.3\"></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.12.12.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.16.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r\" id=\"S5.T3.16.16.5\">Subj</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.13.13.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.14.14.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.15.15.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.16.16.4\"></td>\n</tr>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Accuracy comparison between Vanilla ICL and IDAICL based solely on statistical properties, using the GPT-2 model with 0.8B parameters.\n</figcaption>\n</figure>",
95
+ "perturb_sentence_id": [
96
+ 2,
97
+ 3
98
+ ],
99
  "output": {
100
+ "perturbed_statement": "[paragraph id = 2] As shown in Table 3 , IDAICL relying solely on statistical properties generally performs worse than Vanilla ICL across scenarios with zero, one, and even four demonstrations. This de-emphasizes the crucial role of prior statistics obtained from training data in PLMs predictions.",
101
+ "perturbed_explanation": "The original explanation states that relying on statistical properties allows IDAICL to distinctly outperform Vanilla ICL in various scenarios, highlighting the importance of prior statistics in PLM predictions. 1. The statement claims that IDAICL relying solely on statistical properties performs worse than Vanilla ICL, which contradicts the original context that suggests IDAICL actually outperforms Vanilla ICL in those scenarios. 2. The conclusion that this de-emphasizes the role of prior statistics is incorrect because the original context indicates the importance of prior statistics in enhancing performance, not diminishing it."
102
  }
103
  },
104
  {
 
156
  "[paragraph id = 24] Some templates achieve higher average performance than others."
157
  ],
158
  "table_html": "<figure class=\"ltx_table\" id=\"A8.T10\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"A8.T10.1\" style=\"width:433.6pt;height:1039.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-34.7pt,83.2pt) scale(0.86199860309836,0.86199860309836) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1\">\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.1\" style=\"background-color:#D9D9D9;\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"A8.T10.1.1.1.1\"><span class=\"ltx_text\" id=\"A8.T10.1.1.1.1.1\" style=\"background-color:#D9D9D9;\">Format ID</span></td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"A8.T10.1.1.1.2\"><span class=\"ltx_text\" id=\"A8.T10.1.1.1.2.1\" style=\"background-color:#D9D9D9;\">Prompt</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"A8.T10.1.1.1.3\"><span class=\"ltx_text\" id=\"A8.T10.1.1.1.3.1\" style=\"background-color:#D9D9D9;\">Label names</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.2\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"A8.T10.1.1.2.1\">1</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_tt\" id=\"A8.T10.1.1.2.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.2.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.2.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.2.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.2.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.2.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.2.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.2.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Answer: Positive</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.2.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.2.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.2.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.2.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.2.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.2.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"A8.T10.1.1.2.3\">Positive / Negative</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.3.1\">2</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.3.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.3.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.3.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.3.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.3.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.3.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.3.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.3.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Answer: good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.3.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.3.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.3.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.3.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.3.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.3.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.3.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.4\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.4.1\">3</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.4.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.4.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.4.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.4.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.4.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.4.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.4.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.4.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">My review for last night’s film: This movie is amazing! The critics agreed that this movie was good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.4.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.4.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.4.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.4.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">My review for last night’s film: Horrific movie, don’t see it. The critics agreed that this movie was</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.4.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.4.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.5\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.5.1\">4</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.5.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.5.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.5.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.5.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.5.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.5.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Here is what our critics think for this month’s films.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.5.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.5.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">One of our critics wrote \"This movie is amazing!\". Her sentiment towards the film was positive.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.5.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.5.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.5.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">One of our critics wrote \"Horrific movie, don’t see it\". Her sentiment towards the film was</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.5.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.5.3\">positive / negative</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.6\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.6.1\">5</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.6.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.6.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.6.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.6.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.6.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.6.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Critical reception [ edit ]</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.6.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.6.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">In a contemporary review, Roger Ebert wrote \"This movie is amazing!\". Entertainment Weekly agreed, and\nthe overall critical reception of the film was good.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.6.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.6.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.6.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">In a contemporary review, Roger Ebert wrote \"Horrific movie, don’t see it\". Entertainment Weekly agreed, and\nthe overall critical reception of the film was</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.6.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.6.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.7\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.7.1\">6</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.7.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.7.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.7.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.7.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.7.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.7.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.7.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.7.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Positive Review? Yes</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.7.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.7.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.7.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.7.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.7.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Positive Review?</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.7.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.7.3\">Yes / No</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.8.1\">7</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.8.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.8.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.8.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.8.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Question: Is the sentiment of the above review Positive or Negative?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Answer: Positive</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.5\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.5.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.5.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.5.1.1.1\" style=\"width:327.2pt;\">Question: Is the sentiment of the above review Positive or Negative?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.8.2.2.1.6\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.6.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.8.2.2.1.6.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.8.2.2.1.6.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.8.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.8.3\">Positive / Negative</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.9.1\">8</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.9.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.9.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.9.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.9.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Question: Did the author think that the movie was good or bad?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Answer: good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.5\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.5.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.5.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.5.1.1.1\" style=\"width:327.2pt;\">Question: Did the author think that the movie was good or bad?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.9.2.2.1.6\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.6.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.9.2.2.1.6.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.9.2.2.1.6.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.9.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.9.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.10\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.10.1\">9</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.10.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.10.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.10.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.10.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Question: Did the author of the following tweet think that the movie was good or bad?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Tweet: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Answer: good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Question: Did the author of the following tweet think that the movie was good or bad?</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.5\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.5.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.5.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.5.1.1.1\" style=\"width:327.2pt;\">Tweet: Horrific movie, don’t see it</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.10.2.2.1.6\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.6.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.10.2.2.1.6.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.10.2.2.1.6.1.1.1\" style=\"width:327.2pt;\">Answer:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.10.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.10.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.11\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.11.1\">10</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.11.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.11.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.11.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.11.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.11.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.11.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.11.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.11.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">This movie is amazing! My overall feeling was that the movie was good</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.11.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.11.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.11.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.11.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Horrific movie, don’t see it. My overall feeling was that the movie was</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.11.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.11.3\">good / bad</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.12\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.12.1\">11</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.12.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.12.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.12.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.12.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.12.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.12.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.12.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.12.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">This movie is amazing! I liked the movie.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.12.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.12.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.12.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.12.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Horrific movie, don’t see it. I</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.12.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.12.3\">liked / hated</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.13\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.13.1\">12</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.13.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.13.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.13.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.13.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.13.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.13.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.13.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.13.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">This movie is amazing! My friend asked me if I would give the movie 0 or 5 stars, I said 5</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.13.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.13.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.13.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.13.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Horrific movie, don’t see it. My friend asked me if I would give the movie 0 or 5 stars, I said</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.13.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.13.3\">0 / 5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.14\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.14.1\">13</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.14.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.14.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.14.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.14.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.14.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.14.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Input: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.14.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.14.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Sentiment: Positive</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.14.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.14.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Input: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.14.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.14.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.14.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Sentiment:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.14.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.14.3\">Positive / Negative</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.15\">\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.15.1\">14</td>\n<td class=\"ltx_td ltx_align_left ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.15.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.15.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.15.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.15.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.15.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.15.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.15.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.15.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Positive: True</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.15.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.15.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.15.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.15.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.15.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Positive:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.15.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"A8.T10.1.1.15.3\">True / False</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"A8.T10.1.1.16\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.16.1\">15</td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb ltx_border_r ltx_border_t\" id=\"A8.T10.1.1.16.2\">\n<span class=\"ltx_text\" id=\"A8.T10.1.1.16.2.1\"></span><span class=\"ltx_text\" id=\"A8.T10.1.1.16.2.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"A8.T10.1.1.16.2.2.1\">\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.16.2.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.1.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.16.2.2.1.1.1.1.1\" style=\"width:327.2pt;\">Review: This movie is amazing!</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.16.2.2.1.2\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.2.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.2.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.16.2.2.1.2.1.1.1\" style=\"width:327.2pt;\">Stars: 5</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.16.2.2.1.3\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.3.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.3.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.16.2.2.1.3.1.1.1\" style=\"width:327.2pt;\">Review: Horrific movie, don’t see it.</span>\n</span></span></span>\n<span class=\"ltx_tr\" id=\"A8.T10.1.1.16.2.2.1.4\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"A8.T10.1.1.16.2.2.1.4.1.1\">\n<span class=\"ltx_p\" id=\"A8.T10.1.1.16.2.2.1.4.1.1.1\" style=\"width:327.2pt;\">Stars:</span>\n</span></span></span>\n</span></span><span class=\"ltx_text\" id=\"A8.T10.1.1.16.2.3\"></span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"A8.T10.1.1.16.3\">5 / 0</td>\n</tr>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 10: </span>The templates employed for examining the influence of formats on the SST-2 dataset, following those outlined by Zhao et al. <cite class=\"ltx_cite ltx_citemacro_cite\">Zhao et al. (<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00100v1#bib.bib62\" title=\"\">2021</a>)</cite>. An example from the training data is used for illustration.</figcaption>\n</figure>",
159
+ "perturb_sentence_id": [
160
+ 21,
161
+ 22
162
+ ],
163
  "output": {
164
+ "perturbed_statement": "[paragraph id = 21] (2021). The templates are presented in Table 5 of the Appendix.",
165
+ "perturbed_explanation": "1. The original statement indicates that the templates are elaborated in Table 10 of the Appendix. 2. The statement incorrectly mentions Table 5 instead of Table 10, which changes the reference location of the templates in the appendix, making it factually incorrect."
166
  }
167
  }
168
  ]
table_result/2407.00101v1_output.json CHANGED
@@ -26,10 +26,13 @@
26
  "[paragraph id = 5] For better performance, the difference in accuracy should be positive and that loss should be negative."
27
  ],
28
  "table_html": "<figure class=\"ltx_table\" id=\"S7.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S7.T1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S7.T1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.2\">(300,32)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.3\">(300,64)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.4\">(500,32)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.5\">(500,64)</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S7.T1.1.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.1\">Test Accuracy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.2\">1.374</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.3\">-0.516</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.4\">1.366</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.5\">1.291</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T1.1.3.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.1\">Test loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.2\">-0.047</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.3\">0.001</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.4\">-0.053</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.5\">-0.022</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T1.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.1\">Train loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.2\">-0.047</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.3\">-0.001</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.4\">-0.054</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.5\">-0.023</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Difference between the metric for our algorithm and asynchronous algorithm averaged over entire training interval for MNIST dataset. For better performance, difference in accuracy should be positive and that loss should be negative</figcaption>\n</figure>",
29
- "perturb_sentence_id": 4,
 
 
 
30
  "output": {
31
- "perturbed_statement": "[paragraph id = 4] Table 1 shows the difference of the metrics like accuracy and speed between our algorithm and asynchronous algorithm averaged over the test subset. For better performance, the difference in accuracy should be greater than or equal to zero and speed should be negative.",
32
- "perturbed_explanation": "1. Initially, the metrics compared were accuracy and loss over the training interval, not accuracy and speed, nor over a limited subset like the test data. 2. While it is correct that higher performance is indicated by positive differences in accuracy, speed is not typically represented as a metric difference and cannot logically be negative under usual interpretations, leading to an incoherent assertion."
33
  }
34
  },
35
  {
@@ -63,10 +66,13 @@
63
  "[paragraph id = 9] It is able to achieve higher accuracy and lower loss as compared to asynchronous and synchronous algorithms."
64
  ],
65
  "table_html": "<figure class=\"ltx_table\" id=\"S7.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S7.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S7.T2.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.2\">(300,32)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.3\">(300,64)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.4\">(500,32)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.5\">(500,64)</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S7.T2.1.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.1\">Test Accuracy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.2\">4.849</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.3\">2.435</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.4\">3.468</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.5\">2.884</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T2.1.3.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.1\">Test loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.2\">-0.137</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.3\">-0.066</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.4\">-0.092</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.5\">-0.080</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T2.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.1\">Train loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.2\">-0.139</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.3\">-0.067</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.4\">-0.091</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.5\">-0.082</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2. </span>Difference between the metric for our algorithm and asynchronous algorithm averaged over entire training interval for CIFAR-10 dataset. For better performance, difference in accuracy should be positive and that loss should be negative</figcaption>\n</figure>",
66
- "perturb_sentence_id": 7,
 
 
 
67
  "output": {
68
- "perturbed_statement": "[paragraph id = 7] Table 2 and plots 6 and 7 show similar statistics as that for CIFAR-10. We can clearly note here that our algorithms demonstrate comparable performance to both of the other algorithms.",
69
- "perturbed_explanation": "1. The original explanation highlighted that Table 2 and plots 6 and 7 illustrated a clear speedup of the authors' algorithms relative to the other algorithms. 2. The statement is factually incorrect because it incorrectly asserts that the algorithms exhibit performance comparable to the others, which is contrary to the context indicating a significant speedup."
70
  }
71
  },
72
  {
@@ -92,10 +98,13 @@
92
  "[paragraph id = 3] We hypothesized that as the batch size increases, the difference should decrease since asynchronous algorithms start providing updates with high confidence."
93
  ],
94
  "table_html": "<figure class=\"ltx_table\" id=\"S7.T3\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S7.T3.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S7.T3.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.2\">8</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.3\">16</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.4\">32</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.5\">64</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.6\">128</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S7.T3.1.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.1\">Test Accuracy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.2\">4.896</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.3\">5.183</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.4\">4.222</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.5\">3.304</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.6\">2.599</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T3.1.3.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.1\">Test loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.2\">-0.141</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.3\">-0.141</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.4\">-0.117</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.5\">-0.089</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.6\">-0.072</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T3.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.1\">Train loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.2\">-0.143</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.3\">-0.141</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.4\">-0.114</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.5\">-0.088</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.6\">-0.068</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3. </span>Difference between the metric for our algorithm and asynchronous algorithm averaged over entire training interval for various batch sizes and constant step size of 500. For better performance, difference in accuracy should be positive and that loss should be negative</figcaption>\n</figure>",
95
- "perturb_sentence_id": 2,
 
 
 
96
  "output": {
97
- "perturbed_statement": "[paragraph id = 2] Table 3 shows the difference in metrics like accuracy and efficiency between our algorithm and the asynchronous algorithm averaged over the entire testing interval. We hypothesized that as the batch size increases, the difference should increase since asynchronous algorithms start providing updates with low certainty.",
98
- "perturbed_explanation": "The original explanation states two key ideas: (1) the difference in metrics like accuracy and loss between the algorithms are analyzed over the training interval, and (2) it is hypothesized that increasing batch size decreases the difference due to heightened confidence in asynchronous updates. The statement is incorrect because: (3) it incorrectly states the metrics as accuracy and efficiency instead of accuracy and loss, altering their analytical significance, (4) it specifies the interval as testing instead of training, misrepresenting the context's application phase, and (5) it reverses the hypothesized impact of increasing batch size on difference, making the direction inconsistent with the stated rationale."
99
  }
100
  }
101
  ]
 
26
  "[paragraph id = 5] For better performance, the difference in accuracy should be positive and that loss should be negative."
27
  ],
28
  "table_html": "<figure class=\"ltx_table\" id=\"S7.T1\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S7.T1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S7.T1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.2\">(300,32)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.3\">(300,64)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.4\">(500,32)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T1.1.1.5\">(500,64)</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S7.T1.1.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.1\">Test Accuracy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.2\">1.374</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.3\">-0.516</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.4\">1.366</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.2.1.5\">1.291</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T1.1.3.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.1\">Test loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.2\">-0.047</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.3\">0.001</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.4\">-0.053</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T1.1.3.2.5\">-0.022</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T1.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.1\">Train loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.2\">-0.047</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.3\">-0.001</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.4\">-0.054</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T1.1.4.3.5\">-0.023</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1. </span>Difference between the metric for our algorithm and asynchronous algorithm averaged over entire training interval for MNIST dataset. For better performance, difference in accuracy should be positive and that loss should be negative</figcaption>\n</figure>",
29
+ "perturb_sentence_id": [
30
+ 4,
31
+ 5
32
+ ],
33
  "output": {
34
+ "perturbed_statement": "[paragraph id = 4] Table 1 shows the difference of the metrics like accuracy and loss between our algorithm and asynchronous algorithm averaged over the entire training interval. For better performance, the difference in accuracy should be negative and that loss should be negative.",
35
+ "perturbed_explanation": "Original Explanation: The original statement correctly asserts that for better performance, the difference in accuracy should be positive and that in loss should be negative. This implies that our algorithm should exceed the asynchronous algorithm in terms of accuracy, and have a lower loss. 1. The statement is incorrect because it claims that for better performance, the difference in accuracy should be negative. If the difference in accuracy is negative, it would mean that our algorithm performs worse in terms of accuracy compared to the asynchronous algorithm. Thus, a negative difference in accuracy would not indicate better performance."
36
  }
37
  },
38
  {
 
66
  "[paragraph id = 9] It is able to achieve higher accuracy and lower loss as compared to asynchronous and synchronous algorithms."
67
  ],
68
  "table_html": "<figure class=\"ltx_table\" id=\"S7.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S7.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S7.T2.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.2\">(300,32)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.3\">(300,64)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.4\">(500,32)</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T2.1.1.5\">(500,64)</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S7.T2.1.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.1\">Test Accuracy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.2\">4.849</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.3\">2.435</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.4\">3.468</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.2.1.5\">2.884</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T2.1.3.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.1\">Test loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.2\">-0.137</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.3\">-0.066</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.4\">-0.092</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T2.1.3.2.5\">-0.080</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T2.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.1\">Train loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.2\">-0.139</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.3\">-0.067</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.4\">-0.091</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T2.1.4.3.5\">-0.082</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2. </span>Difference between the metric for our algorithm and asynchronous algorithm averaged over entire training interval for CIFAR-10 dataset. For better performance, difference in accuracy should be positive and that loss should be negative</figcaption>\n</figure>",
69
+ "perturb_sentence_id": [
70
+ 7,
71
+ 8
72
+ ],
73
  "output": {
74
+ "perturbed_statement": "[paragraph id = 7] Table 2 and plots 6 and 7 show similar statistics as that for CIFAR-10. We can clearly note here that our algorithms show significant slowdown as compared to both of the other algorithms.",
75
+ "perturbed_explanation": "The original explanation stated: Table 2 and plots 6 and 7 demonstrate statistics in which the algorithms show notable speedup compared to other algorithms, indicating superior performance. 1. Changing 'MNIST' to 'CIFAR-10': The original statement described the statistics as being similar to those for MNIST, providing context for comparison. However, the statement now inaccurately implies the statistics are merely related to CIFAR-10 without juxtaposing this against any other dataset. 2. Changing 'speedup' to 'slowdown': The original explanation highlighted the improved performance of the algorithms through speedup. The statement inaccurately reflects a 'slowdown', which contradicts the achievement of higher accuracy and performance described in the context."
76
  }
77
  },
78
  {
 
98
  "[paragraph id = 3] We hypothesized that as the batch size increases, the difference should decrease since asynchronous algorithms start providing updates with high confidence."
99
  ],
100
  "table_html": "<figure class=\"ltx_table\" id=\"S7.T3\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S7.T3.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S7.T3.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.2\">8</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.3\">16</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.4\">32</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.5\">64</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_t\" id=\"S7.T3.1.1.6\">128</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S7.T3.1.2.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.1\">Test Accuracy</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.2\">4.896</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.3\">5.183</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.4\">4.222</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.5\">3.304</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.2.1.6\">2.599</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T3.1.3.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.1\">Test loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.2\">-0.141</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.3\">-0.141</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.4\">-0.117</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.5\">-0.089</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S7.T3.1.3.2.6\">-0.072</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S7.T3.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_l ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.1\">Train loss</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.2\">-0.143</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.3\">-0.141</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.4\">-0.114</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.5\">-0.088</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b ltx_border_r ltx_border_t\" id=\"S7.T3.1.4.3.6\">-0.068</td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3. </span>Difference between the metric for our algorithm and asynchronous algorithm averaged over entire training interval for various batch sizes and constant step size of 500. For better performance, difference in accuracy should be positive and that loss should be negative</figcaption>\n</figure>",
101
+ "perturb_sentence_id": [
102
+ 2,
103
+ 3
104
+ ],
105
  "output": {
106
+ "perturbed_statement": "Table 3 shows the effect of batch sizes only on the metrics like accuracy and loss within our algorithm during the initial training phase. We hypothesized that as the batch size increases, the difference should increase since asynchronous algorithms start providing high-volume updates.",
107
+ "perturbed_explanation": "The original explanation states the comparison between our algorithm and the asynchronous algorithm, hypothesizing that an increase in batch size should decrease the difference between them during the entire training interval, as larger batch sizes in asynchronous algorithms provide updates with higher confidence. 1. The statement incorrectly suggests that Table 3 only shows the effect of batch sizes on the metrics within our algorithm during the initial training phase, rather than comparing it with the asynchronous algorithm and over the entire training interval. 2. The statement incorrectly claims the increase in batch size would increase the difference, whereas it should decrease it, since the asynchronous algorithm provides updates with high confidence as the batch size increases."
108
  }
109
  }
110
  ]
table_result/2407.00102v1_output.json CHANGED
@@ -40,10 +40,13 @@
40
  "[paragraph id = 11] The improvements verify the better training effects of our data since less data amount and same model are used."
41
  ],
42
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T1.7\" style=\"width:493.9pt;height:201.1pt;vertical-align:-0.9pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-27.4pt,11.1pt) scale(0.9,0.9) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.7.7\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.3.3.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_tt\" id=\"S4.T1.3.3.3.4\">Method</td>\n<td class=\"ltx_td ltx_align_left ltx_border_tt\" id=\"S4.T1.3.3.3.5\">LLM</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.6.1.1\" style=\"width:14.2pt;\">Res.</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.7.1.1\" style=\"width:19.9pt;\">PT</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_tt\" id=\"S4.T1.3.3.3.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.8.1.1\" style=\"width:25.6pt;\">IT</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.1.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.1.1\" style=\"width:22.8pt;\">VQA</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.9.1.1\" style=\"width:22.8pt;\">GQA</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.10.1.1\" style=\"width:22.8pt;\">VisWiz</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.2.2.2.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.2.2.2.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.2.2.2.2.1.1\" style=\"width:22.8pt;\">SQA</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.3.1.1\" style=\"width:22.8pt;\">VQA</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.8.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.7.7.8.1.1\">BLIP-2<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib19\" title=\"\">19</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.7.7.8.1.2\">Vicuna-13B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.4.1.1\" style=\"width:19.9pt;\">129M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.7.7.8.1.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.5.1.1\" style=\"width:25.6pt;\">-</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.6.1.1\" style=\"width:22.8pt;\">41.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.7.1.1\" style=\"width:22.8pt;\">41</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.8.1.1\" style=\"width:22.8pt;\">19.6</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.9.1.1\" style=\"width:22.8pt;\">61</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.10.1.1\" style=\"width:22.8pt;\">42.5</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.9.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.9.2.1\">InstructBLIP<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib9\" title=\"\">9</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.9.2.2\">Vicuna-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.4.1.1\" style=\"width:19.9pt;\">129M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.9.2.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.5.1.1\" style=\"width:25.6pt;\">1.2M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.6.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.7.1.1\" style=\"width:22.8pt;\">49.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.8.1.1\" style=\"width:22.8pt;\">34.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.9.1.1\" style=\"width:22.8pt;\">60.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.10.1.1\" style=\"width:22.8pt;\">50.1</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.10.3\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.10.3.1\">InstructBLIP<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib9\" title=\"\">9</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.10.3.2\">Vicuna-13B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.4.1.1\" style=\"width:19.9pt;\">129M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.10.3.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.5.1.1\" style=\"width:25.6pt;\">1.2M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.6.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.7.1.1\" style=\"width:22.8pt;\">49.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.8.1.1\" style=\"width:22.8pt;\">33.4</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.9.1.1\" style=\"width:22.8pt;\">63.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.10.1.1\" style=\"width:22.8pt;\">50.7</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.11.4\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.11.4.1\">Shikra<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib6\" title=\"\">6</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.11.4.2\">Vicuna-13B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.4.1.1\" style=\"width:19.9pt;\">600K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.11.4.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.5.1.1\" style=\"width:25.6pt;\">5.5M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.6.1.1\" style=\"width:22.8pt;\">77.4</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.7.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.8.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.9.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.10.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.12.5\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.12.5.1\">IDEFICS-9B <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib16\" title=\"\">16</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.12.5.2\">LLaMA-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.4.1.1\" style=\"width:19.9pt;\">353M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.12.5.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.5.1.1\" style=\"width:25.6pt;\">1M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.6.1.1\" style=\"width:22.8pt;\">50.9</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.7.1.1\" style=\"width:22.8pt;\">38.4</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.8.1.1\" style=\"width:22.8pt;\">35.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.9.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.10.1.1\" style=\"width:22.8pt;\">25.9</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.13.6\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.13.6.1\">IDEFICS-80B<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib16\" title=\"\">16</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.13.6.2\">LLaMA-65B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.4.1.1\" style=\"width:19.9pt;\">353M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.13.6.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.5.1.1\" style=\"width:25.6pt;\">1M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.6.1.1\" style=\"width:22.8pt;\">60.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.7.1.1\" style=\"width:22.8pt;\">45.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.8.1.1\" style=\"width:22.8pt;\">36.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.9.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.10.1.1\" style=\"width:22.8pt;\">30.9</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.5.5.5\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.5.5.5.3\">Qwen-VL<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib1\" title=\"\">1</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.5.5.5.4\">Qwen-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.5.1.1\" style=\"width:14.2pt;\">448</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.4.4.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.4.4.4.1.1\">\n<span class=\"ltx_p\" id=\"S4.T1.4.4.4.1.1.1\" style=\"width:19.9pt;\">1.4B<sup class=\"ltx_sup\" id=\"S4.T1.4.4.4.1.1.1.1\">†</sup></span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.5.5.5.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.2.1.1\" style=\"width:25.6pt;\">50M<sup class=\"ltx_sup\" id=\"S4.T1.5.5.5.2.1.1.1\">†</sup></span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.6.1.1\" style=\"width:22.8pt;\">78.8</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.7.1.1\" style=\"width:22.8pt;\">59.3</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.8.1.1\" style=\"width:22.8pt;\">35.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.9.1.1\" style=\"width:22.8pt;\">67.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.10.1.1\" style=\"width:22.8pt;\">63.8</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.7\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.7.3\">Qwen-VL-Chat<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib1\" title=\"\">1</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.7.4\">Qwen-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.5.1.1\" style=\"width:14.2pt;\">448</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.6.6.6.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.6.6.6.1.1\">\n<span class=\"ltx_p\" id=\"S4.T1.6.6.6.1.1.1\" style=\"width:19.9pt;\">1.4B<sup class=\"ltx_sup\" id=\"S4.T1.6.6.6.1.1.1.1\">†</sup></span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.7.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.2.1.1\" style=\"width:25.6pt;\">50M<sup class=\"ltx_sup\" id=\"S4.T1.7.7.7.2.1.1.1\">†</sup></span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.6.1.1\" style=\"width:22.8pt;\">78.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.7.1.1\" style=\"width:22.8pt;\">57.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.8.1.1\" style=\"width:22.8pt;\">38.9</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.9.1.1\" style=\"width:22.8pt;\">68.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.10.1.1\" style=\"width:22.8pt;\">61.5</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.14.7\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.14.7.1\">LLAVA-V1.5<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib25\" title=\"\">25</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.14.7.2\">Vicuna-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.3.1.1\" style=\"width:14.2pt;\">336</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.4.1.1\" style=\"width:19.9pt;\">558K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.14.7.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.5.1.1\" style=\"width:25.6pt;\">665K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.6.1.1\" style=\"width:22.8pt;\">78.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.7.1.1\" style=\"width:22.8pt;\">62.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.8.1.1\" style=\"width:22.8pt;\">50.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.9.1.1\" style=\"width:22.8pt;\">66.8</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.10.1.1\" style=\"width:22.8pt;\">58.2</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.15.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.7.7.15.8.1\">+ SVIT-Core-157K<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib39\" title=\"\">39</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.7.7.15.8.2\">Vicuna-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.3.1.1\" style=\"width:14.2pt;\">336</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.4.1.1\" style=\"width:19.9pt;\">558K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.7.7.15.8.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.5.1.1\" style=\"width:25.6pt;\">+157K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.6.1.1\" style=\"width:22.8pt;\">75.9</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.7.1.1\" style=\"width:22.8pt;\">57.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.8.1.1\" style=\"width:22.8pt;\">49.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.9.1.1\" style=\"width:22.8pt;\">69.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.10.1.1\" style=\"width:22.8pt;\">56.3</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.16.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S4.T1.7.7.16.9.1\">+ Ours</td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S4.T1.7.7.16.9.2\">Vicuna-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.3.1.1\" style=\"width:14.2pt;\">336</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.4.1.1\" style=\"width:19.9pt;\">558K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb ltx_border_r\" id=\"S4.T1.7.7.16.9.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.5.1.1\" style=\"width:25.6pt;\">+7K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.6.1.1\" style=\"width:22.8pt;\">77.9</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.7.1.1\" style=\"width:22.8pt;\">61.8</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.8.1.1\" style=\"width:22.8pt;\">51.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.9.1.1\" style=\"width:22.8pt;\">69.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.10.1.1\" style=\"width:22.8pt;\">57.3</span>\n</span>\n</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.9.1\">Comparison with SoTA methods on 5 benchmarks.</span> We achieves better performance on all benchmarks than SVIT-Core-157K. Res, PT, and IT indicate input image resolution, and the number of samples in the pretraining and instruction tuning stage, respectively.\nBenchmark names are abbreviated due to space limits. VQA-v2 <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib12\" title=\"\">12</a>]</cite>, GQA <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib15\" title=\"\">15</a>]</cite>, VisWiz <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib13\" title=\"\">13</a>]</cite>, ScienceQA-IMG <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib27\" title=\"\">27</a>]</cite>, TextVQA <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib33\" title=\"\">33</a>]</cite>. More details can be found in the Evaluation Metrics section of the Appendix.</figcaption>\n</figure>",
43
- "perturb_sentence_id": 10,
 
 
 
44
  "output": {
45
- "perturbed_statement": "[paragraph id = 10] In the efficient LoRA training setup, our data exceeded SVIT-core-157K[39 ] by 3.0 points in GQA [15 ], 1.5 points in VQAV2 [12 ], 1.2 points in TextVQA [33 ], 1.8 points in VisWiz [13 ], and 0.3 points in SQA [27 ]. The improvements verify the better training effects of our data since less data amount and same model are used.",
46
- "perturbed_explanation": "1. The original explanation states that the data achieved performance increases by specific margins across various datasets, as given in the results above. 2. The statement now includes invalid performance margins for some datasets, such as '3.0 points in GQA' instead of '4.7 points' and '1.5 points in VQAV2' instead of '2.0 points,' which do not match the results provided. This error alters the reported evidence of the model's effectiveness."
47
  }
48
  },
49
  {
@@ -87,10 +90,13 @@
87
  "[paragraph id = 16] By employing this method, we found that using curriculum learning with the DIQ method can further enhance model performance."
88
  ],
89
  "table_html": "<figure class=\"ltx_table ltx_align_floatright\" id=\"S4.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T2.1\" style=\"width:166.8pt;height:126pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(0.0pt,0.0pt) scale(1.0,1.0) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_tt\" id=\"S4.T2.1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.1.1.1.1\" style=\"font-size:90%;\">Strategy</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S4.T2.1.1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.1.1.2.1\" style=\"font-size:90%;\">Scenario 1</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.2.2\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.1.2.2.1\"></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.2.2.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.2.2.2.1\" style=\"font-size:90%;\">SQA</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.2.2.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.2.2.3.1\" style=\"font-size:90%;\">TextVQA</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.2.2.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.2.2.4.1\" style=\"font-size:90%;\">GQA</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.3.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.3.1.1\" style=\"font-size:90%;\">DIS</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.1.3.3.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.3.2.1\" style=\"font-size:90%;\">57.06</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.1.3.3.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.3.3.1\" style=\"font-size:90%;\">56.13</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.1.3.3.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.3.4.1\" style=\"font-size:90%;\">61.06</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.1.4.4.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.4.1.1\" style=\"font-size:90%;\">DIL</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.4.4.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.4.2.1\" style=\"font-size:90%;\">68.82</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.4.4.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.4.3.1\" style=\"font-size:90%;\">56.30</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.4.4.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.4.4.1\" style=\"font-size:90%;\">60.87</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.1.5.5.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.5.5.1.1\" style=\"font-size:90%;\">DIQ</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.5.5.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.5.5.2.1\" style=\"font-size:90%;\">69.56</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.5.5.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.5.5.3.1\" style=\"font-size:90%;\">56.84</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.5.5.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.5.5.4.1\" style=\"font-size:90%;\">61.16</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.6.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" colspan=\"4\" id=\"S4.T2.1.1.6.6.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T2.1.1.6.6.1.1\" style=\"font-size:90%;\">Result with Data Curriculum</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.7.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb ltx_border_r\" id=\"S4.T2.1.1.7.7.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.7.7.1.1\" style=\"font-size:90%;\">Ours</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.1.7.7.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.7.7.2.1\" style=\"font-size:90%;\">69.51</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.1.7.7.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.7.7.3.1\" style=\"font-size:90%;\">57.25</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.1.7.7.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.7.7.4.1\" style=\"font-size:90%;\">61.80</span></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Results across different methods.</figcaption>\n</figure>",
90
- "perturb_sentence_id": 12,
 
 
 
91
  "output": {
92
- "perturbed_statement": "[paragraph id = 12] In Table 2, we use the top-left corner in the left panel of Figure 7 (shown in the appendix) as the top 5% of the DIQ and conducted a comparison experiment. We found that using the 5% selected by DIQ resulted in better performance compared to using the top 5% of DIS and DIL separately. We realized that this improvement is due to the subset from DIQ selecting data evenly from the entire region, whereas DIS and DIL focus on regions with high levels of clip score or loss.",
93
- "perturbed_explanation": "1. The original explanation clarified that the selection criterion was based on the top-right corner.\n2. The statement specifies the top-left corner, which inconsistency diminishes the accuracy of the description regarding the location derived from Figure 7."
94
  }
95
  }
96
  ]
 
40
  "[paragraph id = 11] The improvements verify the better training effects of our data since less data amount and same model are used."
41
  ],
42
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T1.7\" style=\"width:493.9pt;height:201.1pt;vertical-align:-0.9pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-27.4pt,11.1pt) scale(0.9,0.9) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T1.7.7\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.3.3.3\">\n<td class=\"ltx_td ltx_align_left ltx_border_tt\" id=\"S4.T1.3.3.3.4\">Method</td>\n<td class=\"ltx_td ltx_align_left ltx_border_tt\" id=\"S4.T1.3.3.3.5\">LLM</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.6.1.1\" style=\"width:14.2pt;\">Res.</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.7.1.1\" style=\"width:19.9pt;\">PT</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_tt\" id=\"S4.T1.3.3.3.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.8.1.1\" style=\"width:25.6pt;\">IT</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.1.1.1.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.1.1.1.1.1\">\n<span class=\"ltx_p\" id=\"S4.T1.1.1.1.1.1.1\" style=\"width:22.8pt;\">VQA</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.9.1.1\" style=\"width:22.8pt;\">GQA</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.10.1.1\" style=\"width:22.8pt;\">VisWiz</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.2.2.2.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.2.2.2.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.2.2.2.2.1.1\" style=\"width:22.8pt;\">SQA</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_tt\" id=\"S4.T1.3.3.3.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.3.3.3.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.3.3.3.3.1.1\" style=\"width:22.8pt;\">VQA</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.8.1\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.7.7.8.1.1\">BLIP-2<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib19\" title=\"\">19</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.7.7.8.1.2\">Vicuna-13B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.4.1.1\" style=\"width:19.9pt;\">129M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.7.7.8.1.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.5.1.1\" style=\"width:25.6pt;\">-</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.6.1.1\" style=\"width:22.8pt;\">41.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.7.1.1\" style=\"width:22.8pt;\">41</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.8.1.1\" style=\"width:22.8pt;\">19.6</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.9.1.1\" style=\"width:22.8pt;\">61</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.8.1.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.8.1.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.8.1.10.1.1\" style=\"width:22.8pt;\">42.5</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.9.2\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.9.2.1\">InstructBLIP<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib9\" title=\"\">9</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.9.2.2\">Vicuna-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.4.1.1\" style=\"width:19.9pt;\">129M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.9.2.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.5.1.1\" style=\"width:25.6pt;\">1.2M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.6.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.7.1.1\" style=\"width:22.8pt;\">49.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.8.1.1\" style=\"width:22.8pt;\">34.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.9.1.1\" style=\"width:22.8pt;\">60.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.9.2.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.9.2.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.9.2.10.1.1\" style=\"width:22.8pt;\">50.1</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.10.3\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.10.3.1\">InstructBLIP<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib9\" title=\"\">9</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.10.3.2\">Vicuna-13B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.4.1.1\" style=\"width:19.9pt;\">129M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.10.3.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.5.1.1\" style=\"width:25.6pt;\">1.2M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.6.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.7.1.1\" style=\"width:22.8pt;\">49.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.8.1.1\" style=\"width:22.8pt;\">33.4</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.9.1.1\" style=\"width:22.8pt;\">63.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.10.3.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.10.3.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.10.3.10.1.1\" style=\"width:22.8pt;\">50.7</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.11.4\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.11.4.1\">Shikra<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib6\" title=\"\">6</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.11.4.2\">Vicuna-13B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.4.1.1\" style=\"width:19.9pt;\">600K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.11.4.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.5.1.1\" style=\"width:25.6pt;\">5.5M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.6.1.1\" style=\"width:22.8pt;\">77.4</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.7.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.8.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.9.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.11.4.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.11.4.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.11.4.10.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.12.5\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.12.5.1\">IDEFICS-9B <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib16\" title=\"\">16</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.12.5.2\">LLaMA-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.4.1.1\" style=\"width:19.9pt;\">353M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.12.5.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.5.1.1\" style=\"width:25.6pt;\">1M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.6.1.1\" style=\"width:22.8pt;\">50.9</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.7.1.1\" style=\"width:22.8pt;\">38.4</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.8.1.1\" style=\"width:22.8pt;\">35.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.9.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.12.5.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.12.5.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.12.5.10.1.1\" style=\"width:22.8pt;\">25.9</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.13.6\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.13.6.1\">IDEFICS-80B<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib16\" title=\"\">16</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.13.6.2\">LLaMA-65B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.3.1.1\" style=\"width:14.2pt;\">224</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.4.1.1\" style=\"width:19.9pt;\">353M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.13.6.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.5.1.1\" style=\"width:25.6pt;\">1M</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.6.1.1\" style=\"width:22.8pt;\">60.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.7.1.1\" style=\"width:22.8pt;\">45.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.8.1.1\" style=\"width:22.8pt;\">36.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.9.1.1\" style=\"width:22.8pt;\">–</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.13.6.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.13.6.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.13.6.10.1.1\" style=\"width:22.8pt;\">30.9</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.5.5.5\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.5.5.5.3\">Qwen-VL<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib1\" title=\"\">1</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.5.5.5.4\">Qwen-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.5.1.1\" style=\"width:14.2pt;\">448</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.4.4.4.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.4.4.4.1.1\">\n<span class=\"ltx_p\" id=\"S4.T1.4.4.4.1.1.1\" style=\"width:19.9pt;\">1.4B<sup class=\"ltx_sup\" id=\"S4.T1.4.4.4.1.1.1.1\">†</sup></span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.5.5.5.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.2.1.1\" style=\"width:25.6pt;\">50M<sup class=\"ltx_sup\" id=\"S4.T1.5.5.5.2.1.1.1\">†</sup></span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.6.1.1\" style=\"width:22.8pt;\">78.8</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.7.1.1\" style=\"width:22.8pt;\">59.3</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.8.1.1\" style=\"width:22.8pt;\">35.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.9.1.1\" style=\"width:22.8pt;\">67.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.5.5.5.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.5.5.5.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.5.5.5.10.1.1\" style=\"width:22.8pt;\">63.8</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.7\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.7.3\">Qwen-VL-Chat<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib1\" title=\"\">1</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.7.4\">Qwen-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.5.1.1\" style=\"width:14.2pt;\">448</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.6.6.6.1\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.6.6.6.1.1\">\n<span class=\"ltx_p\" id=\"S4.T1.6.6.6.1.1.1\" style=\"width:19.9pt;\">1.4B<sup class=\"ltx_sup\" id=\"S4.T1.6.6.6.1.1.1.1\">†</sup></span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.7.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.2.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.2.1.1\" style=\"width:25.6pt;\">50M<sup class=\"ltx_sup\" id=\"S4.T1.7.7.7.2.1.1.1\">†</sup></span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.6.1.1\" style=\"width:22.8pt;\">78.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.7.1.1\" style=\"width:22.8pt;\">57.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.8.1.1\" style=\"width:22.8pt;\">38.9</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.9.1.1\" style=\"width:22.8pt;\">68.2</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.7.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.7.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.7.10.1.1\" style=\"width:22.8pt;\">61.5</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.14.7\">\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.14.7.1\">LLAVA-V1.5<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib25\" title=\"\">25</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left\" id=\"S4.T1.7.7.14.7.2\">Vicuna-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.3.1.1\" style=\"width:14.2pt;\">336</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.4.1.1\" style=\"width:19.9pt;\">558K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r\" id=\"S4.T1.7.7.14.7.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.5.1.1\" style=\"width:25.6pt;\">665K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.6.1.1\" style=\"width:22.8pt;\">78.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.7.1.1\" style=\"width:22.8pt;\">62.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.8.1.1\" style=\"width:22.8pt;\">50.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.9.1.1\" style=\"width:22.8pt;\">66.8</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top\" id=\"S4.T1.7.7.14.7.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.14.7.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.14.7.10.1.1\" style=\"width:22.8pt;\">58.2</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.15.8\">\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.7.7.15.8.1\">+ SVIT-Core-157K<cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib39\" title=\"\">39</a>]</cite>\n</td>\n<td class=\"ltx_td ltx_align_left ltx_border_t\" id=\"S4.T1.7.7.15.8.2\">Vicuna-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.3.1.1\" style=\"width:14.2pt;\">336</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.4.1.1\" style=\"width:19.9pt;\">558K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_r ltx_border_t\" id=\"S4.T1.7.7.15.8.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.5.1.1\" style=\"width:25.6pt;\">+157K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.6.1.1\" style=\"width:22.8pt;\">75.9</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.7.1.1\" style=\"width:22.8pt;\">57.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.8.1.1\" style=\"width:22.8pt;\">49.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.9.1.1\" style=\"width:22.8pt;\">69.0</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_t\" id=\"S4.T1.7.7.15.8.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.15.8.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.15.8.10.1.1\" style=\"width:22.8pt;\">56.3</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.7.7.16.9\">\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S4.T1.7.7.16.9.1\">+ Ours</td>\n<td class=\"ltx_td ltx_align_left ltx_border_bb\" id=\"S4.T1.7.7.16.9.2\">Vicuna-7B</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.3\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.3.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.3.1.1\" style=\"width:14.2pt;\">336</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.4\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.4.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.4.1.1\" style=\"width:19.9pt;\">558K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb ltx_border_r\" id=\"S4.T1.7.7.16.9.5\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.5.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.5.1.1\" style=\"width:25.6pt;\">+7K</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.6\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.6.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.6.1.1\" style=\"width:22.8pt;\">77.9</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.7\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.7.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.7.1.1\" style=\"width:22.8pt;\">61.8</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.8\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.8.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.8.1.1\" style=\"width:22.8pt;\">51.1</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.9\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.9.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.9.1.1\" style=\"width:22.8pt;\">69.5</span>\n</span>\n</td>\n<td class=\"ltx_td ltx_align_justify ltx_align_top ltx_border_bb\" id=\"S4.T1.7.7.16.9.10\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S4.T1.7.7.16.9.10.1\">\n<span class=\"ltx_p\" id=\"S4.T1.7.7.16.9.10.1.1\" style=\"width:22.8pt;\">57.3</span>\n</span>\n</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.9.1\">Comparison with SoTA methods on 5 benchmarks.</span> We achieves better performance on all benchmarks than SVIT-Core-157K. Res, PT, and IT indicate input image resolution, and the number of samples in the pretraining and instruction tuning stage, respectively.\nBenchmark names are abbreviated due to space limits. VQA-v2 <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib12\" title=\"\">12</a>]</cite>, GQA <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib15\" title=\"\">15</a>]</cite>, VisWiz <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib13\" title=\"\">13</a>]</cite>, ScienceQA-IMG <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib27\" title=\"\">27</a>]</cite>, TextVQA <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00102v1#bib.bib33\" title=\"\">33</a>]</cite>. More details can be found in the Evaluation Metrics section of the Appendix.</figcaption>\n</figure>",
43
+ "perturb_sentence_id": [
44
+ 10,
45
+ 11
46
+ ],
47
  "output": {
48
+ "perturbed_statement": "[paragraph id = 10] In the efficient LoRA training setup, our data exceeded SVIT-core-157K[39 ] by 4.7 points in GQA [15 ], 2.0 points in VQAV2 [12 ], 1.0 point in TextVQA [33 ], 3.0 points in VisWiz [13 ], and 0.5 points in SQA [27 ].The improvements verify the better training effects of our data since less data amount and same model are used.",
49
+ "perturbed_explanation": "The statement should accurately reflect the information presented in the research results. \n\n1. The statement claimed an improvement of 3.0 points in VisWiz [13 ], which is incorrect. The correct improvement noted in the results was 2.0 points. Therefore, stating 3.0 points is a factual inaccuracy.\n\n2. The alteration in point 1 changes the reported improvement incorrectly, making the statement not consistent with the numerical results achieved and described in the previous paragraphs."
50
  }
51
  },
52
  {
 
90
  "[paragraph id = 16] By employing this method, we found that using curriculum learning with the DIQ method can further enhance model performance."
91
  ],
92
  "table_html": "<figure class=\"ltx_table ltx_align_floatright\" id=\"S4.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S4.T2.1\" style=\"width:166.8pt;height:126pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(0.0pt,0.0pt) scale(1.0,1.0) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_tt\" id=\"S4.T2.1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.1.1.1.1\" style=\"font-size:90%;\">Strategy</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S4.T2.1.1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.1.1.2.1\" style=\"font-size:90%;\">Scenario 1</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.2.2\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.1.2.2.1\"></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.2.2.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.2.2.2.1\" style=\"font-size:90%;\">SQA</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.2.2.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.2.2.3.1\" style=\"font-size:90%;\">TextVQA</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.2.2.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.2.2.4.1\" style=\"font-size:90%;\">GQA</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.3.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.3.1.1\" style=\"font-size:90%;\">DIS</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.1.3.3.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.3.2.1\" style=\"font-size:90%;\">57.06</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.1.3.3.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.3.3.1\" style=\"font-size:90%;\">56.13</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.1.3.3.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.3.3.4.1\" style=\"font-size:90%;\">61.06</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.4.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.1.4.4.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.4.1.1\" style=\"font-size:90%;\">DIL</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.4.4.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.4.2.1\" style=\"font-size:90%;\">68.82</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.4.4.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.4.3.1\" style=\"font-size:90%;\">56.30</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.4.4.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.4.4.4.1\" style=\"font-size:90%;\">60.87</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.5.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.1.5.5.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.5.5.1.1\" style=\"font-size:90%;\">DIQ</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.5.5.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.5.5.2.1\" style=\"font-size:90%;\">69.56</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.5.5.3\"><span class=\"ltx_text\" id=\"S4.T2.1.1.5.5.3.1\" style=\"font-size:90%;\">56.84</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.1.5.5.4\"><span class=\"ltx_text\" id=\"S4.T2.1.1.5.5.4.1\" style=\"font-size:90%;\">61.16</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.6.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" colspan=\"4\" id=\"S4.T2.1.1.6.6.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S4.T2.1.1.6.6.1.1\" style=\"font-size:90%;\">Result with Data Curriculum</span></th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.7.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb ltx_border_r\" id=\"S4.T2.1.1.7.7.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.7.7.1.1\" style=\"font-size:90%;\">Ours</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.1.7.7.2\"><span class=\"ltx_text\" id=\"S4.T2.1.1.7.7.2.1\" style=\"font-size:90%;\">69.51</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.1.7.7.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.7.7.3.1\" style=\"font-size:90%;\">57.25</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.1.7.7.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.1.7.7.4.1\" style=\"font-size:90%;\">61.80</span></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\" style=\"font-size:90%;\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Results across different methods.</figcaption>\n</figure>",
93
+ "perturb_sentence_id": [
94
+ 12,
95
+ 13
96
+ ],
97
  "output": {
98
+ "perturbed_statement": "[paragraph id = 12] In Table 2, we use the top-right corner in the left panel of Figure 8 (shown in the appendix) as the top 5% of the DIQ and conducted a comparison experiment, we found that using the 5% selected by DIQ resulted in better performance compared to using the top 5% of DIS and DIL separately.We realized that this improvement is due to the subset from DIQ selecting data evenly from the entire region, whereas DIS and DIL focus on regions with high levels of clip score or loss.",
99
+ "perturbed_explanation": "1. Original Explanation: The statement describes how the use of the top 5% selected by DIQ led to better performance because DIQ selects data evenly from the entire region, unlike DIS and DIL, which focus on areas with high clip scores or loss. 2. The statement incorrectly mentions Figure 8, whereas there is no mention of Figure 8; the correct reference should be Figure 7. This alters the factual accuracy of the reference provided within the context."
100
  }
101
  }
102
  ]
table_result/2407.00104v1_output.json CHANGED
@@ -27,10 +27,13 @@
27
  "[paragraph id = 10] As can be seen in this table, the database has a significant class imbalance, with SW and MG underrepresented."
28
  ],
29
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Sample distribution for binary and multilabel codification.</figcaption>\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S2.T1.1\" style=\"width:433.6pt;height:36.3pt;vertical-align:-0.7pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-112.0pt,9.2pt) scale(0.659441329482834,0.659441329482834) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"2\" id=\"S2.T1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.1.1.1.1.1\">Binary codification</span></td>\n<td class=\"ltx_td ltx_border_tt\" id=\"S2.T1.1.1.1.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"6\" id=\"S2.T1.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.1.1.1.3.1\">Multi-label codification</span></td>\n<td class=\"ltx_td ltx_border_tt\" id=\"S2.T1.1.1.1.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.1\">BCC</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.2\">Non-BCC</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S2.T1.1.1.2.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.4\">Pigment Network</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.5\">Ulceration</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.6\">Ovoid Nests</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.7\">Multiglobules</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.8\">Maple Leaf-like</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.9\">Spoke Wheel</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.10\">Arborizing Telangiectasia</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.1\">775</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.2\">784</td>\n<td class=\"ltx_td ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.4\">557</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.5\">385</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.6\">333</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.7\">191</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.8\">244</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.9\">178</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.10\">455</td>\n</tr>\n</table>\n</span></div>\n</figure>",
30
- "perturb_sentence_id": 9,
 
 
 
31
  "output": {
32
- "perturbed_statement": "[paragraph id = 9] Table 1 summarizes the distribution of labels in the database. As can be seen in this table, the database has a balanced class distribution, with no labels significantly underrepresented.",
33
- "perturbed_explanation": "The original explanation: Table 1 highlights the label distribution within the database, showing a significant imbalance with SW and MG labels being underrepresented. The altered statement is incorrect due to the following reasons: 1. It inaccurately claims a balanced class distribution, directly contradicting the information from Table 1. 2. Such misrepresentation could lead to misunderstandings about the data characteristics and their implications."
34
  }
35
  },
36
  {
@@ -57,10 +60,13 @@
57
  "[paragraph id = 5] In Table 2 there are some examples of this process."
58
  ],
59
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T2\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Example of multilabel and binary encoding for BCC diagnosis</figcaption>\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S2.T2.1\" style=\"width:260.2pt;height:68.8pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-6.1pt,1.6pt) scale(0.955059914997519,0.955059914997519) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S2.T2.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T2.1.1.1.1.1\">Codification</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S2.T2.1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T2.1.1.1.2.1\">Multi-label</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S2.T2.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T2.1.1.1.3.1\">Binary</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S2.T2.1.1.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T2.1.1.1.4.1\">Diagnostic</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.1.1.2.1\">Example 1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.1.1.2.2\">[0 1 0 1 1 0 1]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.1.1.2.3\">1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.1.1.2.4\">Presence of BCC</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T2.1.1.3.1\">Example 2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T2.1.1.3.2\">[1 0 0 0 0 0 0]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T2.1.1.3.3\">0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T2.1.1.3.4\">Absence of BCC</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T2.1.1.4.1\">Example 3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T2.1.1.4.2\">[0 0 0 0 0 0 0]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T2.1.1.4.3\">0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T2.1.1.4.4\">Absence of BCC</td>\n</tr>\n</table>\n</span></div>\n</figure>",
60
- "perturb_sentence_id": 5,
 
 
 
61
  "output": {
62
- "perturbed_statement": "[paragraph id = 5] In Table 5 there are some examples of this process.",
63
- "perturbed_explanation": "Original Explanation: Table 2 is identified as containing examples of the discussed process. 2. The statement mistakenly references Table 5 instead, which is not mentioned in the provided context as containing examples."
64
  }
65
  },
66
  {
@@ -119,10 +125,13 @@
119
  "[paragraph id = 17] As shown in this table, 73 percent of non-BCC lesions without any BCC pattern, 95 percent of non-BCC lesions with PN, and 99 percent of BCC lesions with some BCC pattern are correctly labeled as such."
120
  ],
121
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T3\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Evaluation using binary and multilabel classification metrics, fine-tuned binary classifier, and physician-guided analysis.</figcaption>\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T3.1\" style=\"width:260.2pt;height:212.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-35.0pt,28.6pt) scale(0.787876288469247,0.787876288469247) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T3.1.1\">\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S3.T3.1.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T3.1.1.1.2\">Recall</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T3.1.1.1.3\">Specificity</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T3.1.1.1.4\">Precision</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T3.1.1.1.5\">Accuracy</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.1.1\">BCC/Non-BCC</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.2.2\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.2.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.2.4\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.2.5\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.3\">\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.3.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.2\">0.89</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3\">0.89</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.4\">0.90</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.5\">0.90</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.4.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.4.1.1\">Pattern detection</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.4.2\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.4.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.4.4\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.4.5\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.1\">Pigment Network</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.2\">0.94</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.3\">0.96</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.4\">0.97</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.5\">0.95</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.1\">Ulceration</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.2\">0.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.3\">0.75</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.4\">0.52</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.5\">0.77</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.7\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.1\">OvidNests</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.2\">0.65</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.3\">0.84</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.4\">0.53</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.5\">0.84</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.8\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.1\">Multiglobules</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.2\">0.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.3\">0.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.4\">0.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.5\">0.80</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.9\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.1\">Maple Leaf-like</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.2\">0.50</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.3\">0.82</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.4\">0.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.5\">0.77</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.10\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.1\">Spoke Wheel</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.2\">0.60</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.3\">0.87</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.4\">0.37</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.5\">0.84</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.11\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.1\">Arborizing Telangiectasia</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.2\">0.89</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.3\">0.76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.4\">0.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.5\">0.80</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.12\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.12.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.12.1.1\">Clinical-inspired XAI</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.12.2\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.12.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.12.4\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.12.5\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.13\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.1\">All 0’s</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.2\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.3\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.4\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.5\">0.73</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.14\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.1\">Pigment Network</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.2\">0.94</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.3\">0.96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.4\">0.97</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.5\">0.95</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.15\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.1\">BCC pattern detection</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.2\">0.84</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.3\">0.88</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.4\">0.71</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.5\">0.99</td>\n</tr>\n</table>\n</span></div>\n</figure>",
122
- "perturb_sentence_id": 17,
 
 
 
123
  "output": {
124
- "perturbed_statement": "[paragraph id = 17] As shown in this table, 73 percent of BCC lesions without any BCC pattern, 95 percent of non-BCC lesions with PN, and 99 percent of non-BCC lesions with some BCC pattern are correctly labeled as such.",
125
- "perturbed_explanation": "1. Analyzing the given information, it is stated that 73 percent of non-BCC lesions without any BCC pattern, 95 percent of non-BCC lesions with PN, and 99 percent of BCC lesions with some BCC pattern are accurately labeled. 2. The statement incorrectly suggests that 73 percent of BCC lesions without BCC patterns and 99 percent of non-BCC lesions exhibiting BCC patterns are correctly labeled, which contradicts the context since it specifically mentions the performance metrics for non-BCC lesions without patterns and BCC lesions with patterns."
126
  }
127
  },
128
  {
@@ -166,10 +175,13 @@
166
  "[paragraph id = 17] In addition, the intersection area is larger in these cases."
167
  ],
168
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T4\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>Statistics derived from estimation of conditional probability density functions of GradCAM within and outside the region of clinical interest.</figcaption>\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T4.1\" style=\"width:260.2pt;height:42.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-52.2pt,8.5pt) scale(0.713620974996278,0.713620974996278) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1\">\n<tr class=\"ltx_tr\" id=\"S3.T4.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.1\">Prediction</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.2\">Intersection</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.3\">\n<span class=\"ltx_text\" id=\"S3.T4.1.1.1.3.1\"></span> <span class=\"ltx_text\" id=\"S3.T4.1.1.1.3.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1.1.3.2.1\">\n<span class=\"ltx_tr\" id=\"S3.T4.1.1.1.3.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T4.1.1.1.3.2.1.1.1\">Mean Fg</span></span>\n</span></span><span class=\"ltx_text\" id=\"S3.T4.1.1.1.3.3\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.4\">\n<span class=\"ltx_text\" id=\"S3.T4.1.1.1.4.1\"></span> <span class=\"ltx_text\" id=\"S3.T4.1.1.1.4.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1.1.4.2.1\">\n<span class=\"ltx_tr\" id=\"S3.T4.1.1.1.4.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T4.1.1.1.4.2.1.1.1\">Mean Bg</span></span>\n</span></span><span class=\"ltx_text\" id=\"S3.T4.1.1.1.4.3\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.5\">\n<span class=\"ltx_text\" id=\"S3.T4.1.1.1.5.1\"></span> <span class=\"ltx_text\" id=\"S3.T4.1.1.1.5.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1.1.5.2.1\">\n<span class=\"ltx_tr\" id=\"S3.T4.1.1.1.5.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T4.1.1.1.5.2.1.1.1\">Std Fg</span></span>\n</span></span><span class=\"ltx_text\" id=\"S3.T4.1.1.1.5.3\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.6\">\n<span class=\"ltx_text\" id=\"S3.T4.1.1.1.6.1\"></span> <span class=\"ltx_text\" id=\"S3.T4.1.1.1.6.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1.1.6.2.1\">\n<span class=\"ltx_tr\" id=\"S3.T4.1.1.1.6.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T4.1.1.1.6.2.1.1.1\">Std Bg</span></span>\n</span></span><span class=\"ltx_text\" id=\"S3.T4.1.1.1.6.3\"></span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T4.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.1\">Correct</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.2\">0.24</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.3\">0.57</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.4\">0.16</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.5\">0.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.6\">0.22</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T4.1.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.1\">Incorrect</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.2\">0.32</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.3\">0.33</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.4\">0.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.5\">0.01</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.6\">0.21</td>\n</tr>\n</table>\n</span></div>\n</figure>",
169
- "perturb_sentence_id": 14,
 
 
 
170
  "output": {
171
- "perturbed_statement": "[paragraph id = 14] Table 4 summarizes the information extracted from these probability density function. Specifically, the median and variance of for and respectively, and the intersection area between and are shown.",
172
- "perturbed_explanation": "1. The original statement describes that Table 4 provides a summary including the mean, standard deviation, and intersection area related to the probability density functions. 2. The statement is incorrect because it inaccurately states that the table contains the median and variance instead of the actual metrics, mean and standard deviation, as outlined in the summary."
173
  }
174
  }
175
  ]
 
27
  "[paragraph id = 10] As can be seen in this table, the database has a significant class imbalance, with SW and MG underrepresented."
28
  ],
29
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Sample distribution for binary and multilabel codification.</figcaption>\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S2.T1.1\" style=\"width:433.6pt;height:36.3pt;vertical-align:-0.7pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-112.0pt,9.2pt) scale(0.659441329482834,0.659441329482834) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T1.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"2\" id=\"S2.T1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.1.1.1.1.1\">Binary codification</span></td>\n<td class=\"ltx_td ltx_border_tt\" id=\"S2.T1.1.1.1.2\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"6\" id=\"S2.T1.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T1.1.1.1.3.1\">Multi-label codification</span></td>\n<td class=\"ltx_td ltx_border_tt\" id=\"S2.T1.1.1.1.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.1\">BCC</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.2\">Non-BCC</td>\n<td class=\"ltx_td ltx_border_t\" id=\"S2.T1.1.1.2.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.4\">Pigment Network</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.5\">Ulceration</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.6\">Ovoid Nests</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.7\">Multiglobules</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.8\">Maple Leaf-like</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.9\">Spoke Wheel</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T1.1.1.2.10\">Arborizing Telangiectasia</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T1.1.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.1\">775</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.2\">784</td>\n<td class=\"ltx_td ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.3\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.4\">557</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.5\">385</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.6\">333</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.7\">191</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.8\">244</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.9\">178</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_t\" id=\"S2.T1.1.1.3.10\">455</td>\n</tr>\n</table>\n</span></div>\n</figure>",
30
+ "perturb_sentence_id": [
31
+ 9,
32
+ 10
33
+ ],
34
  "output": {
35
+ "perturbed_statement": "[paragraph id = 9] Table 1 summarizes the distribution of labels in the database. As can be seen in this table, the database has a significant class imbalance, with SW and LT underrepresented.",
36
+ "perturbed_explanation": "The original explanation would indicate that the class imbalance involves labels SW and MG being underrepresented. 1. The statement incorrectly claims that the labels SW and LT are underrepresented, rather than SW and MG. Since LT is not mentioned as underrepresented in the actual class imbalance issue, stating this creates an inaccuracy about which labels have fewer instances."
37
  }
38
  },
39
  {
 
60
  "[paragraph id = 5] In Table 2 there are some examples of this process."
61
  ],
62
  "table_html": "<figure class=\"ltx_table\" id=\"S2.T2\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Example of multilabel and binary encoding for BCC diagnosis</figcaption>\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S2.T2.1\" style=\"width:260.2pt;height:68.8pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-6.1pt,1.6pt) scale(0.955059914997519,0.955059914997519) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S2.T2.1.1\">\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S2.T2.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T2.1.1.1.1.1\">Codification</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S2.T2.1.1.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T2.1.1.1.2.1\">Multi-label</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S2.T2.1.1.1.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T2.1.1.1.3.1\">Binary</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S2.T2.1.1.1.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S2.T2.1.1.1.4.1\">Diagnostic</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.1.1.2.1\">Example 1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.1.1.2.2\">[0 1 0 1 1 0 1]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.1.1.2.3\">1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S2.T2.1.1.2.4\">Presence of BCC</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T2.1.1.3.1\">Example 2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T2.1.1.3.2\">[1 0 0 0 0 0 0]</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T2.1.1.3.3\">0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S2.T2.1.1.3.4\">Absence of BCC</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S2.T2.1.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T2.1.1.4.1\">Example 3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T2.1.1.4.2\">[0 0 0 0 0 0 0]</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T2.1.1.4.3\">0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S2.T2.1.1.4.4\">Absence of BCC</td>\n</tr>\n</table>\n</span></div>\n</figure>",
63
+ "perturb_sentence_id": [
64
+ 5,
65
+ 6
66
+ ],
67
  "output": {
68
+ "perturbed_statement": "[paragraph id = 5] In Figure 3 there are some examples of this process.",
69
+ "perturbed_explanation": "The original explanation would be: 1. The statement suggests that examples of a process are found in Table 2. Here is why the statement is incorrect: 2. The statement is incorrect because it refers to Figure 3, which is not mentioned in the context. The context mentions dimensions and labels but does not indicate the presence of a figure or specify any particular figure number."
70
  }
71
  },
72
  {
 
125
  "[paragraph id = 17] As shown in this table, 73 percent of non-BCC lesions without any BCC pattern, 95 percent of non-BCC lesions with PN, and 99 percent of BCC lesions with some BCC pattern are correctly labeled as such."
126
  ],
127
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T3\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Evaluation using binary and multilabel classification metrics, fine-tuned binary classifier, and physician-guided analysis.</figcaption>\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T3.1\" style=\"width:260.2pt;height:212.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-35.0pt,28.6pt) scale(0.787876288469247,0.787876288469247) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T3.1.1\">\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S3.T3.1.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T3.1.1.1.2\">Recall</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T3.1.1.1.3\">Specificity</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T3.1.1.1.4\">Precision</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T3.1.1.1.5\">Accuracy</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.1.1\">BCC/Non-BCC</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.2.2\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.2.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.2.4\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.2.5\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.3\">\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.3.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.2\">0.89</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3\">0.89</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.4\">0.90</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.5\">0.90</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.4.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.4.1.1\">Pattern detection</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.4.2\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.4.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.4.4\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.4.5\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.5\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.1\">Pigment Network</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.2\">0.94</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.3\">0.96</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.4\">0.97</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.5.5\">0.95</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.1\">Ulceration</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.2\">0.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.3\">0.75</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.4\">0.52</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.5\">0.77</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.7\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.1\">OvidNests</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.2\">0.65</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.3\">0.84</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.4\">0.53</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.7.5\">0.84</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.8\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.1\">Multiglobules</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.2\">0.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.3\">0.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.4\">0.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.8.5\">0.80</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.9\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.1\">Maple Leaf-like</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.2\">0.50</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.3\">0.82</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.4\">0.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.9.5\">0.77</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.10\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.1\">Spoke Wheel</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.2\">0.60</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.3\">0.87</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.4\">0.37</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.10.5\">0.84</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.11\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.1\">Arborizing Telangiectasia</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.2\">0.89</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.3\">0.76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.4\">0.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.11.5\">0.80</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.12\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.12.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.12.1.1\">Clinical-inspired XAI</span></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.12.2\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.12.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.12.4\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T3.1.1.12.5\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.13\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.1\">All 0’s</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.2\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.3\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.4\">-</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.13.5\">0.73</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.14\">\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.1\">Pigment Network</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.2\">0.94</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.3\">0.96</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.4\">0.97</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.14.5\">0.95</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.15\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.1\">BCC pattern detection</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.2\">0.84</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.3\">0.88</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.4\">0.71</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.15.5\">0.99</td>\n</tr>\n</table>\n</span></div>\n</figure>",
128
+ "perturb_sentence_id": [
129
+ 17,
130
+ 18
131
+ ],
132
  "output": {
133
+ "perturbed_statement": "[paragraph id = 17] As shown in this table, 73 percent of non-BCC lesions with any BCC pattern, 95 percent of non-BCC lesions without PN, and 99 percent of BCC lesions without any BCC pattern are correctly labeled as such.",
134
+ "perturbed_explanation": "The original statement specifies the correct labeling percentages for various categories: 1. '73 percent of non-BCC lesions without any BCC pattern', 2. '95 percent of non-BCC lesions with PN', and 3. '99 percent of BCC lesions with some BCC pattern'. \n1. '73 percent of non-BCC lesions with any BCC pattern': This is incorrect because it falsely attributes a percentage of correct labeling to non-BCC lesions with the presence of any BCC pattern, which contradicts the context that states it is meant for lesions without any BCC pattern.\n2. '95 percent of non-BCC lesions without PN': This statement reverses the presence of PN, which should be included to match the context correctly.\n3. '99 percent of BCC lesions without any BCC pattern': This changes 'with some BCC pattern' to 'without any BCC pattern', thus making it incorrect by stating the opposite condition."
135
  }
136
  },
137
  {
 
175
  "[paragraph id = 17] In addition, the intersection area is larger in these cases."
176
  ],
177
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T4\">\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>Statistics derived from estimation of conditional probability density functions of GradCAM within and outside the region of clinical interest.</figcaption>\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T4.1\" style=\"width:260.2pt;height:42.4pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-52.2pt,8.5pt) scale(0.713620974996278,0.713620974996278) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1\">\n<tr class=\"ltx_tr\" id=\"S3.T4.1.1.1\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.1\">Prediction</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.2\">Intersection</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.3\">\n<span class=\"ltx_text\" id=\"S3.T4.1.1.1.3.1\"></span> <span class=\"ltx_text\" id=\"S3.T4.1.1.1.3.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1.1.3.2.1\">\n<span class=\"ltx_tr\" id=\"S3.T4.1.1.1.3.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T4.1.1.1.3.2.1.1.1\">Mean Fg</span></span>\n</span></span><span class=\"ltx_text\" id=\"S3.T4.1.1.1.3.3\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.4\">\n<span class=\"ltx_text\" id=\"S3.T4.1.1.1.4.1\"></span> <span class=\"ltx_text\" id=\"S3.T4.1.1.1.4.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1.1.4.2.1\">\n<span class=\"ltx_tr\" id=\"S3.T4.1.1.1.4.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T4.1.1.1.4.2.1.1.1\">Mean Bg</span></span>\n</span></span><span class=\"ltx_text\" id=\"S3.T4.1.1.1.4.3\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.5\">\n<span class=\"ltx_text\" id=\"S3.T4.1.1.1.5.1\"></span> <span class=\"ltx_text\" id=\"S3.T4.1.1.1.5.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1.1.5.2.1\">\n<span class=\"ltx_tr\" id=\"S3.T4.1.1.1.5.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T4.1.1.1.5.2.1.1.1\">Std Fg</span></span>\n</span></span><span class=\"ltx_text\" id=\"S3.T4.1.1.1.5.3\"></span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S3.T4.1.1.1.6\">\n<span class=\"ltx_text\" id=\"S3.T4.1.1.1.6.1\"></span> <span class=\"ltx_text\" id=\"S3.T4.1.1.1.6.2\">\n<span class=\"ltx_tabular ltx_align_middle\" id=\"S3.T4.1.1.1.6.2.1\">\n<span class=\"ltx_tr\" id=\"S3.T4.1.1.1.6.2.1.1\">\n<span class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T4.1.1.1.6.2.1.1.1\">Std Bg</span></span>\n</span></span><span class=\"ltx_text\" id=\"S3.T4.1.1.1.6.3\"></span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T4.1.1.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.1\">Correct</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.2\">0.24</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.3\">0.57</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.4\">0.16</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.5\">0.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T4.1.1.2.6\">0.22</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T4.1.1.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.1\">Incorrect</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.2\">0.32</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.3\">0.33</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.4\">0.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.5\">0.01</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T4.1.1.3.6\">0.21</td>\n</tr>\n</table>\n</span></div>\n</figure>",
178
+ "perturb_sentence_id": [
179
+ 14,
180
+ 15
181
+ ],
182
  "output": {
183
+ "perturbed_statement": "[paragraph id = 14] Table 4 summarizes the information extracted from these probability density function. Specifically, only the standard deviation of for and respectively is shown.",
184
+ "perturbed_explanation": "Original Explanation: 1. The statement originally mentions that the table summarizes mean, standard deviation, and intersection area. 2. This indicates that Table 4 contains multiple statistical measures, providing a comprehensive summary of the data. Perturbed Explanation: 3. The altered statement incorrectly claims that Table 4 only shows the standard deviation, omitting the mean and intersection area. This is inaccurate since the context mentions that both mean and intersection area are part of the summarized information."
185
  }
186
  }
187
  ]
table_result/2407.00108v1_output.json CHANGED
@@ -50,10 +50,13 @@
50
  "[paragraph id = 14] As a result, some errors were split into more granular categories, some were renamed and some generalised."
51
  ],
52
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T1.1\" style=\"width:455.2pt;height:275.9pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-62.2pt,37.7pt) scale(0.785454796276505,0.785454796276505) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S3.T1.1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.1.1\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S3.T1.1.1.1.1.1\">Type</th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.1.1.1.1.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.1.1.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.1.1.2.1.1\" style=\"width:412.6pt;\">Description</span>\n</span>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.2.1\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S3.T1.1.1.2.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.2.1.1.1\">Translation quality</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_top ltx_border_t\" id=\"S3.T1.1.1.2.1.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.3.2\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.3.2.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.3.2.1.1\">Catastrophic translation</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.3.2.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.3.2.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.3.2.2.1.1\" style=\"width:412.6pt;\">Impossible to post-edit, must be translated from scratch.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.4.3\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.4.3.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.4.3.1.1\">Mistranslation</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.4.3.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.4.3.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.4.3.2.1.1\" style=\"width:412.6pt;\">Incorrect. Does not preserve the meaning or function of the source.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.5.4\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.5.4.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.5.4.1.1\">Omission</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.5.4.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.5.4.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.5.4.2.1.1\" style=\"width:412.6pt;\">Part of the source text was left untranslated.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.6.5\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.6.5.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.6.5.1.1\">Deviation in sentiment</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.6.5.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.6.5.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.6.5.2.1.1\" style=\"width:412.6pt;\">Does not preserve the sentiment of the source (e.g. does not match the expressed excitement), or negates the sentiment (e.g. from positive to negative).</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.7.6\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.7.6.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.7.6.1.1\">Locale convention</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.7.6.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.7.6.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.7.6.2.1.1\" style=\"width:412.6pt;\">Violates locale convention, e.g. currency and date format.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.8.7\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.8.7.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.8.7.1.1\">Fluency</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.8.7.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.8.7.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.8.7.2.1.1\" style=\"width:412.6pt;\">Contains punctuation, spelling and grammar errors.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.9.8\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S3.T1.1.1.9.8.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.9.8.1.1\">Context</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_top ltx_border_t\" id=\"S3.T1.1.1.9.8.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.10.9\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.10.9.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.10.9.1.1\">Incorrect gender</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.10.9.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.10.9.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.10.9.2.1.1\" style=\"width:412.6pt;\">Misgenders the speaker or the addressed person(s).</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.11.10\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.11.10.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.11.10.1.1\">Incorrect plurality</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.11.10.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.11.10.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.11.10.2.1.1\" style=\"width:412.6pt;\">Incorrectly refers to a single person when a group is addressed, or vice versa.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.12.11\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.12.11.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.12.11.1.1\">Wrong formality</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.12.11.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.12.11.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.12.11.2.1.1\" style=\"width:412.6pt;\">Expressed in informal style or uses informal addressing when should use formal, or vice versa.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.13.12\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.13.12.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.13.12.1.1\">Other inconsistency with video</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.13.12.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.13.12.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.13.12.2.1.1\" style=\"width:412.6pt;\">Contains inconsistencies with the video material not falling within any of the above.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.14.13\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S3.T1.1.1.14.13.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.14.13.1.1\">Style</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_top ltx_border_t\" id=\"S3.T1.1.1.14.13.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.15.14\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.15.14.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.15.14.1.1\">Subtitle formatting violation</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.15.14.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.15.14.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.15.14.2.1.1\" style=\"width:412.6pt;\">Violation of the subtitle blocking guidelines.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.16.15\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.16.15.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.16.15.1.1\">Other style sheet non-compliance</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.16.15.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.16.15.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.16.15.2.1.1\" style=\"width:412.6pt;\">Does not conform to the provided style sheet.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.17.16\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.17.16.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.17.16.1.1\">Awkward style</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.17.16.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.17.16.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.17.16.2.1.1\" style=\"width:412.6pt;\">The style of the translation does not reflect the style of the source sentence and/or the context.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.18.17\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.18.17.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.18.17.1.1\">Subjective style changes</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.18.17.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.18.17.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.18.17.2.1.1\" style=\"width:412.6pt;\">The translation is acceptable but the editor suggests improvements in style.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.19.18\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb ltx_border_t\" id=\"S3.T1.1.1.19.18.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.19.18.1.1\">Other</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top ltx_border_bb ltx_border_t\" id=\"S3.T1.1.1.19.18.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.19.18.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.19.18.2.1.1\" style=\"width:412.6pt;\">Error of type not found above (use text box provided).</span>\n</span>\n</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>List of errors provided to the human evaluators during the campaign.</figcaption>\n</figure>",
53
- "perturb_sentence_id": 12,
 
 
 
54
  "output": {
55
- "perturbed_statement": "[paragraph id = 12] For this project, we created a bespoke taxonomy of errors (Table 2) based on translation errors reported in recent work [Freitag et al., 2021, Sharou and Specia, 2020], the original list of issues currently present in the ZOOSubs system, and errors unrelated to previous work (§3 ). Error categories from the sources were compiled and curated to be generic and not specific to the study requirements.",
56
- "perturbed_explanation": "1. The original statement correctly listed the contributing sources for the taxonomy of errors as previous works and the ZOOSubs system, aligning with the provided information. It precisely specified the process of compiling and curating error categories to fit the specific study requirements, conforming to the context.\n2. The statement now incorrectly refers to \"Table 2\" instead of \"Table 1\", erroneously attributes Freitag et al. to 2021 instead of 2020, incorrectly claims inclusion of errors unrelated to previous work, and states that error categories were curated to be generic rather than specific, which contradicts the given explanations. Such inaccuracies misrepresent the scope and methods described for the taxonomy creation and its alignment with the study focus."
57
  }
58
  },
59
  {
@@ -107,10 +110,13 @@
107
  "[paragraph id = 22] When referring to individual PEs, we use the notation PE."
108
  ],
109
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T2\">\n<div class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T2.1\" style=\"width:433.6pt;height:132.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-18.9pt,5.8pt) scale(0.919930461661833,0.919930461661833) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T2.1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.1.1\">\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S3.T2.1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.1.1.1.1\">Series</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" colspan=\"2\" id=\"S3.T2.1.1.1.1.2\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.2.1\">A</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" colspan=\"2\" id=\"S3.T2.1.1.1.1.3\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.3.1\">B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T2.1.1.1.1.4\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.4.1\">C</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.2.2\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.2.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.2.2.1.1\">Ep. ID</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.2\">A1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.2.2.3\">A2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.4\">B1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.2.2.5\">B2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.6\">C1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.7\">C2</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.8\">C3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.3.3\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.3.3.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.3.3.1.1\">PE.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.2\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.2.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.3.3.3\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.3.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.4\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.4.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.3.3.5\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.5.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.6\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.6.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.7\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.7.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.8\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.8.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.4.4\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.4.4.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.4.4.1.1\">PE.2</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.2\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.2.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.4.4.3\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.3.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.4\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.4.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.4.4.5\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.5.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.6\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.6.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.7\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.7.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.4.4.8\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.8.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.5.5\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.5.5.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.5.5.1.1\">PE.3</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.2\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.2.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.5.5.3\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.3.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.4\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.4.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.5.5.5\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.5.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.6\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.6.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.7\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.7.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.5.5.8\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.8.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.6.6\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.6.6.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.6.6.1.1\">PE.4</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.2\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.2.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.6.6.3\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.3.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.4\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.4.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.6.6.5\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.5.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.6\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.6.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.7\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.7.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.6.6.8\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.8.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.7.7\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.7.7.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.7.7.1.1\">HT.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.7.7.2\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text\" id=\"S3.T2.1.1.7.7.2.1\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T2.1.1.7.7.2.1.1\">From Scratch</span></span></td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.7.7.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.4\"></td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.7.7.5\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.6\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.7\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_border_t\" id=\"S3.T2.1.1.7.7.8\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.8.8\">\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S3.T2.1.1.8.8.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.8.8.1.1\">HT.2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T2.1.1.8.8.2\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text\" id=\"S3.T2.1.1.8.8.2.1\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T2.1.1.8.8.2.1.1\">From Scratch</span></span></td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S3.T2.1.1.8.8.3\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.4\"></td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S3.T2.1.1.8.8.5\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.6\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.7\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_border_bb\" id=\"S3.T2.1.1.8.8.8\"></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Work assignment to PEs and HTs in the human evaluation campaign used for both language pairs.</figcaption>\n</figure>",
110
- "perturb_sentence_id": 21,
 
 
 
111
  "output": {
112
- "perturbed_statement": "[paragraph id = 21] sees two different versions of the same text for evaluation) (Table 3 ).When referring to individual PEs, we use the notation PE.",
113
- "perturbed_explanation": "Original Explanation: The statement accurately represents that in the experimental design, individual PEs do not evaluate multiple versions of the same text, and the table referenced is Table 2. Explanation of Incorrectness: 1. The altered statement incorrectly claims that the PEs evaluate two different versions of the same text, introducing a factual error about the uniqueness of evaluation text versions. 2. Additionally, the table referenced is incorrectly numbered as Table 3 instead of Table 2, which does not align with the referenced material's notation."
114
  }
115
  },
116
  {
@@ -181,10 +187,13 @@
181
  "[paragraph id = 6] In this example, if PE.1 generally marked fewer errors than others, Base-NMT would be disproportionately rewarded."
182
  ],
183
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T2\">\n<div class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T2.1\" style=\"width:433.6pt;height:132.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-18.9pt,5.8pt) scale(0.919930461661833,0.919930461661833) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T2.1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.1.1\">\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S3.T2.1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.1.1.1.1\">Series</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" colspan=\"2\" id=\"S3.T2.1.1.1.1.2\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.2.1\">A</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" colspan=\"2\" id=\"S3.T2.1.1.1.1.3\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.3.1\">B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T2.1.1.1.1.4\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.4.1\">C</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.2.2\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.2.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.2.2.1.1\">Ep. ID</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.2\">A1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.2.2.3\">A2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.4\">B1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.2.2.5\">B2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.6\">C1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.7\">C2</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.8\">C3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.3.3\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.3.3.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.3.3.1.1\">PE.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.2\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.2.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.3.3.3\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.3.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.4\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.4.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.3.3.5\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.5.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.6\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.6.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.7\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.7.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.8\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.8.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.4.4\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.4.4.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.4.4.1.1\">PE.2</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.2\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.2.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.4.4.3\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.3.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.4\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.4.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.4.4.5\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.5.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.6\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.6.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.7\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.7.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.4.4.8\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.8.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.5.5\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.5.5.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.5.5.1.1\">PE.3</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.2\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.2.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.5.5.3\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.3.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.4\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.4.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.5.5.5\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.5.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.6\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.6.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.7\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.7.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.5.5.8\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.8.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.6.6\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.6.6.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.6.6.1.1\">PE.4</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.2\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.2.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.6.6.3\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.3.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.4\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.4.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.6.6.5\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.5.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.6\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.6.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.7\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.7.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.6.6.8\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.8.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.7.7\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.7.7.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.7.7.1.1\">HT.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.7.7.2\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text\" id=\"S3.T2.1.1.7.7.2.1\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T2.1.1.7.7.2.1.1\">From Scratch</span></span></td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.7.7.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.4\"></td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.7.7.5\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.6\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.7\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_border_t\" id=\"S3.T2.1.1.7.7.8\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.8.8\">\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S3.T2.1.1.8.8.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.8.8.1.1\">HT.2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T2.1.1.8.8.2\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text\" id=\"S3.T2.1.1.8.8.2.1\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T2.1.1.8.8.2.1.1\">From Scratch</span></span></td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S3.T2.1.1.8.8.3\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.4\"></td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S3.T2.1.1.8.8.5\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.6\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.7\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_border_bb\" id=\"S3.T2.1.1.8.8.8\"></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Work assignment to PEs and HTs in the human evaluation campaign used for both language pairs.</figcaption>\n</figure>",
184
- "perturb_sentence_id": 5,
 
 
 
185
  "output": {
186
- "perturbed_statement": "[paragraph id = 5] For example, in Table 2 , PE.1 is assigned two episodes for Ref and Base-NMT, but only one for MTCue and Google.",
187
- "perturbed_explanation": "Original Explanation: The discussed context highlights the imbalance in the distribution of assigned episodes, particularly the preference observed for certain systems. 1. By stating that PE.1 is assigned two episodes for 'Ref and Base-NMT' instead of 'Ref, MTCue and Google,' while 'MTCue and Google' receive just one, the altered statement introduces a misrepresentation of the observation distribution. This does not align with the information summarized in the table, making the statement inaccurate."
188
  }
189
  },
190
  {
@@ -236,10 +245,13 @@
236
  "[paragraph id = 25] They were informed that the undertaken work was carried out for a research project, but nevertheless, they were paid for their effort at competitive PE and HT rates, standard within the company for this type of work."
237
  ],
238
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T3\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T3.1\" style=\"width:433.6pt;height:127.3pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(2.2pt,-0.7pt) scale(1.01037484358733,1.01037484358733) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S3.T3.1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r ltx_border_tt\" id=\"S3.T3.1.1.1.1.1\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"4\" id=\"S3.T3.1.1.1.1.2\">English-to-French</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"4\" id=\"S3.T3.1.1.1.1.3\">English-to-German</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.2.2\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S3.T3.1.1.2.2.1\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.2.1\">PE.F1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.3.1\">PE.F2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.4.1\">PE.F3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T3.1.1.2.2.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.5.1\">PE.F4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.6.1\">PE.G1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.7\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.7.1\">PE.G2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.8.1\">PE.G3</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.9.1\">PE.G4</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.3.3\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S3.T3.1.1.3.3.1\">Translation YOE</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.2\">15</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.3\">8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.4\">3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T3.1.1.3.3.5\">20</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.6\">7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.7\">18</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.8\">8</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.9\">17</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.4.4\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_r\" id=\"S3.T3.1.1.4.4.1\">YOE in subtitles</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.2\">8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.3\">6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.4\">1.5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T3.1.1.4.4.5\">20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.6\">7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.7\">5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.8\">8</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.1.1.4.4.9\">7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.5.5\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_r\" id=\"S3.T3.1.1.5.5.1\">YOE in post-editing</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.2\">8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.3\">6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.4\">3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T3.1.1.5.5.5\">10</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.6\">5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.7\">5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.8\">1</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.1.1.5.5.9\">3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.6.6\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_r\" id=\"S3.T3.1.1.6.6.1\">Post-editing training?</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.2\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.2.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.3\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.3.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.4\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.4.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T3.1.1.6.6.5\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.5.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.6\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.6.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.7\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.7.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.8\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.8.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.1.1.6.6.9\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.9.1\" style=\"color:#B80000;\">✗</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.7.7\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb ltx_border_r\" id=\"S3.T3.1.1.7.7.1\">Prefer post-editing?</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.2\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.2.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.3\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.3.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.4\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.4.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S3.T3.1.1.7.7.5\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.5.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.6\">\n<span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.6.1\" style=\"color:#009900;\">✓</span>/<span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.6.2\" style=\"color:#B80000;\">✗</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.7\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.7.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.8\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.8.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.9\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.9.1\" style=\"color:#B80000;\">✗</span></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Details regarding employed PEs.</figcaption>\n</figure>",
239
- "perturb_sentence_id": 25,
 
 
 
240
  "output": {
241
- "perturbed_statement": "[paragraph id = 25] They were informed that the undertaken work was carried out for a community service project, but nevertheless, they were paid for their effort at competitive PE and HT rates, standard within the company for this type of work. Information about the PEs and HTs years of experience (YOE) was collected to shed more light on the findings (Table 3).",
242
- "perturbed_explanation": "1. Original Explanation: The statement correctly describes the context in which the project work was conducted and the rationale for collecting the professionals' years of experience data.\n2. The statement is incorrect because it misrepresents the purpose of the project. Specifically, it was not a community service project but rather a research project. This inaccurately reflects the original intent and nature of the work undertaken."
243
  }
244
  },
245
  {
@@ -326,10 +338,13 @@
326
  "[paragraph id = 10] We report the total error counts as well as the normalisation multipliers in Table 4 ."
327
  ],
328
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T4\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T4.18\" style=\"width:433.6pt;height:171.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(80.4pt,-31.8pt) scale(1.58972460985968,1.58972460985968) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T4.18.18\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T4.18.18.19.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" colspan=\"3\" id=\"S5.T4.18.18.19.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.18.18.19.1.1.1\">English-to-German</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_l ltx_border_tt\" colspan=\"3\" id=\"S5.T4.18.18.19.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.18.18.19.1.2.1\">English-to-French</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T4.2.2.2\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S5.T4.2.2.2.3\"><span class=\"ltx_text ltx_font_italic\" id=\"S5.T4.2.2.2.3.1\">PE ID</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.2.2.4\"><span class=\"ltx_text ltx_font_italic\" id=\"S5.T4.2.2.2.4.1\">Error count</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T4.1.1.1.1\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S5.T4.2.2.2.5\"><span class=\"ltx_text ltx_font_italic\" id=\"S5.T4.2.2.2.5.1\">PE ID</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.2.2.6\"><span class=\"ltx_text ltx_font_italic\" id=\"S5.T4.2.2.2.6.1\">Error count</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S5.T4.2.2.2.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.6.6.6\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.6.6.6.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.6.6.6.5.1\">PE.G1</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.3.3.3.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S5.T4.4.4.4.2\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.6.6.6.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.6.6.6.6.1\">PE.F1</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.5.5.5.3\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T4.6.6.6.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.10.10.10\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.10.10.10.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.10.10.10.5.1\">PE.G2</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.7.7.7.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S5.T4.8.8.8.2\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.10.10.10.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.10.10.10.6.1\">PE.F2</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.9.9.9.3\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T4.10.10.10.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.14.14.14\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.14.14.14.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.14.14.14.5.1\">PE.G3</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.11.11.11.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S5.T4.12.12.12.2\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.14.14.14.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.14.14.14.6.1\">PE.F3</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.13.13.13.3\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T4.14.14.14.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.18.18.18\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T4.18.18.18.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.18.18.18.5.1\">PE.G4</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.15.15.15.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T4.16.16.16.2\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T4.18.18.18.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.18.18.18.6.1\">PE.F4</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.17.17.17.3\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_bb\" id=\"S5.T4.18.18.18.4\"></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>Error counts and values of for each PE.</figcaption>\n</figure>",
329
- "perturb_sentence_id": 1,
 
 
 
330
  "output": {
331
- "perturbed_statement": "[paragraph id = 1] PE.F1 marked 120 errors total while PE.F4 marked 200).This suggested that the error counts across systems were highly consistent as every PE post-edited an identical number of segments for each system (cf.",
332
- "perturbed_explanation": "The original explanation is: 1. Direct comparison of the error counts across systems was unreliable due to each PE post-editing a different number of segments for each system. The statement introduces an inconsistency because: 2. It asserts that every PE post-edited an identical number of segments for each system, which contradicts the provided details about differences in the post-edited segment counts across systems for PEs, as normalization was required to address this discrepancy."
333
  }
334
  }
335
  ]
 
50
  "[paragraph id = 14] As a result, some errors were split into more granular categories, some were renamed and some generalised."
51
  ],
52
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T1\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T1.1\" style=\"width:455.2pt;height:275.9pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-62.2pt,37.7pt) scale(0.785454796276505,0.785454796276505) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S3.T1.1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.1.1\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S3.T1.1.1.1.1.1\">Type</th>\n<th class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top ltx_th ltx_th_column ltx_border_tt\" id=\"S3.T1.1.1.1.1.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.1.1.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.1.1.2.1.1\" style=\"width:412.6pt;\">Description</span>\n</span>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.2.1\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S3.T1.1.1.2.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.2.1.1.1\">Translation quality</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_top ltx_border_t\" id=\"S3.T1.1.1.2.1.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.3.2\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.3.2.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.3.2.1.1\">Catastrophic translation</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.3.2.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.3.2.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.3.2.2.1.1\" style=\"width:412.6pt;\">Impossible to post-edit, must be translated from scratch.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.4.3\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.4.3.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.4.3.1.1\">Mistranslation</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.4.3.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.4.3.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.4.3.2.1.1\" style=\"width:412.6pt;\">Incorrect. Does not preserve the meaning or function of the source.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.5.4\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.5.4.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.5.4.1.1\">Omission</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.5.4.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.5.4.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.5.4.2.1.1\" style=\"width:412.6pt;\">Part of the source text was left untranslated.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.6.5\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.6.5.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.6.5.1.1\">Deviation in sentiment</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.6.5.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.6.5.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.6.5.2.1.1\" style=\"width:412.6pt;\">Does not preserve the sentiment of the source (e.g. does not match the expressed excitement), or negates the sentiment (e.g. from positive to negative).</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.7.6\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.7.6.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.7.6.1.1\">Locale convention</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.7.6.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.7.6.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.7.6.2.1.1\" style=\"width:412.6pt;\">Violates locale convention, e.g. currency and date format.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.8.7\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.8.7.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.8.7.1.1\">Fluency</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.8.7.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.8.7.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.8.7.2.1.1\" style=\"width:412.6pt;\">Contains punctuation, spelling and grammar errors.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.9.8\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S3.T1.1.1.9.8.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.9.8.1.1\">Context</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_top ltx_border_t\" id=\"S3.T1.1.1.9.8.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.10.9\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.10.9.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.10.9.1.1\">Incorrect gender</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.10.9.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.10.9.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.10.9.2.1.1\" style=\"width:412.6pt;\">Misgenders the speaker or the addressed person(s).</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.11.10\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.11.10.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.11.10.1.1\">Incorrect plurality</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.11.10.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.11.10.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.11.10.2.1.1\" style=\"width:412.6pt;\">Incorrectly refers to a single person when a group is addressed, or vice versa.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.12.11\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.12.11.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.12.11.1.1\">Wrong formality</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.12.11.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.12.11.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.12.11.2.1.1\" style=\"width:412.6pt;\">Expressed in informal style or uses informal addressing when should use formal, or vice versa.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.13.12\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.13.12.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.13.12.1.1\">Other inconsistency with video</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.13.12.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.13.12.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.13.12.2.1.1\" style=\"width:412.6pt;\">Contains inconsistencies with the video material not falling within any of the above.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.14.13\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S3.T1.1.1.14.13.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.14.13.1.1\">Style</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_top ltx_border_t\" id=\"S3.T1.1.1.14.13.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.15.14\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.15.14.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.15.14.1.1\">Subtitle formatting violation</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.15.14.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.15.14.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.15.14.2.1.1\" style=\"width:412.6pt;\">Violation of the subtitle blocking guidelines.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.16.15\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.16.15.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.16.15.1.1\">Other style sheet non-compliance</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.16.15.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.16.15.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.16.15.2.1.1\" style=\"width:412.6pt;\">Does not conform to the provided style sheet.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.17.16\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.17.16.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.17.16.1.1\">Awkward style</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.17.16.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.17.16.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.17.16.2.1.1\" style=\"width:412.6pt;\">The style of the translation does not reflect the style of the source sentence and/or the context.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.18.17\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S3.T1.1.1.18.17.1\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T1.1.1.18.17.1.1\">Subjective style changes</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top\" id=\"S3.T1.1.1.18.17.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.18.17.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.18.17.2.1.1\" style=\"width:412.6pt;\">The translation is acceptable but the editor suggests improvements in style.</span>\n</span>\n</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T1.1.1.19.18\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb ltx_border_t\" id=\"S3.T1.1.1.19.18.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T1.1.1.19.18.1.1\">Other</span></th>\n<td class=\"ltx_td ltx_nopad_r ltx_align_justify ltx_align_top ltx_border_bb ltx_border_t\" id=\"S3.T1.1.1.19.18.2\">\n<span class=\"ltx_inline-block ltx_align_top\" id=\"S3.T1.1.1.19.18.2.1\">\n<span class=\"ltx_p\" id=\"S3.T1.1.1.19.18.2.1.1\" style=\"width:412.6pt;\">Error of type not found above (use text box provided).</span>\n</span>\n</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>List of errors provided to the human evaluators during the campaign.</figcaption>\n</figure>",
53
+ "perturb_sentence_id": [
54
+ 12,
55
+ 13
56
+ ],
57
  "output": {
58
+ "perturbed_statement": "[paragraph id = 12] For this project, we created a bespoke taxonomy of errors (Table 1) based solely on translation errors reported in the original list of issues already present in the ZOOSubs system and relevant errors from previous work (§2). Error categories from the aforementioned sources were compiled together and curated to fit the study requirements. We uploaded a draft taxonomy to ZOOSubs, and the first author performed a test evaluation against a stream with segments to validate the list.",
59
+ "perturbed_explanation": "The original explanation highlights the sources used to create the bespoke taxonomy of errors, which includes translation errors reported in previous work by Freitag et al., 2021, Sharou and Specia, 2022, the original list of issues from the ZOOSubs system, and relevant errors from previous work. 1. The statement is incorrect because it claims that the taxonomy was based solely on the original list of issues present in the ZOOSubs system and relevant errors from previous work, but it omits that the taxonomy was also based on translation errors reported in previous work by Freitag et al., 2021, and Sharou and Specia, 2022."
60
  }
61
  },
62
  {
 
110
  "[paragraph id = 22] When referring to individual PEs, we use the notation PE."
111
  ],
112
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T2\">\n<div class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T2.1\" style=\"width:433.6pt;height:132.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-18.9pt,5.8pt) scale(0.919930461661833,0.919930461661833) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T2.1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.1.1\">\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S3.T2.1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.1.1.1.1\">Series</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" colspan=\"2\" id=\"S3.T2.1.1.1.1.2\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.2.1\">A</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" colspan=\"2\" id=\"S3.T2.1.1.1.1.3\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.3.1\">B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T2.1.1.1.1.4\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.4.1\">C</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.2.2\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.2.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.2.2.1.1\">Ep. ID</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.2\">A1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.2.2.3\">A2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.4\">B1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.2.2.5\">B2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.6\">C1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.7\">C2</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.8\">C3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.3.3\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.3.3.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.3.3.1.1\">PE.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.2\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.2.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.3.3.3\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.3.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.4\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.4.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.3.3.5\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.5.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.6\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.6.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.7\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.7.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.8\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.8.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.4.4\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.4.4.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.4.4.1.1\">PE.2</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.2\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.2.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.4.4.3\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.3.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.4\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.4.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.4.4.5\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.5.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.6\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.6.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.7\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.7.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.4.4.8\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.8.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.5.5\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.5.5.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.5.5.1.1\">PE.3</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.2\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.2.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.5.5.3\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.3.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.4\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.4.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.5.5.5\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.5.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.6\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.6.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.7\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.7.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.5.5.8\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.8.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.6.6\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.6.6.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.6.6.1.1\">PE.4</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.2\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.2.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.6.6.3\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.3.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.4\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.4.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.6.6.5\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.5.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.6\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.6.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.7\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.7.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.6.6.8\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.8.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.7.7\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.7.7.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.7.7.1.1\">HT.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.7.7.2\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text\" id=\"S3.T2.1.1.7.7.2.1\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T2.1.1.7.7.2.1.1\">From Scratch</span></span></td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.7.7.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.4\"></td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.7.7.5\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.6\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.7\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_border_t\" id=\"S3.T2.1.1.7.7.8\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.8.8\">\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S3.T2.1.1.8.8.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.8.8.1.1\">HT.2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T2.1.1.8.8.2\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text\" id=\"S3.T2.1.1.8.8.2.1\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T2.1.1.8.8.2.1.1\">From Scratch</span></span></td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S3.T2.1.1.8.8.3\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.4\"></td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S3.T2.1.1.8.8.5\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.6\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.7\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_border_bb\" id=\"S3.T2.1.1.8.8.8\"></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Work assignment to PEs and HTs in the human evaluation campaign used for both language pairs.</figcaption>\n</figure>",
113
+ "perturb_sentence_id": [
114
+ 21,
115
+ 22
116
+ ],
117
  "output": {
118
+ "perturbed_statement": "[paragraph id = 21] sees two different versions of the same text) (Table 2). When referring to individual PEs, we use the notation PE.",
119
+ "perturbed_explanation": "1. The original statement explains that the paragraph does not see two different versions of the same text. 2. The statement incorrectly claims that the paragraph does see two different versions of the same text, which contradicts the context provided in paragraph id 20, where it was ensured that the same PE evaluated the output for each episode exactly once, implying there should be no exposure to multiple versions of the same text for evaluation."
120
  }
121
  },
122
  {
 
187
  "[paragraph id = 6] In this example, if PE.1 generally marked fewer errors than others, Base-NMT would be disproportionately rewarded."
188
  ],
189
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T2\">\n<div class=\"ltx_inline-block ltx_transformed_outer\" id=\"S3.T2.1\" style=\"width:433.6pt;height:132.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-18.9pt,5.8pt) scale(0.919930461661833,0.919930461661833) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S3.T2.1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.1.1\">\n<td class=\"ltx_td ltx_align_right ltx_border_tt\" id=\"S3.T2.1.1.1.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.1.1.1.1\">Series</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" colspan=\"2\" id=\"S3.T2.1.1.1.1.2\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.2.1\">A</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_tt\" colspan=\"2\" id=\"S3.T2.1.1.1.1.3\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.3.1\">B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"3\" id=\"S3.T2.1.1.1.1.4\"><span class=\"ltx_text ltx_font_bold ltx_font_smallcaps\" id=\"S3.T2.1.1.1.1.4.1\">C</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.2.2\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.2.2.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.2.2.1.1\">Ep. ID</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.2\">A1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.2.2.3\">A2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.4\">B1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.2.2.5\">B2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.6\">C1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.7\">C2</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.2.2.8\">C3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.3.3\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.3.3.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.3.3.1.1\">PE.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.2\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.2.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.3.3.3\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.3.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.4\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.4.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.3.3.5\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.5.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.6\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.6.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.7\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.7.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.3.3.8\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.3.3.8.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.4.4\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.4.4.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.4.4.1.1\">PE.2</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.2\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.2.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.4.4.3\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.3.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.4\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.4.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.4.4.5\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.5.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.6\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.6.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.4.4.7\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.7.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.4.4.8\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.4.4.8.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.5.5\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.5.5.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.5.5.1.1\">PE.3</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.2\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.2.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.5.5.3\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.3.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.4\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.4.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.5.5.5\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.5.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.6\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.6.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.5.5.7\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.7.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.5.5.8\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.5.5.8.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.6.6\">\n<td class=\"ltx_td ltx_align_right\" id=\"S3.T2.1.1.6.6.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.6.6.1.1\">PE.4</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.2\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.2.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.6.6.3\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.3.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.4\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.4.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T2.1.1.6.6.5\" style=\"background-color:#FFD9C9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.5.1\" style=\"background-color:#FFD9C9;\">Ref</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.6\" style=\"background-color:#D5B3F9;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.6.1\" style=\"background-color:#D5B3F9;\">MTCue</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T2.1.1.6.6.7\" style=\"background-color:#D0FFF6;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.7.1\" style=\"background-color:#D0FFF6;\">Google</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T2.1.1.6.6.8\" style=\"background-color:#95D675;\"><span class=\"ltx_text ltx_font_smallcaps\" id=\"S3.T2.1.1.6.6.8.1\" style=\"background-color:#95D675;\">Base-NMT</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.7.7\">\n<td class=\"ltx_td ltx_align_right ltx_border_t\" id=\"S3.T2.1.1.7.7.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.7.7.1.1\">HT.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T2.1.1.7.7.2\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text\" id=\"S3.T2.1.1.7.7.2.1\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T2.1.1.7.7.2.1.1\">From Scratch</span></span></td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.7.7.3\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.4\"></td>\n<td class=\"ltx_td ltx_border_r ltx_border_t\" id=\"S3.T2.1.1.7.7.5\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.6\"></td>\n<td class=\"ltx_td ltx_border_t\" id=\"S3.T2.1.1.7.7.7\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_border_t\" id=\"S3.T2.1.1.7.7.8\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T2.1.1.8.8\">\n<td class=\"ltx_td ltx_align_right ltx_border_bb\" id=\"S3.T2.1.1.8.8.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T2.1.1.8.8.1.1\">HT.2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T2.1.1.8.8.2\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text\" id=\"S3.T2.1.1.8.8.2.1\" style=\"background-color:#FFFFFF;\"><span class=\"ltx_text ltx_font_italic\" id=\"S3.T2.1.1.8.8.2.1.1\">From Scratch</span></span></td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S3.T2.1.1.8.8.3\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.4\"></td>\n<td class=\"ltx_td ltx_border_bb ltx_border_r\" id=\"S3.T2.1.1.8.8.5\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.6\"></td>\n<td class=\"ltx_td ltx_border_bb\" id=\"S3.T2.1.1.8.8.7\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_border_bb\" id=\"S3.T2.1.1.8.8.8\"></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Work assignment to PEs and HTs in the human evaluation campaign used for both language pairs.</figcaption>\n</figure>",
190
+ "perturb_sentence_id": [
191
+ 5,
192
+ 6
193
+ ],
194
  "output": {
195
+ "perturbed_statement": "[paragraph id = 5] For example, in Table 2 , PE.1 is assigned three episodes for Ref, MTCue and Google, but only one for Base-NMT. In this example, if PE.1 generally marked fewer errors than others, Base-NMT would be disproportionately rewarded.",
196
+ "perturbed_explanation": "Original Explanation: The statement highlights that PE.1 has fewer episodes assigned for Base-NMT than for Ref, MTCue, and Google. This implies that any error marking tendency of PE.1 could affect the perceived performance of Base-NMT, possibly rewarding it disproportionately. 2. The statement is incorrect because it mentions three episodes assigned to PE.1 for Ref, MTCue, and Google, whereas the original context specifies two episodes. This change misrepresents the actual distribution of episodes in Table 2, altering the comparative analysis originally presented in the statement."
197
  }
198
  },
199
  {
 
245
  "[paragraph id = 25] They were informed that the undertaken work was carried out for a research project, but nevertheless, they were paid for their effort at competitive PE and HT rates, standard within the company for this type of work."
246
  ],
247
  "table_html": "<figure class=\"ltx_table\" id=\"S3.T3\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S3.T3.1\" style=\"width:433.6pt;height:127.3pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(2.2pt,-0.7pt) scale(1.01037484358733,1.01037484358733) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S3.T3.1.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r ltx_border_tt\" id=\"S3.T3.1.1.1.1.1\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"4\" id=\"S3.T3.1.1.1.1.2\">English-to-French</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" colspan=\"4\" id=\"S3.T3.1.1.1.1.3\">English-to-German</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.2.2\">\n<th class=\"ltx_td ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S3.T3.1.1.2.2.1\"></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.2.1\">PE.F1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.3\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.3.1\">PE.F2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.4\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.4.1\">PE.F3</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T3.1.1.2.2.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.5.1\">PE.F4</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.6.1\">PE.G1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.7\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.7.1\">PE.G2</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.8\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.8.1\">PE.G3</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.2.2.9\"><span class=\"ltx_text ltx_font_bold\" id=\"S3.T3.1.1.2.2.9.1\">PE.G4</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.3.3\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S3.T3.1.1.3.3.1\">Translation YOE</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.2\">15</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.3\">8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.4\">3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S3.T3.1.1.3.3.5\">20</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.6\">7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.7\">18</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.8\">8</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S3.T3.1.1.3.3.9\">17</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.4.4\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_r\" id=\"S3.T3.1.1.4.4.1\">YOE in subtitles</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.2\">8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.3\">6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.4\">1.5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T3.1.1.4.4.5\">20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.6\">7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.7\">5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.4.4.8\">8</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.1.1.4.4.9\">7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.5.5\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_r\" id=\"S3.T3.1.1.5.5.1\">YOE in post-editing</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.2\">8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.3\">6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.4\">3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T3.1.1.5.5.5\">10</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.6\">5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.7\">5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.5.5.8\">1</td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.1.1.5.5.9\">3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.6.6\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_r\" id=\"S3.T3.1.1.6.6.1\">Post-editing training?</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.2\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.2.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.3\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.3.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.4\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.4.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S3.T3.1.1.6.6.5\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.5.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.6\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.6.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.7\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.7.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S3.T3.1.1.6.6.8\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.8.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S3.T3.1.1.6.6.9\"><span class=\"ltx_text\" id=\"S3.T3.1.1.6.6.9.1\" style=\"color:#B80000;\">✗</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S3.T3.1.1.7.7\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb ltx_border_r\" id=\"S3.T3.1.1.7.7.1\">Prefer post-editing?</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.2\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.2.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.3\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.3.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.4\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.4.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S3.T3.1.1.7.7.5\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.5.1\" style=\"color:#009900;\">✓</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.6\">\n<span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.6.1\" style=\"color:#009900;\">✓</span>/<span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.6.2\" style=\"color:#B80000;\">✗</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.7\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.7.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.8\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.8.1\" style=\"color:#B80000;\">✗</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_bb\" id=\"S3.T3.1.1.7.7.9\"><span class=\"ltx_text\" id=\"S3.T3.1.1.7.7.9.1\" style=\"color:#B80000;\">✗</span></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Details regarding employed PEs.</figcaption>\n</figure>",
248
+ "perturb_sentence_id": [
249
+ 25,
250
+ 26
251
+ ],
252
  "output": {
253
+ "perturbed_statement": "[paragraph id = 25] They were informed that the undertaken work was carried out for a research project, but nevertheless, they were paid below standard PE and HT rates, standard within the company for this type of work. Information about the PEs and HTs years of experience (YOE) was collected to shed more light on the findings (Table 3 ).",
254
+ "perturbed_explanation": "They were informed that the undertaken work was carried out for a research project, but nevertheless, they were paid for their effort at competitive PE and HT rates, standard within the company for this type of work. Information about the PEs and HTs years of experience (YOE) was collected to shed more light on the findings. 1. The work was completed for a research project, aligning with the context of research being the nature of the work. 2. Payment was provided at competitive rates, noting that the remuneration was aligned with company standards for the type of work involved. The statement is incorrect because it falsely claims they were paid below standard rates, while the original context specifies that they were compensated at competitive rates. Thus, the discrepancy lies in the level of payment being misrepresented."
255
  }
256
  },
257
  {
 
338
  "[paragraph id = 10] We report the total error counts as well as the normalisation multipliers in Table 4 ."
339
  ],
340
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T4\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T4.18\" style=\"width:433.6pt;height:171.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(80.4pt,-31.8pt) scale(1.58972460985968,1.58972460985968) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T4.18.18\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T4.18.18.19.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" colspan=\"3\" id=\"S5.T4.18.18.19.1.1\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.18.18.19.1.1.1\">English-to-German</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_l ltx_border_tt\" colspan=\"3\" id=\"S5.T4.18.18.19.1.2\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.18.18.19.1.2.1\">English-to-French</span></th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T4.2.2.2\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S5.T4.2.2.2.3\"><span class=\"ltx_text ltx_font_italic\" id=\"S5.T4.2.2.2.3.1\">PE ID</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.2.2.4\"><span class=\"ltx_text ltx_font_italic\" id=\"S5.T4.2.2.2.4.1\">Error count</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T4.1.1.1.1\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_t\" id=\"S5.T4.2.2.2.5\"><span class=\"ltx_text ltx_font_italic\" id=\"S5.T4.2.2.2.5.1\">PE ID</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.2.2.6\"><span class=\"ltx_text ltx_font_italic\" id=\"S5.T4.2.2.2.6.1\">Error count</span></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_t\" id=\"S5.T4.2.2.2.2\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.6.6.6\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.6.6.6.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.6.6.6.5.1\">PE.G1</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.3.3.3.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S5.T4.4.4.4.2\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.6.6.6.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.6.6.6.6.1\">PE.F1</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.5.5.5.3\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T4.6.6.6.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.10.10.10\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.10.10.10.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.10.10.10.5.1\">PE.G2</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.7.7.7.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S5.T4.8.8.8.2\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.10.10.10.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.10.10.10.6.1\">PE.F2</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.9.9.9.3\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T4.10.10.10.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.14.14.14\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.14.14.14.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.14.14.14.5.1\">PE.G3</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.11.11.11.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_r\" id=\"S5.T4.12.12.12.2\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row\" id=\"S5.T4.14.14.14.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.14.14.14.6.1\">PE.F3</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.13.13.13.3\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S5.T4.14.14.14.4\"></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.18.18.18\">\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T4.18.18.18.5\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.18.18.18.5.1\">PE.G4</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.15.15.15.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T4.16.16.16.2\"></td>\n<th class=\"ltx_td ltx_align_right ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T4.18.18.18.6\"><span class=\"ltx_text ltx_font_bold\" id=\"S5.T4.18.18.18.6.1\">PE.F4</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.17.17.17.3\"></td>\n<td class=\"ltx_td ltx_nopad_r ltx_align_center ltx_border_bb\" id=\"S5.T4.18.18.18.4\"></td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>Error counts and values of for each PE.</figcaption>\n</figure>",
341
+ "perturb_sentence_id": [
342
+ 1,
343
+ 2
344
+ ],
345
  "output": {
346
+ "perturbed_statement": "[paragraph id = 1] PE.F1 marked errors total while PE.F2 marked ).This made direct comparison of the error counts across systems unreliable as each PE also post-edited a different number of segments for each system (cf.",
347
+ "perturbed_explanation": "1. Each post-editor (PE) marked a significantly different total number of errors, leading to unreliable comparisons of error counts across systems, as each PE edited a different number of segments for each system. 2. The statement is incorrect because it falsely claims that PE.F2 marked errors, instead of PE.F4. This introduces a factual inconsistency with the context which states that PE.F4 was involved in marking, while PE.F2 is not mentioned."
348
  }
349
  }
350
  ]
table_result/2407.00111v1_output.json CHANGED
@@ -34,10 +34,13 @@
34
  "[paragraph id = 13] Additionally, both model instances produced 0% exact matches for the A and B ordinal affinity values, and 1%, 15%, and 9% exact matches for the ordinal affinity values C, D, and E, respectively."
35
  ],
36
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T2.1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1.1.1.1\" style=\"font-size:90%;\">Machine Learning</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1.2.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.1.1.1\" style=\"font-size:90%;\">Ligand</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.2.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.3.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.1.1.1\" style=\"font-size:90%;\">Protein</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.2.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.3.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.1.1.1\" style=\"font-size:90%;\">Dimension of</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.2.1.1\" style=\"font-size:90%;\">Ligand + Protein</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.3.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.5.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.5.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.5.1.1.1.1\" style=\"font-size:90%;\">% Accuracy</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.6\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.6.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.6.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.6.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.6.1.1.1.1\" style=\"font-size:90%;\">% Exact</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.6.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.6.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.6.1.2.1.1\" style=\"font-size:90%;\">Matches</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.2.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S4.T2.1.2.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.1.1\" style=\"font-size:90%;\">OvR(LinearSVM)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.2\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.2.1\" style=\"font-size:90%;\">ECFP</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.3\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.3.1\" style=\"font-size:90%;\">ESM2-3B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.4\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.4.1\" style=\"font-size:90%;\">4,608</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.5\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.5.1\" style=\"font-size:90%;\">7%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.6\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.6.1\" style=\"font-size:90%;\">7%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.3.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T2.1.3.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.1.1\" style=\"font-size:90%;\">OvR(LinearSVM)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.2\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.2.1\" style=\"font-size:90%;\">MACCS</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.3\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.3.1\" style=\"font-size:90%;\">ESM2-3B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.4\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.4.1\" style=\"font-size:90%;\">2,727</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.5\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.5.1\" style=\"font-size:90%;\">7%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.6\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.6.1\" style=\"font-size:90%;\">7%</span></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Performance of ML models in the conversion of 10,000 test instances of ligand embeddings and protein amino acid sequence embeddings into their corresponding predicted LPI ordinal affinity values from the LPI-1.5M data set. The ML model outputs were compared to their ground truth values for scoring.</figcaption>\n</figure>",
37
- "perturb_sentence_id": 11,
 
 
 
38
  "output": {
39
- "perturbed_statement": "[paragraph id = 11] A support vector machines (SVM) machine learning model was selected for this analysis given its strong performance on imbalanced data sets Chakrabarti & Fauber (2022 ), which are statistically rare in multinomial classification tasks such as ours (Figure 5).333https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC (accessed 11June2024) A one-versus-rest (OvR) instance of a nonlinear kernel SVM was employed, thus enabling our multinomial classification task.444https://scikit-learn.org/stable/modules/generated/sklearn.multiclass.OneVsRestClassifier.html (accessed 11June2024) Additional details for our data embedding and ML methods are described in the Appendix.The OvR instances of linear SVM models demonstrated 7% overall accuracy and 7% overall exact matches on our multinomial classification task for both ligand embedding techniques (Table 2).",
40
- "perturbed_explanation": "1. The original explanation mentions that a linear kernel was selected as an integral part of the SVM model used in the analysis, designed for addressing multinomial classification task, as linear kernels are computationally efficient and effective in certain scenarios of data separation. 2. The statement incorrectly claims that a nonlinear kernel was utilized, which introduces a contradiction to the detailed choice mentioned. This change misrepresents the methodological decisions made in the study and could lead to misunderstandings regarding the applied techniques. To clarify, linear SVMs differ significantly from their nonlinear counterparts in their handling of data distributions."
41
  }
42
  },
43
  {
@@ -71,10 +74,13 @@
71
  "[paragraph id = 7] With the relaxed \"near match\" criteria, we achieved an 77% overall accuracy and all ordinal affinity values achieved 19-94% near matches relative the the ground truth with our method (Figure 6)."
72
  ],
73
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T2.1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1.1.1.1\" style=\"font-size:90%;\">Machine Learning</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1.2.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.1.1.1\" style=\"font-size:90%;\">Ligand</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.2.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.3.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.1.1.1\" style=\"font-size:90%;\">Protein</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.2.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.3.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.1.1.1\" style=\"font-size:90%;\">Dimension of</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.2.1.1\" style=\"font-size:90%;\">Ligand + Protein</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.3.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.5.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.5.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.5.1.1.1.1\" style=\"font-size:90%;\">% Accuracy</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.6\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.6.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.6.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.6.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.6.1.1.1.1\" style=\"font-size:90%;\">% Exact</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.6.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.6.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.6.1.2.1.1\" style=\"font-size:90%;\">Matches</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.2.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S4.T2.1.2.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.1.1\" style=\"font-size:90%;\">OvR(LinearSVM)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.2\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.2.1\" style=\"font-size:90%;\">ECFP</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.3\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.3.1\" style=\"font-size:90%;\">ESM2-3B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.4\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.4.1\" style=\"font-size:90%;\">4,608</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.5\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.5.1\" style=\"font-size:90%;\">7%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.6\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.6.1\" style=\"font-size:90%;\">7%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.3.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T2.1.3.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.1.1\" style=\"font-size:90%;\">OvR(LinearSVM)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.2\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.2.1\" style=\"font-size:90%;\">MACCS</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.3\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.3.1\" style=\"font-size:90%;\">ESM2-3B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.4\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.4.1\" style=\"font-size:90%;\">2,727</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.5\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.5.1\" style=\"font-size:90%;\">7%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.6\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.6.1\" style=\"font-size:90%;\">7%</span></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Performance of ML models in the conversion of 10,000 test instances of ligand embeddings and protein amino acid sequence embeddings into their corresponding predicted LPI ordinal affinity values from the LPI-1.5M data set. The ML model outputs were compared to their ground truth values for scoring.</figcaption>\n</figure>",
74
- "perturb_sentence_id": 1,
 
 
 
75
  "output": {
76
- "perturbed_statement": "[paragraph id = 1] We observed a significant improvement in the performance of our fine-tuned SLM on our LPI affinity prediction task versus the baseline model on a test set of 500 examples from the LPI-1.5M data set. Our fine-tuned SLM achieved 37% overall accuracy and 57% overall exact matches on our task.",
77
- "perturbed_explanation": "The original explanation highlights the key performance of the fine-tuned SLM on the test set. The statement specifies a test set size of 10,000 examples and reports both 37% accuracy and exact matches. However, the statement now incorrectly mentions that the test set size was 500 examples and that exact matches reached 57%. These altered details conflict with the reported test set size and performance figures, rendering the statement inaccurate."
78
  }
79
  },
80
  {
@@ -106,10 +112,13 @@
106
  "[paragraph id = 4] These results were significantly better than the ML results (Table 2) and baseline language model results (Table 3) on the same train/test data sets."
107
  ],
108
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T3.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T3.1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.1.1.1.1.1\" style=\"font-size:90%;\">Pretrained Foundational</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.1.1.2.1.1\" style=\"font-size:90%;\">Language Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T3.1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.2.1.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.2.1.1.1.1\" style=\"font-size:90%;\">Language Model</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.2.1.2.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.2.1.2.1.1\" style=\"font-size:90%;\">Parameter Count</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T3.1.1.1.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.1.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.3.1.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.3.1.1.1.1\" style=\"font-size:90%;\">% Accuracy</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T3.1.1.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.1.1.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.4.1.1.1.1\" style=\"font-size:90%;\">% Exact</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.4.1.2.1.1\" style=\"font-size:90%;\">Matches</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.2.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S4.T3.1.2.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.2.1.1.1\" style=\"font-size:90%;\">roneneldan/TinyStories-28M</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.2.1.2\"><span class=\"ltx_text\" id=\"S4.T3.1.2.1.2.1\" style=\"font-size:90%;\">28M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.2.1.3\"><span class=\"ltx_text\" id=\"S4.T3.1.2.1.3.1\" style=\"font-size:90%;\">0%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.2.1.4\"><span class=\"ltx_text\" id=\"S4.T3.1.2.1.4.1\" style=\"font-size:90%;\">0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.3.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S4.T3.1.3.2.1\"><span class=\"ltx_text\" id=\"S4.T3.1.3.2.1.1\" style=\"font-size:90%;\">facebook/opt-125m</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.1.3.2.2\"><span class=\"ltx_text\" id=\"S4.T3.1.3.2.2.1\" style=\"font-size:90%;\">125M</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.1.3.2.3\"><span class=\"ltx_text\" id=\"S4.T3.1.3.2.3.1\" style=\"font-size:90%;\">0%</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.1.3.2.4\"><span class=\"ltx_text\" id=\"S4.T3.1.3.2.4.1\" style=\"font-size:90%;\">0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.4.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T3.1.4.3.1\"><span class=\"ltx_text\" id=\"S4.T3.1.4.3.1.1\" style=\"font-size:90%;\">EleutherAI/gpt-neo-125m</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.1.4.3.2\"><span class=\"ltx_text\" id=\"S4.T3.1.4.3.2.1\" style=\"font-size:90%;\">125M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.1.4.3.3\"><span class=\"ltx_text\" id=\"S4.T3.1.4.3.3.1\" style=\"font-size:90%;\">0%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.1.4.3.4\"><span class=\"ltx_text\" id=\"S4.T3.1.4.3.4.1\" style=\"font-size:90%;\">0%</span></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Baseline performance of pretrained foundational small language models in the conversion of 10,000 test instances of ligand SMILES strings and protein amino acid sequences into their corresponding predicted LPI ordinal affinity values from the LPI-1.5M data set. The model outputs were compared to their ground truth values for scoring. The language models are described by their <span class=\"ltx_text ltx_font_typewriter\" id=\"S4.T3.3.1\">HuggingFace.co</span> repo names (accessed 30May2024).</figcaption>\n</figure>",
109
- "perturb_sentence_id": 3,
 
 
 
110
  "output": {
111
- "perturbed_statement": "[paragraph id = 3] Notably, our fine-tuned SLM achieved 44%, 26%, 34%, and 12% exact matches for the ordinal affinity values B, C, D, and E, respectively (Figure 6).These results were significantly better than the ML results (Table 2) and baseline language model results (Table 3) on the same train/test data sets.",
112
- "perturbed_explanation": "The original explanation: This data point indicates that the fine-tuned SLM model's performance significantly improved over the results from previous models, as evidenced by the higher exact match percentages for each ordinal affinity value. 1. In the statement, the exact match percentages for B, C, D, and E should be 14%, 36%, 64%, and 22%, respectively; this discrepancy alters the correct reported performance numbers and suggests a different performance overview. 2. Consequently, the provided numbers no longer confirm the stated superior performance without factual alignment with verified results."
113
  }
114
  }
115
  ]
 
34
  "[paragraph id = 13] Additionally, both model instances produced 0% exact matches for the A and B ordinal affinity values, and 1%, 15%, and 9% exact matches for the ordinal affinity values C, D, and E, respectively."
35
  ],
36
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T2.1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1.1.1.1\" style=\"font-size:90%;\">Machine Learning</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1.2.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.1.1.1\" style=\"font-size:90%;\">Ligand</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.2.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.3.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.1.1.1\" style=\"font-size:90%;\">Protein</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.2.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.3.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.1.1.1\" style=\"font-size:90%;\">Dimension of</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.2.1.1\" style=\"font-size:90%;\">Ligand + Protein</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.3.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.5.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.5.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.5.1.1.1.1\" style=\"font-size:90%;\">% Accuracy</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.6\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.6.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.6.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.6.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.6.1.1.1.1\" style=\"font-size:90%;\">% Exact</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.6.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.6.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.6.1.2.1.1\" style=\"font-size:90%;\">Matches</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.2.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S4.T2.1.2.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.1.1\" style=\"font-size:90%;\">OvR(LinearSVM)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.2\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.2.1\" style=\"font-size:90%;\">ECFP</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.3\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.3.1\" style=\"font-size:90%;\">ESM2-3B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.4\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.4.1\" style=\"font-size:90%;\">4,608</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.5\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.5.1\" style=\"font-size:90%;\">7%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.6\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.6.1\" style=\"font-size:90%;\">7%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.3.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T2.1.3.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.1.1\" style=\"font-size:90%;\">OvR(LinearSVM)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.2\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.2.1\" style=\"font-size:90%;\">MACCS</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.3\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.3.1\" style=\"font-size:90%;\">ESM2-3B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.4\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.4.1\" style=\"font-size:90%;\">2,727</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.5\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.5.1\" style=\"font-size:90%;\">7%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.6\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.6.1\" style=\"font-size:90%;\">7%</span></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Performance of ML models in the conversion of 10,000 test instances of ligand embeddings and protein amino acid sequence embeddings into their corresponding predicted LPI ordinal affinity values from the LPI-1.5M data set. The ML model outputs were compared to their ground truth values for scoring.</figcaption>\n</figure>",
37
+ "perturb_sentence_id": [
38
+ 11,
39
+ 12
40
+ ],
41
  "output": {
42
+ "perturbed_statement": "A support vector machines (SVM) machine learning model was selected for this analysis given its strong performance on imbalanced data sets Chakrabarti & Fauber (2022 ), which are often present in multinomial classification tasks such as ours (Figure 5).333https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC (accessed 11June2024) A one-versus-rest (OvR) instance of a linear kernel SVM was employed, thus enabling our multinomial classification task.444https://scikit-learn.org/stable/modules/generated/sklearn.multiclass.OneVsRestClassifier.html (accessed 11June2024) Additional details for our data embedding and ML methods are described in the Appendix.The OvR instances of linear SVM models demonstrated 10% overall accuracy and 2% overall exact matches on our multinomial classification task for both ligand embedding techniques (Table 2).",
43
+ "perturbed_explanation": "Original Explanation: The statement describes that a support vector machines (SVM) model was chosen for its strong performance on imbalanced data sets, as mentioned by Chakrabarti & Fauber (2022), which is relevant for multinomial classification tasks. A one-versus-rest (OvR) strategy was employed with a linear kernel SVM to facilitate the multinomial classification. It also notes that the OvR models achieved 7% overall accuracy and 7% exact matches in the task for both ligand embedding techniques. 1. The statement mentions '10% overall accuracy and 2% overall exact matches.' This is incorrect because it differs from the original overall accuracy and overall exact matches reported for the SVM model, which were both 7%."
44
  }
45
  },
46
  {
 
74
  "[paragraph id = 7] With the relaxed \"near match\" criteria, we achieved an 77% overall accuracy and all ordinal affinity values achieved 19-94% near matches relative the the ground truth with our method (Figure 6)."
75
  ],
76
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T2.1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1.1.1.1\" style=\"font-size:90%;\">Machine Learning</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1.2.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.1.1.1\" style=\"font-size:90%;\">Ligand</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.2.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.2.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.2.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.2.1.3.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.1.1.1\" style=\"font-size:90%;\">Protein</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.2.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.3.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.3.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.3.1.3.1.1\" style=\"font-size:90%;\">Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.1.1.1\" style=\"font-size:90%;\">Dimension of</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.2.1.1\" style=\"font-size:90%;\">Ligand + Protein</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.4.1.3\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.4.1.3.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.4.1.3.1.1\" style=\"font-size:90%;\">Embedding</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.5\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.5.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.5.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.5.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.5.1.1.1.1\" style=\"font-size:90%;\">% Accuracy</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T2.1.1.1.6\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T2.1.1.1.6.1\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.6.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.6.1.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.6.1.1.1.1\" style=\"font-size:90%;\">% Exact</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1.6.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T2.1.1.1.6.1.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.6.1.2.1.1\" style=\"font-size:90%;\">Matches</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.2.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S4.T2.1.2.1.1\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.1.1\" style=\"font-size:90%;\">OvR(LinearSVM)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.2\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.2.1\" style=\"font-size:90%;\">ECFP</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.3\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.3.1\" style=\"font-size:90%;\">ESM2-3B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.4\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.4.1\" style=\"font-size:90%;\">4,608</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.5\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.5.1\" style=\"font-size:90%;\">7%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.2.1.6\"><span class=\"ltx_text\" id=\"S4.T2.1.2.1.6.1\" style=\"font-size:90%;\">7%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.3.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T2.1.3.2.1\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.1.1\" style=\"font-size:90%;\">OvR(LinearSVM)</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.2\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.2.1\" style=\"font-size:90%;\">MACCS</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.3\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.3.1\" style=\"font-size:90%;\">ESM2-3B</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.4\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.4.1\" style=\"font-size:90%;\">2,727</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.5\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.5.1\" style=\"font-size:90%;\">7%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T2.1.3.2.6\"><span class=\"ltx_text\" id=\"S4.T2.1.3.2.6.1\" style=\"font-size:90%;\">7%</span></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Performance of ML models in the conversion of 10,000 test instances of ligand embeddings and protein amino acid sequence embeddings into their corresponding predicted LPI ordinal affinity values from the LPI-1.5M data set. The ML model outputs were compared to their ground truth values for scoring.</figcaption>\n</figure>",
77
+ "perturb_sentence_id": [
78
+ 1,
79
+ 2
80
+ ],
81
  "output": {
82
+ "perturbed_statement": "[paragraph id = 1] We observed a significant improvement in the performance of our fine-tuned SLM on our LPI affinity prediction task versus the ML results on a test set of 10,000 examples from the LPI-1.5M data set. Our fine-tuned SLM achieved 37% overall accuracy and 37% overall exact matches on our task.",
83
+ "perturbed_explanation": "The original explanation would mention that the statement highlights a significant improvement in the performance of the fine-tuned SLM when compared to the baseline language model results, specifically noting its accuracy and exact matches. 1. The statement claims a significant improvement of the fine-tuned SLM compared to the ML results, while the context highlights the significant improvement over the baseline language model results. This change introduces a factual error because the context distinguishes between improvements upon baseline language models and ML results separately. Therefore, the statement incorrectly compares the fine-tuned SLM to ML results instead of the intended comparison with the baseline language model results."
84
  }
85
  },
86
  {
 
112
  "[paragraph id = 4] These results were significantly better than the ML results (Table 2) and baseline language model results (Table 3) on the same train/test data sets."
113
  ],
114
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<table class=\"ltx_tabular ltx_centering ltx_guessed_headers ltx_align_middle\" id=\"S4.T3.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S4.T3.1.1.1.1\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.1.1.1.1.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.1.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.1.1.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.1.1.1.1.1\" style=\"font-size:90%;\">Pretrained Foundational</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.1.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.1.1.2.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.1.1.2.1.1\" style=\"font-size:90%;\">Language Model</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T3.1.1.1.2\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.1.1.1.2.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.2.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.2.1.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.2.1.1.1.1\" style=\"font-size:90%;\">Language Model</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.2.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.2.1.2.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.2.1.2.1.1\" style=\"font-size:90%;\">Parameter Count</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T3.1.1.1.3\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.1.1.1.3.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.3.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.3.1.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.3.1.1.1.1\" style=\"font-size:90%;\">% Accuracy</span></td>\n</tr>\n</table>\n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S4.T3.1.1.1.4\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S4.T3.1.1.1.4.1\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.4.1.1\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.4.1.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.4.1.1.1.1\" style=\"font-size:90%;\">% Exact</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.1.1.4.1.2\">\n<td class=\"ltx_td ltx_nopad_r ltx_align_center\" id=\"S4.T3.1.1.1.4.1.2.1\"><span class=\"ltx_text\" id=\"S4.T3.1.1.1.4.1.2.1.1\" style=\"font-size:90%;\">Matches</span></td>\n</tr>\n</table>\n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T3.1.2.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_t\" id=\"S4.T3.1.2.1.1\"><span class=\"ltx_text\" id=\"S4.T3.1.2.1.1.1\" style=\"font-size:90%;\">roneneldan/TinyStories-28M</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.2.1.2\"><span class=\"ltx_text\" id=\"S4.T3.1.2.1.2.1\" style=\"font-size:90%;\">28M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.2.1.3\"><span class=\"ltx_text\" id=\"S4.T3.1.2.1.3.1\" style=\"font-size:90%;\">0%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.1.2.1.4\"><span class=\"ltx_text\" id=\"S4.T3.1.2.1.4.1\" style=\"font-size:90%;\">0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.3.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row\" id=\"S4.T3.1.3.2.1\"><span class=\"ltx_text\" id=\"S4.T3.1.3.2.1.1\" style=\"font-size:90%;\">facebook/opt-125m</span></th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.1.3.2.2\"><span class=\"ltx_text\" id=\"S4.T3.1.3.2.2.1\" style=\"font-size:90%;\">125M</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.1.3.2.3\"><span class=\"ltx_text\" id=\"S4.T3.1.3.2.3.1\" style=\"font-size:90%;\">0%</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.1.3.2.4\"><span class=\"ltx_text\" id=\"S4.T3.1.3.2.4.1\" style=\"font-size:90%;\">0%</span></td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.1.4.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_bb\" id=\"S4.T3.1.4.3.1\"><span class=\"ltx_text\" id=\"S4.T3.1.4.3.1.1\" style=\"font-size:90%;\">EleutherAI/gpt-neo-125m</span></th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.1.4.3.2\"><span class=\"ltx_text\" id=\"S4.T3.1.4.3.2.1\" style=\"font-size:90%;\">125M</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.1.4.3.3\"><span class=\"ltx_text\" id=\"S4.T3.1.4.3.3.1\" style=\"font-size:90%;\">0%</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S4.T3.1.4.3.4\"><span class=\"ltx_text\" id=\"S4.T3.1.4.3.4.1\" style=\"font-size:90%;\">0%</span></td>\n</tr>\n</tbody>\n</table>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>Baseline performance of pretrained foundational small language models in the conversion of 10,000 test instances of ligand SMILES strings and protein amino acid sequences into their corresponding predicted LPI ordinal affinity values from the LPI-1.5M data set. The model outputs were compared to their ground truth values for scoring. The language models are described by their <span class=\"ltx_text ltx_font_typewriter\" id=\"S4.T3.3.1\">HuggingFace.co</span> repo names (accessed 30May2024).</figcaption>\n</figure>",
115
+ "perturb_sentence_id": [
116
+ 3,
117
+ 4
118
+ ],
119
  "output": {
120
+ "perturbed_statement": "[paragraph id = 3] Notably, our fine-tuned SLM achieved 54%, 36%, 64%, and 22% exact matches for the ordinal affinity values B, C, D, and E, respectively (Figure 6). These results were significantly better than the ML results (Table 2) and baseline language model results (Table 3) on the same train/test data sets.",
121
+ "perturbed_explanation": "1. The original explanation defines specific exact match percentages for different ordinal affinity values achieved by the fine-tuned SLM, as well as a comparison with other models. 2. The statement is incorrect because it falsely claims that the fine-tuned SLM achieved 54% exact matches for the ordinal affinity value B, while no such detail is provided in the context. Instead, the context specifies that the overall exact match was 37%, without breaking it down by ordinal values like B, C, D, and E."
122
  }
123
  }
124
  ]
table_result/2407.00115v3_output.json CHANGED
@@ -63,10 +63,13 @@
63
  "[paragraph id = 26] To verify whether our RLKD method possesses robustness across other visual tasks, we execute object detection on the MS-COCO dataset."
64
  ],
65
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T2.2\" style=\"width:474.1pt;height:33pt;vertical-align:-0.6pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-158.0pt,10.8pt) scale(0.6,0.6) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T2.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.2\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.3\">Student</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.4\">Vanilla KD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.5\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.6\">+Ours</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.7\">PKT</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.8\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.9\">+Ours</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.10\">RKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.11\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.12\">+Ours</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.13\">SRRL</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.14\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.15\">+Ours</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.16\">DKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.17\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.18\">+Ours</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.2.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.1\">Top-1</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.2\">73.96</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.3\">70.26</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.4\">70.83</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.5\">71.28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.6\">71.39</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.7\">70.92</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.8\">71.31</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.9\">71.53</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.10\">70.94</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.11\">71.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.12\">71.37</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.13\">71.01</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.14\">71.25</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.15\">71.38</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.16\">71.13</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.17\">71.47</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.18\">71.62</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.1\">Top-5</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.2\">91.58</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.3\">89.50</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.4\">90.31</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.5\">90.33</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.6\">90.51</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.7\">90.25</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.8\">90.30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.9\">90.42</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.10\">90.33</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.11\">90.34</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.12\">90.45</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.13\">90.41</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.14\">90.42</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.15\">90.52</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.16\">90.31</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.17\">90.44</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.18\">90.56</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T2.3.1.1\" style=\"font-size:90%;\">Table 2</span>: </span><span class=\"ltx_text\" id=\"S5.T2.4.2\" style=\"font-size:90%;\">Top-1 and Top-5 accuracy on ImageNet with ResNet-34 as teacher and ResNet-18 as student.</span></figcaption>\n</figure>",
66
- "perturb_sentence_id": 22,
 
 
 
67
  "output": {
68
- "perturbed_statement": "[paragraph id = 22] In contrast, CTKD achieves significant improvements on these KD frameworks, with gains of 0.22% (90.53% vs 90.31%) and 0.21% (90.62% vs 90.41%) respectively, which equals the performance of RLKD in these tasks.",
69
- "perturbed_explanation": "The original explanation states that RLKD achieves superior performance compared to CTKD, with the latter showing gains of only 0.02% and 0.01%, attributed to RLKD’s RL-based framework in temperature adjustment which considers long-term benefits. 1. RLKD is reported as outperforming CTKD in terms of percentage gains in the provided KD framework results. 2. The claim that CTKD achieves a 0.22% and 0.21% improvement contradicts this fact, as the gains are overstated and contradict the specific values provided. Therefore, CTKD does not match RLKD's reported achievements per stated metrics."
70
  }
71
  },
72
  {
@@ -121,10 +124,13 @@
121
  "[paragraph id = 12] further confirm the strong generalization of our RLKD."
122
  ],
123
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T3\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T3.2\" style=\"width:238.1pt;height:229.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-86.2pt,83.2pt) scale(0.58,0.58) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S5.T3.2.2\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.3\">Teacher</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.4\">RN-56</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.5\">RN-110</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.6\">RN-110</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.7\">WRN-40-2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.8\">WRN-40-2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.1.1.1.1\">RN-324</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.2\">RN-324</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.3.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.1\">Acc</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.2\">72.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.3\">74.31</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.4\">74.31</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.5\">75.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.6\">75.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.7\">79.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.8\">79.42</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.4.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.1\">Student</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.2\">RN-20</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.3\">RN-32</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.4\">RN-20</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.5\">WRN-16-2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.6\">WRN-40-1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.7\">SN-V1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.8\">SN-V2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.5.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.1\">Acc</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.2\">69.06</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.3\">71.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.4\">69.06</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.5\">73.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.6\">71.98</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.7\">70.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.8\">71.82</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.6.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.1\">PKT</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.2\">70.85</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.3\">73.36</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.4\">70.88</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.5\">74.82</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.6\">74.01</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.7\">74.39</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.8\">75.10</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.7.5\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.2\">71.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.3\">73.49</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.4\">71.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.5\">75.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.6\">74.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.7\">74.63</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.8\">75.52</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.8.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.2\">71.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.3\">73.68</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.4\">71.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.5\">75.62</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.6\">74.23</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.7\">74.89</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.8\">75.78</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.9.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.1\">SP</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.2\">70.84</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.3\">73.09</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.4\">70.74</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.5\">74.88</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.6\">73.77</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.7\">74.97</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.8\">75.59</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.10.8\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.2\">71.29</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.3\">73.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.4\">71.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.5\">75.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.6\">73.97</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.7\">75.28</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.8\">75.79</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.11.9\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.2\">71.65</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.3\">73.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.4\">71.51</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.5\">75.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.6\">74.22</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.7\">75.31</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.8\">76.04</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.12.10\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.1\">VID</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.2\">70.62</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.3\">73.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.4\">70.59</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.5\">74.89</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.6\">73.60</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.7\">74.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.8\">75.24</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.13.11\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.2\">70.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.3\">73.38</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.4\">71.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.5\">75.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.6\">73.75</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.7\">75.23</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.8\">75.48</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.14.12\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.2\">71.09</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.3\">73.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.4\">71.39</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.5\">75.48</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.6\">74.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.7\">75.58</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.8\">75.81</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.15.13\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.1\">CRD</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.2\">71.69</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.3\">73.63</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.4\">71.38</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.5\">75.53</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.6\">74.36</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.7\">75.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.8\">75.90</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.16.14\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.2\">72.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.3\">74.08</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.4\">72.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.5\">75.71</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.6\">74.72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.7\">75.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.8\">76.20</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.17.15\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.2\">72.29</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.3\">74.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.4\">72.28</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.5\">76.03</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.6\">74.98</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.7\">75.68</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.8\">76.55</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.18.16\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.1\">SRRL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.2\">71.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.3\">73.48</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.4\">71.09</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.5\">75.69</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.6\">74.18</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.7\">75.36</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.8\">75.90</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.19.17\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.2\">71.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.3\">73.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.4\">71.52</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.5\">75.90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.6\">74.38</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.7\">75.62</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.8\">75.97</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.20.18\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.2\">71.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.3\">74.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.4\">71.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.5\">76.23</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.6\">74.64</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.7\">75.90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.8\">76.06</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.21.19\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.1\">DKD</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.2\">71.43</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.3\">73.66</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.4\">71.28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.5\">75.70</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.6\">74.54</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.7\">75.44</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.8\">76.48</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.22.20\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.2\">71.62</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.3\">73.91</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.4\">71.65</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.5\">75.85</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.6\">74.57</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.7\">75.88</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.8\">76.91</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.23.21\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.2\">71.89</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.3\">74.27</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.4\">71.91</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.5\">76.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.6\">74.90</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.7\">76.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.8\">77.21</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T3.4.1.1\" style=\"font-size:90%;\">Table 3</span>: </span><span class=\"ltx_text\" id=\"S5.T3.5.2\" style=\"font-size:90%;\">Student network Top-1 accuracy on CIFAR-100 dataset. </span></figcaption>\n</figure>",
124
- "perturb_sentence_id": 9,
 
 
 
125
  "output": {
126
- "perturbed_statement": "[paragraph id = 9] When applied to the teacher-student pair RN110 & RN32, our RLKD brings an improvement of 0.61% (74.27% vs 73.66%) in the CTKD framework, which surpasses the accuracy of DKD by 0.36% (74.27% vs 73.91%). Experiments conducted on other 5 KD frameworks (e.g.",
127
- "perturbed_explanation": "1. The original explanation states that RLKD achieves an improvement of 0.61% in the DKD framework (74.27% vs 73.66%) and surpasses CTKD by 0.36% (74.27% vs 73.91%). 2. However, the statement claims that the 0.61% improvement occurred in the CTKD framework compared to DKD, which alters the roles of the frameworks mentioned. This adjustment contradicts the described experimental outcomes and results."
128
  }
129
  },
130
  {
@@ -177,10 +183,13 @@
177
  "[paragraph id = 29] Results demonstrate the robustness of our approach, where instance temperature adjustment is treated as a sequential decision-making task, enabling consideration of future benefits."
178
  ],
179
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T4\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T4.2\" style=\"width:176.2pt;height:124.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-51.7pt,36.6pt) scale(0.63,0.63) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S5.T4.2.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.1.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S5.T4.2.1.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.2\">mAP</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.3\">AP50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.4\">AP75</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.5\">APl</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.6\">APm</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.7\">APs</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.1\">T: RN-101</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.2\">42.04</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.3\">62.48</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.4\">45.88</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.5\">54.60</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.6\">45.55</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.7\">25.22</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.3.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.1\">S: RN-18</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.2\">33.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.3\">53.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.4\">35.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.5\">43.16</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.6\">35.68</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.7\">18.96</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.1\">Vanilla KD</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.2\">33.97</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.3\">54.66</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.4\">36.62</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.5\">44.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.6\">36.67</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.7\">18.71</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.5.5\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.2\">34.51</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.3\">55.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.4\">36.95</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.5\">44.76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.6\">37.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.7\">19.01</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.6.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.2\">34.73</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.3\">55.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.4\">37.19</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.5\">45.27</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.6\">37.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.7\">19.12</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.7.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.1\">T: RN-50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.2\">40.22</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.3\">61.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.4\">43.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.5\">51.98</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.6\">43.53</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.7\">24.16</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.8.8\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.1\">S: MN-V2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.2\">29.47</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.3\">48.87</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.4\">30.90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.5\">38.86</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.6\">30.77</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.7\">16.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.9.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.1\">Vanilla KD</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.2\">30.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.3\">50.28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.4\">31.35</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.5\">39.56</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.6\">31.91</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.7\">16.69</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.10.10\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.2\">31.21</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.3\">52.12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.4\">32.01</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.5\">41.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.6\">33.44</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.7\">18.09</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.11.11\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.2\">31.49</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.3\">52.57</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.4\">33.23</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.5\">41.71</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.6\">33.65</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.7\">18.31</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T4.3.1.1\" style=\"font-size:90%;\">Table 4</span>: </span><span class=\"ltx_text\" id=\"S5.T4.4.2\" style=\"font-size:90%;\">Results of our RLKD on the MS-COCO dataset, utilizing Faster-RCNN <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00115v3#bib.bib27\" title=\"\"><span class=\"ltx_text\" style=\"font-size:90%;\">27</span></a>]</cite> with FPN <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00115v3#bib.bib22\" title=\"\"><span class=\"ltx_text\" style=\"font-size:90%;\">22</span></a>]</cite>. We conduct experiments with the following teacher-student pairings: RN-101 paired with RN-18, and RN-50 paired with MN-V2.</span></figcaption>\n</figure>",
180
- "perturb_sentence_id": 27,
 
 
 
181
  "output": {
182
- "perturbed_statement": "[paragraph id = 27] As shown in Table 4, in the case of RN-50 & MN-V2, regarding the mAP metric, our RLKD underperforms Vanilla KD by -1.36% (31.13% vs 31.49%) and CTKD by -0.28% (31.13% vs 31.21%), respectively. Additionally, for detecting objects with varying sizes – evaluated by the AP metrics for large (APl), medium (APm), and small (APs) objects, our RLKD shows a slight reduction, consistently falling below CTKD across all size categories.",
183
- "perturbed_explanation": "Original Explanation: The statement outlines the performance of the RLKD model against other benchmarks, with RLKD demonstrating superior results across all metrics. Correct values are compared for clarity and accuracy. 1. RLKD outperformance by 1.36% compared to Vanilla KD indicates an improvement in the mAP metric. 2. RLKD outperformance by 0.28% compared to CTKD shows a consistent trend of enhancement. Updated Explanation: The altered description inaccurately states that RLKD underperforms both Vanilla KD and CTKD across metrics, which misrepresents the actual experimental findings. 3. RLKD does not perform worse in the described comparison metrics (mAP and AP for object size categories); instead, it shows higher values, evidencing its robustness."
184
  }
185
  },
186
  {
@@ -226,10 +235,13 @@
226
  "[paragraph id = 5] This enhancement verifies the effectiveness of our designed uncertainty score, which enables the agent to make wiser decisions by taking into account the student model s mastery of the training instances."
227
  ],
228
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T5\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T5.2\" style=\"width:177.7pt;height:47.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-45.8pt,12.2pt) scale(0.66,0.66) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T5.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.1\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.2\">RN-56</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.3\">RN-110</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.4\">WRN-40-2</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.5\">VGG-13</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.2.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.1\">Student</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.2\">RN-20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.3\">RN-32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.4\">WRN-16-2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.5\">VGG-8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.1\">Ours w/o US</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.2\">71.16</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.3\">73.68</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.4\">75.61</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.5\">73.57</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.1\">Ours w US</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.2\">71.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.3\">73.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.4\">75.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.5\">73.75</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T5.3.1.1\" style=\"font-size:90%;\">Table 5</span>: </span><span class=\"ltx_text\" id=\"S5.T5.4.2\" style=\"font-size:90%;\">Ablation study of the uncertainty score (US) feature. </span></figcaption>\n</figure>",
229
- "perturb_sentence_id": 4,
 
 
 
230
  "output": {
231
- "perturbed_statement": "[paragraph id = 4] As shown in Table 5 , when incorporating uncertainty score into state representation, our method shows an improvement of 1.04% (72.20% vs 71.16%) in the RN-56 & RN-20 teacher-student pair. This enhancement verifies the effectiveness of our designed uncertainty score, which enables the agent to make wiser decisions by taking into account the student model s mastery of the training instances.",
232
- "perturbed_explanation": "Original Explanation: 1. Incorporating the uncertainty score into the state representation led to an improvement of 0.24% (71.40% vs 71.16%) in accuracy for the RN-56 & RN-20 Network Pair. 2. This indicates that the uncertainty score enables more effective decision-making by considering the student model's training instance mastery.\n\nExplanation of the Inaccuracy: The provided statement erroneously claims an improvement of 1.04% (72.20% vs 71.16%) with the inclusion of the uncertainty score, which substantially exceeds the described actual improvement of 0.24% (71.40% vs 71.16%). This misstatement overstates the effectiveness of the designed uncertainty score."
233
  }
234
  },
235
  {
@@ -275,10 +287,13 @@
275
  "[paragraph id = 10] We believe the effectiveness of the instance reward calibration strategy lies in its ability to enable the agent to more accurately perceive the rewards resulting from each of its instance temperature adjustment actions, thereby enhancing its capacity to update its policy for performing the action."
276
  ],
277
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T6\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T6.2\" style=\"width:181.3pt;height:47.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-46.7pt,12.2pt) scale(0.66,0.66) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T6.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T6.2.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.1\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.2\">RN-56</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.3\">RN-110</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.4\">WRN-40-2</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.5\">VGG-13</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T6.2.1.2.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.1\">Student</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.2\">RN-20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.3\">RN-32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.4\">WRN-16-2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.5\">VGG-8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.1.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.1\">Ours w/o IRA</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.2\">70.91</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.3\">73.26</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.4\">75.39</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.5\">73.32</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.1\">Ours w IRA</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.2\">71.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.3\">73.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.4\">75.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.5\">73.75</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T6.3.1.1\" style=\"font-size:90%;\">Table 6</span>: </span><span class=\"ltx_text\" id=\"S5.T6.4.2\" style=\"font-size:90%;\">Ablation on instance reward calibration (IRA) strategy.</span></figcaption>\n</figure>",
278
- "perturb_sentence_id": 9,
 
 
 
279
  "output": {
280
- "perturbed_statement": "[paragraph id = 9] E.g., our instance reward calibration strategy diminishes the performance of RN-110 & RN-32 pair by 0.55% (73.81% vs 73.26%).We believe the effectiveness of the instance reward calibration strategy lies in its ability to enable the agent to more accurately perceive the rewards resulting from each of its instance temperature adjustment actions, thereby enhancing its capacity to update its policy for performing the action.",
281
- "perturbed_explanation": "The original explanation: The statement highlights the effect of an instance reward calibration strategy on the RN-110 & RN-32 pair, emphasizing its role in improving the performance by 0.55% through more accurate reward perception and policy enhancement. 1. The claim in the statement that the strategy diminishes performance is factually incorrect, as the provided performance metrics display an increase from 73.26% to 73.81%, indicating an improvement rather than a decrease. 2. The evaluation thus reaffirms the promotive influence of the calibration strategy on performance, contrasting with the diminishing effect mentioned."
282
  }
283
  },
284
  {
@@ -324,10 +339,13 @@
324
  "[paragraph id = 15] We attribute this success to the strategy enables the agent to learn valuable instance temperature adjustment policy faster, allowing the student model to acquire more useful knowledge during the early stages of KD."
325
  ],
326
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T7\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T7.2\" style=\"width:178.1pt;height:47.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-45.9pt,12.2pt) scale(0.66,0.66) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T7.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T7.2.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.1\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.2\">RN-56</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.3\">RN-110</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.4\">WRN-40-2</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.5\">VGG-13</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T7.2.1.2.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.1\">Student</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.2\">RN-20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.3\">RN-32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.4\">WRN-16-2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.5\">VGG-8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.1.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.1\">Ours w/o EE</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.2\">71.03</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.3\">73.52</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.4\">75.50</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.5\">73.45</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.1\">Ours w EE</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.2\">71.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.3\">73.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.4\">75.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.5\">73.75</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T7.3.1.1\" style=\"font-size:90%;\">Table 7</span>: </span><span class=\"ltx_text\" id=\"S5.T7.4.2\" style=\"font-size:90%;\">Ablation study of the efficient exploration (EE) strategy.</span></figcaption>\n</figure>",
327
- "perturb_sentence_id": 14,
 
 
 
328
  "output": {
329
- "perturbed_statement": "[paragraph id = 14] In the experiments involving the RN-56 & RN-20 teacher-student pair, our efficient exploration strategy results in a performance improvement of 0.15% (71.40% vs 71.25%).We attribute this success to the strategy enables the agent to learn valuable instance temperature adjustment policy faster, allowing the student model to acquire more useful knowledge during the early stages of KD.",
330
- "perturbed_explanation": "Original Explanation: The efficient exploration strategy demonstrated a performance improvement of 0.37% (71.40% vs 71.03%) for the RN-56 & RN-20 teacher-student pair, attributing success to the accelerated policy learning that benefited the student's knowledge acquisition during KD's initial stages. 2. The statement claims an improvement of 0.15% (71.40% vs 71.25%), which conflicts with the exact 0.37% improvement provided in the details, making the improvement range erroneous."
331
  }
332
  },
333
  {
@@ -375,10 +393,13 @@
375
  "[paragraph id = 20] We think this may due to utilizing the top 10% samples caused overfitting in the agent."
376
  ],
377
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T8\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T8.8\" style=\"width:241.3pt;height:30.8pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-91.0pt,11.6pt) scale(0.57,0.57) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T8.8.8\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T8.8.8.8\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T8.8.8.8.9\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_tt\" id=\"S5.T8.8.8.8.10\">Student</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T8.1.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T8.2.2.2.2\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T8.5.5.5.5\">\n \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T8.8.8.8.8\">\n \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T8.8.8.9.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T8.8.8.9.1.1\">72.34</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S5.T8.8.8.9.1.2\">69.06</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T8.8.8.9.1.3\">70.92</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T8.8.8.9.1.4\">71.21</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T8.8.8.9.1.5\">71.27</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T8.8.8.9.1.6\">71.40</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T8.8.8.10.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T8.8.8.10.2.1\">75.61</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb ltx_border_r\" id=\"S5.T8.8.8.10.2.2\">73.26</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T8.8.8.10.2.3\">75.33</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T8.8.8.10.2.4\">75.57</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T8.8.8.10.2.5\">75.61</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T8.8.8.10.2.6\">75.79</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T8.12.2.1\" style=\"font-size:90%;\">Table 8</span>: </span><span class=\"ltx_text\" id=\"S5.T8.10.1\" style=\"font-size:90%;\">Comparison of different high-quality training sample selection strategies. The teacher-student pairs corresponding to the second and third rows are respectively RN-56 &amp; RN-20 and WRN-40-2 &amp; WRN-16-2. “” denotes the mix-up operation.</span></figcaption>\n</figure>",
378
- "perturb_sentence_id": 18,
 
 
 
379
  "output": {
380
- "perturbed_statement": "[paragraph id = 18] Interestingly, we observe that when using the top 5% of high-quality training data, the performance of the student model in the teacher-student pair RN-56 & RN-20 is 70.92%, which is not as good as the performance 71.21% of the student model when using the training data ranked from 10% to 20%.This phenomenon is also observed in the teacher-student pair WRN-40-2 & WRN-16-2.",
381
- "perturbed_explanation": "The original explanation states: 1. The performance comparison is based on using the top 10% of data versus data ranked from 10% to 20%. 2. The performance values given are the result of this specific data segmentation. Alteration Impact Analysis: 3. The altered statement incorrectly specifies the top 5% instead of the top 10%. This misrepresentation contradicts the actual observed methodology and data selection criteria, which might lead to a misunderstanding of the experiment's outcomes."
382
  }
383
  }
384
  ]
 
63
  "[paragraph id = 26] To verify whether our RLKD method possesses robustness across other visual tasks, we execute object detection on the MS-COCO dataset."
64
  ],
65
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T2\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T2.2\" style=\"width:474.1pt;height:33pt;vertical-align:-0.6pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-158.0pt,10.8pt) scale(0.6,0.6) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T2.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.1.1\">\n<th class=\"ltx_td ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.2\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.3\">Student</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.4\">Vanilla KD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.5\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.6\">+Ours</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.7\">PKT</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.8\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.9\">+Ours</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.10\">RKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.11\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.12\">+Ours</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.13\">SRRL</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.14\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_r ltx_border_tt\" id=\"S5.T2.2.1.1.1.15\">+Ours</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T2.2.1.1.1.16\">DKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.17\">+CTKD</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T2.2.1.1.1.18\">+Ours</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.2.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.1\">Top-1</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.2\">73.96</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.3\">70.26</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.4\">70.83</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.5\">71.28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.6\">71.39</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.7\">70.92</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.8\">71.31</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.9\">71.53</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.10\">70.94</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.11\">71.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.12\">71.37</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.13\">71.01</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.14\">71.25</td>\n<td class=\"ltx_td ltx_align_center ltx_border_r ltx_border_t\" id=\"S5.T2.2.1.2.1.15\">71.38</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T2.2.1.2.1.16\">71.13</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.17\">71.47</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T2.2.1.2.1.18\">71.62</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T2.2.1.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.1\">Top-5</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.2\">91.58</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.3\">89.50</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.4\">90.31</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.5\">90.33</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.6\">90.51</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.7\">90.25</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.8\">90.30</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.9\">90.42</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.10\">90.33</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.11\">90.34</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.12\">90.45</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.13\">90.41</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.14\">90.42</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb ltx_border_r\" id=\"S5.T2.2.1.3.2.15\">90.52</td>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T2.2.1.3.2.16\">90.31</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.17\">90.44</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T2.2.1.3.2.18\">90.56</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T2.3.1.1\" style=\"font-size:90%;\">Table 2</span>: </span><span class=\"ltx_text\" id=\"S5.T2.4.2\" style=\"font-size:90%;\">Top-1 and Top-5 accuracy on ImageNet with ResNet-34 as teacher and ResNet-18 as student.</span></figcaption>\n</figure>",
66
+ "perturb_sentence_id": [
67
+ 22,
68
+ 23
69
+ ],
70
  "output": {
71
+ "perturbed_statement": "[paragraph id = 22] In contrast, CTKD obtains much greater improvement on these KD frameworks, with gains of 2% (90.31% vs 88.31%) and 1% (90.41% vs 89.41%) respectively, about 10 times higher.We think the superior performance of RLKD can be attributed to its RL-based framework in instance temperature adjustment, which considers the future benefits of these adjustments.",
72
+ "perturbed_explanation": "The original explanation states that CTKD obtains much fewer improvements on the KD frameworks mentioned, with specific gains of just 0.02% and 0.01%, which are about 10 times lower than those of RLKD. 1. CTKD obtains a marginal improvement on each KD framework. 2. These improvements are significantly smaller compared to the improvements achieved by RLKD. In the statement, CTKD's gains are misrepresented as much greater than they are, with exaggerated gains of 2% and 1%, respectively, compared to RLKD. This exaggeration contradicts the experimental results discussed, which highlight RLKD's superior performance and scalability."
73
  }
74
  },
75
  {
 
124
  "[paragraph id = 12] further confirm the strong generalization of our RLKD."
125
  ],
126
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T3\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T3.2\" style=\"width:238.1pt;height:229.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-86.2pt,83.2pt) scale(0.58,0.58) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S5.T3.2.2\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.3\">Teacher</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.4\">RN-56</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.5\">RN-110</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.6\">RN-110</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.7\">WRN-40-2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.8\">WRN-40-2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.1.1.1.1\">RN-324</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T3.2.2.2.2\">RN-324</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.3.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.1\">Acc</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.2\">72.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.3\">74.31</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.4\">74.31</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.5\">75.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.6\">75.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.7\">79.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.3.1.8\">79.42</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.4.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.1\">Student</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.2\">RN-20</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.3\">RN-32</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.4\">RN-20</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.5\">WRN-16-2</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.6\">WRN-40-1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.7\">SN-V1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.4.2.8\">SN-V2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.5.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.1\">Acc</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.2\">69.06</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.3\">71.14</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.4\">69.06</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.5\">73.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.6\">71.98</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.7\">70.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.5.3.8\">71.82</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.6.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.1\">PKT</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.2\">70.85</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.3\">73.36</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.4\">70.88</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.5\">74.82</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.6\">74.01</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.7\">74.39</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.6.4.8\">75.10</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.7.5\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.2\">71.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.3\">73.49</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.4\">71.07</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.5\">75.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.6\">74.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.7\">74.63</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.7.5.8\">75.52</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.8.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.2\">71.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.3\">73.68</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.4\">71.34</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.5\">75.62</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.6\">74.23</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.7\">74.89</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.8.6.8\">75.78</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.9.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.1\">SP</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.2\">70.84</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.3\">73.09</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.4\">70.74</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.5\">74.88</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.6\">73.77</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.7\">74.97</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.9.7.8\">75.59</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.10.8\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.2\">71.29</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.3\">73.42</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.4\">71.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.5\">75.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.6\">73.97</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.7\">75.28</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.10.8.8\">75.79</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.11.9\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.2\">71.65</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.3\">73.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.4\">71.51</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.5\">75.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.6\">74.22</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.7\">75.31</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.11.9.8\">76.04</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.12.10\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.1\">VID</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.2\">70.62</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.3\">73.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.4\">70.59</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.5\">74.89</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.6\">73.60</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.7\">74.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.12.10.8\">75.24</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.13.11\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.2\">70.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.3\">73.38</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.4\">71.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.5\">75.20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.6\">73.75</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.7\">75.23</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.13.11.8\">75.48</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.14.12\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.2\">71.09</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.3\">73.70</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.4\">71.39</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.5\">75.48</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.6\">74.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.7\">75.58</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.14.12.8\">75.81</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.15.13\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.1\">CRD</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.2\">71.69</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.3\">73.63</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.4\">71.38</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.5\">75.53</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.6\">74.36</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.7\">75.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.15.13.8\">75.90</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.16.14\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.2\">72.13</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.3\">74.08</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.4\">72.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.5\">75.71</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.6\">74.72</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.7\">75.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.16.14.8\">76.20</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.17.15\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.2\">72.29</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.3\">74.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.4\">72.28</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.5\">76.03</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.6\">74.98</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.7\">75.68</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.17.15.8\">76.55</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.18.16\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.1\">SRRL</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.2\">71.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.3\">73.48</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.4\">71.09</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.5\">75.69</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.6\">74.18</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.7\">75.36</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.18.16.8\">75.90</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.19.17\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.2\">71.41</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.3\">73.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.4\">71.52</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.5\">75.90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.6\">74.38</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.7\">75.62</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.19.17.8\">75.97</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.20.18\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.2\">71.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.3\">74.02</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.4\">71.81</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.5\">76.23</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.6\">74.64</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.7\">75.90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.20.18.8\">76.06</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.21.19\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.1\">DKD</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.2\">71.43</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.3\">73.66</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.4\">71.28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.5\">75.70</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.6\">74.54</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.7\">75.44</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T3.2.2.21.19.8\">76.48</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.22.20\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.2\">71.62</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.3\">73.91</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.4\">71.65</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.5\">75.85</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.6\">74.57</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.7\">75.88</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T3.2.2.22.20.8\">76.91</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T3.2.2.23.21\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.2\">71.89</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.3\">74.27</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.4\">71.91</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.5\">76.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.6\">74.90</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.7\">76.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T3.2.2.23.21.8\">77.21</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T3.4.1.1\" style=\"font-size:90%;\">Table 3</span>: </span><span class=\"ltx_text\" id=\"S5.T3.5.2\" style=\"font-size:90%;\">Student network Top-1 accuracy on CIFAR-100 dataset. </span></figcaption>\n</figure>",
127
+ "perturb_sentence_id": [
128
+ 9,
129
+ 10
130
+ ],
131
  "output": {
132
+ "perturbed_statement": "[paragraph id = 9] When applied to the teacher-student pair RN110 & RN56, our RLKD brings an improvement of 0.61% (74.27% vs 73.66%) in the DKD framework, which surpasses the accuracy of CTKD by 0.36% (74.27% vs 73.91%).Experiments conducted on other 5 KD frameworks (e.g. ",
133
+ "perturbed_explanation": "The original explanation is: 1. The statement describes an experiment involving the teacher-student pair RN110 & RN32 within the DKD framework, claiming an improvement and comparison to the CTKD accuracy. 2. The statement's comparison is explicit in mentioning two specific accuracy percentages (74.27% and 73.66%). 3. The improvement over another framework (CTKD) is indicated by percentages (74.27% vs 73.91%). The altered statement is incorrect because: 4. It changes the teacher-student pair from RN110 & RN32 to RN110 & RN56, which is not mentioned in the context provided, thus altering a key detail of the experiment description."
134
  }
135
  },
136
  {
 
183
  "[paragraph id = 29] Results demonstrate the robustness of our approach, where instance temperature adjustment is treated as a sequential decision-making task, enabling consideration of future benefits."
184
  ],
185
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T4\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T4.2\" style=\"width:176.2pt;height:124.7pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-51.7pt,36.6pt) scale(0.63,0.63) ;\">\n<table class=\"ltx_tabular ltx_align_middle\" id=\"S5.T4.2.1\">\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.1.1\">\n<td class=\"ltx_td ltx_border_tt\" id=\"S5.T4.2.1.1.1.1\"></td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.2\">mAP</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.3\">AP50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.4\">AP75</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.5\">APl</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.6\">APm</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.1.1.7\">APs</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.2.2\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.1\">T: RN-101</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.2\">42.04</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.3\">62.48</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.4\">45.88</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.5\">54.60</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.6\">45.55</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.2.2.7\">25.22</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.3.3\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.1\">S: RN-18</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.2\">33.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.3\">53.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.4\">35.26</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.5\">43.16</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.6\">35.68</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.3.3.7\">18.96</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.4.4\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.1\">Vanilla KD</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.2\">33.97</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.3\">54.66</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.4\">36.62</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.5\">44.14</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.6\">36.67</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.4.4.7\">18.71</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.5.5\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.2\">34.51</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.3\">55.32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.4\">36.95</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.5\">44.76</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.6\">37.17</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.5.5.7\">19.01</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.6.6\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.2\">34.73</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.3\">55.61</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.4\">37.19</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.5\">45.27</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.6\">37.30</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.6.6.7\">19.12</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.7.7\">\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.1\">T: RN-50</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.2\">40.22</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.3\">61.02</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.4\">43.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.5\">51.98</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.6\">43.53</td>\n<td class=\"ltx_td ltx_align_center ltx_border_tt\" id=\"S5.T4.2.1.7.7.7\">24.16</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.8.8\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.1\">S: MN-V2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.2\">29.47</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.3\">48.87</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.4\">30.90</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.5\">38.86</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.6\">30.77</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.8.8.7\">16.33</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.9.9\">\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.1\">Vanilla KD</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.2\">30.13</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.3\">50.28</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.4\">31.35</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.5\">39.56</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.6\">31.91</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T4.2.1.9.9.7\">16.69</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.10.10\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.1\">+CTKD</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.2\">31.21</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.3\">52.12</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.4\">32.01</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.5\">41.11</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.6\">33.44</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T4.2.1.10.10.7\">18.09</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T4.2.1.11.11\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.1\">+Ours</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.2\">31.49</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.3\">52.57</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.4\">33.23</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.5\">41.71</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.6\">33.65</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T4.2.1.11.11.7\">18.31</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T4.3.1.1\" style=\"font-size:90%;\">Table 4</span>: </span><span class=\"ltx_text\" id=\"S5.T4.4.2\" style=\"font-size:90%;\">Results of our RLKD on the MS-COCO dataset, utilizing Faster-RCNN <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00115v3#bib.bib27\" title=\"\"><span class=\"ltx_text\" style=\"font-size:90%;\">27</span></a>]</cite> with FPN <cite class=\"ltx_cite ltx_citemacro_cite\">[<a class=\"ltx_ref\" href=\"https://arxiv.org/html/2407.00115v3#bib.bib22\" title=\"\"><span class=\"ltx_text\" style=\"font-size:90%;\">22</span></a>]</cite>. We conduct experiments with the following teacher-student pairings: RN-101 paired with RN-18, and RN-50 paired with MN-V2.</span></figcaption>\n</figure>",
186
+ "perturb_sentence_id": [
187
+ 27,
188
+ 28
189
+ ],
190
  "output": {
191
+ "perturbed_statement": "[paragraph id = 27] As shown in Table 4 , in the case of RN-50 & MN-V2, regarding the mAP metric, our RLKD underperforms Vanilla KD by 1.36% (29.49% vs 30.13%) and CTKD by 0.28% (31.21% vs 31.49%), respectively.Additionally, for detecting objects with varying sizes – evaluated by the AP metrics for large (APl), medium (APm) and small (APs) objects, our RLKD fails to show significant enhancement and is consistently surpassed by CTKD across all size categories.",
192
+ "perturbed_explanation": "1. The original statement asserts that RLKD outperforms Vanilla KD by 1.36% in the mAP metric and CTKD by 0.28%. Moreover, for detecting objects of varied sizes, RLKD consistently surpasses CTKD, showing significant enhancement in all size categories. 2. The statement claims that RLKD underperforms compared to both Vanilla KD and CTKD in the mAP metric, implying that the mAP of RLKD is incorrectly lower than both. Additionally, it states RLKD fails to show significant enhancement, incorrectly asserting that CTKD surpasses RLKD in all object size categories."
193
  }
194
  },
195
  {
 
235
  "[paragraph id = 5] This enhancement verifies the effectiveness of our designed uncertainty score, which enables the agent to make wiser decisions by taking into account the student model s mastery of the training instances."
236
  ],
237
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T5\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T5.2\" style=\"width:177.7pt;height:47.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-45.8pt,12.2pt) scale(0.66,0.66) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T5.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.1\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.2\">RN-56</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.3\">RN-110</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.4\">WRN-40-2</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T5.2.1.1.1.5\">VGG-13</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.2.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.1\">Student</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.2\">RN-20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.3\">RN-32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.4\">WRN-16-2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T5.2.1.2.1.5\">VGG-8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.1\">Ours w/o US</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.2\">71.16</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.3\">73.68</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.4\">75.61</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T5.2.1.3.2.5\">73.57</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T5.2.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.1\">Ours w US</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.2\">71.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.3\">73.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.4\">75.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T5.2.1.4.3.5\">73.75</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T5.3.1.1\" style=\"font-size:90%;\">Table 5</span>: </span><span class=\"ltx_text\" id=\"S5.T5.4.2\" style=\"font-size:90%;\">Ablation study of the uncertainty score (US) feature. </span></figcaption>\n</figure>",
238
+ "perturb_sentence_id": [
239
+ 4,
240
+ 5
241
+ ],
242
  "output": {
243
+ "perturbed_statement": "[paragraph id = 4] As shown in Table 5, when incorporating uncertainty score into state representation, our method shows an improvement of 1.50% (72.66% vs 71.16%) in the RN-56 & RN-20 teacher-student pair. This enhancement verifies the effectiveness of our designed uncertainty score, which enables the agent to make wiser decisions by taking into account the student model's mastery of the training instances.",
244
+ "perturbed_explanation": "1. The statement originally reports an improvement of 0.24% (71.40% vs 71.16%) in the RN-56 & RN-20 teacher-student pair after incorporating the uncertainty score into the state representation. 2. The statement now claims a much larger improvement of 1.50% (72.66% vs 71.16%), which is incorrect. The original improvement figure of 0.24% is consistent with careful experimentation and analysis. The altered improvement figure introduces an error, suggesting a much larger enhancement than actually observed, thus misrepresenting the effectiveness of the uncertainty score in the experiments conducted."
245
  }
246
  },
247
  {
 
287
  "[paragraph id = 10] We believe the effectiveness of the instance reward calibration strategy lies in its ability to enable the agent to more accurately perceive the rewards resulting from each of its instance temperature adjustment actions, thereby enhancing its capacity to update its policy for performing the action."
288
  ],
289
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T6\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T6.2\" style=\"width:181.3pt;height:47.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-46.7pt,12.2pt) scale(0.66,0.66) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T6.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T6.2.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.1\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.2\">RN-56</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.3\">RN-110</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.4\">WRN-40-2</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T6.2.1.1.1.5\">VGG-13</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T6.2.1.2.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.1\">Student</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.2\">RN-20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.3\">RN-32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.4\">WRN-16-2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T6.2.1.2.1.5\">VGG-8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.1.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.1\">Ours w/o IRA</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.2\">70.91</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.3\">73.26</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.4\">75.39</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T6.2.1.3.2.5\">73.32</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T6.2.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.1\">Ours w IRA</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.2\">71.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.3\">73.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.4\">75.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T6.2.1.4.3.5\">73.75</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T6.3.1.1\" style=\"font-size:90%;\">Table 6</span>: </span><span class=\"ltx_text\" id=\"S5.T6.4.2\" style=\"font-size:90%;\">Ablation on instance reward calibration (IRA) strategy.</span></figcaption>\n</figure>",
290
+ "perturb_sentence_id": [
291
+ 9,
292
+ 10
293
+ ],
294
  "output": {
295
+ "perturbed_statement": "[paragraph id = 9] E.g., our instance reward calibration strategy boosts the performance of RN-110 & RN-32 pair by 10.5% (83.81% vs 73.26%). We believe the effectiveness of the instance reward calibration strategy lies in its ability to enable the agent to more accurately perceive the rewards resulting from each of its instance temperature adjustment actions, thereby enhancing its capacity to update its policy for performing the action.",
296
+ "perturbed_explanation": "The original explanation posits that the effectiveness of the instance reward calibration strategy lies in its ability to enable the agent to more accurately perceive the rewards resulting from each of its instance temperature adjustment actions, thereby enhancing its capacity to update its policy for performing the action. 1. The statement claims that the instance reward calibration strategy boosts performance by 10.5%, which is factually incorrect. The original text does not provide any mention of RN-110 & RN-32; instead, the context discusses the promotive effect across 4 different sets of teacher-student pairs, but it does not specify the exact percentage improvement or identify such pairs like RN-110 & RN-32. 2. It inaccurately reports a performance percentage of 83.81%, which is not supported or indicated in the context."
297
  }
298
  },
299
  {
 
339
  "[paragraph id = 15] We attribute this success to the strategy enables the agent to learn valuable instance temperature adjustment policy faster, allowing the student model to acquire more useful knowledge during the early stages of KD."
340
  ],
341
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T7\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T7.2\" style=\"width:178.1pt;height:47.5pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-45.9pt,12.2pt) scale(0.66,0.66) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T7.2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T7.2.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.1\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.2\">RN-56</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.3\">RN-110</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.4\">WRN-40-2</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T7.2.1.1.1.5\">VGG-13</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T7.2.1.2.1\">\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.1\">Student</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.2\">RN-20</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.3\">RN-32</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.4\">WRN-16-2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S5.T7.2.1.2.1.5\">VGG-8</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.1.3.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.1\">Ours w/o EE</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.2\">71.03</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.3\">73.52</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.4\">75.50</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S5.T7.2.1.3.2.5\">73.45</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T7.2.1.4.3\">\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.1\">Ours w EE</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.2\">71.40</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.3\">73.81</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.4\">75.79</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T7.2.1.4.3.5\">73.75</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T7.3.1.1\" style=\"font-size:90%;\">Table 7</span>: </span><span class=\"ltx_text\" id=\"S5.T7.4.2\" style=\"font-size:90%;\">Ablation study of the efficient exploration (EE) strategy.</span></figcaption>\n</figure>",
342
+ "perturb_sentence_id": [
343
+ 14,
344
+ 15
345
+ ],
346
  "output": {
347
+ "perturbed_statement": "[paragraph id = 14] In the experiments involving the MN-60 & MN-30 teacher-student pair, our efficient exploration strategy results in a performance improvement of 0.37% (71.40% vs 71.03%). We attribute this success to the strategy enables the agent to learn valuable instance temperature adjustment policy faster, allowing the student model to acquire more useful knowledge during the early stages of KD.",
348
+ "perturbed_explanation": "1. The original explanation describes that the efficient exploration strategy results in a performance improvement of 0.37% for the RN-56 & RN-20 teacher-student pair. 2. The statement incorrectly refers to the MN-60 & MN-30 teacher-student pair instead of the RN-56 & RN-20 pair, which is contradicting the context. The MN-60 & MN-30 teacher-student pair is not mentioned, making the performance improvement reference factually incorrect based on the available information."
349
  }
350
  },
351
  {
 
393
  "[paragraph id = 20] We think this may due to utilizing the top 10% samples caused overfitting in the agent."
394
  ],
395
  "table_html": "<figure class=\"ltx_table\" id=\"S5.T8\">\n<div class=\"ltx_inline-block ltx_align_center ltx_transformed_outer\" id=\"S5.T8.8\" style=\"width:241.3pt;height:30.8pt;vertical-align:-0.0pt;\"><span class=\"ltx_transformed_inner\" style=\"transform:translate(-91.0pt,11.6pt) scale(0.57,0.57) ;\">\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S5.T8.8.8\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S5.T8.8.8.8\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_tt\" id=\"S5.T8.8.8.8.9\">Teacher</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_tt\" id=\"S5.T8.8.8.8.10\">Student</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T8.1.1.1.1\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T8.2.2.2.2\"></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T8.5.5.5.5\">\n \n</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_tt\" id=\"S5.T8.8.8.8.8\">\n \n</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S5.T8.8.8.9.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_t\" id=\"S5.T8.8.8.9.1.1\">72.34</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S5.T8.8.8.9.1.2\">69.06</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T8.8.8.9.1.3\">70.92</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T8.8.8.9.1.4\">71.21</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T8.8.8.9.1.5\">71.27</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S5.T8.8.8.9.1.6\">71.40</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S5.T8.8.8.10.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb\" id=\"S5.T8.8.8.10.2.1\">75.61</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_bb ltx_border_r\" id=\"S5.T8.8.8.10.2.2\">73.26</th>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T8.8.8.10.2.3\">75.33</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T8.8.8.10.2.4\">75.57</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T8.8.8.10.2.5\">75.61</td>\n<td class=\"ltx_td ltx_align_center ltx_border_bb\" id=\"S5.T8.8.8.10.2.6\">75.79</td>\n</tr>\n</tbody>\n</table>\n</span></div>\n<figcaption class=\"ltx_caption ltx_centering\"><span class=\"ltx_tag ltx_tag_table\"><span class=\"ltx_text\" id=\"S5.T8.12.2.1\" style=\"font-size:90%;\">Table 8</span>: </span><span class=\"ltx_text\" id=\"S5.T8.10.1\" style=\"font-size:90%;\">Comparison of different high-quality training sample selection strategies. The teacher-student pairs corresponding to the second and third rows are respectively RN-56 &amp; RN-20 and WRN-40-2 &amp; WRN-16-2. “” denotes the mix-up operation.</span></figcaption>\n</figure>",
396
+ "perturb_sentence_id": [
397
+ 18,
398
+ 19
399
+ ],
400
  "output": {
401
+ "perturbed_statement": "[paragraph id = 18] Interestingly, we observe that when using the top 5% of high-quality training data, the performance of the student model in the teacher-student pair RN-56 & RN-20 is 70.92%, which is not as good as the performance 71.21% of the student model when using the training data ranked from 5% to 15%. This phenomenon is also observed in the teacher-student pair WRN-40-2 & WRN-16-2.",
402
+ "perturbed_explanation": "The original explanation highlights the performance of the student model under different selections of high-quality training data. Specifically, it focuses on the observation that using the top 10% of data seems to cause a reduction in performance compared to using data ranked from 10% to 20%, implying issues such as overfitting when only top 10% samples are used. The statement is incorrect for the following reasons: 1. The statement incorrectly mentions using the top 5% of high-quality training data, whereas the correct top percentage mentioned is the top 10%. 2. It incorrectly suggests using a range from 5% to 15% for comparison, whereas the correct range is from 10% to 20%."
403
  }
404
  }
405
  ]
table_result/2407.00119v2_output.json CHANGED
@@ -68,10 +68,13 @@
68
  "[paragraph id = 14] The experimental results prove the superiority of the ELR-GNN method proposed in this paper."
69
  ],
70
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Comparison with other baseline models on the IEMOCAP dataset.</figcaption>\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1\" rowspan=\"3\" style=\"padding:2.5pt 8.7pt;\"><span class=\"ltx_text\" id=\"S4.T1.1.1.1.1.1\">Methods</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"7\" id=\"S4.T1.1.1.1.2\" style=\"padding:2.5pt 8.7pt;\">IEMOCAP</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.1\" style=\"padding:2.5pt 8.7pt;\">Happy</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.2\" style=\"padding:2.5pt 8.7pt;\">Sad</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.3\" style=\"padding:2.5pt 8.7pt;\">Neutral</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.4\" style=\"padding:2.5pt 8.7pt;\">Angry</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.5\" style=\"padding:2.5pt 8.7pt;\">Excited</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.6\" style=\"padding:2.5pt 8.7pt;\">Frustrated</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.7\" style=\"padding:2.5pt 8.7pt;\">Average(w)</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.1\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.2\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.3\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.4\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.5\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.6\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.7\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.4.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.1.1\" style=\"padding:2.5pt 8.7pt;\">TextCNN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.2\" style=\"padding:2.5pt 8.7pt;\">27.7 29..8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.3\" style=\"padding:2.5pt 8.7pt;\">57.1 53.8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.4\" style=\"padding:2.5pt 8.7pt;\">34.3 40.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.5\" style=\"padding:2.5pt 8.7pt;\">61.1 52.4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.6\" style=\"padding:2.5pt 8.7pt;\">46.1 50.0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.7\" style=\"padding:2.5pt 8.7pt;\">62.9 55.7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.8\" style=\"padding:2.5pt 8.7pt;\">48.9 48.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.5.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.5.2.1\" style=\"padding:2.5pt 8.7pt;\">bc-LSTM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.2\" style=\"padding:2.5pt 8.7pt;\">29.1 34.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.3\" style=\"padding:2.5pt 8.7pt;\">57.1 60.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.4\" style=\"padding:2.5pt 8.7pt;\">54.1 51.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.5\" style=\"padding:2.5pt 8.7pt;\">57.0 56.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.6\" style=\"padding:2.5pt 8.7pt;\">51.1 57.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.7\" style=\"padding:2.5pt 8.7pt;\">67.1 58.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.8\" style=\"padding:2.5pt 8.7pt;\">55.2 54.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.6.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.6.3.1\" style=\"padding:2.5pt 8.7pt;\">MFN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.2\" style=\"padding:2.5pt 8.7pt;\">24.0 34.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.3\" style=\"padding:2.5pt 8.7pt;\">65.6 70.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.4\" style=\"padding:2.5pt 8.7pt;\">55.5 52.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.5\" style=\"padding:2.5pt 8.7pt;\">72.3 66.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.6\" style=\"padding:2.5pt 8.7pt;\">64.3 62.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.7\" style=\"padding:2.5pt 8.7pt;\">67.9 62.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.8\" style=\"padding:2.5pt 8.7pt;\">60.1 59.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.7.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.7.4.1\" style=\"padding:2.5pt 8.7pt;\">CMN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.2\" style=\"padding:2.5pt 8.7pt;\">25.0 30.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.3\" style=\"padding:2.5pt 8.7pt;\">55.9 62.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.4\" style=\"padding:2.5pt 8.7pt;\">52.8 52.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.5\" style=\"padding:2.5pt 8.7pt;\">61.7 59.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.6\" style=\"padding:2.5pt 8.7pt;\">55.5 60.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.7\" style=\"padding:2.5pt 8.7pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.7.4.7.1\">71.1</span> 60.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.8\" style=\"padding:2.5pt 8.7pt;\">56.5 56.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.8.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.8.5.1\" style=\"padding:2.5pt 8.7pt;\">LFM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.2\" style=\"padding:2.5pt 8.7pt;\">25.6 33.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.3\" style=\"padding:2.5pt 8.7pt;\">75.1 78.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.4\" style=\"padding:2.5pt 8.7pt;\">58.5 59.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.5\" style=\"padding:2.5pt 8.7pt;\">64.7 65.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.6\" style=\"padding:2.5pt 8.7pt;\">80.2 71.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.7\" style=\"padding:2.5pt 8.7pt;\">61.1 58.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.8\" style=\"padding:2.5pt 8.7pt;\">63.4 62.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.9.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.9.6.1\" style=\"padding:2.5pt 8.7pt;\">ICON</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.2\" style=\"padding:2.5pt 8.7pt;\">22.2 29.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.3\" style=\"padding:2.5pt 8.7pt;\">58.8 64.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.4\" style=\"padding:2.5pt 8.7pt;\">62.8 57.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.5\" style=\"padding:2.5pt 8.7pt;\">64.7 63.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.6\" style=\"padding:2.5pt 8.7pt;\">58.9 63.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.7\" style=\"padding:2.5pt 8.7pt;\">67.2 60.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.8\" style=\"padding:2.5pt 8.7pt;\">59.1 58.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.10.7\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.10.7.1\" style=\"padding:2.5pt 8.7pt;\">A-DMN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.2\" style=\"padding:2.5pt 8.7pt;\">43.1 50.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.3\" style=\"padding:2.5pt 8.7pt;\">69.4 76.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.4\" style=\"padding:2.5pt 8.7pt;\">63.0 62.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.5\" style=\"padding:2.5pt 8.7pt;\">63.5 56.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.6\" style=\"padding:2.5pt 8.7pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.10.7.6.1\">88.3</span> 77.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.7\" style=\"padding:2.5pt 8.7pt;\">53.3 55.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.8\" style=\"padding:2.5pt 8.7pt;\">64.6 64.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.11.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.11.8.1\" style=\"padding:2.5pt 8.7pt;\">DialogueGCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.2\" style=\"padding:2.5pt 8.7pt;\">40.6 42.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.3\" style=\"padding:2.5pt 8.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.11.8.3.1\">89.1 84.5</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.4\" style=\"padding:2.5pt 8.7pt;\">62.0 63.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.5\" style=\"padding:2.5pt 8.7pt;\">67.5 64.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.6\" style=\"padding:2.5pt 8.7pt;\">65.5 63.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.7\" style=\"padding:2.5pt 8.7pt;\">64.1 66.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.8\" style=\"padding:2.5pt 8.7pt;\">65.2 64.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.12.9\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.12.9.1\" style=\"padding:2.5pt 8.7pt;\">RGAT</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.2\" style=\"padding:2.5pt 8.7pt;\">60.1 51.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.3\" style=\"padding:2.5pt 8.7pt;\">78.8 77.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.4\" style=\"padding:2.5pt 8.7pt;\">60.1 65.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.5\" style=\"padding:2.5pt 8.7pt;\">70.7 63.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.6\" style=\"padding:2.5pt 8.7pt;\">78.0 68.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.7\" style=\"padding:2.5pt 8.7pt;\">64.3 61.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.8\" style=\"padding:2.5pt 8.7pt;\">65.0 65.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.13.10\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.13.10.1\" style=\"padding:2.5pt 8.7pt;\">AGHMN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.2\" style=\"padding:2.5pt 8.7pt;\">48.3 52.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.3\" style=\"padding:2.5pt 8.7pt;\">68.3 73.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.4\" style=\"padding:2.5pt 8.7pt;\">61.6 58.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.5\" style=\"padding:2.5pt 8.7pt;\">57.5 61.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.6\" style=\"padding:2.5pt 8.7pt;\">68.1 69.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.7\" style=\"padding:2.5pt 8.7pt;\">67.1 62.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.8\" style=\"padding:2.5pt 8.7pt;\">63.5 63.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.14.11\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.14.11.1\" style=\"padding:2.5pt 8.7pt;\">BiERU</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.2\" style=\"padding:2.5pt 8.7pt;\">54.2 31.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.3\" style=\"padding:2.5pt 8.7pt;\">80.6 84.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.4\" style=\"padding:2.5pt 8.7pt;\">64.7 60.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.5\" style=\"padding:2.5pt 8.7pt;\">67.9 65.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.6\" style=\"padding:2.5pt 8.7pt;\">62.8 74.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.7\" style=\"padding:2.5pt 8.7pt;\">61.9 61.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.8\" style=\"padding:2.5pt 8.7pt;\">66.1 64.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.15.12\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.15.12.1\" style=\"padding:2.5pt 8.7pt;\">CoMPM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.2\" style=\"padding:2.5pt 8.7pt;\">59.9 60.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.3\" style=\"padding:2.5pt 8.7pt;\">78.0 82.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.4\" style=\"padding:2.5pt 8.7pt;\">60.4 63.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.5\" style=\"padding:2.5pt 8.7pt;\">70.2 59.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.6\" style=\"padding:2.5pt 8.7pt;\">85.8 78.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.7\" style=\"padding:2.5pt 8.7pt;\">62.9 59.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.8\" style=\"padding:2.5pt 8.7pt;\">67.7 67.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.16.13\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.16.13.1\" style=\"padding:2.5pt 8.7pt;\">EmoBERTa</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.2\" style=\"padding:2.5pt 8.7pt;\">56.9 56.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.3\" style=\"padding:2.5pt 8.7pt;\">79.1 83.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.4\" style=\"padding:2.5pt 8.7pt;\">64.0 61.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.5\" style=\"padding:2.5pt 8.7pt;\">70.6 69.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.6\" style=\"padding:2.5pt 8.7pt;\">86.0 78.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.7\" style=\"padding:2.5pt 8.7pt;\">63.8 68.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.8\" style=\"padding:2.5pt 8.7pt;\">67.3 67.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.17.14\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.17.14.1\" style=\"padding:2.5pt 8.7pt;\">COGMEN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.2\" style=\"padding:2.5pt 8.7pt;\">57.4 51.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.3\" style=\"padding:2.5pt 8.7pt;\">81.4 81.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.4\" style=\"padding:2.5pt 8.7pt;\">65.4 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.17.14.4.1\">68.6</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.5\" style=\"padding:2.5pt 8.7pt;\">69.5 66.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.6\" style=\"padding:2.5pt 8.7pt;\">83.3 75.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.7\" style=\"padding:2.5pt 8.7pt;\">63.8 68.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.8\" style=\"padding:2.5pt 8.7pt;\">68.2 67.6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.18.15\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.18.15.1\" style=\"padding:2.5pt 8.7pt;\">CTNet</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.2\" style=\"padding:2.5pt 8.7pt;\">47.9 51.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.3\" style=\"padding:2.5pt 8.7pt;\">78.0 79.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.4\" style=\"padding:2.5pt 8.7pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.18.15.4.1\">69.0</span> 65.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.5\" style=\"padding:2.5pt 8.7pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.18.15.5.1\">72.9</span> 67.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.6\" style=\"padding:2.5pt 8.7pt;\">85.3 78.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.7\" style=\"padding:2.5pt 8.7pt;\">52.2 58.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.8\" style=\"padding:2.5pt 8.7pt;\">68.0 67.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.19.16\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.19.16.1\" style=\"padding:2.5pt 8.7pt;\">LR-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.2\" style=\"padding:2.5pt 8.7pt;\">54.2 55.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.3\" style=\"padding:2.5pt 8.7pt;\">81.6 79.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.4\" style=\"padding:2.5pt 8.7pt;\">59.1 63.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.5\" style=\"padding:2.5pt 8.7pt;\">69.4 69.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.6\" style=\"padding:2.5pt 8.7pt;\">76.3 74.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.7\" style=\"padding:2.5pt 8.7pt;\">68.2 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.19.16.7.1\">68.9</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.8\" style=\"padding:2.5pt 8.7pt;\">68.5 68.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.20.17\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.20.17.1\" style=\"padding:2.5pt 8.7pt;\">DER-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.2\" style=\"padding:2.5pt 8.7pt;\">60.7 58.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.3\" style=\"padding:2.5pt 8.7pt;\">75.9 79.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.4\" style=\"padding:2.5pt 8.7pt;\">66.5 61.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.5\" style=\"padding:2.5pt 8.7pt;\">71.3 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.20.17.5.1\">72.1</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.6\" style=\"padding:2.5pt 8.7pt;\">71.1 73.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.7\" style=\"padding:2.5pt 8.7pt;\">66.1 67.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.8\" style=\"padding:2.5pt 8.7pt;\">69.7 69.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.21.18\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_b ltx_border_r\" id=\"S4.T1.1.21.18.1\" style=\"padding:2.5pt 8.7pt;\">ELR-GCN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.2\" style=\"padding:2.5pt 8.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.21.18.2.1\">64.7 62.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.3\" style=\"padding:2.5pt 8.7pt;\">75.7 80.8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.4\" style=\"padding:2.5pt 8.7pt;\">66.2 62.4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.5\" style=\"padding:2.5pt 8.7pt;\">70.7 70.0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.6\" style=\"padding:2.5pt 8.7pt;\">76.8 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.21.18.6.1\">78.6</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.7\" style=\"padding:2.5pt 8.7pt;\">67.9 68.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.8\" style=\"padding:2.5pt 8.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.21.18.8.1\">70.6 70.9</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
71
- "perturb_sentence_id": 2,
 
 
 
72
  "output": {
73
- "perturbed_statement": "[paragraph id = 2] IEMOCAP: As shown in Table 1, the multi-modal emotion recognition method proposed in this paper achieved the best emotion recognition effect on the IEMOCAP data set, with an average accuracy of 68.5% and an average F1 value of 68.3%. ELR-GCN proposes an effective modeling method of long-distance context latent dependencies for multi-modal emotion recognition.",
74
- "perturbed_explanation": "1. The original explanation highlights the efficacy of the ELR-GCN method on the IEMOCAP dataset with specific accuracy metrics mentioned. 2. The statement now incorrectly lists the accuracy as 68.5% and the F1 value as 68.3%, while the actual values are 70.6% and 70.9%, as shown in the context of the experiment results."
75
  }
76
  },
77
  {
@@ -141,10 +144,13 @@
141
  "[paragraph id = 25] In addition, to intuitively illustrate that the running time of the ELR-GNN method proposed in this paper is better than other comparative methods, we statistics in Table 3 the running time of other comparative methods of the ELR-GNN method on the IEMOCAP and MELD data sets."
142
  ],
143
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Comparison with other baseline models on the MELD dataset.</figcaption>\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.1\" rowspan=\"3\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1\">Methods</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"8\" id=\"S4.T2.1.1.1.2\" style=\"padding:2.5pt 5.4pt;\">MELD</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.1\" style=\"padding:2.5pt 5.4pt;\">Neutral</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.2\" style=\"padding:2.5pt 5.4pt;\">Surprise</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.3\" style=\"padding:2.5pt 5.4pt;\">Fear</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.4\" style=\"padding:2.5pt 5.4pt;\">Sadness</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.5\" style=\"padding:2.5pt 5.4pt;\">Joy</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.6\" style=\"padding:2.5pt 5.4pt;\">Disgust</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.7\" style=\"padding:2.5pt 5.4pt;\">Anger</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.8\" style=\"padding:2.5pt 5.4pt;\">Average(w)</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.1\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.2\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.3\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.4\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.5\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.6\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.7\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.8\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.4.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.1.1\" style=\"padding:2.5pt 5.4pt;\">TextCNN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.2\" style=\"padding:2.5pt 5.4pt;\">76.2 74.9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.3\" style=\"padding:2.5pt 5.4pt;\">43.3 45.5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.4\" style=\"padding:2.5pt 5.4pt;\">4.6 3.7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.5\" style=\"padding:2.5pt 5.4pt;\">18.2 21.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.6\" style=\"padding:2.5pt 5.4pt;\">46.1 49.4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.7\" style=\"padding:2.5pt 5.4pt;\">8.9 8.3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.8\" style=\"padding:2.5pt 5.4pt;\">35.3 34.5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.9\" style=\"padding:2.5pt 5.4pt;\">56.3 55.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.5.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.5.2.1\" style=\"padding:2.5pt 5.4pt;\">bc-LSTM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.2\" style=\"padding:2.5pt 5.4pt;\">78.4 73.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.3\" style=\"padding:2.5pt 5.4pt;\">46.8 47.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.4\" style=\"padding:2.5pt 5.4pt;\">3.8 5.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.5\" style=\"padding:2.5pt 5.4pt;\">22.4 25.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.6\" style=\"padding:2.5pt 5.4pt;\">51.6 51.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.7\" style=\"padding:2.5pt 5.4pt;\">4.3 5.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.8\" style=\"padding:2.5pt 5.4pt;\">36.7 38.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.9\" style=\"padding:2.5pt 5.4pt;\">57.5 55.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.6.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.6.3.1\" style=\"padding:2.5pt 5.4pt;\">DialogueRNN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.2\" style=\"padding:2.5pt 5.4pt;\">72.1 73.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.3\" style=\"padding:2.5pt 5.4pt;\">54.4 49.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.4\" style=\"padding:2.5pt 5.4pt;\">1.6 1.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.5\" style=\"padding:2.5pt 5.4pt;\">23.9 23.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.6\" style=\"padding:2.5pt 5.4pt;\">52.0 50.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.7\" style=\"padding:2.5pt 5.4pt;\">1.5 1.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.8\" style=\"padding:2.5pt 5.4pt;\">41.0 41.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.9\" style=\"padding:2.5pt 5.4pt;\">56.1 55.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.7.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.7.4.1\" style=\"padding:2.5pt 5.4pt;\">DialogueGCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.2\" style=\"padding:2.5pt 5.4pt;\">70.3 72.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.3\" style=\"padding:2.5pt 5.4pt;\">42.4 41.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.4\" style=\"padding:2.5pt 5.4pt;\">3.0 2.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.5\" style=\"padding:2.5pt 5.4pt;\">20.9 21.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.6\" style=\"padding:2.5pt 5.4pt;\">44.7 44.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.7\" style=\"padding:2.5pt 5.4pt;\">6.5 6.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.8\" style=\"padding:2.5pt 5.4pt;\">39.0 36.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.9\" style=\"padding:2.5pt 5.4pt;\">54.9 54.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.8.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.8.5.1\" style=\"padding:2.5pt 5.4pt;\">RGAT</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.2\" style=\"padding:2.5pt 5.4pt;\">76.0 78.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.3\" style=\"padding:2.5pt 5.4pt;\">40.1 41.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.4\" style=\"padding:2.5pt 5.4pt;\">3.0 2.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.5\" style=\"padding:2.5pt 5.4pt;\">32.1 30.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.6\" style=\"padding:2.5pt 5.4pt;\">68.1 58.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.7\" style=\"padding:2.5pt 5.4pt;\">4.5 2.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.8\" style=\"padding:2.5pt 5.4pt;\">40.0 44.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.9\" style=\"padding:2.5pt 5.4pt;\">60.3 61.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.9.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.9.6.1\" style=\"padding:2.5pt 5.4pt;\">CoMPM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.2\" style=\"padding:2.5pt 5.4pt;\">78.3 82.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.3\" style=\"padding:2.5pt 5.4pt;\">48.3 49.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.4\" style=\"padding:2.5pt 5.4pt;\">1.7 2.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.5\" style=\"padding:2.5pt 5.4pt;\">35.9 32.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.6\" style=\"padding:2.5pt 5.4pt;\">71.4 61.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.7\" style=\"padding:2.5pt 5.4pt;\">3.1 2.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.8\" style=\"padding:2.5pt 5.4pt;\">42.2 45.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.9\" style=\"padding:2.5pt 5.4pt;\">64.1 65.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.10.7\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.10.7.1\" style=\"padding:2.5pt 5.4pt;\">EmoBERTa</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.2\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.10.7.2.1\">78.9 82.5</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.3\" style=\"padding:2.5pt 5.4pt;\">50.2 50.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.4\" style=\"padding:2.5pt 5.4pt;\">1.8 1.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.5\" style=\"padding:2.5pt 5.4pt;\">33.3 31.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.6\" style=\"padding:2.5pt 5.4pt;\">72.1 61.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.7\" style=\"padding:2.5pt 5.4pt;\">9.1 2.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.8\" style=\"padding:2.5pt 5.4pt;\">43.3 46.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.9\" style=\"padding:2.5pt 5.4pt;\">64.1 65.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.11.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.11.8.1\" style=\"padding:2.5pt 5.4pt;\">ConGCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.2\" style=\"padding:2.5pt 5.4pt;\">46.8 45.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.3\" style=\"padding:2.5pt 5.4pt;\">10.6 8.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.4\" style=\"padding:2.5pt 5.4pt;\">8.7 8.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.5\" style=\"padding:2.5pt 5.4pt;\">53.1 54.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.6\" style=\"padding:2.5pt 5.4pt;\">76.7 75.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.7\" style=\"padding:2.5pt 5.4pt;\">28.5 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.11.8.7.1\">26.3</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.8\" style=\"padding:2.5pt 5.4pt;\">50.3 48.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.9\" style=\"padding:2.5pt 5.4pt;\">59.4 58.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.12.9\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.12.9.1\" style=\"padding:2.5pt 5.4pt;\">A-DMN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.2\" style=\"padding:2.5pt 5.4pt;\">76.5 78.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.3\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.12.9.3.1\">56.2 55.3</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.4\" style=\"padding:2.5pt 5.4pt;\">8.2 8.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.5\" style=\"padding:2.5pt 5.4pt;\">22.1 24.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.6\" style=\"padding:2.5pt 5.4pt;\">59.8 57.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.7\" style=\"padding:2.5pt 5.4pt;\">1.2 3.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.8\" style=\"padding:2.5pt 5.4pt;\">41.3 40.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.9\" style=\"padding:2.5pt 5.4pt;\">61.5 60.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.13.10\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.13.10.1\" style=\"padding:2.5pt 5.4pt;\">LR-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.2\" style=\"padding:2.5pt 5.4pt;\">76.7 80.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.3\" style=\"padding:2.5pt 5.4pt;\">53.3 55.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.4\" style=\"padding:2.5pt 5.4pt;\">0.0 0.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.5\" style=\"padding:2.5pt 5.4pt;\">49.6 35.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.6\" style=\"padding:2.5pt 5.4pt;\">68.0 64.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.7\" style=\"padding:2.5pt 5.4pt;\">10.7 2.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.8\" style=\"padding:2.5pt 5.4pt;\">48.0 51.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.9\" style=\"padding:2.5pt 5.4pt;\">65.7 65.6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.14.11\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.14.11.1\" style=\"padding:2.5pt 5.4pt;\">DER-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.2\" style=\"padding:2.5pt 5.4pt;\">76.8 80.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.3\" style=\"padding:2.5pt 5.4pt;\">50.5 51.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.4\" style=\"padding:2.5pt 5.4pt;\">14.8 10.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.5\" style=\"padding:2.5pt 5.4pt;\">56.7 41.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.6\" style=\"padding:2.5pt 5.4pt;\">69.3 64.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.7\" style=\"padding:2.5pt 5.4pt;\">17.2 10.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.8\" style=\"padding:2.5pt 5.4pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.14.11.8.1\">52.5</span> 57.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.9\" style=\"padding:2.5pt 5.4pt;\">66.8 66.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.15.12\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_b ltx_border_r\" id=\"S4.T2.1.15.12.1\" style=\"padding:2.5pt 5.4pt;\">ELR-GCN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.2\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.2.1\">80.2 83.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.3\" style=\"padding:2.5pt 5.4pt;\">36.8 35.4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.4\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.4.1\">19.2 13.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.5\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.5.1\">80.2 83.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.6\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.6.1\">76.5 69.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.7\" style=\"padding:2.5pt 5.4pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.7.1\">55.6</span> 13.0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.8\" style=\"padding:2.5pt 5.4pt;\">52.1 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.8.1\">57.7</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.9\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.9.1\">68.7 69.9</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
144
- "perturb_sentence_id": 15,
 
 
 
145
  "output": {
146
- "perturbed_statement": "[paragraph id = 15] MELD: As shown in Table 2, The ELR-GNN method proposed in this article has the best emotion recognition effect on the MELD data set, with an average accuracy of 72.5% and an average F1 value of 69.9%. The emotion recognition effect of DER-GCN is second, with an average accuracy of 69.7% and an average F1 value of 69.4%.",
147
- "perturbed_explanation": "The statement made is inaccurate due to the following reasons: 1. The actual average accuracy of ELR-GNN on the MELD dataset is indicated as 68.7%, not 72.5%, as stated in the context. 2. This alteration modifies the factual data presented, altering the outcome and potentially misleading regarding the method's performance."
148
  }
149
  },
150
  {
@@ -202,10 +208,13 @@
202
  "[paragraph id = 30] The experimental results prove the efficiency and effectiveness of the ELR-GNN method proposed in this paper."
203
  ],
204
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>We tested the running time of the ELR-GNN method proposed in this paper and other comparative methods on the IEMOCAP and MELD data sets. In particular, ELR-GNN sets to and neighbor size to 64.</figcaption>\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T3.5\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T3.5.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T3.5.1.1.1\" rowspan=\"2\" style=\"padding:2.5pt 18.5pt;\">      <span class=\"ltx_text\" id=\"S4.T3.5.1.1.1.1\">Methods</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"2\" id=\"S4.T3.5.1.1.2\" style=\"padding:2.5pt 18.5pt;\">      Running time (s)</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.5.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T3.5.2.2.1\" style=\"padding:2.5pt 18.5pt;\">      IEMOCAP</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T3.5.2.2.2\" style=\"padding:2.5pt 18.5pt;\">      MELD</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T3.5.3.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T3.5.3.1.1\" style=\"padding:2.5pt 18.5pt;\">      DialogueGCN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.5.3.1.2\" style=\"padding:2.5pt 18.5pt;\">      58</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.5.3.1.3\" style=\"padding:2.5pt 18.5pt;\">      127</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.5.4.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T3.5.4.2.1\" style=\"padding:2.5pt 18.5pt;\">      LR-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.4.2.2\" style=\"padding:2.5pt 18.5pt;\">      87</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.4.2.3\" style=\"padding:2.5pt 18.5pt;\">      142</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.5.5.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T3.5.5.3.1\" style=\"padding:2.5pt 18.5pt;\">      DER-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.5.3.2\" style=\"padding:2.5pt 18.5pt;\">      125</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.5.3.3\" style=\"padding:2.5pt 18.5pt;\">      189</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.5.6.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r\" id=\"S4.T3.5.6.4.1\" style=\"padding:2.5pt 18.5pt;\">      ELR-GNN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T3.5.6.4.2\" style=\"padding:2.5pt 18.5pt;\">      41</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T3.5.6.4.3\" style=\"padding:2.5pt 18.5pt;\">      91</td>\n</tr>\n</tbody>\n</table>\n</figure>",
205
- "perturb_sentence_id": 29,
 
 
 
206
  "output": {
207
- "perturbed_statement": "[paragraph id = 29] The running times of DER-GCN are 87s and 142s respectively.The experimental results prove the efficiency and effectiveness of the ELR-GNN method proposed in this paper.",
208
- "perturbed_explanation": "The original explanation:\n1. Running times of DER-GCN were not mentioned in the provided context.\nThe statement is factually incorrect because:\n2. It directly attributes the running times of 87s and 142s to DER-GCN, which are the running times of LR-GCN. This misattribution misrepresents the data from the source elements."
209
  }
210
  },
211
  {
@@ -248,10 +257,13 @@
248
  "[paragraph id = 9] Experimental results demonstrate the effectiveness of multimodal features."
249
  ],
250
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T4\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>The effect of ELR-GNN on IEMOCAP and MELD datasets using unimodal features and multimodal features, respectively. We report average accuracy and F1-score.</figcaption>\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T4.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T4.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T4.1.1.1.1\" rowspan=\"2\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text\" id=\"S4.T4.1.1.1.1.1\">Modality</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"2\" id=\"S4.T4.1.1.1.2\" style=\"padding:2.5pt 12.8pt;\">IEMOCAP</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"2\" id=\"S4.T4.1.1.1.3\" style=\"padding:2.5pt 12.8pt;\">MELD</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T4.1.2.2.1\" style=\"padding:2.5pt 12.8pt;\">Acc.</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T4.1.2.2.2\" style=\"padding:2.5pt 12.8pt;\">F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T4.1.2.2.3\" style=\"padding:2.5pt 12.8pt;\">Acc</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T4.1.2.2.4\" style=\"padding:2.5pt 12.8pt;\">F1</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T4.1.3.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T4.1.3.1.1\" style=\"padding:2.5pt 12.8pt;\">T</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.3.1.2\" style=\"padding:2.5pt 12.8pt;\">64.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.3.1.3\" style=\"padding:2.5pt 12.8pt;\">63.9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.3.1.4\" style=\"padding:2.5pt 12.8pt;\">63.5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.3.1.5\" style=\"padding:2.5pt 12.8pt;\">62.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.4.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.4.2.1\" style=\"padding:2.5pt 12.8pt;\">A</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.4.2.2\" style=\"padding:2.5pt 12.8pt;\">61.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.4.2.3\" style=\"padding:2.5pt 12.8pt;\">60.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.4.2.4\" style=\"padding:2.5pt 12.8pt;\">62.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.4.2.5\" style=\"padding:2.5pt 12.8pt;\">62.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.5.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.5.3.1\" style=\"padding:2.5pt 12.8pt;\">V</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.5.3.2\" style=\"padding:2.5pt 12.8pt;\">59.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.5.3.3\" style=\"padding:2.5pt 12.8pt;\">59.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.5.3.4\" style=\"padding:2.5pt 12.8pt;\">60.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.5.3.5\" style=\"padding:2.5pt 12.8pt;\">61.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.6.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.6.4.1\" style=\"padding:2.5pt 12.8pt;\">T+A</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.6.4.2\" style=\"padding:2.5pt 12.8pt;\">65.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.6.4.3\" style=\"padding:2.5pt 12.8pt;\">64.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.6.4.4\" style=\"padding:2.5pt 12.8pt;\">64.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.6.4.5\" style=\"padding:2.5pt 12.8pt;\">63.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.7.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.7.5.1\" style=\"padding:2.5pt 12.8pt;\">T+V</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.7.5.2\" style=\"padding:2.5pt 12.8pt;\">64.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.7.5.3\" style=\"padding:2.5pt 12.8pt;\">64.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.7.5.4\" style=\"padding:2.5pt 12.8pt;\">64.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.7.5.5\" style=\"padding:2.5pt 12.8pt;\">62.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.8.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.8.6.1\" style=\"padding:2.5pt 12.8pt;\">V+A</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.8.6.2\" style=\"padding:2.5pt 12.8pt;\">63.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.8.6.3\" style=\"padding:2.5pt 12.8pt;\">62.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.8.6.4\" style=\"padding:2.5pt 12.8pt;\">61.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.8.6.5\" style=\"padding:2.5pt 12.8pt;\">61.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.9.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r\" id=\"S4.T4.1.9.7.1\" style=\"padding:2.5pt 12.8pt;\">T+A+V</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T4.1.9.7.2\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.9.7.2.1\">70.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T4.1.9.7.3\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.9.7.3.1\">70.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T4.1.9.7.4\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.9.7.4.1\">68.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T4.1.9.7.5\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.9.7.5.1\">69.9</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
251
- "perturb_sentence_id": 3,
 
 
 
252
  "output": {
253
- "perturbed_statement": "[paragraph id = 3] The average accuracy on the IEMOCAP and MELD data sets are 64.1% and 63.5%, respectively, and the average F1 value is 63.9% and 62.4%, respectively. The emotion recognition effect of ELR-GNN with audio modal features is second, with average accuracy rates of 61.5% and 63.1% on the IEMOCAP and MELD data sets, and average F1 values of 61.0% and 63.0% respectively.",
254
- "perturbed_explanation": "1. The original statement describes particular evaluation metrics for the ELR-GNN model's performance using the audio modality, specifying average accuracy rates and F1 values. 2. The statement's claim of 63.1% accuracy on the MELD dataset and 63.0% F1 value are inconsistent with the provided data, which indicates 62.7% and 62.0%, respectively."
255
  }
256
  }
257
  ]
 
68
  "[paragraph id = 14] The experimental results prove the superiority of the ELR-GNN method proposed in this paper."
69
  ],
70
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T1\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 1: </span>Comparison with other baseline models on the IEMOCAP dataset.</figcaption>\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T1.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T1.1.1.1.1\" rowspan=\"3\" style=\"padding:2.5pt 8.7pt;\"><span class=\"ltx_text\" id=\"S4.T1.1.1.1.1.1\">Methods</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"7\" id=\"S4.T1.1.1.1.2\" style=\"padding:2.5pt 8.7pt;\">IEMOCAP</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.1\" style=\"padding:2.5pt 8.7pt;\">Happy</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.2\" style=\"padding:2.5pt 8.7pt;\">Sad</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.3\" style=\"padding:2.5pt 8.7pt;\">Neutral</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.4\" style=\"padding:2.5pt 8.7pt;\">Angry</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.5\" style=\"padding:2.5pt 8.7pt;\">Excited</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.6\" style=\"padding:2.5pt 8.7pt;\">Frustrated</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.2.2.7\" style=\"padding:2.5pt 8.7pt;\">Average(w)</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.1\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.2\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.3\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.4\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.5\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.6\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T1.1.3.3.7\" style=\"padding:2.5pt 8.7pt;\">Acc. F1</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T1.1.4.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T1.1.4.1.1\" style=\"padding:2.5pt 8.7pt;\">TextCNN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.2\" style=\"padding:2.5pt 8.7pt;\">27.7 29..8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.3\" style=\"padding:2.5pt 8.7pt;\">57.1 53.8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.4\" style=\"padding:2.5pt 8.7pt;\">34.3 40.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.5\" style=\"padding:2.5pt 8.7pt;\">61.1 52.4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.6\" style=\"padding:2.5pt 8.7pt;\">46.1 50.0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.7\" style=\"padding:2.5pt 8.7pt;\">62.9 55.7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T1.1.4.1.8\" style=\"padding:2.5pt 8.7pt;\">48.9 48.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.5.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.5.2.1\" style=\"padding:2.5pt 8.7pt;\">bc-LSTM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.2\" style=\"padding:2.5pt 8.7pt;\">29.1 34.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.3\" style=\"padding:2.5pt 8.7pt;\">57.1 60.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.4\" style=\"padding:2.5pt 8.7pt;\">54.1 51.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.5\" style=\"padding:2.5pt 8.7pt;\">57.0 56.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.6\" style=\"padding:2.5pt 8.7pt;\">51.1 57.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.7\" style=\"padding:2.5pt 8.7pt;\">67.1 58.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.5.2.8\" style=\"padding:2.5pt 8.7pt;\">55.2 54.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.6.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.6.3.1\" style=\"padding:2.5pt 8.7pt;\">MFN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.2\" style=\"padding:2.5pt 8.7pt;\">24.0 34.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.3\" style=\"padding:2.5pt 8.7pt;\">65.6 70.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.4\" style=\"padding:2.5pt 8.7pt;\">55.5 52.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.5\" style=\"padding:2.5pt 8.7pt;\">72.3 66.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.6\" style=\"padding:2.5pt 8.7pt;\">64.3 62.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.7\" style=\"padding:2.5pt 8.7pt;\">67.9 62.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.6.3.8\" style=\"padding:2.5pt 8.7pt;\">60.1 59.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.7.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.7.4.1\" style=\"padding:2.5pt 8.7pt;\">CMN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.2\" style=\"padding:2.5pt 8.7pt;\">25.0 30.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.3\" style=\"padding:2.5pt 8.7pt;\">55.9 62.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.4\" style=\"padding:2.5pt 8.7pt;\">52.8 52.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.5\" style=\"padding:2.5pt 8.7pt;\">61.7 59.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.6\" style=\"padding:2.5pt 8.7pt;\">55.5 60.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.7\" style=\"padding:2.5pt 8.7pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.7.4.7.1\">71.1</span> 60.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.7.4.8\" style=\"padding:2.5pt 8.7pt;\">56.5 56.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.8.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.8.5.1\" style=\"padding:2.5pt 8.7pt;\">LFM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.2\" style=\"padding:2.5pt 8.7pt;\">25.6 33.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.3\" style=\"padding:2.5pt 8.7pt;\">75.1 78.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.4\" style=\"padding:2.5pt 8.7pt;\">58.5 59.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.5\" style=\"padding:2.5pt 8.7pt;\">64.7 65.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.6\" style=\"padding:2.5pt 8.7pt;\">80.2 71.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.7\" style=\"padding:2.5pt 8.7pt;\">61.1 58.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.8.5.8\" style=\"padding:2.5pt 8.7pt;\">63.4 62.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.9.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.9.6.1\" style=\"padding:2.5pt 8.7pt;\">ICON</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.2\" style=\"padding:2.5pt 8.7pt;\">22.2 29.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.3\" style=\"padding:2.5pt 8.7pt;\">58.8 64.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.4\" style=\"padding:2.5pt 8.7pt;\">62.8 57.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.5\" style=\"padding:2.5pt 8.7pt;\">64.7 63.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.6\" style=\"padding:2.5pt 8.7pt;\">58.9 63.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.7\" style=\"padding:2.5pt 8.7pt;\">67.2 60.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.9.6.8\" style=\"padding:2.5pt 8.7pt;\">59.1 58.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.10.7\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.10.7.1\" style=\"padding:2.5pt 8.7pt;\">A-DMN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.2\" style=\"padding:2.5pt 8.7pt;\">43.1 50.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.3\" style=\"padding:2.5pt 8.7pt;\">69.4 76.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.4\" style=\"padding:2.5pt 8.7pt;\">63.0 62.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.5\" style=\"padding:2.5pt 8.7pt;\">63.5 56.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.6\" style=\"padding:2.5pt 8.7pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.10.7.6.1\">88.3</span> 77.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.7\" style=\"padding:2.5pt 8.7pt;\">53.3 55.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.10.7.8\" style=\"padding:2.5pt 8.7pt;\">64.6 64.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.11.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.11.8.1\" style=\"padding:2.5pt 8.7pt;\">DialogueGCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.2\" style=\"padding:2.5pt 8.7pt;\">40.6 42.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.3\" style=\"padding:2.5pt 8.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.11.8.3.1\">89.1 84.5</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.4\" style=\"padding:2.5pt 8.7pt;\">62.0 63.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.5\" style=\"padding:2.5pt 8.7pt;\">67.5 64.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.6\" style=\"padding:2.5pt 8.7pt;\">65.5 63.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.7\" style=\"padding:2.5pt 8.7pt;\">64.1 66.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.11.8.8\" style=\"padding:2.5pt 8.7pt;\">65.2 64.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.12.9\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.12.9.1\" style=\"padding:2.5pt 8.7pt;\">RGAT</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.2\" style=\"padding:2.5pt 8.7pt;\">60.1 51.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.3\" style=\"padding:2.5pt 8.7pt;\">78.8 77.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.4\" style=\"padding:2.5pt 8.7pt;\">60.1 65.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.5\" style=\"padding:2.5pt 8.7pt;\">70.7 63.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.6\" style=\"padding:2.5pt 8.7pt;\">78.0 68.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.7\" style=\"padding:2.5pt 8.7pt;\">64.3 61.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.12.9.8\" style=\"padding:2.5pt 8.7pt;\">65.0 65.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.13.10\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.13.10.1\" style=\"padding:2.5pt 8.7pt;\">AGHMN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.2\" style=\"padding:2.5pt 8.7pt;\">48.3 52.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.3\" style=\"padding:2.5pt 8.7pt;\">68.3 73.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.4\" style=\"padding:2.5pt 8.7pt;\">61.6 58.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.5\" style=\"padding:2.5pt 8.7pt;\">57.5 61.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.6\" style=\"padding:2.5pt 8.7pt;\">68.1 69.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.7\" style=\"padding:2.5pt 8.7pt;\">67.1 62.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.13.10.8\" style=\"padding:2.5pt 8.7pt;\">63.5 63.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.14.11\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.14.11.1\" style=\"padding:2.5pt 8.7pt;\">BiERU</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.2\" style=\"padding:2.5pt 8.7pt;\">54.2 31.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.3\" style=\"padding:2.5pt 8.7pt;\">80.6 84.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.4\" style=\"padding:2.5pt 8.7pt;\">64.7 60.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.5\" style=\"padding:2.5pt 8.7pt;\">67.9 65.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.6\" style=\"padding:2.5pt 8.7pt;\">62.8 74.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.7\" style=\"padding:2.5pt 8.7pt;\">61.9 61.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.14.11.8\" style=\"padding:2.5pt 8.7pt;\">66.1 64.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.15.12\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.15.12.1\" style=\"padding:2.5pt 8.7pt;\">CoMPM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.2\" style=\"padding:2.5pt 8.7pt;\">59.9 60.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.3\" style=\"padding:2.5pt 8.7pt;\">78.0 82.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.4\" style=\"padding:2.5pt 8.7pt;\">60.4 63.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.5\" style=\"padding:2.5pt 8.7pt;\">70.2 59.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.6\" style=\"padding:2.5pt 8.7pt;\">85.8 78.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.7\" style=\"padding:2.5pt 8.7pt;\">62.9 59.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.15.12.8\" style=\"padding:2.5pt 8.7pt;\">67.7 67.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.16.13\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.16.13.1\" style=\"padding:2.5pt 8.7pt;\">EmoBERTa</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.2\" style=\"padding:2.5pt 8.7pt;\">56.9 56.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.3\" style=\"padding:2.5pt 8.7pt;\">79.1 83.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.4\" style=\"padding:2.5pt 8.7pt;\">64.0 61.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.5\" style=\"padding:2.5pt 8.7pt;\">70.6 69.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.6\" style=\"padding:2.5pt 8.7pt;\">86.0 78.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.7\" style=\"padding:2.5pt 8.7pt;\">63.8 68.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.16.13.8\" style=\"padding:2.5pt 8.7pt;\">67.3 67.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.17.14\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.17.14.1\" style=\"padding:2.5pt 8.7pt;\">COGMEN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.2\" style=\"padding:2.5pt 8.7pt;\">57.4 51.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.3\" style=\"padding:2.5pt 8.7pt;\">81.4 81.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.4\" style=\"padding:2.5pt 8.7pt;\">65.4 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.17.14.4.1\">68.6</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.5\" style=\"padding:2.5pt 8.7pt;\">69.5 66.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.6\" style=\"padding:2.5pt 8.7pt;\">83.3 75.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.7\" style=\"padding:2.5pt 8.7pt;\">63.8 68.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.17.14.8\" style=\"padding:2.5pt 8.7pt;\">68.2 67.6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.18.15\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.18.15.1\" style=\"padding:2.5pt 8.7pt;\">CTNet</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.2\" style=\"padding:2.5pt 8.7pt;\">47.9 51.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.3\" style=\"padding:2.5pt 8.7pt;\">78.0 79.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.4\" style=\"padding:2.5pt 8.7pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.18.15.4.1\">69.0</span> 65.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.5\" style=\"padding:2.5pt 8.7pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.18.15.5.1\">72.9</span> 67.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.6\" style=\"padding:2.5pt 8.7pt;\">85.3 78.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.7\" style=\"padding:2.5pt 8.7pt;\">52.2 58.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.18.15.8\" style=\"padding:2.5pt 8.7pt;\">68.0 67.5</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.19.16\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.19.16.1\" style=\"padding:2.5pt 8.7pt;\">LR-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.2\" style=\"padding:2.5pt 8.7pt;\">54.2 55.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.3\" style=\"padding:2.5pt 8.7pt;\">81.6 79.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.4\" style=\"padding:2.5pt 8.7pt;\">59.1 63.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.5\" style=\"padding:2.5pt 8.7pt;\">69.4 69.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.6\" style=\"padding:2.5pt 8.7pt;\">76.3 74.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.7\" style=\"padding:2.5pt 8.7pt;\">68.2 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.19.16.7.1\">68.9</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.19.16.8\" style=\"padding:2.5pt 8.7pt;\">68.5 68.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.20.17\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T1.1.20.17.1\" style=\"padding:2.5pt 8.7pt;\">DER-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.2\" style=\"padding:2.5pt 8.7pt;\">60.7 58.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.3\" style=\"padding:2.5pt 8.7pt;\">75.9 79.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.4\" style=\"padding:2.5pt 8.7pt;\">66.5 61.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.5\" style=\"padding:2.5pt 8.7pt;\">71.3 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.20.17.5.1\">72.1</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.6\" style=\"padding:2.5pt 8.7pt;\">71.1 73.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.7\" style=\"padding:2.5pt 8.7pt;\">66.1 67.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T1.1.20.17.8\" style=\"padding:2.5pt 8.7pt;\">69.7 69.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T1.1.21.18\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_b ltx_border_r\" id=\"S4.T1.1.21.18.1\" style=\"padding:2.5pt 8.7pt;\">ELR-GCN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.2\" style=\"padding:2.5pt 8.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.21.18.2.1\">64.7 62.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.3\" style=\"padding:2.5pt 8.7pt;\">75.7 80.8</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.4\" style=\"padding:2.5pt 8.7pt;\">66.2 62.4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.5\" style=\"padding:2.5pt 8.7pt;\">70.7 70.0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.6\" style=\"padding:2.5pt 8.7pt;\">76.8 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.21.18.6.1\">78.6</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.7\" style=\"padding:2.5pt 8.7pt;\">67.9 68.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T1.1.21.18.8\" style=\"padding:2.5pt 8.7pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T1.1.21.18.8.1\">70.6 70.9</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
71
+ "perturb_sentence_id": [
72
+ 2,
73
+ 3
74
+ ],
75
  "output": {
76
+ "perturbed_statement": "[paragraph id = 2] IEMOCAP: As shown in Table 1, the multi-modal emotion recognition method proposed in this paper achieved the best emotion recognition effect on the IEMOCAP data set, with an average accuracy of 70.6% and an average F1 value of 70.9%. However, DER-GCN proposes an effective modeling method of long-distance context latent dependencies for multi-modal emotion recognition.",
77
+ "perturbed_explanation": "1. ELR-GCN proposes an effective modeling method of long-distance context latent dependencies for multi-modal emotion recognition. 2. The statement incorrectly attributes the method of modeling long-distance context latent dependencies to DER-GCN, whereas it is actually ELR-GCN that proposes this effective modeling method."
78
  }
79
  },
80
  {
 
144
  "[paragraph id = 25] In addition, to intuitively illustrate that the running time of the ELR-GNN method proposed in this paper is better than other comparative methods, we statistics in Table 3 the running time of other comparative methods of the ELR-GNN method on the IEMOCAP and MELD data sets."
145
  ],
146
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T2\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 2: </span>Comparison with other baseline models on the MELD dataset.</figcaption>\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T2.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.1.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.1.1.1.1\" rowspan=\"3\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text\" id=\"S4.T2.1.1.1.1.1\">Methods</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"8\" id=\"S4.T2.1.1.1.2\" style=\"padding:2.5pt 5.4pt;\">MELD</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.1\" style=\"padding:2.5pt 5.4pt;\">Neutral</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.2\" style=\"padding:2.5pt 5.4pt;\">Surprise</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.3\" style=\"padding:2.5pt 5.4pt;\">Fear</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.4\" style=\"padding:2.5pt 5.4pt;\">Sadness</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.5\" style=\"padding:2.5pt 5.4pt;\">Joy</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.6\" style=\"padding:2.5pt 5.4pt;\">Disgust</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.7\" style=\"padding:2.5pt 5.4pt;\">Anger</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.2.2.8\" style=\"padding:2.5pt 5.4pt;\">Average(w)</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.3.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.1\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.2\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.3\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.4\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.5\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.6\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.7\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T2.1.3.3.8\" style=\"padding:2.5pt 5.4pt;\">Acc. F1</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T2.1.4.1\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T2.1.4.1.1\" style=\"padding:2.5pt 5.4pt;\">TextCNN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.2\" style=\"padding:2.5pt 5.4pt;\">76.2 74.9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.3\" style=\"padding:2.5pt 5.4pt;\">43.3 45.5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.4\" style=\"padding:2.5pt 5.4pt;\">4.6 3.7</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.5\" style=\"padding:2.5pt 5.4pt;\">18.2 21.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.6\" style=\"padding:2.5pt 5.4pt;\">46.1 49.4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.7\" style=\"padding:2.5pt 5.4pt;\">8.9 8.3</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.8\" style=\"padding:2.5pt 5.4pt;\">35.3 34.5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T2.1.4.1.9\" style=\"padding:2.5pt 5.4pt;\">56.3 55.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.5.2\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.5.2.1\" style=\"padding:2.5pt 5.4pt;\">bc-LSTM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.2\" style=\"padding:2.5pt 5.4pt;\">78.4 73.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.3\" style=\"padding:2.5pt 5.4pt;\">46.8 47.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.4\" style=\"padding:2.5pt 5.4pt;\">3.8 5.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.5\" style=\"padding:2.5pt 5.4pt;\">22.4 25.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.6\" style=\"padding:2.5pt 5.4pt;\">51.6 51.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.7\" style=\"padding:2.5pt 5.4pt;\">4.3 5.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.8\" style=\"padding:2.5pt 5.4pt;\">36.7 38.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.5.2.9\" style=\"padding:2.5pt 5.4pt;\">57.5 55.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.6.3\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.6.3.1\" style=\"padding:2.5pt 5.4pt;\">DialogueRNN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.2\" style=\"padding:2.5pt 5.4pt;\">72.1 73.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.3\" style=\"padding:2.5pt 5.4pt;\">54.4 49.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.4\" style=\"padding:2.5pt 5.4pt;\">1.6 1.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.5\" style=\"padding:2.5pt 5.4pt;\">23.9 23.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.6\" style=\"padding:2.5pt 5.4pt;\">52.0 50.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.7\" style=\"padding:2.5pt 5.4pt;\">1.5 1.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.8\" style=\"padding:2.5pt 5.4pt;\">41.0 41.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.6.3.9\" style=\"padding:2.5pt 5.4pt;\">56.1 55.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.7.4\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.7.4.1\" style=\"padding:2.5pt 5.4pt;\">DialogueGCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.2\" style=\"padding:2.5pt 5.4pt;\">70.3 72.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.3\" style=\"padding:2.5pt 5.4pt;\">42.4 41.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.4\" style=\"padding:2.5pt 5.4pt;\">3.0 2.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.5\" style=\"padding:2.5pt 5.4pt;\">20.9 21.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.6\" style=\"padding:2.5pt 5.4pt;\">44.7 44.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.7\" style=\"padding:2.5pt 5.4pt;\">6.5 6.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.8\" style=\"padding:2.5pt 5.4pt;\">39.0 36.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.7.4.9\" style=\"padding:2.5pt 5.4pt;\">54.9 54.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.8.5\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.8.5.1\" style=\"padding:2.5pt 5.4pt;\">RGAT</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.2\" style=\"padding:2.5pt 5.4pt;\">76.0 78.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.3\" style=\"padding:2.5pt 5.4pt;\">40.1 41.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.4\" style=\"padding:2.5pt 5.4pt;\">3.0 2.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.5\" style=\"padding:2.5pt 5.4pt;\">32.1 30.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.6\" style=\"padding:2.5pt 5.4pt;\">68.1 58.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.7\" style=\"padding:2.5pt 5.4pt;\">4.5 2.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.8\" style=\"padding:2.5pt 5.4pt;\">40.0 44.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.8.5.9\" style=\"padding:2.5pt 5.4pt;\">60.3 61.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.9.6\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.9.6.1\" style=\"padding:2.5pt 5.4pt;\">CoMPM</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.2\" style=\"padding:2.5pt 5.4pt;\">78.3 82.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.3\" style=\"padding:2.5pt 5.4pt;\">48.3 49.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.4\" style=\"padding:2.5pt 5.4pt;\">1.7 2.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.5\" style=\"padding:2.5pt 5.4pt;\">35.9 32.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.6\" style=\"padding:2.5pt 5.4pt;\">71.4 61.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.7\" style=\"padding:2.5pt 5.4pt;\">3.1 2.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.8\" style=\"padding:2.5pt 5.4pt;\">42.2 45.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.9.6.9\" style=\"padding:2.5pt 5.4pt;\">64.1 65.3</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.10.7\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.10.7.1\" style=\"padding:2.5pt 5.4pt;\">EmoBERTa</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.2\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.10.7.2.1\">78.9 82.5</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.3\" style=\"padding:2.5pt 5.4pt;\">50.2 50.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.4\" style=\"padding:2.5pt 5.4pt;\">1.8 1.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.5\" style=\"padding:2.5pt 5.4pt;\">33.3 31.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.6\" style=\"padding:2.5pt 5.4pt;\">72.1 61.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.7\" style=\"padding:2.5pt 5.4pt;\">9.1 2.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.8\" style=\"padding:2.5pt 5.4pt;\">43.3 46.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.10.7.9\" style=\"padding:2.5pt 5.4pt;\">64.1 65.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.11.8\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.11.8.1\" style=\"padding:2.5pt 5.4pt;\">ConGCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.2\" style=\"padding:2.5pt 5.4pt;\">46.8 45.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.3\" style=\"padding:2.5pt 5.4pt;\">10.6 8.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.4\" style=\"padding:2.5pt 5.4pt;\">8.7 8.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.5\" style=\"padding:2.5pt 5.4pt;\">53.1 54.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.6\" style=\"padding:2.5pt 5.4pt;\">76.7 75.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.7\" style=\"padding:2.5pt 5.4pt;\">28.5 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.11.8.7.1\">26.3</span>\n</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.8\" style=\"padding:2.5pt 5.4pt;\">50.3 48.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.11.8.9\" style=\"padding:2.5pt 5.4pt;\">59.4 58.7</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.12.9\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.12.9.1\" style=\"padding:2.5pt 5.4pt;\">A-DMN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.2\" style=\"padding:2.5pt 5.4pt;\">76.5 78.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.3\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.12.9.3.1\">56.2 55.3</span></td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.4\" style=\"padding:2.5pt 5.4pt;\">8.2 8.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.5\" style=\"padding:2.5pt 5.4pt;\">22.1 24.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.6\" style=\"padding:2.5pt 5.4pt;\">59.8 57.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.7\" style=\"padding:2.5pt 5.4pt;\">1.2 3.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.8\" style=\"padding:2.5pt 5.4pt;\">41.3 40.9</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.12.9.9\" style=\"padding:2.5pt 5.4pt;\">61.5 60.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.13.10\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.13.10.1\" style=\"padding:2.5pt 5.4pt;\">LR-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.2\" style=\"padding:2.5pt 5.4pt;\">76.7 80.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.3\" style=\"padding:2.5pt 5.4pt;\">53.3 55.2</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.4\" style=\"padding:2.5pt 5.4pt;\">0.0 0.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.5\" style=\"padding:2.5pt 5.4pt;\">49.6 35.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.6\" style=\"padding:2.5pt 5.4pt;\">68.0 64.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.7\" style=\"padding:2.5pt 5.4pt;\">10.7 2.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.8\" style=\"padding:2.5pt 5.4pt;\">48.0 51.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.13.10.9\" style=\"padding:2.5pt 5.4pt;\">65.7 65.6</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.14.11\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_r\" id=\"S4.T2.1.14.11.1\" style=\"padding:2.5pt 5.4pt;\">DER-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.2\" style=\"padding:2.5pt 5.4pt;\">76.8 80.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.3\" style=\"padding:2.5pt 5.4pt;\">50.5 51.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.4\" style=\"padding:2.5pt 5.4pt;\">14.8 10.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.5\" style=\"padding:2.5pt 5.4pt;\">56.7 41.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.6\" style=\"padding:2.5pt 5.4pt;\">69.3 64.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.7\" style=\"padding:2.5pt 5.4pt;\">17.2 10.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.8\" style=\"padding:2.5pt 5.4pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.14.11.8.1\">52.5</span> 57.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T2.1.14.11.9\" style=\"padding:2.5pt 5.4pt;\">66.8 66.1</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T2.1.15.12\">\n<th class=\"ltx_td ltx_align_left ltx_th ltx_th_row ltx_border_b ltx_border_r\" id=\"S4.T2.1.15.12.1\" style=\"padding:2.5pt 5.4pt;\">ELR-GCN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.2\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.2.1\">80.2 83.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.3\" style=\"padding:2.5pt 5.4pt;\">36.8 35.4</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.4\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.4.1\">19.2 13.1</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.5\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.5.1\">80.2 83.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.6\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.6.1\">76.5 69.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.7\" style=\"padding:2.5pt 5.4pt;\">\n<span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.7.1\">55.6</span> 13.0</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.8\" style=\"padding:2.5pt 5.4pt;\">52.1 <span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.8.1\">57.7</span>\n</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T2.1.15.12.9\" style=\"padding:2.5pt 5.4pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T2.1.15.12.9.1\">68.7 69.9</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
147
+ "perturb_sentence_id": [
148
+ 15,
149
+ 16
150
+ ],
151
  "output": {
152
+ "perturbed_statement": "[paragraph id = 15] MELD: As shown in Table 2, The ELR-GNN method proposed in this article has the best emotion recognition effect on the MELD data set, with an average accuracy of 68.7% and an average F1 value of 69.9%. The emotion recognition effect of DER-GCN is second, with an average accuracy of 68.5% and an average F1 value of 68.3%.",
153
+ "perturbed_explanation": "1. The ELR-GNN method is stated to have the best emotion recognition effect on the MELD data set in terms of its performance metrics. 2. The statement claims that DER-GCN has an average accuracy of 68.5% and an average F1 value of 68.3%, which are actually the metrics mentioned in the context for LR-GCN, not DER-GCN. DER-GCN's performance metrics are not provided in the context."
154
  }
155
  },
156
  {
 
208
  "[paragraph id = 30] The experimental results prove the efficiency and effectiveness of the ELR-GNN method proposed in this paper."
209
  ],
210
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T3\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 3: </span>We tested the running time of the ELR-GNN method proposed in this paper and other comparative methods on the IEMOCAP and MELD data sets. In particular, ELR-GNN sets to and neighbor size to 64.</figcaption>\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T3.5\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T3.5.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T3.5.1.1.1\" rowspan=\"2\" style=\"padding:2.5pt 18.5pt;\">      <span class=\"ltx_text\" id=\"S4.T3.5.1.1.1.1\">Methods</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"2\" id=\"S4.T3.5.1.1.2\" style=\"padding:2.5pt 18.5pt;\">      Running time (s)</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.5.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T3.5.2.2.1\" style=\"padding:2.5pt 18.5pt;\">      IEMOCAP</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T3.5.2.2.2\" style=\"padding:2.5pt 18.5pt;\">      MELD</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T3.5.3.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T3.5.3.1.1\" style=\"padding:2.5pt 18.5pt;\">      DialogueGCN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.5.3.1.2\" style=\"padding:2.5pt 18.5pt;\">      58</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T3.5.3.1.3\" style=\"padding:2.5pt 18.5pt;\">      127</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.5.4.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T3.5.4.2.1\" style=\"padding:2.5pt 18.5pt;\">      LR-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.4.2.2\" style=\"padding:2.5pt 18.5pt;\">      87</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.4.2.3\" style=\"padding:2.5pt 18.5pt;\">      142</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.5.5.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T3.5.5.3.1\" style=\"padding:2.5pt 18.5pt;\">      DER-GCN</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.5.3.2\" style=\"padding:2.5pt 18.5pt;\">      125</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T3.5.5.3.3\" style=\"padding:2.5pt 18.5pt;\">      189</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T3.5.6.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r\" id=\"S4.T3.5.6.4.1\" style=\"padding:2.5pt 18.5pt;\">      ELR-GNN</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T3.5.6.4.2\" style=\"padding:2.5pt 18.5pt;\">      41</td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T3.5.6.4.3\" style=\"padding:2.5pt 18.5pt;\">      91</td>\n</tr>\n</tbody>\n</table>\n</figure>",
211
+ "perturb_sentence_id": [
212
+ 29,
213
+ 30
214
+ ],
215
  "output": {
216
+ "perturbed_statement": "[paragraph id = 29] The running times of DER-GCN are 100s and 150s respectively. The experimental results prove the efficiency and effectiveness of the ELR-GNN method proposed in this paper.",
217
+ "perturbed_explanation": "1. The original explanation asserted that the running times of DER-GCN are 125s and 189s respectively, which aligns with the factual data that could have been provided in the surrounding context about DER-GCN. 2. The statement is incorrect because it claims the running times of DER-GCN to be 100s and 150s respectively, while these figures are not given in the context. Such times do not match with any running times documented for DER-GCN or any other method detailed in the context, which challenges both accuracy and verification of this claim."
218
  }
219
  },
220
  {
 
257
  "[paragraph id = 9] Experimental results demonstrate the effectiveness of multimodal features."
258
  ],
259
  "table_html": "<figure class=\"ltx_table\" id=\"S4.T4\">\n<figcaption class=\"ltx_caption\"><span class=\"ltx_tag ltx_tag_table\">Table 4: </span>The effect of ELR-GNN on IEMOCAP and MELD datasets using unimodal features and multimodal features, respectively. We report average accuracy and F1-score.</figcaption>\n<table class=\"ltx_tabular ltx_guessed_headers ltx_align_middle\" id=\"S4.T4.1\">\n<thead class=\"ltx_thead\">\n<tr class=\"ltx_tr\" id=\"S4.T4.1.1.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T4.1.1.1.1\" rowspan=\"2\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text\" id=\"S4.T4.1.1.1.1.1\">Modality</span></th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"2\" id=\"S4.T4.1.1.1.2\" style=\"padding:2.5pt 12.8pt;\">IEMOCAP</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" colspan=\"2\" id=\"S4.T4.1.1.1.3\" style=\"padding:2.5pt 12.8pt;\">MELD</th>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.2.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T4.1.2.2.1\" style=\"padding:2.5pt 12.8pt;\">Acc.</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T4.1.2.2.2\" style=\"padding:2.5pt 12.8pt;\">F1</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T4.1.2.2.3\" style=\"padding:2.5pt 12.8pt;\">Acc</th>\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_column ltx_border_t\" id=\"S4.T4.1.2.2.4\" style=\"padding:2.5pt 12.8pt;\">F1</th>\n</tr>\n</thead>\n<tbody class=\"ltx_tbody\">\n<tr class=\"ltx_tr\" id=\"S4.T4.1.3.1\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r ltx_border_t\" id=\"S4.T4.1.3.1.1\" style=\"padding:2.5pt 12.8pt;\">T</th>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.3.1.2\" style=\"padding:2.5pt 12.8pt;\">64.1</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.3.1.3\" style=\"padding:2.5pt 12.8pt;\">63.9</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.3.1.4\" style=\"padding:2.5pt 12.8pt;\">63.5</td>\n<td class=\"ltx_td ltx_align_center ltx_border_t\" id=\"S4.T4.1.3.1.5\" style=\"padding:2.5pt 12.8pt;\">62.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.4.2\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.4.2.1\" style=\"padding:2.5pt 12.8pt;\">A</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.4.2.2\" style=\"padding:2.5pt 12.8pt;\">61.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.4.2.3\" style=\"padding:2.5pt 12.8pt;\">60.8</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.4.2.4\" style=\"padding:2.5pt 12.8pt;\">62.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.4.2.5\" style=\"padding:2.5pt 12.8pt;\">62.0</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.5.3\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.5.3.1\" style=\"padding:2.5pt 12.8pt;\">V</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.5.3.2\" style=\"padding:2.5pt 12.8pt;\">59.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.5.3.3\" style=\"padding:2.5pt 12.8pt;\">59.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.5.3.4\" style=\"padding:2.5pt 12.8pt;\">60.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.5.3.5\" style=\"padding:2.5pt 12.8pt;\">61.4</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.6.4\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.6.4.1\" style=\"padding:2.5pt 12.8pt;\">T+A</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.6.4.2\" style=\"padding:2.5pt 12.8pt;\">65.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.6.4.3\" style=\"padding:2.5pt 12.8pt;\">64.4</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.6.4.4\" style=\"padding:2.5pt 12.8pt;\">64.1</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.6.4.5\" style=\"padding:2.5pt 12.8pt;\">63.2</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.7.5\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.7.5.1\" style=\"padding:2.5pt 12.8pt;\">T+V</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.7.5.2\" style=\"padding:2.5pt 12.8pt;\">64.3</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.7.5.3\" style=\"padding:2.5pt 12.8pt;\">64.6</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.7.5.4\" style=\"padding:2.5pt 12.8pt;\">64.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.7.5.5\" style=\"padding:2.5pt 12.8pt;\">62.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.8.6\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_r\" id=\"S4.T4.1.8.6.1\" style=\"padding:2.5pt 12.8pt;\">V+A</th>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.8.6.2\" style=\"padding:2.5pt 12.8pt;\">63.0</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.8.6.3\" style=\"padding:2.5pt 12.8pt;\">62.7</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.8.6.4\" style=\"padding:2.5pt 12.8pt;\">61.5</td>\n<td class=\"ltx_td ltx_align_center\" id=\"S4.T4.1.8.6.5\" style=\"padding:2.5pt 12.8pt;\">61.9</td>\n</tr>\n<tr class=\"ltx_tr\" id=\"S4.T4.1.9.7\">\n<th class=\"ltx_td ltx_align_center ltx_th ltx_th_row ltx_border_b ltx_border_r\" id=\"S4.T4.1.9.7.1\" style=\"padding:2.5pt 12.8pt;\">T+A+V</th>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T4.1.9.7.2\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.9.7.2.1\">70.6</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T4.1.9.7.3\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.9.7.3.1\">70.9</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T4.1.9.7.4\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.9.7.4.1\">68.7</span></td>\n<td class=\"ltx_td ltx_align_center ltx_border_b\" id=\"S4.T4.1.9.7.5\" style=\"padding:2.5pt 12.8pt;\"><span class=\"ltx_text ltx_font_bold\" id=\"S4.T4.1.9.7.5.1\">69.9</span></td>\n</tr>\n</tbody>\n</table>\n</figure>",
260
+ "perturb_sentence_id": [
261
+ 3,
262
+ 4
263
+ ],
264
  "output": {
265
+ "perturbed_statement": "[paragraph id = 3] The average accuracy on the IEMOCAP and MELD data sets are 64.1% and 63.5%, respectively, and the average F1 value is 63.9% and 62.4%, respectively. The emotion recognition effect of ELR-GNN with audio modal features is second, with average accuracy rates of 61.1% and 62.7% on the IEMOCAP and MELD data sets, and average F1 values of 60.8% and 60.0% respectively.",
266
+ "perturbed_explanation": "1. The original explanation provides the average accuracy and F1 values for ELR-GNN with audio modal features in the IEMOCAP and MELD datasets. It states that this configuration ranks second in terms of emotion recognition effect. 2. The statement alters the average F1 value for the MELD dataset from 62.0% to 60.0%. This change introduces a factual inaccuracy since the context specifies the original F1 value as 62.0%, not 60.0% as claimed."
267
  }
268
  }
269
  ]