wentaochen commited on
Commit
949310d
·
1 Parent(s): 5c7b47a
.DS_Store ADDED
Binary file (8.2 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.so filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.4.0-devel-ubuntu22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ RUN apt-get update && \
5
+ apt-get install -y --no-install-recommends wget ca-certificates && \
6
+ wget -q https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_3/NsightSystems-linux-cli-public-2025.3.1.90-3582212.deb -O /tmp/nsys.deb && \
7
+ apt-get install -y /tmp/nsys.deb && \
8
+ rm -f /tmp/nsys.deb && \
9
+ apt-get install -y python3.11 python3.11-distutils python3.11-dev python3-pip && \
10
+ update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \
11
+ python -m pip install --upgrade pip && \
12
+ rm -rf /var/lib/apt/lists/*
13
+
14
+ RUN apt-get update && apt-get install -y gcc g++ build-essential && \
15
+ python -m pip install --no-cache-dir nuitka
16
+
17
+ # 可选:创建非 root 用户
18
+ RUN useradd -m -u 1000 user
19
+ USER user
20
+ ENV PATH="/home/user/.local/bin:$PATH"
21
+
22
+ WORKDIR /app
23
+
24
+ # 安装 Python 依赖
25
+ COPY --chown=user requirements.txt .
26
+ RUN pip install --no-cache-dir -r requirements.txt
27
+
28
+ # 复制源码
29
+ COPY --chown=user app.py .
30
+ COPY --chown=user tool /app/tool
31
+ RUN chown -R user:user /app
32
+
33
+ # Gradio 默认监听 7860
34
+ EXPOSE 7860
35
+
36
+ # 直接启动 Python 脚本
37
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, tempfile, time
2
+ import gradio as gr
3
+ from tool.test import run_autotune_pipeline, DATA_DIR
4
+
5
+ # ---------- Core callback ----------
6
+
7
+ def get_test_text(test_file, test_data_input):
8
+ if test_file is not None:
9
+ if hasattr(test_file, "read"):
10
+ return test_file.read().decode("utf-8")
11
+ elif hasattr(test_file, "data"):
12
+ return test_file.data if isinstance(test_file.data, str) else test_file.data.decode("utf-8")
13
+ elif hasattr(test_file, "name") and os.path.exists(test_file.name):
14
+ with open(test_file.name, "r", encoding="utf-8") as f:
15
+ return f.read()
16
+ # fallback to textbox
17
+ return test_data_input or ""
18
+
19
+ def generate_kernel(text_input, test_data_input, test_file, n_iters, progress=gr.Progress()):
20
+ """
21
+ text_input : string from textbox (NL description or base CUDA code)
22
+ test_data_input: test data (variable name, data)
23
+ file_input : gr.File upload object (or None)
24
+ Returns : (kernel_code_str, downloadable_file_path)
25
+ """
26
+ progress((0, n_iters), desc="Initializing...")
27
+ # 1) Select input source
28
+
29
+ if not text_input.strip():
30
+ return "⚠️ Please paste a description or baseline CUDA code."
31
+
32
+ # td = tempfile.mkdtemp(prefix="auto_")
33
+
34
+ # # ------- select test data source -------
35
+ # if test_file is not None and test_file.size > 0:
36
+ # test_text = test_file.read().decode("utf-8")
37
+ # elif test_data_input.strip():
38
+ # test_text = test_data_input
39
+ # else:
40
+ # return "Test data required: either fill Test Data Input or upload a .txt file.", "", None
41
+
42
+ # src_path = os.path.join(td, f"input_{int(time.time())}.txt")
43
+ # test_path = os.path.join(td, f"test_data_{int(time.time())}.txt")
44
+
45
+ # with open(src_path, "w") as f:
46
+ # f.write(text_input)
47
+
48
+ # with open(test_path, "w") as f:
49
+ # f.write(test_data_input or "")
50
+
51
+ # if test_file is not None:
52
+ # test_text = test_file.read().decode("utf-8")
53
+ # else:
54
+ # test_text = test_data_input
55
+
56
+ test_text = get_test_text(test_file, test_data_input)
57
+
58
+ if not test_text.strip():
59
+ return "⚠️ Test data required."
60
+
61
+
62
+ best_code = ""
63
+ for info in run_autotune_pipeline(
64
+ input_code=text_input,
65
+ test_data_input=test_text,
66
+ test_file=None,
67
+ bin_dir=DATA_DIR,
68
+ max_iterations=int(n_iters)
69
+ ):
70
+ # 1) update progress bar (if iteration known)
71
+ if info["iteration"] is not None:
72
+ # print(f"Iteration {info['iteration']} / {n_iters}: {info['message']}")
73
+ progress((info["iteration"], n_iters), desc=info["message"])
74
+
75
+ # 3) kernel output only when we get new code
76
+ if info["code"]:
77
+ best_code = info["code"]
78
+
79
+ # TBD: download button
80
+ return best_code
81
+
82
+
83
+ # ---------- Gradio UI ----------
84
+ with gr.Blocks(
85
+ title="KernelPilot",
86
+ theme=gr.themes.Soft(
87
+ text_size="lg",
88
+ font=[
89
+ "system-ui",
90
+ "-apple-system",
91
+ "BlinkMacSystemFont",
92
+ "Segoe UI",
93
+ "Roboto",
94
+ "Helvetica Neue",
95
+ "Arial",
96
+ "Noto Sans",
97
+ "sans-serif"
98
+ ])) as demo:
99
+ gr.Markdown(
100
+ """# 🚀 KernelPilot Optimizer
101
+ Enter a code, test data, then click **Generate** to obtain the optimized kernel function."""
102
+ )
103
+
104
+ with gr.Row():
105
+ txt_input = gr.Textbox(
106
+ label="📝 Input",
107
+ lines=10,
108
+ placeholder="Enter the code",
109
+ scale=3
110
+ )
111
+ level = gr.Number(
112
+ label="Optimazation Level",
113
+ minimum=1,
114
+ maximum=5,
115
+ value=5,
116
+ step=1,
117
+ scale=1
118
+ )
119
+
120
+ with gr.Row():
121
+ test_data_input = gr.Textbox(
122
+ label="Test Data Input",
123
+ lines=10,
124
+ placeholder="<number_of_test_cases>\n<number_of_variables>\n\n<variable_1_name>\n<variable_1_testcase_1_data>\n<variable_1_testcase_2_data>\n...\n<variable_1_testcase_N_data>\n\n<variable_2_name>\n<variable_2_testcase_1_data>\n...\n<variable_2_testcase_N_data>\n\n...",
125
+ scale=2
126
+ )
127
+ test_file = gr.File(
128
+ label="Upload Test Data (.txt)",
129
+ file_types=["text"],
130
+ scale=1
131
+ )
132
+
133
+ gen_btn = gr.Button("⚡ Generate")
134
+
135
+ kernel_output = gr.Code(
136
+ label="🎯 Tuned CUDA Kernel",
137
+ language="cpp"
138
+ )
139
+
140
+ gen_btn.click(
141
+ fn=generate_kernel,
142
+ inputs=[txt_input, test_data_input, test_file, level],
143
+ outputs=[kernel_output],
144
+ queue=True, # keeps requests queued
145
+ show_progress=True, # show progress bar
146
+ show_progress_on=kernel_output # update log box with progress
147
+ )
148
+
149
+ if __name__ == "__main__":
150
+ demo.queue(default_concurrency_limit=1, max_size=50)
151
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.30
2
+ openai>=1.30.5
3
+ httpx>=0.27.0
4
+ numpy>=1.26.0
tool/.DS_Store ADDED
Binary file (8.2 kB). View file
 
tool/README.md ADDED
File without changes
tool/__init__.py ADDED
File without changes
tool/formatted_file_input.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2
2
+ 3
3
+
4
+ mse_preds
5
+ [1.0, 2.0, 3.0, 4.0]
6
+ [10.0, 20.0, 30.0]
7
+
8
+ mse_targets
9
+ [1.5, 2.5, 3.5, 4.5]
10
+ [12.0, 18.0, 33.0]
11
+
12
+ mse_ref
13
+ 0.25
14
+ 5.67
tool/input.txt ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <cuda_runtime.h>
3
+ #include <fstream>
4
+ #include <cmath>
5
+ #include <vector>
6
+ #include <string>
7
+
8
+ __global__ void mseKernel(const float* predictions, const float* targets, size_t numElements, float* sum) {
9
+ size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
10
+ if (idx < numElements) {
11
+ float diff = predictions[idx] - targets[idx];
12
+ float sq_diff = diff * diff;
13
+ atomicAdd(sum, sq_diff);
14
+ }
15
+ }
16
+
17
+ void read_binary(const std::string& filename, float* data, size_t size) {
18
+ std::ifstream in(filename, std::ios::binary);
19
+ if (!in) {
20
+ std::cerr << "Cannot open file: " << filename << std::endl;
21
+ exit(1);
22
+ }
23
+ in.read(reinterpret_cast<char*>(data), size * sizeof(float));
24
+ in.close();
25
+ }
26
+
27
+ // test
28
+ bool compare_scalar(float a, float b, float tol = 1e-1f) {
29
+ return std::fabs(a - b) < tol;
30
+ }
31
+
32
+ int main() {
33
+ std::vector<size_t> sizes = {1 << 10, 1 << 12, 1 << 14, 1 << 16, 1 << 18};
34
+ bool all_passed = true;
35
+
36
+ for (int t = 0; t < sizes.size(); ++t) {
37
+ size_t N = sizes[t];
38
+ size_t input_size = N * sizeof(float);
39
+
40
+ // test
41
+ std::string pred_file = "data/mse_preds_" + std::to_string(t + 1) + ".bin";
42
+ std::string target_file = "data/mse_targets_" + std::to_string(t + 1) + ".bin";
43
+ std::string ref_file = "data/mse_ref_" + std::to_string(t + 1) + ".bin";
44
+
45
+ float* h_preds = (float*)malloc(input_size);
46
+ float* h_targets = (float*)malloc(input_size);
47
+ float h_mse_ref;
48
+
49
+ read_binary(pred_file, h_preds, N);
50
+ read_binary(target_file, h_targets, N);
51
+ read_binary(ref_file, &h_mse_ref, 1);
52
+
53
+ float *d_preds, *d_targets, *d_sum;
54
+ cudaMalloc(&d_preds, input_size);
55
+ cudaMalloc(&d_targets, input_size);
56
+ cudaMalloc(&d_sum, sizeof(float));
57
+ cudaMemcpy(d_preds, h_preds, input_size, cudaMemcpyHostToDevice);
58
+ cudaMemcpy(d_targets, h_targets, input_size, cudaMemcpyHostToDevice);
59
+ cudaMemset(d_sum, 0, sizeof(float));
60
+
61
+ int threads = 256;
62
+ int blocks = (N + threads - 1) / threads;
63
+ mseKernel<<<blocks, threads>>>(d_preds, d_targets, N, d_sum);
64
+
65
+ float h_sum = 0.0f;
66
+ cudaMemcpy(&h_sum, d_sum, sizeof(float), cudaMemcpyDeviceToHost);
67
+ float mse = h_sum / N;
68
+
69
+ if (!compare_scalar(mse, h_mse_ref)) {
70
+ std::cout << "F" << std::endl;
71
+ all_passed = false;
72
+
73
+ cudaFree(d_preds); cudaFree(d_targets); cudaFree(d_sum);
74
+ free(h_preds); free(h_targets);
75
+ break;
76
+ }
77
+
78
+ cudaFree(d_preds); cudaFree(d_targets); cudaFree(d_sum);
79
+ free(h_preds); free(h_targets);
80
+ }
81
+
82
+ if (all_passed) std::cout << "T" << std::endl;
83
+ return 0;
84
+ }
tool/prompt_a.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The task is to write a CUDA kernel function on GPU, and we have the benchmark code for this task:
2
+ [code]
3
+
4
+ Optimize the kernel function for less execution time on GPU.
5
+ The output should be the content of whole .cu file containing ONE kernel function.
6
+ Do not modify the test part. Note the test data contains exactly four input sets. The generated .cu file must ensure that for each input set, the kernel function is called exactly once, resulting in a total of four kernel invocations. Do not include any extra timing logic, profiling wrappers, or repeat kernel calls that could cause each input to trigger multiple kernel launches.
7
+
8
+ When generating CUDA code, you must produce a complete, standalone, compilable program, not just a kernel or code fragment.
9
+ The program should include headers, data structures, the kernel definition, a main() function that allocates memory, launches the kernel, and prints "T" or "F" based on correctness.
10
+ Follow these strict rules:
11
+ 1. Always make the code fully self-contained and directly compilable with nvcc file.cu -o file. No missing functions, dependencies, or external headers.
12
+ 2. Do not use std::max, std::min, or std::abs in device code. Always use fmaxf, fminf, and fabsf instead.
13
+ 3. Do not use INFINITY; use CUDART_INF_F or a large constant (e.g., 1e30f) instead.
14
+ 4. Include all required headers: <vector>, <cmath>, <cstdint>, and <cuda_runtime.h>.
15
+ 5. Avoid duplicate includes and never mark __global__ functions as inline.
16
+
17
+ Please focus your modifications only in the CUDA kernel function part. Avoid changing any other parts of the program, including:
18
+ - data loading, I/O, or test logic
19
+ - host-side function definitions
20
+ - main() function or CUDA memory allocation logic
21
+
22
+ Do not rename variables or structs defined outside the kernel region.
tool/prompt_b.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Modify the code with the execution error result.
2
+ The output should be the content of whole .cu file containing ONE kernel function.
3
+ Do not modify the test part. Note the test data contains exactly four input sets. The generated .cu file must ensure that for each input set, the kernel function is called exactly once, resulting in a total of four kernel invocations. Do not include any extra timing logic, profiling wrappers, or repeat kernel calls that could cause each input to trigger multiple kernel launches.
4
+
5
+ The execution output is:
tool/prompt_c.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Optimize the kernel function for less execution time on GPU.
2
+ The output should be the content of whole .cu file containing ONE kernel function.
3
+ Do not modify the test part. Note the test data contains exactly four input sets. The generated .cu file must ensure that for each input set, the kernel function is called exactly once, resulting in a total of four kernel invocations. Do not include any extra timing logic, profiling wrappers, or repeat kernel calls that could cause each input to trigger multiple kernel launches.
tool/prompt_e.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ The code failed to launch the kernel. Modify the code.
2
+ The output should be the content of whole .cu file containing ONE kernel function.
3
+ Do not modify the test part. Note the test data contains exactly four input sets. The generated .cu file must ensure that for each input set, the kernel function is called exactly once, resulting in a total of four kernel invocations. Do not include any extra timing logic, profiling wrappers, or repeat kernel calls that could cause each input to trigger multiple kernel launches.
tool/prompt_f.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ The result is not the same with the reference output. Modify the code.
2
+ The output should be the content of whole .cu file containing ONE kernel function.
3
+ Do not modify the test part. Note the test data contains exactly four input sets. The generated .cu file must ensure that for each input set, the kernel function is called exactly once, resulting in a total of four kernel invocations. Do not include any extra timing logic, profiling wrappers, or repeat kernel calls that could cause each input to trigger multiple kernel launches.
tool/prompt_norm.txt ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are an expert CUDA engineer.
2
+
3
+ Your task is:
4
+ 1. Completely ignore all input-loading code, file paths, main() logic, correctness checks,
5
+ device allocations, free(), test loops, or anything unrelated to the CUDA kernel.
6
+ 2. Extract ONLY the CUDA kernel from the provided code:
7
+ - The kernel must keep the original function name.
8
+ - The kernel must keep the original parameter list.
9
+ - The kernel body may be reformatted but not logically changed.
10
+ 3. Rebuild a complete, compilable CUDA program using the fixed template provided below.
11
+ 4. Insert the extracted kernel EXACTLY into the placeholder {{KERNEL_CODE}}.
12
+ 5. Do NOT modify the test harness structure.
13
+ The test harness will load .bin files automatically and compare results.
14
+ 6. Use the following information:
15
+ - Binary directory: {{BIN_DIR}}
16
+ - Variables to load: {{VARNAMES}}
17
+ - Number of test cases: {{NUM_TESTS}}
18
+ - The test data files follow the format: data/{variable_name}_{case_idx}.bin, where {variable_name} is one of: {{VARNAMES}}
19
+
20
+ The output **must be a COMPLETE, COMPILABLE .cu file**.
21
+ It must:
22
+ - include exactly one main() from the template,
23
+ - print 'T' if all tests pass,
24
+ - print 'F' otherwise,
25
+ - compile with nvcc.
26
+
27
+ Below is the CUDA test harness template you must strictly follow.
28
+ You MUST follow the template's structure exactly. All sections in the template (read_bin_f32_dyn, CUDA kernel, correctness check, main(), variable loading, allocation, launch, correctness, free) must appear exactly once.
29
+ Insert your kernel where {{KERNEL_CODE}} is located.
30
+
31
+ ```cpp
32
+ #include <cuda_runtime.h>
33
+ #include <iostream>
34
+ #include <fstream>
35
+ #include <vector>
36
+ #include <string>
37
+ #include <cmath>
38
+
39
+ using std::vector;
40
+ using std::string;
41
+
42
+ #define TOLERANCE 1e-3f // The TOLERANCE can be modified according to different tasks
43
+ #define NUM_TESTS {{NUM_TESTS}}
44
+
45
+ // --------------------------
46
+ // read_bin_f32_dyn: read .bin function
47
+ // --------------------------
48
+ vector<float> read_bin_f32_dyn(const string& filename) {
49
+ FILE* fp = fopen(filename.c_str(), "rb");
50
+ if (!fp) {
51
+ std::cerr << "Cannot open bin file: " << filename << std::endl;
52
+ exit(1);
53
+ }
54
+
55
+ fseek(fp, 0, SEEK_END);
56
+ long fsize = ftell(fp);
57
+ rewind(fp);
58
+
59
+ size_t n_elem = fsize / sizeof(float);
60
+ vector<float> data(n_elem);
61
+ fread(data.data(), sizeof(float), n_elem, fp);
62
+ fclose(fp);
63
+ return data;
64
+ }
65
+
66
+ // --------------------------
67
+ // CUDA kernel (inserted here)
68
+ // --------------------------
69
+ {{KERNEL_CODE}}
70
+
71
+ // --------------------------
72
+ // correctness check
73
+ // --------------------------
74
+ bool check_equal(const vector<float>& a, const vector<float>& b) {
75
+ if (a.size() != b.size()) return false;
76
+ for (size_t i = 0; i < a.size(); ++i) {
77
+ if (fabs(a[i] - b[i]) > TOLERANCE) return false;
78
+ }
79
+ return true;
80
+ }
81
+
82
+ // --------------------------
83
+ // main test harness
84
+ // --------------------------
85
+ int main() {
86
+ bool all_pass = true;
87
+
88
+ for (int case_id = 1; case_id <= NUM_TESTS; ++case_id) {
89
+
90
+ // --------------------------
91
+ // load all variables
92
+ // --------------------------
93
+ {{VAR_LOADING_BLOCK}}
94
+
95
+ // --------------------------
96
+ // allocate GPU buffers
97
+ // --------------------------
98
+ {{ALLOCATE_BLOCK}}
99
+
100
+ // --------------------------
101
+ // kernel launch
102
+ // --------------------------
103
+ {{KERNEL_LAUNCH_BLOCK}}
104
+
105
+ // --------------------------
106
+ // correctness check
107
+ // --------------------------
108
+ {{CORRECTNESS_BLOCK}}
109
+
110
+ // --------------------------
111
+ // free GPU memory
112
+ // --------------------------
113
+ {{FREE_BLOCK}}
114
+ }
115
+
116
+ std::cout << (all_pass ? "T" : "F") << std::endl;
117
+ return 0;
118
+ }
119
+
120
+ Below is the original CUDA source code.
121
+ Extract ONLY the kernel and rebuild the program using the above template.
122
+
123
+ ======== ORIGINAL CUDA CODE START ========
124
+ {{CODE}}
125
+ ======== ORIGINAL CUDA CODE END ==========
126
+
127
+ IMPORTANT RULES (strict):
128
+ 1. Remove or replace any undefined types (e.g., BlockMessage). If the structure definition is missing, expand it using equivalent primitive variables, e.g., int region1_index = blockMessage_region1_index[blockID];
129
+ 2. Do not generate any non-ASCII characters or Chinese comments. All output must be strictly ASCII-only.
130
+ 3. If the kernel uses external macros (such as BlockSizeInEval, MaxIntervalCount), detect them from the input code and insert default definitions at the top of the normalized code.
131
+ 4. Remove the inline qualifier from any __global__ kernel function. (inline is meaningless and should not be applied to __global__ functions.)
132
+ 5. Ensure that all parameters referenced inside the __global__ kernel are actually defined within the function scope. No undefined external dependencies are allowed.
133
+ 6. If many numerical mismatches occur in fabs(a[i] - b[i]), allow increasing the tolerance in the template (e.g., from 1e-3f to 1e-1f).
134
+ 7. Ignore warnings about unused local variables. These do not affect correctness or compilation and should not be treated as errors.
tool/test_data_input.txt ADDED
File without changes
tool/test_data_txt_input.txt ADDED
File without changes