JohannesGaessler commited on
Commit
e7722cb
·
1 Parent(s): 4427ede

tests: add tests for GGUF (llama/10830)

Browse files
Files changed (2) hide show
  1. ggml/src/ggml-impl.h +16 -0
  2. ggml/src/ggml.c +24 -41
ggml/src/ggml-impl.h CHANGED
@@ -551,6 +551,22 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
551
  #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
552
  #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  #ifdef __cplusplus
555
  }
556
  #endif
 
551
  #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
552
  #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
553
 
554
+ // expose GGUF internals for test code
555
+
556
+ GGML_API size_t gguf_type_size(enum gguf_type type);
557
+
558
+ GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
559
+
560
+ struct gguf_buf {
561
+ void * data;
562
+ size_t size;
563
+ size_t offset;
564
+ };
565
+ GGML_API struct gguf_buf gguf_buf_init(size_t size);
566
+ GGML_API void gguf_buf_free(struct gguf_buf buf);
567
+
568
+ GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta);
569
+
570
  #ifdef __cplusplus
571
  }
572
  #endif
ggml/src/ggml.c CHANGED
@@ -6489,7 +6489,7 @@ struct gguf_context {
6489
  void * data;
6490
  };
6491
 
6492
- static size_t gguf_type_size(enum gguf_type type) {
6493
  GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
6494
  return GGUF_TYPE_SIZE[type];
6495
  }
@@ -6617,13 +6617,7 @@ struct gguf_context * gguf_init_empty(void) {
6617
  return ctx;
6618
  }
6619
 
6620
- struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
6621
- FILE * file = ggml_fopen(fname, "rb");
6622
- if (!file) {
6623
- fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
6624
- return NULL;
6625
- }
6626
-
6627
  // offset from start of file
6628
  size_t offset = 0;
6629
 
@@ -6636,7 +6630,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6636
  for (uint32_t i = 0; i < sizeof(magic); i++) {
6637
  if (magic[i] != GGUF_MAGIC[i]) {
6638
  fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
6639
- fclose(file);
6640
  return NULL;
6641
  }
6642
  }
@@ -6647,7 +6640,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6647
  struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
6648
  if (!ctx) {
6649
  fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
6650
- fclose(file);
6651
  return NULL;
6652
  }
6653
 
@@ -6665,7 +6657,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6665
 
6666
  if (ctx->header.version == 1) {
6667
  fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
6668
- fclose(file);
6669
  gguf_free(ctx);
6670
  return NULL;
6671
  }
@@ -6678,7 +6669,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6678
 
6679
  if (!ok) {
6680
  fprintf(stderr, "%s: failed to read header\n", __func__);
6681
- fclose(file);
6682
  gguf_free(ctx);
6683
  return NULL;
6684
  }
@@ -6688,12 +6678,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6688
  {
6689
  const uint64_t n_kv = ctx->header.n_kv;
6690
 
6691
- ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
6692
- if (!ctx->kv) {
6693
- fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
6694
- fclose(file);
6695
- gguf_free(ctx);
6696
- return NULL;
 
6697
  }
6698
 
6699
  for (uint64_t i = 0; i < n_kv; ++i) {
@@ -6740,7 +6731,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6740
  // prevent from integer overflow in the malloc below
6741
  if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
6742
  fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6743
- fclose(file);
6744
  gguf_free(ctx);
6745
  return NULL;
6746
  }
@@ -6748,7 +6738,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6748
  kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
6749
  if (!kv->value.arr.data) {
6750
  fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6751
- fclose(file);
6752
  gguf_free(ctx);
6753
  return NULL;
6754
  }
@@ -6760,7 +6749,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6760
  // prevent from integer overflow in the malloc below
6761
  if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
6762
  fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6763
- fclose(file);
6764
  gguf_free(ctx);
6765
  return NULL;
6766
  }
@@ -6768,7 +6756,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6768
  kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str));
6769
  if (!kv->value.arr.data) {
6770
  fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6771
- fclose(file);
6772
  gguf_free(ctx);
6773
  return NULL;
6774
  }
@@ -6799,7 +6786,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6799
 
6800
  if (!ok) {
6801
  fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
6802
- fclose(file);
6803
  gguf_free(ctx);
6804
  return NULL;
6805
  }
@@ -6810,7 +6796,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6810
  ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
6811
  if (!ctx->infos) {
6812
  fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
6813
- fclose(file);
6814
  gguf_free(ctx);
6815
  return NULL;
6816
  }
@@ -6846,7 +6831,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6846
 
6847
  if (!ok) {
6848
  fprintf(stderr, "%s: failed to read tensor info\n", __func__);
6849
- fclose(file);
6850
  gguf_free(ctx);
6851
  return NULL;
6852
  }
@@ -6889,7 +6873,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6889
  // this tensor type support have been removed:
6890
  fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
6891
  __func__, info->name.data, (int) info->type, ggml_type_name(info->type));
6892
- fclose(file);
6893
  gguf_free(ctx);
6894
  return NULL;
6895
  }
@@ -6897,7 +6880,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6897
  if (ne % ggml_blck_size(info->type) != 0) {
6898
  fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
6899
  __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
6900
- fclose(file);
6901
  gguf_free(ctx);
6902
  return NULL;
6903
  }
@@ -6929,7 +6911,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6929
  *params.ctx = ggml_init(pdata);
6930
  if (*params.ctx == NULL) {
6931
  fprintf(stderr, "%s: failed to initialize context\n", __func__);
6932
- fclose(file);
6933
  gguf_free(ctx);
6934
  return NULL;
6935
  }
@@ -6948,7 +6929,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6948
 
6949
  if (!ok) {
6950
  fprintf(stderr, "%s: failed to read tensor data\n", __func__);
6951
- fclose(file);
6952
  ggml_free(ctx_data);
6953
  gguf_free(ctx);
6954
  return NULL;
@@ -6987,7 +6967,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6987
 
6988
  if (!ok) {
6989
  fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
6990
- fclose(file);
6991
  ggml_free(ctx_data);
6992
  gguf_free(ctx);
6993
  return NULL;
@@ -6996,11 +6975,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
6996
  ggml_set_no_alloc(ctx_data, params.no_alloc);
6997
  }
6998
 
6999
- fclose(file);
7000
-
7001
  return ctx;
7002
  }
7003
 
 
 
 
 
 
 
 
 
 
 
 
 
7004
  void gguf_free(struct gguf_context * ctx) {
7005
  if (ctx == NULL) {
7006
  return;
@@ -7460,13 +7449,7 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
7460
  // fwrite(val, sizeof(char), size, file);
7461
  //}
7462
 
7463
- struct gguf_buf {
7464
- void * data;
7465
- size_t size;
7466
- size_t offset;
7467
- };
7468
-
7469
- static struct gguf_buf gguf_buf_init(size_t size) {
7470
  struct gguf_buf buf = {
7471
  /*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
7472
  /*buf.size =*/ size,
@@ -7476,7 +7459,7 @@ static struct gguf_buf gguf_buf_init(size_t size) {
7476
  return buf;
7477
  }
7478
 
7479
- static void gguf_buf_free(struct gguf_buf buf) {
7480
  if (buf.data) {
7481
  GGML_FREE(buf.data);
7482
  }
@@ -7514,7 +7497,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
7514
  buf->offset += el_size;
7515
  }
7516
 
7517
- static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
7518
  // write header
7519
  gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
7520
  gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
 
6489
  void * data;
6490
  };
6491
 
6492
+ size_t gguf_type_size(enum gguf_type type) {
6493
  GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
6494
  return GGUF_TYPE_SIZE[type];
6495
  }
 
6617
  return ctx;
6618
  }
6619
 
6620
+ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
 
 
 
 
 
 
6621
  // offset from start of file
6622
  size_t offset = 0;
6623
 
 
6630
  for (uint32_t i = 0; i < sizeof(magic); i++) {
6631
  if (magic[i] != GGUF_MAGIC[i]) {
6632
  fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
 
6633
  return NULL;
6634
  }
6635
  }
 
6640
  struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
6641
  if (!ctx) {
6642
  fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
 
6643
  return NULL;
6644
  }
6645
 
 
6657
 
6658
  if (ctx->header.version == 1) {
6659
  fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
 
6660
  gguf_free(ctx);
6661
  return NULL;
6662
  }
 
6669
 
6670
  if (!ok) {
6671
  fprintf(stderr, "%s: failed to read header\n", __func__);
 
6672
  gguf_free(ctx);
6673
  return NULL;
6674
  }
 
6678
  {
6679
  const uint64_t n_kv = ctx->header.n_kv;
6680
 
6681
+ if (n_kv > 0) {
6682
+ ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
6683
+ if (!ctx->kv) {
6684
+ fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
6685
+ gguf_free(ctx);
6686
+ return NULL;
6687
+ }
6688
  }
6689
 
6690
  for (uint64_t i = 0; i < n_kv; ++i) {
 
6731
  // prevent from integer overflow in the malloc below
6732
  if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
6733
  fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
 
6734
  gguf_free(ctx);
6735
  return NULL;
6736
  }
 
6738
  kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
6739
  if (!kv->value.arr.data) {
6740
  fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
 
6741
  gguf_free(ctx);
6742
  return NULL;
6743
  }
 
6749
  // prevent from integer overflow in the malloc below
6750
  if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
6751
  fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
 
6752
  gguf_free(ctx);
6753
  return NULL;
6754
  }
 
6756
  kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str));
6757
  if (!kv->value.arr.data) {
6758
  fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
 
6759
  gguf_free(ctx);
6760
  return NULL;
6761
  }
 
6786
 
6787
  if (!ok) {
6788
  fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
 
6789
  gguf_free(ctx);
6790
  return NULL;
6791
  }
 
6796
  ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
6797
  if (!ctx->infos) {
6798
  fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
 
6799
  gguf_free(ctx);
6800
  return NULL;
6801
  }
 
6831
 
6832
  if (!ok) {
6833
  fprintf(stderr, "%s: failed to read tensor info\n", __func__);
 
6834
  gguf_free(ctx);
6835
  return NULL;
6836
  }
 
6873
  // this tensor type support have been removed:
6874
  fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
6875
  __func__, info->name.data, (int) info->type, ggml_type_name(info->type));
 
6876
  gguf_free(ctx);
6877
  return NULL;
6878
  }
 
6880
  if (ne % ggml_blck_size(info->type) != 0) {
6881
  fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
6882
  __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
 
6883
  gguf_free(ctx);
6884
  return NULL;
6885
  }
 
6911
  *params.ctx = ggml_init(pdata);
6912
  if (*params.ctx == NULL) {
6913
  fprintf(stderr, "%s: failed to initialize context\n", __func__);
 
6914
  gguf_free(ctx);
6915
  return NULL;
6916
  }
 
6929
 
6930
  if (!ok) {
6931
  fprintf(stderr, "%s: failed to read tensor data\n", __func__);
 
6932
  ggml_free(ctx_data);
6933
  gguf_free(ctx);
6934
  return NULL;
 
6967
 
6968
  if (!ok) {
6969
  fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
 
6970
  ggml_free(ctx_data);
6971
  gguf_free(ctx);
6972
  return NULL;
 
6975
  ggml_set_no_alloc(ctx_data, params.no_alloc);
6976
  }
6977
 
 
 
6978
  return ctx;
6979
  }
6980
 
6981
+ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
6982
+ FILE * file = ggml_fopen(fname, "rb");
6983
+ if (!file) {
6984
+ fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
6985
+ return NULL;
6986
+ }
6987
+
6988
+ struct gguf_context * result = gguf_init_from_file_impl(file, params);
6989
+ fclose(file);
6990
+ return result;
6991
+ }
6992
+
6993
  void gguf_free(struct gguf_context * ctx) {
6994
  if (ctx == NULL) {
6995
  return;
 
7449
  // fwrite(val, sizeof(char), size, file);
7450
  //}
7451
 
7452
+ struct gguf_buf gguf_buf_init(size_t size) {
 
 
 
 
 
 
7453
  struct gguf_buf buf = {
7454
  /*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
7455
  /*buf.size =*/ size,
 
7459
  return buf;
7460
  }
7461
 
7462
+ void gguf_buf_free(struct gguf_buf buf) {
7463
  if (buf.data) {
7464
  GGML_FREE(buf.data);
7465
  }
 
7497
  buf->offset += el_size;
7498
  }
7499
 
7500
+ void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
7501
  // write header
7502
  gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
7503
  gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));