cmdr2 commited on
Commit
09f2f18
·
1 Parent(s): 4be7f68

cpu: de-duplicate some of the operators and refactor (ggml/1144)

Browse files

* cpu: de-duplicate some of the operators and refactor

* Fix PR comments

* Fix PR comments

ggml/src/ggml-cpu/CMakeLists.txt CHANGED
@@ -23,6 +23,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
23
  ggml-cpu/amx/mmq.cpp
24
  ggml-cpu/amx/mmq.h
25
  ggml-cpu/ggml-cpu-impl.h
 
 
 
 
 
26
  )
27
 
28
  target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
 
23
  ggml-cpu/amx/mmq.cpp
24
  ggml-cpu/amx/mmq.h
25
  ggml-cpu/ggml-cpu-impl.h
26
+ ggml-cpu/common.h
27
+ ggml-cpu/binary-ops.h
28
+ ggml-cpu/binary-ops.cpp
29
+ ggml-cpu/unary-ops.h
30
+ ggml-cpu/unary-ops.cpp
31
  )
32
 
33
  target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
ggml/src/ggml-cpu/binary-ops.cpp ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "binary-ops.h"
2
+
3
+ #if defined(GGML_USE_ACCELERATE)
4
+ #include <Accelerate/Accelerate.h>
5
+
6
+ using vDSP_fn_t = void (*)(const float *, vDSP_Stride, const float *, vDSP_Stride, float *, vDSP_Stride, vDSP_Length);
7
+ #endif
8
+
9
+ static inline float op_add(float a, float b) {
10
+ return a + b;
11
+ }
12
+
13
+ static inline float op_sub(float a, float b) {
14
+ return a - b;
15
+ }
16
+
17
+ static inline float op_mul(float a, float b) {
18
+ return a * b;
19
+ }
20
+
21
+ static inline float op_div(float a, float b) {
22
+ return a / b;
23
+ }
24
+
25
+ template <float (*op)(float, float), typename src0_t, typename src1_t, typename dst_t>
26
+ static inline void vec_binary_op_contiguous(const int64_t n, dst_t * z, const src0_t * x, const src1_t * y) {
27
+ constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
28
+ constexpr auto src1_to_f32 = type_conversion_table<src1_t>::to_f32;
29
+ constexpr auto f32_to_dst = type_conversion_table<dst_t >::from_f32;
30
+
31
+ for (int i = 0; i < n; i++) {
32
+ z[i] = f32_to_dst(op(src0_to_f32(x[i]), src1_to_f32(y[i])));
33
+ }
34
+ }
35
+
36
+ template <float (*op)(float, float), typename src0_t, typename src1_t, typename dst_t>
37
+ static inline void vec_binary_op_non_contiguous(const int64_t n, const int64_t ne10, const int64_t nb10, dst_t * z, const src0_t * x, const src1_t * y) {
38
+ constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
39
+ constexpr auto src1_to_f32 = type_conversion_table<src1_t>::to_f32;
40
+ constexpr auto f32_to_dst = type_conversion_table<dst_t >::from_f32;
41
+
42
+ for (int i = 0; i < n; i++) {
43
+ int i10 = i % ne10;
44
+ const src1_t * y_ptr = (const src1_t *)((const char *)y + i10*nb10);
45
+ z[i] = f32_to_dst(op(src0_to_f32(x[i]), src1_to_f32(*y_ptr)));
46
+ }
47
+ }
48
+
49
+ template <float (*op)(float, float), typename src0_t, typename src1_t, typename dst_t>
50
+ static void apply_binary_op(const ggml_compute_params * params, ggml_tensor * dst) {
51
+ const ggml_tensor * src0 = dst->src[0];
52
+ const ggml_tensor * src1 = dst->src[1];
53
+
54
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
55
+
56
+ GGML_TENSOR_BINARY_OP_LOCALS
57
+
58
+ GGML_ASSERT( nb0 == sizeof(dst_t));
59
+ GGML_ASSERT(nb00 == sizeof(src0_t));
60
+
61
+ const auto [ir0, ir1] = get_thread_range(params, src0);
62
+ const bool is_src1_contiguous = (nb10 == sizeof(src1_t));
63
+
64
+ if (!is_src1_contiguous) { // broadcast not implemented yet for non-contiguous
65
+ GGML_ASSERT(ggml_are_same_shape(src0, src1));
66
+ }
67
+
68
+ #ifdef GGML_USE_ACCELERATE
69
+ vDSP_fn_t vDSP_op = nullptr;
70
+ // TODO - avoid the f32-only check using type 'trait' lookup tables and row-based src-to-float conversion functions
71
+ if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
72
+ if (op == op_add) {
73
+ vDSP_op = vDSP_vadd;
74
+ } else if (op == op_sub) {
75
+ vDSP_op = vDSP_vsub;
76
+ } else if (op == op_mul) {
77
+ vDSP_op = vDSP_vmul;
78
+ } else if (op == op_div) {
79
+ vDSP_op = vDSP_vdiv;
80
+ }
81
+ }
82
+ #endif
83
+
84
+ for (int64_t ir = ir0; ir < ir1; ++ir) {
85
+ const int64_t i03 = ir/(ne02*ne01);
86
+ const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
87
+ const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
88
+
89
+ const int64_t i13 = i03 % ne13;
90
+ const int64_t i12 = i02 % ne12;
91
+ const int64_t i11 = i01 % ne11;
92
+
93
+ dst_t * dst_ptr = (dst_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
94
+ const src0_t * src0_ptr = (const src0_t *) ((const char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
95
+ const src1_t * src1_ptr = (const src1_t *) ((const char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
96
+
97
+ if (is_src1_contiguous) {
98
+ // src1 is broadcastable across src0 and dst in i1, i2, i3
99
+ const int64_t nr0 = ne00 / ne10;
100
+
101
+ for (int64_t r = 0; r < nr0; ++r) {
102
+ #ifdef GGML_USE_ACCELERATE
103
+ if constexpr (std::is_same_v<src0_t, float> && std::is_same_v<src1_t, float> && std::is_same_v<dst_t, float>) {
104
+ if (vDSP_op != nullptr) {
105
+ vDSP_op(src1_ptr, 1, src0_ptr + r*ne10, 1, dst_ptr + r*ne10, 1, ne10);
106
+ continue;
107
+ }
108
+ }
109
+ #endif
110
+ vec_binary_op_contiguous<op>(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
111
+ }
112
+ } else {
113
+ vec_binary_op_non_contiguous<op>(ne0, ne10, nb10, dst_ptr, src0_ptr, src1_ptr);
114
+ }
115
+ }
116
+ }
117
+
118
+ // TODO: Use the 'traits' lookup table (for type conversion fns), instead of a mass of 'if' conditions with long templates
119
+ template <float (*op)(float, float)>
120
+ static void binary_op(const ggml_compute_params * params, ggml_tensor * dst) {
121
+ const ggml_tensor * src0 = dst->src[0];
122
+ const ggml_tensor * src1 = dst->src[1];
123
+
124
+ /* */ if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { // all f32
125
+ apply_binary_op<op, float, float, float>(params, dst);
126
+ } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { // all f16
127
+ apply_binary_op<op, ggml_fp16_t, ggml_fp16_t, ggml_fp16_t>(params, dst);
128
+ } else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) { // all bf16
129
+ apply_binary_op<op, ggml_bf16_t, ggml_bf16_t, ggml_bf16_t>(params, dst);
130
+ } else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_BF16) {
131
+ apply_binary_op<op, ggml_bf16_t, float, ggml_bf16_t>(params, dst);
132
+ } else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
133
+ apply_binary_op<op, ggml_bf16_t, float, float>(params, dst);
134
+ } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F16) {
135
+ apply_binary_op<op, ggml_fp16_t, float, ggml_fp16_t>(params, dst);
136
+ } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
137
+ apply_binary_op<op, ggml_fp16_t, float, float>(params, dst);
138
+ } else {
139
+ GGML_ABORT("%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
140
+ ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
141
+ }
142
+ }
143
+
144
+ void ggml_compute_forward_add_non_quantized(const ggml_compute_params * params, ggml_tensor * dst) {
145
+ binary_op<op_add>(params, dst);
146
+ }
147
+
148
+ void ggml_compute_forward_sub(const ggml_compute_params * params, ggml_tensor * dst) {
149
+ binary_op<op_sub>(params, dst);
150
+ }
151
+
152
+ void ggml_compute_forward_mul(const ggml_compute_params * params, ggml_tensor * dst) {
153
+ binary_op<op_mul>(params, dst);
154
+ }
155
+
156
+ void ggml_compute_forward_div(const ggml_compute_params * params, ggml_tensor * dst) {
157
+ binary_op<op_div>(params, dst);
158
+ }
ggml/src/ggml-cpu/binary-ops.h ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "common.h"
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+
9
+ void ggml_compute_forward_add_non_quantized(const struct ggml_compute_params * params, struct ggml_tensor * dst);
10
+ void ggml_compute_forward_sub(const struct ggml_compute_params * params, struct ggml_tensor * dst);
11
+ void ggml_compute_forward_mul(const struct ggml_compute_params * params, struct ggml_tensor * dst);
12
+ void ggml_compute_forward_div(const struct ggml_compute_params * params, struct ggml_tensor * dst);
13
+
14
+ #ifdef __cplusplus
15
+ }
16
+ #endif
ggml/src/ggml-cpu/common.h ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-cpu-traits.h"
5
+ #include "ggml-cpu-impl.h"
6
+ #include "ggml-impl.h"
7
+
8
+ #ifdef __cplusplus
9
+
10
+ #include <utility>
11
+
12
+ // convenience functions/macros for use in template calls
13
+ // note: these won't be required after the 'traits' lookup table is used.
14
+ static inline ggml_fp16_t f32_to_f16(float x) {
15
+ return GGML_FP32_TO_FP16(x);
16
+ }
17
+
18
+ static inline float f16_to_f32(ggml_fp16_t x) {
19
+ return GGML_FP16_TO_FP32(x);
20
+ }
21
+
22
+ static inline ggml_bf16_t f32_to_bf16(float x) {
23
+ return GGML_FP32_TO_BF16(x);
24
+ }
25
+
26
+ static inline float bf16_to_f32(ggml_bf16_t x) {
27
+ return GGML_BF16_TO_FP32(x);
28
+ }
29
+
30
+ static inline float f32_to_f32(float x) {
31
+ return x;
32
+ }
33
+
34
+ // TODO - merge this into the traits table, after using row-based conversions
35
+ template <class T>
36
+ struct type_conversion_table;
37
+
38
+ template <>
39
+ struct type_conversion_table<ggml_fp16_t> {
40
+ static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
41
+ static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
42
+ };
43
+
44
+ template <>
45
+ struct type_conversion_table<float> {
46
+ static constexpr float (*to_f32)(float) = f32_to_f32;
47
+ static constexpr float (*from_f32)(float) = f32_to_f32;
48
+ };
49
+
50
+ template <>
51
+ struct type_conversion_table<ggml_bf16_t> {
52
+ static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
53
+ static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
54
+ };
55
+
56
+ static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
57
+ const int64_t ith = params->ith;
58
+ const int64_t nth = params->nth;
59
+
60
+ const int64_t nr = ggml_nrows(src0);
61
+
62
+ // rows per thread
63
+ const int64_t dr = (nr + nth - 1)/nth;
64
+
65
+ // row range for this thread
66
+ const int64_t ir0 = dr*ith;
67
+ const int64_t ir1 = MIN(ir0 + dr, nr);
68
+
69
+ return {ir0, ir1};
70
+ }
71
+
72
+ #endif
ggml/src/ggml-cpu/ggml-cpu.c CHANGED
The diff for this file is too large to render. See raw diff
 
ggml/src/ggml-cpu/unary-ops.cpp ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "unary-ops.h"
2
+
3
+ static inline float op_abs(float x) {
4
+ return fabsf(x);
5
+ }
6
+
7
+ static inline float op_sgn(float x) {
8
+ return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f);
9
+ }
10
+
11
+ static inline float op_neg(float x) {
12
+ return -x;
13
+ }
14
+
15
+ static inline float op_step(float x) {
16
+ return (x > 0.f) ? 1.f : 0.f;
17
+ }
18
+
19
+ static inline float op_tanh(float x) {
20
+ return tanhf(x);
21
+ }
22
+
23
+ static inline float op_elu(float x) {
24
+ return (x > 0.f) ? x : expm1f(x);
25
+ }
26
+
27
+ static inline float op_relu(float x) {
28
+ return (x > 0.f) ? x : 0.f;
29
+ }
30
+
31
+ static inline float op_sigmoid(float x) {
32
+ return 1.f / (1.f + expf(-x));
33
+ }
34
+
35
+ static inline float op_hardsigmoid(float x) {
36
+ return fminf(1.0f, fmaxf(0.0f, (x + 3.0f) / 6.0f));
37
+ }
38
+
39
+ static inline float op_exp(float x) {
40
+ return expf(x);
41
+ }
42
+
43
+ static inline float op_hardswish(float x) {
44
+ return x * fminf(1.0f, fmaxf(0.0f, (x + 3.0f) / 6.0f));
45
+ }
46
+
47
+ static inline float op_sqr(float x) {
48
+ return x * x;
49
+ }
50
+
51
+ static inline float op_sqrt(float x) {
52
+ return sqrtf(x);
53
+ }
54
+
55
+ static inline float op_sin(float x) {
56
+ return sinf(x);
57
+ }
58
+
59
+ static inline float op_cos(float x) {
60
+ return cosf(x);
61
+ }
62
+
63
+ static inline float op_log(float x) {
64
+ return logf(x);
65
+ }
66
+
67
+ template <float (*op)(float), typename src0_t, typename dst_t>
68
+ static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) {
69
+ constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
70
+ constexpr auto f32_to_dst = type_conversion_table<dst_t >::from_f32;
71
+
72
+ for (int i = 0; i < n; i++) {
73
+ y[i] = f32_to_dst(op(src0_to_f32(x[i])));
74
+ }
75
+ }
76
+
77
+ template <float (*op)(float), typename src0_t, typename dst_t>
78
+ static void apply_unary_op(const ggml_compute_params * params, ggml_tensor * dst) {
79
+ const ggml_tensor * src0 = dst->src[0];
80
+
81
+ GGML_ASSERT(ggml_is_contiguous_1(src0) && ggml_is_contiguous_1(dst) && ggml_are_same_shape(src0, dst));
82
+
83
+ GGML_TENSOR_UNARY_OP_LOCALS
84
+
85
+ GGML_ASSERT( nb0 == sizeof(dst_t));
86
+ GGML_ASSERT(nb00 == sizeof(src0_t));
87
+
88
+ const auto [ir0, ir1] = get_thread_range(params, src0);
89
+
90
+ for (int64_t ir = ir0; ir < ir1; ++ir) {
91
+ const int64_t i03 = ir/(ne02*ne01);
92
+ const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
93
+ const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
94
+
95
+ dst_t * dst_ptr = (dst_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
96
+ const src0_t * src0_ptr = (const src0_t *) ((const char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
97
+
98
+ vec_unary_op<op>(ne0, dst_ptr, src0_ptr);
99
+ }
100
+ }
101
+
102
+ // TODO: Use the 'traits' lookup table (for type conversion fns), instead of a mass of 'if' conditions with long templates
103
+ template <float (*op)(float)>
104
+ static void unary_op(const ggml_compute_params * params, ggml_tensor * dst) {
105
+ const ggml_tensor * src0 = dst->src[0];
106
+
107
+ /* */ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { // all f32
108
+ apply_unary_op<op, float, float>(params, dst);
109
+ } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { // all f16
110
+ apply_unary_op<op, ggml_fp16_t, ggml_fp16_t>(params, dst);
111
+ } else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) { // all bf16
112
+ apply_unary_op<op, ggml_bf16_t, ggml_bf16_t>(params, dst);
113
+ } else if (src0->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_F32) {
114
+ apply_unary_op<op, ggml_bf16_t, float>(params, dst);
115
+ } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
116
+ apply_unary_op<op, ggml_fp16_t, float>(params, dst);
117
+ } else {
118
+ fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s\n", __func__,
119
+ ggml_type_name(dst->type), ggml_type_name(src0->type));
120
+ GGML_ABORT("fatal error");
121
+ }
122
+ }
123
+
124
+ void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor * dst) {
125
+ unary_op<op_abs>(params, dst);
126
+ }
127
+
128
+ void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) {
129
+ unary_op<op_sgn>(params, dst);
130
+ }
131
+
132
+ void ggml_compute_forward_neg(const ggml_compute_params * params, ggml_tensor * dst) {
133
+ unary_op<op_neg>(params, dst);
134
+ }
135
+
136
+ void ggml_compute_forward_step(const ggml_compute_params * params, ggml_tensor * dst) {
137
+ unary_op<op_step>(params, dst);
138
+ }
139
+
140
+ void ggml_compute_forward_tanh(const ggml_compute_params * params, ggml_tensor * dst) {
141
+ unary_op<op_tanh>(params, dst);
142
+ }
143
+
144
+ void ggml_compute_forward_elu(const ggml_compute_params * params, ggml_tensor * dst) {
145
+ unary_op<op_elu>(params, dst);
146
+ }
147
+
148
+ void ggml_compute_forward_relu(const ggml_compute_params * params, ggml_tensor * dst) {
149
+ unary_op<op_relu>(params, dst);
150
+ }
151
+
152
+ void ggml_compute_forward_sigmoid(const ggml_compute_params * params, ggml_tensor * dst) {
153
+ unary_op<op_sigmoid>(params, dst);
154
+ }
155
+
156
+ void ggml_compute_forward_hardsigmoid(const ggml_compute_params * params, ggml_tensor * dst) {
157
+ unary_op<op_hardsigmoid>(params, dst);
158
+ }
159
+
160
+ void ggml_compute_forward_exp(const ggml_compute_params * params, ggml_tensor * dst) {
161
+ unary_op<op_exp>(params, dst);
162
+ }
163
+
164
+ void ggml_compute_forward_hardswish(const ggml_compute_params * params, ggml_tensor * dst) {
165
+ unary_op<op_hardswish>(params, dst);
166
+ }
167
+
168
+ void ggml_compute_forward_sqr(const ggml_compute_params * params, ggml_tensor * dst) {
169
+ unary_op<op_sqr>(params, dst);
170
+ }
171
+
172
+ void ggml_compute_forward_sqrt(const ggml_compute_params * params, ggml_tensor * dst) {
173
+ unary_op<op_sqrt>(params, dst);
174
+ }
175
+
176
+ void ggml_compute_forward_sin(const ggml_compute_params * params, ggml_tensor * dst) {
177
+ unary_op<op_sin>(params, dst);
178
+ }
179
+
180
+ void ggml_compute_forward_cos(const ggml_compute_params * params, ggml_tensor * dst) {
181
+ unary_op<op_cos>(params, dst);
182
+ }
183
+
184
+ void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor * dst) {
185
+ unary_op<op_log>(params, dst);
186
+ }
ggml/src/ggml-cpu/unary-ops.h ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "common.h"
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+
9
+ void ggml_compute_forward_abs(const struct ggml_compute_params * params, struct ggml_tensor * dst);
10
+ void ggml_compute_forward_sgn(const struct ggml_compute_params * params, struct ggml_tensor * dst);
11
+ void ggml_compute_forward_neg(const struct ggml_compute_params * params, struct ggml_tensor * dst);
12
+ void ggml_compute_forward_step(const struct ggml_compute_params * params, struct ggml_tensor * dst);
13
+ void ggml_compute_forward_tanh(const struct ggml_compute_params * params, struct ggml_tensor * dst);
14
+ void ggml_compute_forward_elu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
15
+ void ggml_compute_forward_relu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
16
+ void ggml_compute_forward_sigmoid(const struct ggml_compute_params * params, struct ggml_tensor * dst);
17
+ void ggml_compute_forward_hardsigmoid(const struct ggml_compute_params * params, struct ggml_tensor * dst);
18
+ void ggml_compute_forward_exp(const struct ggml_compute_params * params, struct ggml_tensor * dst);
19
+ void ggml_compute_forward_hardswish(const struct ggml_compute_params * params, struct ggml_tensor * dst);
20
+ void ggml_compute_forward_sqr(const struct ggml_compute_params * params, struct ggml_tensor * dst);
21
+ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct ggml_tensor * dst);
22
+ void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst);
23
+ void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
24
+ void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst);
25
+
26
+ #ifdef __cplusplus
27
+ }
28
+ #endif