ggml: update examples after submodule update

danbev · danbev · commit 7e80f5e1dd59 · 2024-11-08T14:13:47.000+01:00
diff --git a/fundamentals/ggml/src/acc.c b/fundamentals/ggml/src/acc.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 
 int main(int argc, char **argv) {
   printf("GGML accumulate example\n");
diff --git a/fundamentals/ggml/src/backprop.c b/fundamentals/ggml/src/backprop.c
@@ -1,4 +1,5 @@
 #include "ggml.h"
+#include "ggml-cpu.h"
 
 #include <stdio.h>
 
diff --git a/fundamentals/ggml/src/broadcast.c b/fundamentals/ggml/src/broadcast.c
@@ -1,4 +1,5 @@
 #include "ggml.h"
+#include "ggml-cpu.h"
 
 #include <stdio.h>
 
diff --git a/fundamentals/ggml/src/concat.c b/fundamentals/ggml/src/concat.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/conv2d.c b/fundamentals/ggml/src/conv2d.c
@@ -2,6 +2,7 @@
 #include <string.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/copy.c b/fundamentals/ggml/src/copy.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/custom.c b/fundamentals/ggml/src/custom.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/dup.c b/fundamentals/ggml/src/dup.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/galloc.c b/fundamentals/ggml/src/galloc.c
@@ -1,4 +1,5 @@
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/get_rows.c b/fundamentals/ggml/src/get_rows.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/graph.c b/fundamentals/ggml/src/graph.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/im2col.c b/fundamentals/ggml/src/im2col.c
@@ -2,6 +2,7 @@
 #include <string.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/llama-att-softmax.c b/fundamentals/ggml/src/llama-att-softmax.c
@@ -3,6 +3,7 @@
 #include <math.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/matrix-mul.c b/fundamentals/ggml/src/matrix-mul.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/mul-mat-id.c b/fundamentals/ggml/src/mul-mat-id.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/mul.c b/fundamentals/ggml/src/mul.c
@@ -0,0 +1,70 @@
+#include <stdio.h>
+
+#include "ggml.h"
+#include "ggml-cpu.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+int main(int argc, char **argv) {
+  printf("GGML mul example\n");
+
+  struct ggml_init_params params = {
+    .mem_size   = 16*1024*1024,
+    .mem_buffer = NULL,
+  };
+  struct ggml_context* ctx = ggml_init(params);
+
+  struct ggml_tensor* a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
+  ggml_set_name(a, "a");
+  ggml_set_i32_nd(a, 0, 0, 0, 0, 1);
+  ggml_set_i32_nd(a, 1, 0, 0, 0, 2);
+  ggml_set_i32_nd(a, 0, 1, 0, 0, 3);
+  ggml_set_i32_nd(a, 1, 1, 0, 0, 4);
+  ggml_set_i32_nd(a, 0, 2, 0, 0, 5);
+  ggml_set_i32_nd(a, 1, 2, 0, 0, 6);
+
+  printf("matrix a %ldx%ld:\n", a->ne[0], a->ne[1]);
+  for (int y = 0; y < a->ne[1]; y++) {
+      for (int x = 0; x < a->ne[0]; x++) {
+          printf("%.2f ", *(float *) ((char *) a->data + y * a->nb[1] + x * a->nb[0]));
+       }
+      printf("\n");
+  }
+  printf("\n");
+
+  struct ggml_tensor* b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
+  ggml_set_name(b, "b");
+  ggml_set_i32_nd(b, 0, 0, 0, 0, 2);
+  ggml_set_i32_nd(b, 1, 0, 0, 0, 4);
+
+  // Note that ggml_mul_mat() transposes the second matrix b.
+  struct ggml_tensor* result = ggml_mul(ctx, a, b);
+  ggml_set_name(result, "result");
+
+  struct ggml_cgraph* c_graph = ggml_new_graph(ctx);
+  ggml_build_forward_expand(c_graph, result);
+  int n_threads = 4;
+  enum ggml_status st = ggml_graph_compute_with_ctx(ctx, c_graph, n_threads);
+  if (st != GGML_STATUS_SUCCESS) {
+    printf("could not compute graph\n");
+    return 1;
+  }
+
+  printf("result tensor type: %s\n", ggml_type_name(result->type));
+  printf("result dim: %d\n", ggml_n_dims(result));
+  printf("result dim[0]: %ld\n", result->ne[0]);
+  printf("result dim[1]: %ld\n", result->ne[1]);
+
+  printf("\n");
+  printf("result matrix %ldx%ld:\n", result->ne[0], result->ne[1]);
+  for (int y = 0; y < result->ne[1]; y++) {
+      for (int x = 0; x < result->ne[0]; x++) {
+          printf("%.2f ", *(float *) ((char *) result->data + y * result->nb[1] + x * result->nb[0]));
+       }
+      printf("\n");
+  }
+  printf("\n");
+
+  ggml_free(ctx);
+  return 0;
+}
diff --git a/fundamentals/ggml/src/norm.c b/fundamentals/ggml/src/norm.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 
 int main(int argc, char **argv) {
   printf("GGML normalization example\n");
diff --git a/fundamentals/ggml/src/opt-lbfgs.c b/fundamentals/ggml/src/opt-lbfgs.c
@@ -0,0 +1,125 @@
+#include <stdio.h>
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+int main(int argc, char **argv) {
+  printf("GGML LBFGS example\n");
+
+  struct ggml_init_params params = {
+    .mem_size   = 16*1024*1024,
+    .mem_buffer = NULL,
+  };
+  struct ggml_context* ctx = ggml_init(params);
+
+  // Simulate a sequence of 6 tokens with en embedding size of 4096 and a
+  // context length of 512. 
+  int n_ctx_orig = 4096;
+  int embd_dim = 128;
+  int n_head = 32;
+  int n_tokens = 6;
+
+  // The Query matrix in this case can hold 512 tokens each with a dimension
+  // of 4096.
+  struct ggml_tensor* query = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ctx_orig, n_tokens);
+  // Will trigger: GGML_ASSERT(false && "backwards pass not implemented") failed
+  ggml_set_param(ctx, query);
+
+  // We reshape the query matrix embedding dimensions to account for the number
+  // of heads (32) each which will have a dimension of 128 (128 * 32 = 4096).
+  struct ggml_tensor* a = ggml_reshape_3d(ctx, query, embd_dim, n_head, n_tokens);
+  ggml_set_name(a, "a");
+  // Will trigger: GGML_ASSERT(false && "backwards pass not implemented") failed
+  //ggml_set_param(ctx, a);
+
+  // These are the positions 
+  struct ggml_tensor* pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, n_tokens);
+  ggml_set_name(pos, "pos");
+  ggml_set_param(ctx, pos);
+
+  // Set some made up values for the tensor to be rotated.
+  for (int i = 0; i < a->ne[2]; i++) {
+      // Loop over the embedding heads (32)
+      for (int j = 0; j < a->ne[1]; j++) {
+          // Loop over the embedding dimensions (128)
+          for (int k = 0; k < a->ne[0]; k++) {
+              float value = 0.0f + k;
+              ggml_set_f32_nd(a, k, j, i, 0, value);
+          }
+      } 
+  }
+
+  // Print a few of the first dimensions so we can see that there is a rotation
+  // being performed. In this case we are printing the first 10 embeddings for
+  // the 2nd token. I'm not using token 0 as this will have a cosine value of 10
+  // and since value of 0 which will not perform any rotations for the position
+  // embeddings for that dimension.
+  for (int i = 0; i < 10; i++) {
+    printf("embedding for token 1, embedding dim %d: %f\n", i, ggml_get_f32_nd(a, i, 0, 1, 0));
+  }
+
+  // Set the positions manually (the b tensor parameter to ggml_rope_ext).
+  for (int i = 0; i < pos->ne[0]; i++) {
+      ggml_set_i32_1d(pos, i, i);
+  }
+
+  int mode = 0;    // rote type 0 = Normal
+
+  // The RoPE base frequency
+  //   ↓
+  // (10000^(-2j/d).
+  float freq_base = 10000.0f;
+
+  // The RoPE frequency scale.
+  float freq_scale = 1.0f;
+
+  // TODO: What is this? It looks like this is mscale (magnituce scale)
+  float attn_factor = 1.0f;
+
+  // Extrapolation factor. If this is 0.0 then the beta_fast and beta_slow
+  // are not used. 
+  float ext_factor = 1.0f;
+
+  // This is a YaRN parameter is named α (alpha) in the YaRN paper. This
+  // specifies that hen the number of rotations is 32 this is the position
+  // embedding dimension that should be used for the for 
+  float beta_fast = 32.0f;
+
+  // This is a YaRN parameter which I think is named β in the YaRN paper.
+  float beta_slow = 1.0f;
+
+  // LongRope Frequency factors (freq_factors/rope_scaling) are used with
+  // certain models like Phi-3-mini-128k-instruct
+  // (https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/blob/main/config.json#L27).
+  struct ggml_tensor* freq_factors = NULL;
+
+  struct ggml_tensor* s = ggml_rope_ext(ctx,
+                                        a,
+                                        pos,
+                                        freq_factors,
+                                        embd_dim,
+                                        mode,
+                                        n_ctx_orig,
+                                        freq_base,
+                                        freq_scale,
+                                        ext_factor,
+                                        attn_factor,
+                                        beta_fast,
+                                        beta_slow);
+
+  struct ggml_opt_params opts = ggml_opt_default_params(GGML_OPT_TYPE_LBFGS);
+  ggml_set_param(ctx, s);
+
+  struct ggml_cgraph * cgraph = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
+  ggml_build_forward_expand(cgraph, s);
+
+  ggml_opt(ctx, opts, s);
+
+  ggml_graph_compute_with_ctx(ctx, cgraph, 1);
+
+  //printf("a: n_elements: %ld\n", ggml_nelements(s));
+
+  ggml_free(ctx);
+  return 0;
+}
diff --git a/fundamentals/ggml/src/outer-product.c b/fundamentals/ggml/src/outer-product.c
@@ -1,4 +1,5 @@
 #include "ggml.h"
+#include "ggml-cpu.h"
 
 #include <stdio.h>
 
diff --git a/fundamentals/ggml/src/permute.c b/fundamentals/ggml/src/permute.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/reshape.c b/fundamentals/ggml/src/reshape.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/rope.c b/fundamentals/ggml/src/rope.c
@@ -2,6 +2,7 @@
 #include <math.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/scale.c b/fundamentals/ggml/src/scale.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/simple-backend.cpp b/fundamentals/ggml/src/simple-backend.cpp
@@ -1,4 +1,5 @@
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/softmax.c b/fundamentals/ggml/src/softmax.c
@@ -15,16 +15,16 @@ int main(int argc, char **argv) {
   };
   struct ggml_context* ctx = ggml_init(params);
 
-  struct ggml_tensor* logits = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 4, 1, 1);
+  struct ggml_tensor* logits = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 8, 1, 1);
   ggml_set_name(logits, "logits");
 
-  float tensor_data[4] = { 6, 7, 10, 9};
+  float tensor_data[8] = { 6, 7, 10, 9, 22, 33, 44, 55};
   memcpy((char *)logits->data, tensor_data, ggml_nbytes(logits));
 
-  struct ggml_tensor* mask = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 4, 1);
+  struct ggml_tensor* mask = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 8, 1);
                                 
   ggml_set_name(mask, "mask");
-  float mask_data[4] = { 0, 0, -INFINITY, 0};
+  float mask_data[8] = { 0, 0, -INFINITY, 0, -INFINITY, -INFINITY, -INFINITY, -INFINITY};
   memcpy((char *)mask->data, mask_data, ggml_nbytes(mask));
 
   struct ggml_tensor* result = ggml_soft_max_ext(ctx, logits, mask, 1.0f, 0.0f);
diff --git a/fundamentals/ggml/src/ssm_conv.c b/fundamentals/ggml/src/ssm_conv.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/ssm_scan.c b/fundamentals/ggml/src/ssm_scan.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
diff --git a/fundamentals/ggml/src/states.c b/fundamentals/ggml/src/states.c
diff --git a/fundamentals/ggml/src/sub.c b/fundamentals/ggml/src/sub.c
diff --git a/fundamentals/ggml/src/tensor.c b/fundamentals/ggml/src/tensor.c
diff --git a/fundamentals/ggml/src/view.c b/fundamentals/ggml/src/view.c

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`#include "ggml.h"`
	`2`	`+#include "ggml-cpu.h"`
`2`	`3`
`3`	`4`	`#include <stdio.h>`
`4`	`5`