-
-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathopt-lbfgs.c
128 lines (102 loc) · 4.41 KB
/
opt-lbfgs.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <stdio.h>
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
int main(int argc, char **argv) {
printf("GGML LBFGS example\n");
struct ggml_init_params params = {
.mem_size = 16*1024*1024,
.mem_buffer = NULL,
};
struct ggml_context* ctx = ggml_init(params);
/* TODO: revisit this code after reading up on the new optimizer API
// Simulate a sequence of 6 tokens with en embedding size of 4096 and a
// context length of 512.
int n_ctx_orig = 4096;
int embd_dim = 128;
int n_head = 32;
int n_tokens = 6;
// The Query matrix in this case can hold 512 tokens each with a dimension
// of 4096.
struct ggml_tensor* query = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ctx_orig, n_tokens);
// Will trigger: GGML_ASSERT(false && "backwards pass not implemented") failed
ggml_set_param(ctx, query);
// We reshape the query matrix embedding dimensions to account for the number
// of heads (32) each which will have a dimension of 128 (128 * 32 = 4096).
struct ggml_tensor* a = ggml_reshape_3d(ctx, query, embd_dim, n_head, n_tokens);
ggml_set_name(a, "a");
// Will trigger: GGML_ASSERT(false && "backwards pass not implemented") failed
//ggml_set_param(ctx, a);
// These are the positions
struct ggml_tensor* pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, n_tokens);
ggml_set_name(pos, "pos");
ggml_set_param(ctx, pos);
// Set some made up values for the tensor to be rotated.
for (int i = 0; i < a->ne[2]; i++) {
// Loop over the embedding heads (32)
for (int j = 0; j < a->ne[1]; j++) {
// Loop over the embedding dimensions (128)
for (int k = 0; k < a->ne[0]; k++) {
float value = 0.0f + k;
ggml_set_f32_nd(a, k, j, i, 0, value);
}
}
}
// Print a few of the first dimensions so we can see that there is a rotation
// being performed. In this case we are printing the first 10 embeddings for
// the 2nd token. I'm not using token 0 as this will have a cosine value of 10
// and since value of 0 which will not perform any rotations for the position
// embeddings for that dimension.
for (int i = 0; i < 10; i++) {
printf("embedding for token 1, embedding dim %d: %f\n", i, ggml_get_f32_nd(a, i, 0, 1, 0));
}
// Set the positions manually (the b tensor parameter to ggml_rope_ext).
for (int i = 0; i < pos->ne[0]; i++) {
ggml_set_i32_1d(pos, i, i);
}
int mode = 0; // rote type 0 = Normal
// The RoPE base frequency
// ↓
// (10000^(-2j/d).
float freq_base = 10000.0f;
// The RoPE frequency scale.
float freq_scale = 1.0f;
// TODO: What is this? It looks like this is mscale (magnituce scale)
float attn_factor = 1.0f;
// Extrapolation factor. If this is 0.0 then the beta_fast and beta_slow
// are not used.
float ext_factor = 1.0f;
// This is a YaRN parameter is named α (alpha) in the YaRN paper. This
// specifies that hen the number of rotations is 32 this is the position
// embedding dimension that should be used for the for
float beta_fast = 32.0f;
// This is a YaRN parameter which I think is named β in the YaRN paper.
float beta_slow = 1.0f;
// LongRope Frequency factors (freq_factors/rope_scaling) are used with
// certain models like Phi-3-mini-128k-instruct
// (https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/blob/main/config.json#L27).
struct ggml_tensor* freq_factors = NULL;
struct ggml_tensor* s = ggml_rope_ext(ctx,
a,
pos,
freq_factors,
embd_dim,
mode,
n_ctx_orig,
freq_base,
freq_scale,
ext_factor,
attn_factor,
beta_fast,
beta_slow);
struct ggml_opt_params opts = ggml_opt_default_params(GGML_OPT_TYPE_LBFGS);
ggml_set_param(ctx, s);
struct ggml_cgraph * cgraph = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
ggml_build_forward_expand(cgraph, s);
ggml_opt(ctx, opts, s);
ggml_graph_compute_with_ctx(ctx, cgraph, 1);
//printf("a: n_elements: %ld\n", ggml_nelements(s));
*/
ggml_free(ctx);
return 0;
}