-
Notifications
You must be signed in to change notification settings - Fork 885
Expand file tree
/
Copy pathsram_bench.m
More file actions
148 lines (127 loc) · 8.05 KB
/
sram_bench.m
File metadata and controls
148 lines (127 loc) · 8.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#import <Foundation/Foundation.h>
#import <objc/runtime.h>
#import <objc/message.h>
#import <dlfcn.h>
#import <mach/mach_time.h>
#import <IOSurface/IOSurface.h>
static mach_timebase_info_data_t g_tb;
static double ticksToMs(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
static NSData *buildWeightBlob(int ch) {
NSUInteger wsize = (NSUInteger)ch * ch * 2;
NSUInteger total = 64 + 64 + wsize;
uint8_t *buf = calloc(total, 1);
buf[0] = 0x01; buf[4] = 0x02;
uint8_t *chunk = buf + 64;
chunk[0]=0xEF; chunk[1]=0xBE; chunk[2]=0xAD; chunk[3]=0xDE;
chunk[4]=0x01; chunk[10]=0x08;
uint16_t *fp16 = (uint16_t*)(chunk + 64);
for (NSUInteger j = 0; j < (NSUInteger)ch * ch; j++)
fp16[j] = (arc4random() & 0x03FF) | 0x2000;
return [NSData dataWithBytesNoCopy:buf length:total freeWhenDone:YES];
}
static NSString *genMIL(int ch, int sp) {
NSMutableString *m = [NSMutableString string];
[m appendString:@"program(1.3)\n[buildInfo = dict<string, string>({{\"coremlc-component-MIL\", \"3510.2.1\"}, {\"coremlc-version\", \"3505.4.1\"}, {\"coremltools-component-milinternal\", \"\"}, {\"coremltools-version\", \"9.0\"}})]\n{\n"];
[m appendFormat:@" func main<ios18>(tensor<fp32, [1, %d, 1, %d]> x) {\n", ch, sp];
[m appendString:
@" string c_pad_type = const()[name = string(\"c_pad_type\"), val = string(\"valid\")];\n"
@" tensor<int32, [2]> c_strides = const()[name = string(\"c_strides\"), val = tensor<int32, [2]>([1, 1])];\n"
@" tensor<int32, [4]> c_pad = const()[name = string(\"c_pad\"), val = tensor<int32, [4]>([0, 0, 0, 0])];\n"
@" tensor<int32, [2]> c_dilations = const()[name = string(\"c_dilations\"), val = tensor<int32, [2]>([1, 1])];\n"
@" int32 c_groups = const()[name = string(\"c_groups\"), val = int32(1)];\n"
@" string to_fp16 = const()[name = string(\"to_fp16\"), val = string(\"fp16\")];\n"];
[m appendFormat:@" tensor<fp16, [1, %d, 1, %d]> x16 = cast(dtype = to_fp16, x = x)[name = string(\"cast_in\")];\n", ch, sp];
[m appendFormat:@" tensor<fp16, [%d, %d, 1, 1]> W = const()[name = string(\"W\"), val = tensor<fp16, [%d, %d, 1, 1]>(BLOBFILE(path = string(\"@model_path/weights/weight.bin\"), offset = uint64(64)))];\n", ch, ch, ch, ch];
[m appendFormat:@" tensor<fp16, [1, %d, 1, %d]> y16 = conv(dilations = c_dilations, groups = c_groups, pad = c_pad, pad_type = c_pad_type, strides = c_strides, weight = W, x = x16)[name = string(\"conv\")];\n", ch, sp];
[m appendString:@" string to_fp32 = const()[name = string(\"to_fp32\"), val = string(\"fp32\")];\n"];
[m appendFormat:@" tensor<fp32, [1, %d, 1, %d]> y = cast(dtype = to_fp32, x = y16)[name = string(\"cast_out\")];\n", ch, sp];
[m appendString:@" } -> (y);\n}\n"];
return m;
}
double bench(int ch, int sp) {
@autoreleasepool {
NSError *e = nil;
NSData *milData = [[genMIL(ch, sp) dataUsingEncoding:NSUTF8StringEncoding] copy];
NSData *wb = buildWeightBlob(ch);
Class D = NSClassFromString(@"_ANEInMemoryModelDescriptor");
Class I = NSClassFromString(@"_ANEInMemoryModel");
Class AR = NSClassFromString(@"_ANERequest");
Class AIO = NSClassFromString(@"_ANEIOSurfaceObject");
id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(
D, @selector(modelWithMILText:weights:optionsPlist:),
milData, @{@"@model_path/weights/weight.bin": @{@"offset": @0, @"data": wb}}, nil);
if (!desc) return -2;
id model = ((id(*)(Class,SEL,id))objc_msgSend)(
I, @selector(inMemoryModelWithDescriptor:), desc);
if (!model) return -3;
id hexId = ((id(*)(id,SEL))objc_msgSend)(model, @selector(hexStringIdentifier));
NSString *tmpDir = [NSTemporaryDirectory() stringByAppendingPathComponent:hexId];
NSFileManager *fm = [NSFileManager defaultManager];
[fm createDirectoryAtPath:[tmpDir stringByAppendingPathComponent:@"weights"]
withIntermediateDirectories:YES attributes:nil error:nil];
[milData writeToFile:[tmpDir stringByAppendingPathComponent:@"model.mil"] atomically:YES];
[wb writeToFile:[tmpDir stringByAppendingPathComponent:@"weights/weight.bin"] atomically:YES];
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
model, @selector(compileWithQoS:options:error:), 21, @{}, &e)) {
[fm removeItemAtPath:tmpDir error:nil]; return -4;
}
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
model, @selector(loadWithQoS:options:error:), 21, @{}, &e)) {
[fm removeItemAtPath:tmpDir error:nil]; return -5;
}
NSUInteger bytes = ch * sp * 4;
IOSurfaceRef ioIn = IOSurfaceCreate((__bridge CFDictionaryRef)@{
(id)kIOSurfaceWidth:@(bytes),(id)kIOSurfaceHeight:@1,
(id)kIOSurfaceBytesPerElement:@1,(id)kIOSurfaceBytesPerRow:@(bytes),
(id)kIOSurfaceAllocSize:@(bytes),(id)kIOSurfacePixelFormat:@0});
IOSurfaceRef ioOut = IOSurfaceCreate((__bridge CFDictionaryRef)@{
(id)kIOSurfaceWidth:@(bytes),(id)kIOSurfaceHeight:@1,
(id)kIOSurfaceBytesPerElement:@1,(id)kIOSurfaceBytesPerRow:@(bytes),
(id)kIOSurfaceAllocSize:@(bytes),(id)kIOSurfacePixelFormat:@0});
id wIn = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(AIO, @selector(objectWithIOSurface:), ioIn);
id wOut = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(AIO, @selector(objectWithIOSurface:), ioOut);
id req = ((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(AR,
@selector(requestWithInputs:inputIndices:outputs:outputIndices:weightsBuffer:perfStats:procedureIndex:),
@[wIn], @[@0], @[wOut], @[@0], nil, nil, @0);
for (int i = 0; i < 5; i++)
((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
model, @selector(evaluateWithQoS:options:request:error:), 21, @{}, req, &e);
int iters = 30;
uint64_t t0 = mach_absolute_time();
for (int i = 0; i < iters; i++)
((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
model, @selector(evaluateWithQoS:options:request:error:), 21, @{}, req, &e);
double ms = ticksToMs(mach_absolute_time() - t0) / iters;
((BOOL(*)(id,SEL,unsigned int,NSError**))objc_msgSend)(
model, @selector(unloadWithQoS:error:), 21, &e);
CFRelease(ioIn); CFRelease(ioOut);
[fm removeItemAtPath:tmpDir error:nil];
return ms;
}
}
int main() {
mach_timebase_info(&g_tb);
dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
printf("=== ANE SRAM Probe: 1x1 Conv with Increasing Weight Size ===\n\n");
printf("%-25s %8s %8s %8s %10s %8s\n", "Config", "W (MB)", "Act(MB)", "Tot(MB)", "ms/eval", "TFLOPS");
printf("--------------------------------------------------------------------------\n");
typedef struct { int ch; int sp; } S;
S sizes[] = {{256,64},{512,64},{1024,64},{2048,64},{3072,64},{4096,64},{5120,64},{6144,64},{8192,32}};
for (int i = 0; i < 9; i++) {
int ch = sizes[i].ch, sp = sizes[i].sp;
double w_mb = (double)ch * ch * 2 / 1024 / 1024; // FP16 weights
double a_mb = (double)ch * sp * 2 / 1024 / 1024; // FP16 activations
double tot = w_mb + 2 * a_mb;
double gflop = 2.0 * ch * ch * sp / 1e9;
double ms = bench(ch, sp);
double tflops = (ms > 0) ? gflop / ms : -1;
char label[64];
snprintf(label, sizeof(label), "%dch x %dsp", ch, sp);
if (ms > 0)
printf("%-25s %7.1f %7.2f %7.1f %8.3f ms %7.2f\n", label, w_mb, a_mb, tot, ms, tflops);
else
printf("%-25s %7.1f %7.2f %7.1f FAIL(%.0f)\n", label, w_mb, a_mb, tot, ms);
}
printf("\nLook for the performance cliff to estimate SRAM size.\n");
return 0;
}