diff --git a/docs/llamacpp_ops.md b/docs/llamacpp_ops.md index 0cce6279d..49dac2b7a 100644 --- a/docs/llamacpp_ops.md +++ b/docs/llamacpp_ops.md @@ -51,6 +51,7 @@ | SOFTCAP | ✅ | | SQR | ✅ | | SQRT | ✅ | +| SSM_CONV | ✅ | | STEP | ✅ | | SUB | ✅ | | SUM_ROWS | ✅ | @@ -70,7 +71,6 @@ | MUL_MAT | 🟡 | | MUL_MAT_ID | 🟡 | | OPT_STEP_ADAMW | 🟡 | -| OUT_PROD | 🟡 | | PAD | 🟡 | | POOL_2D | 🟡 | | REPEAT_BACK | 🟡 | @@ -85,10 +85,10 @@ | SILU_BACK | 🟡 | | SOFT_MAX | 🟡 | | SOFT_MAX_BACK | 🟡 | -| SSM_CONV | 🟡 | | SSM_SCAN | 🟡 | | SUM | 🟡 | | UPSCALE | 🟡 | +| OUT_PROD | ❌ | Tips: ✅ supported, 🟡 partially supported, ❌ fail, ❓ unsupported, 🔍 unknown @@ -96,125 +96,72 @@ Tips: ✅ supported, 🟡 partially supported, ❌ fail, ❓ unsupported, 🔍 u | Operator | Previous | Current | | --- | --- | --- | -| CONV_TRANSPOSE_1D | partial (🟡) | supported (✅) | +| OUT_PROD | partial (🟡) | fail (❌) | +| SSM_CONV | partial (🟡) | supported (✅) | -#### CONV_TRANSPOSE_1D log (partial -> supported) +#### OUT_PROD log (partial -> fail) ```text - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[197,32,1,1],ne_kernel=[16,32,32,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=3,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=2,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[3,2,1,1],ne_kernel=[3,1,2,1],s0=1,p0=0,d0=1): OK - CONV_TRANSPOSE_1D(ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1): OK - 116/116 tests passed +new_pool_for_device: device 0 use vmm pool +Testing 2 devices + +Backend 1/2: CANN0 + Device description: Ascend910B1 + Device memory: 62420 MB (62055 MB free) + +/__w/Ascend-CI/Ascend-CI/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp:3723: GGML_ASSERT(dst->ne[0] == nr) failed +libggml-base.so.0(+0x151a4)[0xffff9e6951a4] +libggml-base.so.0(ggml_print_backtrace+0x21c)[0xffff9e69565c] +libggml-base.so.0(ggml_abort+0x134)[0xffff9e695824] +libggml-cann.so.0(_Z18ggml_cann_ssm_convR25ggml_backend_cann_contextP11ggml_tensor+0x414)[0xffff9e051a84] +libggml-cann.so.0(+0x26820)[0xffff9e056820] +libggml-cann.so.0(+0x27358)[0xffff9e057358] +libggml-base.so.0(ggml_backend_graph_compute+0x14)[0xffff9e6ab184] +libggml-base.so.0(ggml_backend_compare_graph_backend+0x170)[0xffff9e6afc70] +./test-backend-ops(+0x80f7c)[0xaaaac70c0f7c] +./test-backend-ops(+0x36618)[0xaaaac7076618] +./test-backend-ops(+0x16a10)[0xaaaac7056a10] +/lib/aarch64-linux-gnu/libc.so.6(+0x273fc)[0xffff9e1f73fc] +/lib/aarch64-linux-gnu/libc.so.6(__libc_start_main+0x98)[0xffff9e1f74cc] +./test-backend-ops(+0x180f0)[0xaaaac70580f0] +``` + +#### SSM_CONV log (partial -> supported) +```text +new_pool_for_device: device 0 use vmm pool +Testing 2 devices + +Backend 1/2: CANN0 + Device description: Ascend910B1 + Device memory: 62420 MB (62051 MB free) + + SSM_CONV(type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[3,1024,4,1],ne_b=[3,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[3,1536,1,1],ne_b=[3,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[6,1536,1,1],ne_b=[3,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[3,1536,4,1],ne_b=[3,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[3,2048,1,1],ne_b=[3,2048,1,1]): OK + SSM_CONV(type=f32,ne_a=[6,2048,1,1],ne_b=[3,2048,1,1]): OK + SSM_CONV(type=f32,ne_a=[3,2048,4,1],ne_b=[3,2048,1,1]): OK + SSM_CONV(type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]): OK + SSM_CONV(type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]): OK + SSM_CONV(type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]): OK + SSM_CONV(type=f32,ne_a=[9,1024,1,1],ne_b=[9,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[18,1024,1,1],ne_b=[9,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[9,1024,4,1],ne_b=[9,1024,1,1]): OK + SSM_CONV(type=f32,ne_a=[9,1536,1,1],ne_b=[9,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[18,1536,1,1],ne_b=[9,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[9,1536,4,1],ne_b=[9,1536,1,1]): OK + SSM_CONV(type=f32,ne_a=[9,2048,1,1],ne_b=[9,2048,1,1]): OK + SSM_CONV(type=f32,ne_a=[18,2048,1,1],ne_b=[9,2048,1,1]): OK + SSM_CONV(type=f32,ne_a=[9,2048,4,1],ne_b=[9,2048,1,1]): OK + 27/27 tests passed Backend CANN0: OK Backend 2/2: CPU Skipping CPU backend