|
| 1 | +extern crate ndarray; |
| 2 | + |
| 3 | +use ndarray::prelude::*; |
| 4 | + |
| 5 | +use blas_mock_tests::CALL_COUNT; |
| 6 | +use ndarray::linalg::general_mat_mul; |
| 7 | +use ndarray::Order; |
| 8 | +use ndarray_gen::array_builder::ArrayBuilder; |
| 9 | + |
| 10 | +use itertools::iproduct; |
| 11 | + |
| 12 | +#[test] |
| 13 | +fn test_gen_mat_mul_uses_blas() |
| 14 | +{ |
| 15 | + let alpha = 1.0; |
| 16 | + let beta = 0.0; |
| 17 | + |
| 18 | + let sizes = vec![ |
| 19 | + (8, 8, 8), |
| 20 | + (10, 10, 10), |
| 21 | + (8, 8, 1), |
| 22 | + (1, 10, 10), |
| 23 | + (10, 1, 10), |
| 24 | + (10, 10, 1), |
| 25 | + (1, 10, 1), |
| 26 | + (10, 1, 1), |
| 27 | + (1, 1, 10), |
| 28 | + (4, 17, 3), |
| 29 | + (17, 3, 22), |
| 30 | + (19, 18, 2), |
| 31 | + (16, 17, 15), |
| 32 | + (15, 16, 17), |
| 33 | + (67, 63, 62), |
| 34 | + ]; |
| 35 | + let strides = &[1, 2, -1, -2]; |
| 36 | + let cf_order = [Order::C, Order::F]; |
| 37 | + |
| 38 | + // test different strides and memory orders |
| 39 | + for &(m, k, n) in &sizes { |
| 40 | + for (&s1, &s2) in iproduct!(strides, strides) { |
| 41 | + for (ord1, ord2, ord3) in iproduct!(cf_order, cf_order, cf_order) { |
| 42 | + println!("Case s1={}, s2={}, orders={:?}, {:?}, {:?}", s1, s2, ord1, ord2, ord3); |
| 43 | + |
| 44 | + let a = ArrayBuilder::new((m, k)).memory_order(ord1).build(); |
| 45 | + let b = ArrayBuilder::new((k, n)).memory_order(ord2).build(); |
| 46 | + let mut c = ArrayBuilder::new((m, n)).memory_order(ord3).build(); |
| 47 | + |
| 48 | + { |
| 49 | + let av; |
| 50 | + let bv; |
| 51 | + let mut cv; |
| 52 | + |
| 53 | + if s1 != 1 || s2 != 1 { |
| 54 | + av = a.slice(s![..;s1, ..;s2]); |
| 55 | + bv = b.slice(s![..;s2, ..;s2]); |
| 56 | + cv = c.slice_mut(s![..;s1, ..;s2]); |
| 57 | + } else { |
| 58 | + // different stride cases for slicing versus not sliced (for axes of |
| 59 | + // len=1); so test not sliced here. |
| 60 | + av = a.view(); |
| 61 | + bv = b.view(); |
| 62 | + cv = c.view_mut(); |
| 63 | + } |
| 64 | + |
| 65 | + let pre_count = CALL_COUNT.with(|ctx| *ctx.borrow()); |
| 66 | + general_mat_mul(alpha, &av, &bv, beta, &mut cv); |
| 67 | + let after_count = CALL_COUNT.with(|ctx| *ctx.borrow()); |
| 68 | + let ncalls = after_count - pre_count; |
| 69 | + debug_assert!(ncalls <= 1); |
| 70 | + |
| 71 | + let always_uses_blas = s1 == 1 && s2 == 1; |
| 72 | + |
| 73 | + if always_uses_blas { |
| 74 | + assert_eq!(ncalls, 1, "Contiguous arrays should use blas, orders={:?}", (ord1, ord2, ord3)); |
| 75 | + } |
| 76 | + |
| 77 | + let should_use_blas = av.strides().iter().all(|&s| s > 0) |
| 78 | + && bv.strides().iter().all(|&s| s > 0) |
| 79 | + && cv.strides().iter().all(|&s| s > 0) |
| 80 | + && av.strides().iter().any(|&s| s == 1) |
| 81 | + && bv.strides().iter().any(|&s| s == 1) |
| 82 | + && cv.strides().iter().any(|&s| s == 1); |
| 83 | + assert_eq!(should_use_blas, ncalls > 0); |
| 84 | + } |
| 85 | + } |
| 86 | + } |
| 87 | + } |
| 88 | +} |
0 commit comments