diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..851d8878 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ + +stenglib/Fast/fsetop.mexw64 +stenglib/Fast/fsparse.mexw64 +stenglib/Tensor/tprod.mexw64 +stenglib/Tensor/tsum.mexw64 diff --git a/startup.m b/startup.m index 0f64f255..8c3b0716 100644 --- a/startup.m +++ b/startup.m @@ -8,4 +8,9 @@ % path to urdme/ folders addpath(genpath([link 'urdme/'])); -addpath(genpath([link 'workflows/'])); \ No newline at end of file +addpath(genpath([link 'workflows/'])); + +% Johannes Dufva 2020-10-30 +% path to other functions used +run('stenglib/make_all.m') +run('stenglib/startup.m') \ No newline at end of file diff --git a/stenglib/Fast/clenshaw.m b/stenglib/Fast/clenshaw.m new file mode 100644 index 00000000..6aaafbac --- /dev/null +++ b/stenglib/Fast/clenshaw.m @@ -0,0 +1,43 @@ +function Y = clenshaw(A,B,C) +%CLENSHAW Evaluation of 3-term recurrences. +% Y = CLENSHAW(A,B) evaluates the 3-term recurrence as defined by the +% matrices A and B. +% +% Let [M,N] be MAX(SIZE(A),SIZE(B)). Then the output Y is M-by-N and, for +% 1 <= j <= N, the recurrence is defined by Y(1,j) = A(1,j)+B(1,j), Y(2,j) +% = A(2,j)*Y(1,j)+B(2,j) and, for i > 2, Y(i,j) = +% A(i,j)Y(i-1,j)+B(i,j)Y(i-2,j). +% +% The dimensions of the matrices A and B must either match or be +% singletons. Singleton dimensions are as usual silently repeated. +% +% Y = CLENSHAW(A,B,C) evaluates the 3-term recurrence defined by A and B, +% multiplies it by the coefficients C and computes the sum of the result +% by means of Clenshaws summation formula. C is M-by-K-by-N and the result +% is K-by-N. The first dimension of C must be M while the second and third +% dimensions of C can be singletons. +% +% Examples: +% % Fibonacci numbers +% n = 10; clenshaw(1,[0; 0; ones(n-2,1)])' +% +% % Legendre polynomials +% x = linspace(-1,1); n = 4; j = [0.5 1:n]'; +% A = (2*j-1)./j*x; +% B = (1-j)./j; +% y = clenshaw(A,B); +% figure, plot(x,y); +% +% % two functions defined by Chebyshev coefficients +% x = linspace(-1,1,50); +% n = 4; j = [0; 1; 2*ones(n-1,1)]; +% A = j*x; B = [1; 0; -ones(n-1,1)]; +% C = [[1 0.75 0.5 0.25 0.125]' [1 -1.75 1.5 -1.25 2.125]']; +% figure, plot(x,clenshaw(A,B,C)); +% +% See also FILTER. + +% S. Engblom 2007-04-19 (Major revision) +% S. Engblom 2005-07-28 + +error('.MEX-file not found on path.'); diff --git a/stenglib/Fast/clenshaw.mexw64 b/stenglib/Fast/clenshaw.mexw64 new file mode 100644 index 00000000..6a81209f Binary files /dev/null and b/stenglib/Fast/clenshaw.mexw64 differ diff --git a/stenglib/Fast/frepmat.m b/stenglib/Fast/frepmat.m new file mode 100644 index 00000000..78c01692 --- /dev/null +++ b/stenglib/Fast/frepmat.m @@ -0,0 +1,61 @@ +function B = frepmat(A,rep) +%FREPMAT Fast replication of array. +% FREPMAT does the same job as the MATLAB-function REPMAT but runs +% faster since the main part of the code is written in C. +% +% B = FREPMAT(A,[M N]) creates a matrix B consisting of M-by-N +% copies of A. +% +% In general, B = FREPMAT(A,REP) repeats A in the first dimension +% REP(1) times, in the second dimension REP(2) times and so on for +% all of the sizes in REP. +% +% Unlike REPMAT, missing sizes in REP are consistently defined to be +% 1. This means that if m is a scalar, then FREPMAT(A,m) does not +% produce the same result as REPMAT(A,m) (which is defined to be +% REPMAT(A,[m m])). Instead, FREPMAT(A,m) produces the same result +% as would REPMAT(A,[m 1]). Also, the traditional syntax +% FREPMAT(A,M,N,...) is not supported. +% +% Examples: +% a = frepmat(1,1000); % same as ones(1000,1) +% b = frepmat(int8(1),[100 100]); % same as ones(100,100,'int8') +% +% s = sprand(10,10,0.5); +% a1 = frepmat(s,[100 10]); [nnz(a1) nzmax(a1)] +% a2 = repmat(s,[100 10]); [nnz(a2) nzmax(a2)] % overallocation +% +% See also REPMAT. + +% - For doubles (real or complex), FREPMAT is about 15-20% faster +% than REPMAT. +% +% - For other numerical types (single precision floats, integers, +% characters and logicals), FREPMAT is about 50% faster than REPMAT. +% +% - For sparse matrices (real, complex or logical), FREPMAT is about +% 85-95% faster than REPMAT and allocates much less memory. +% +% - For other data-types, such as cell-, structure- and +% function-arrays, FREPMAT is about 10% faster than REPMAT. + +% S. Engblom 2004-10-28 + +if isnumeric(A) || ischar(A) || islogical(A) + % runs fast + B = mexfrepmat(A,rep); +else + % general code using the traditional 'indexing' style + rep = rep(:); + lenrep = size(rep,1); + len = max(lenrep,tndims(A)); + sizA = int32(tsize(A,1:len)); + rep = [rep; ones(len-lenrep,1)]; + + % while building the indices the speed of mexfrepmat is exploited + ix = cell(1,len); + for i = 1:len + ix{i} = mexfrepmat([1:sizA(i)]',rep(i)); + end + B = A(ix{:}); +end diff --git a/stenglib/Fast/fsetop.m b/stenglib/Fast/fsetop.m new file mode 100644 index 00000000..c3d61232 --- /dev/null +++ b/stenglib/Fast/fsetop.m @@ -0,0 +1,102 @@ +function [c,ia,ib] = fsetop(op,a,b) +%FSETOP Fast set operations based on hashing. +% FSETOP(OP,A,B) performs the set operation OP on the columns of the +% arrays A and B. The main differences from the set operations +% implemented in MATLAB are: +% - FSETOP runs faster because it is based on hashing instead of on +% sorting. +% +% - In FSETOP, the order of the elements in the input arrays are +% retained in the output array, in contrast to the corresponding +% MATLAB functions where the result is sorted. This means that all +% indices for new elements are strictly increasing (except for IB +% in the INTERSECT operation and for JA and JB in the UNION +% operation). Also, in case of duplications, the first element +% (lowest column number) is selected. +% +% - FSETOP always uses the columns of its inputs as elements, like the +% 'rows' switch in the MATLAB commands, but for columns instead of +% rows. In particular, column vectors are not special cases, they are +% treated as single elements. +% +% - FSETOP supports the following data types: double, single, char, +% logicals, int8, uint8, int16, uint16, int32, uint32, int64, uint64 +% and cell-arrays containing any of the above data types. +% +% Cell-arrays are treated as vectors and all output indices refer to a +% linear ordering. The output C is always a row cell-vector and cells +% containing the same data but of different types or shapes are +% considered unequal. +% +% C = FSETOP('check',A) computes hash-values for each column in A. The +% output C is a row-vector containing uint32-type integers. This is not +% a set-operation but is provided as a useful tool for computing +% check-sums for large data-sets in a uniform way. +% +% [C,IA,IB] = FSETOP('intersect',A,B) returns the columns common to +% both A and B. C = A(:,IA) = B(:,IB). +% +% [C,IA] = FSETOP('setdiff',A,B) returns the columns in A that are +% not in B. C = A(:,IA). +% +% [C,IA,IB] = FSETOP('setxor',A,B) returns the columns that are not +% in the intersection of A and B. C = [A(:,IA) B(:,IB)]. +% +% [C,IA,IB,JA,JB] = FSETOP('union',A,B) returns the combined columns +% from A and B but with no repetitions. C = [A(:,IA) B(:,IB)] and A +% = C(:,JA), B = C(:,JB). Note that the outputs JA and JB are not +% produced by the MATLAB-function UNION. +% +% [B,IA,IB] = FSETOP('unique',A) returns the same columns as in A +% but with no repetitions. B = A(:,IA) and A = B(:,IB). +% +% [IA,IB] = FSETOP('ismember',A,B) returns a logical vector IA +% containing 1 where the columns of A are also columns of B and 0 +% otherwise. IB contains the index in B of each column in A and zero if +% no such index exists. A(:,IA) = B(:,IB(IA)). +% +% Note: two elements are considered equal if and only if their +% bitwise representations are identical. This can sometimes give +% unexpected results. For example, with doubles, -0.0 ~= 0.0 and NaN +% == NaN. +% +% Examples: +% % intersection of integers +% a = int8(ceil(3*rand(2,10))), b = int8(ceil(3*rand(2,10))) +% aandb = fsetop('intersect',a,b) +% +% % unique strings in cell-array +% strs = {'foobar' 'foo' 'foobar' 'bar' 'barfoo'} +% strsunq = fsetop('unique',strs) +% +% % find indices ix in b = a(ix) +% a = randperm(6), b = ceil(6*rand(1,6)) +% [foo,ix] = fsetop('ismember',b,a) +% +% % remove indices from set of indices +% a = {[1 2 4] [2 3 5 6] [4 2 1] [2 3 5 6]' int32([1 2 4])}; +% b = {[4 2 1]}; +% c = fsetop('setdiff',a,b) +% +% % union of structs +% a = struct('foo',1,'bar',NaN,'foobar','hello') +% b = struct('faa',2,'bar',Inf,'foobar','goodbye') +% [cf,ia,ib] = fsetop('union',fieldnames(a),fieldnames(b)); +% ac = struct2cell(a); bc = struct2cell(b); +% c = cell2struct([ac(ia); bc(ib)],cf) +% +% % a single 32-bit checksum from strings +% s = {'check','intersect','setdiff','setxor', ... +% 'union','unique','ismember'}; +% c = fsetop('check',fsetop('check',s)') +% +% See also INTERSECT, SETDIFF, SETXOR, UNION, UNIQUE, ISMEMBER. + +% S. Engblom 2010-02-09 (Revision) +% S. Engblom 2006-06-13 (Major revision) +% S. Engblom 2005-06-21 +% Based on a concept by P-O Persson, COMSOL AB. The internal hash +% function used is based on a code by P. Hsieh, see +% http://www.azillionmonkeys.com/qed/hash.html. + +error('.MEX-file not found on path.'); diff --git a/stenglib/Fast/fsetop.mexw64 b/stenglib/Fast/fsetop.mexw64 new file mode 100644 index 00000000..db152a9a Binary files /dev/null and b/stenglib/Fast/fsetop.mexw64 differ diff --git a/stenglib/Fast/fsparse.m b/stenglib/Fast/fsparse.m new file mode 100644 index 00000000..2c01914b --- /dev/null +++ b/stenglib/Fast/fsparse.m @@ -0,0 +1,145 @@ +function S = fsparse(ii,jj,ss,siz,flag,nthreads) +%FSPARSE Fast assembly of sparse matrix. +% FSPARSE does the same job as the MATLAB-function SPARSE but runs faster +% since the assembly is based on indirect addressing and on hashing rather +% than on sorting. FSPARSE also in general allocates the sparse matrix +% exactly, that is, the resulting sparse matrix S satisfies NNZ(S) = +% NZMAX(S). In addition, FSPARSE supports an efficient and powerful +% 'assembly' syntax which makes it easy to create sparse banded matrices +% or sparse matrices arising in finite difference computations. +% +% S = FSPARSE(X), where X is a full or sparse matrix, constructs a sparse +% matrix S by squeezing out any zero elements. +% +% S = FSPARSE(II,JJ,SS) creates a sparse matrix S from triplet data +% (II,JJ,SS). The inputs are all matrices with either matching or +% singleton dimensions according to certain rules (see below). For +% instance, II may be 4-by-10, JJ 1-by-10 and SS 4-by-1. +% +% The result is a sparse matrix formally satisfying the relation +% S(II(i,j),JJ(i,j)) = SS(i,j), except of course for singleton +% dimensions. In the example above for instance, we have the relation +% S(II(i,j),JJ(1,j)) = SS(i,1). Repeated indices are as usual summed +% together. +% +% If II is IM-by-IN, JJ JM-by-JN and SS SM-by-SN, then it is required that +% (1) IN = JN or 1, (2) JM = IM or 1, (3) SM = IM or 1 and (4) SN = JN or +% 1. +% +% S = FSPARSE(II,JJ,SS,[M N Nzmax]) also specifies the dimensions of the +% output. All arguments are optional and when they are omitted M = +% MAX(II), N = MAX(JJ) and Nzmax = NNZ(S) are used as defaults. +% +% S = FSPARSE(II,JJ,SS,SIZ,'nosort') produces a sparse matrix S +% where the columns are not sorted with respect to row-indices (if +% the input is sorted with respect to II, then the output is in +% order anyway). This runs faster and the resulting unordered matrix +% can be used in many, but not all, situations in +% MATLAB. Cautionary: on some platforms an unordered sparse matrix +% causes MATLAB to crash when performing certain operations. +% +% One or both of the index-matrices II and JJ may be integers instead of +% doubles. This is faster still since no typecast or deep copy is +% needed. An 'integer' is the type which corresponds to the C declaration +% 'int' on the platform; -- on most modern platforms this is int32. +% +% Differences between FSPARSE and SPARSE: +% - FSPARSE generally allocates the sparse matrix so that it fits +% exactly. However, zero elements resulting from cancellation or from +% zero data causes a slight over-allocation. Thus, both FSPARSE([1 1],[1 +% 1],[1 -1]) and FSPARSE(1,1,0) allocates space for one entry. +% +% - The call S = FSPARSE(X), where X is sparse, always makes a deep copy +% of the sparse matrix so that NNZ(S) = NZMAX(S). +% +% - Integer indices are not supported by SPARSE. +% +% - According to the specification above, the call FSPARSE(1:n,1,1) is +% not allowed. The corrected version of this example is simply +% FSPARSE([1:n]',1,1) where the row-index is associated with the first +% dimension of the input matrix. +% +% - Logical sparse matrices cannot be created by FSPARSE. Use logical +% operations for this purpose. +% +% - SPARSE allows the input (II,JJ,SS) itself to be sparse or even +% logical sparse. This syntax is not supported by FSPARSE. +% +% Examples: +% N = 10; +% S1 = fsparse(1:N,1:N,1); % same as speye(N) +% S2 = fsparse([1:N]',1:N,1); % same as sparse(ones(N)); +% +% % sparse matrix from triplet +% ii = ceil(4*rand(1,N)); +% jj = ceil(4*rand(1,N)); +% ss = rand(1,N); +% S3 = fsparse(ii,jj,ss); [nnz(S3) nzmax(S3)] +% s3 = sparse(ii,jj,ss); [nnz(s3) nzmax(s3)] % over-allocation +% +% % 3-point stencil +% S4 = fsparse([2:N-1]',[1:N-2; 2:N-1; 3:N]',[1 -2 1],[N N]); +% +% % 5-point stencil on an N-by-N grid +% N2 = N*N; ix = reshape(1:N2,N,N); +% ix([1 N],:) = []; ix(:,[1 N]) = []; ix = ix(:); +% S5 = fsparse(ix,[ix-N ix-1 ix ix+1 ix+N],[1 1 -4 1 1],[N2 N2]); +% +% % band-matrix +% B = rand(3,N); B(end,1) = 0; B(1,end) = 0; +% % same as spdiags(B',[-1 0 1],N,N): +% S6 = fsparse([[2:N 1]; [1:N]; [N 1:N-1]],1:N,B); +% +% % circulant matrix +% jj = 1+mod(cumsum([0:3; ones(N-1,4)]),N); +% S7 = fsparse([1:N]',jj,1:4); +% +% See also SPARSE, NNZ, NZMAX, SPDIAGS, FIND. + + +% Hidden argument nthreads and OpenMP-support +% +% The full call currently supported is actually +% S = FSPARSE(II,JJ,SS,SIZ,[] | 'sort' | 'nosort',nthreads) +% with nthreads an integer >= 1. The default value is +% omp_get_max_threads(). The value of nthreads is remembered until +% the mex-file is cleared from memory: +% S1 = fsparse(...,4); % use 4 threads +% S2 = fsparse(...); % continues to use 4 threads +% clear all; +% S3 = fsparse(...); % uses omp_get_max_threads() +% The threaded version is a beta-release and is only supported for +% a subset of the syntaxes covered by FSPARSE. Also, compilation +% must be done under OpenMP, see MAKE. + +% Hidden output timing +% +% With #define FSPARSE_TIME an extra output time vector is supported +% as follows: +% [S,t] = FSPARSE(...) +% returns an 1-by-6 vector containing timings of the internal +% phases of FSPARSE. +% t(1) getix()-function +% t(2) Part 1 +% t(3) Part 2 +% t(4) Part 3 +% t(5) Part 4 +% t(6) sparse_insert() +% The suppport for timing is a beta-release and is only supported +% for a subset of the syntaxes covered by FSPARSE. See MAKE. + +% Early performance observations for the serial version +% +% In general, when the input indices and values are of the same +% sizes, FSPARSE is about 30-70% faster than SPARSE depending on the +% number of colliding indices, the usage of integer indices and the +% 'nosort' option. When converting from full to sparse storage +% format, FSPARSE is about 50% faster than MATLAB. Finally, for the +% 'assembly' syntax, FSPARSE is about 50-60% faster than any +% equivalent construction in MATLAB. + +% S. Engblom 2013-12-03 (Revision, OpenMP added, hidden argument nthreads). +% S. Engblom 2010-01-13 (Minor revision, caution with 'nosort' added) +% S. Engblom 2004-11-12 + +error('.MEX-file not found on path.'); diff --git a/stenglib/Fast/fsparse.mexw64 b/stenglib/Fast/fsparse.mexw64 new file mode 100644 index 00000000..da844630 Binary files /dev/null and b/stenglib/Fast/fsparse.mexw64 differ diff --git a/stenglib/Fast/mexfrepmat.mexw64 b/stenglib/Fast/mexfrepmat.mexw64 new file mode 100644 index 00000000..b7a7de27 Binary files /dev/null and b/stenglib/Fast/mexfrepmat.mexw64 differ diff --git a/stenglib/Fast/powerseries.m b/stenglib/Fast/powerseries.m new file mode 100644 index 00000000..7ca9702b --- /dev/null +++ b/stenglib/Fast/powerseries.m @@ -0,0 +1,25 @@ +y = powerseries(c,x,tol) +%POWERSERIES Sum power series. +% Y = POWERSERIES(C,X,tol) evaluates the power series with coefficients C +% at points X where both C and X are vectors. The output Y is the same +% size as X and contains Y(i) = sum_(j >= 1) C(j)*X(i)^(j-1). The sum is +% truncated when the last contribution is relatively less than tol. +% +% Note: use +% warning('off','powerseries:w1'); +% to turn the warning for inaccurate evaluation off. As an +% alternative, you may explicitly set C(end) = 0. +% +% Example: +% % the complex exponential function +% c = 1./cumprod([1 1:10]); +% x = complex(linspace(-2,3),linspace(-4,6)); +% y = powerseries(c,x,0.5e-7); +% figure, plot(real(x),real(y),'b',real(x),real(exp(x)),'r'); +% legend('Series','Exp()'); + +% S. Engblom 2007-08-17 (Minor revision) +% S. Engblom 2007-06-13 (Minor revision) +% S. Engblom 2007-01-22 + +error('.MEX-file not found on path.'); diff --git a/stenglib/Fast/powerseries.mexw64 b/stenglib/Fast/powerseries.mexw64 new file mode 100644 index 00000000..178e1943 Binary files /dev/null and b/stenglib/Fast/powerseries.mexw64 differ diff --git a/stenglib/Fast/source/clenshaw.c b/stenglib/Fast/source/clenshaw.c new file mode 100644 index 00000000..a37c5015 --- /dev/null +++ b/stenglib/Fast/source/clenshaw.c @@ -0,0 +1,392 @@ +/* clenshaw.c */ +/* S. Engblom 2007-04-19 (Revision) */ +/* S. Engblom 2005-07-28 */ + +#include + +#include "mex.h" +#include "matrix.h" + +/* forward declarations */ +void SizesAndType(const mxArray **A, + int *dimM,int *dimN, + mxComplexity *type); + +void recurrence(double *prY,double *piY,int M,int N, + const mxArray *A,const mxArray *B); +void clenshaw(double *prY,double *piY,int M,int N,int KC,int NC, + const mxArray *A,const mxArray *B, + const double *prC,const double *piC); + +#define ISDOUBLEMATRIX(A) (mxIsDouble(A) && !mxIsSparse(A) && \ + mxGetNumberOfDimensions(A) == 2) +#define ISDOUBLETENSOR(A) (mxIsDouble(A) && !mxIsSparse(A)) + +/*------------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + /* syntax */ + if (nrhs < 2 || 3 < nrhs || nlhs > 1) + mexErrMsgIdAndTxt("clenshaw:e1","Expecting 2 or 3 inputs " + "and one output."); + + /* 'recurrence'-syntax */ + if (nrhs == 2) { + /* chech input and allocate output */ + int dimM,dimN; + mxComplexity type; + SizesAndType(prhs,&dimM,&dimN,&type); + + /* allocate and evaluate recurrence */ + plhs[0] = mxCreateDoubleMatrix(dimM,dimN,type); + recurrence(mxGetPr(plhs[0]),mxGetPi(plhs[0]), + dimM,dimN,prhs[0],prhs[1]); + } + /* 'coefficient'-syntax (Clenshaw summation) */ + else { + /* check input and allocate output */ + int dimM,dimN,KC,NC; + mxComplexity type; + SizesAndType(prhs,&dimM,&dimN,&type); + + /* coefficient matrix */ + if (!ISDOUBLETENSOR(prhs[2])) + mexErrMsgIdAndTxt("clenshaw:e2", + "Expecting a double, non-sparse matrix."); + if (mxGetM(prhs[2]) != dimM) + mexErrMsgIdAndTxt("clenshaw:e4", + "The dimensions of the coefficients must " + "match the length of each recursion."); + + if (mxGetNumberOfDimensions(prhs[2]) > 3) + mexErrMsgIdAndTxt("clenshaw:e5","Coefficient array can have at " + "most three dimensions."); + else if (mxGetNumberOfDimensions(prhs[2]) == 3) { + KC = mxGetDimensions(prhs[2])[1]; + NC = mxGetDimensions(prhs[2])[2]; + } + else { + KC = mxGetN(prhs[2]); + NC = 1; + } + + if (dimN == 1) + dimN = NC; + else if (dimN != NC && NC != 1) + mexErrMsgIdAndTxt("clenshaw:e6","The number of recursions " + "must agree."); + + if (mxIsComplex(prhs[2])) type = mxCOMPLEX; + + /* allocate and evaluate sum */ + plhs[0] = mxCreateDoubleMatrix(KC,dimN,type); + clenshaw(mxGetPr(plhs[0]),mxGetPi(plhs[0]), + dimM,dimN,KC,NC, + prhs[0],prhs[1],mxGetPr(prhs[2]),mxGetPi(prhs[2])); + } +} +/*------------------------------------------------------------------------*/ +void SizesAndType(const mxArray **A, + int *dimM,int *dimN, + mxComplexity *type) +/* Given the arrays A[0] and A[1], this routine determines the + dimensions dimM and dimN of the recursions defined by this + input. The common type (mxREAL/mxCOMPLEX) is returned in type. */ +{ + *dimM = *dimN = 1; + *type = mxREAL; + + for (int j = 0; j < 2; j++) { + const mxArray *Aj = A[j]; + if (!ISDOUBLEMATRIX(Aj)) + mexErrMsgIdAndTxt("clenshaw:e2", + "Expecting a double, non-sparse matrix."); + if (*dimM == 1) + *dimM = mxGetM(Aj); + else if (*dimM != mxGetM(Aj) && mxGetM(Aj) != 1) + mexErrMsgIdAndTxt("clenshaw:e3", + "Dimensions must either match or be singletons."); + if (*dimN == 1) + *dimN = mxGetN(Aj); + else if (*dimN != mxGetN(Aj) && mxGetN(Aj) != 1) + mexErrMsgIdAndTxt("clenshaw:e3", + "Dimensions must either match or be singletons."); + if (mxIsComplex(Aj)) *type = mxCOMPLEX; + } +} +/*------------------------------------------------------------------------*/ +void recurrence(double *prY,double *piY,int M,int N, + const mxArray *A,const mxArray *B) +/* Evaluates the recurrence as defined by the matrices A and B and + stores the result in (prY,piY), which must be allocated prior to + call. [M,N] = max(size(A),size(B)). All dimensions must either + match or be singletons. */ +{ + const double *prA = mxGetPr(A),*piA = mxGetPi(A); + const double *prB = mxGetPr(B),*piB = mxGetPi(B); + + /* increments for columns and rows */ + const int ia = mxGetM(A) == M; + const int ja = (ia ^ mxGetN(A) == N)*((mxGetN(A) == N)-mxGetM(A)); + const int ib = mxGetM(B) == M; + const int jb = (ib ^ mxGetN(B) == N)*((mxGetN(B) == N)-mxGetM(B)); + + /* evaluate recurrence */ + if (piY == NULL) + for (int j = 0; j < N; j++) { + double r1 = 1.0,r2 = 1.0,r3; + for (int i = 0; i < M; i++) { + r3 = r2; + r2 = r1; + r1 = *prA*r2+*prB*r3; + *prY++ = r1; + prA += ia; + prB += ib; + } + prA += ja; + prB += jb; + } + /* the remaining 3 complex case */ + else if (piB == NULL) + for (int j = 0; j < N; j++) { + double r1 = 1.0,r2 = 1.0,r3; + double i1 = 0.0,i2 = 0.0,i3; + for (int i = 0; i < M; i++) { + r3 = r2; i3 = i2; + r2 = r1; i2 = i1; + r1 = *prA*r2-*piA*i2+*prB*r3; + i1 = *prA*i2+*piA*r2+*prB*i3; + *prY++ = r1; *piY++ = i1; + prA += ia; piA += ia; + prB += ib; + } + prA += ja; piA += ja; + prB += jb; + } + else if (piA == NULL) + for (int j = 0; j < N; j++) { + double r1 = 1.0,r2 = 1.0,r3; + double i1 = 0.0,i2 = 0.0,i3; + for (int i = 0; i < M; i++) { + r3 = r2; i3 = i2; + r2 = r1; i2 = i1; + r1 = *prA*r2+*prB*r3-*piB*i3; + i1 = *prA*i2+*prB*i3+*piB*r3; + *prY++ = r1; *piY++ = i1; + prA += ia; + prB += ib; piB += ib; + } + prA += ja; + prB += jb; piB += jb; + } + else + for (int j = 0; j < N; j++) { + double r1 = 1.0,r2 = 1.0,r3; + double i1 = 0.0,i2 = 0.0,i3; + for (int i = 0; i < M; i++) { + r3 = r2; i3 = i2; + r2 = r1; i2 = i1; + r1 = *prA*r2-*piA*i2+*prB*r3-*piB*i3; + i1 = *prA*i2+*piA*r2+*prB*i3+*piB*r3; + *prY++ = r1; *piY++ = i1; + prA += ia; piA += ia; + prB += ib; piB += ib; + } + prA += ja; piA += ja; + prB += jb; piB += jb; + } +} +/*------------------------------------------------------------------------*/ +void clenshaw(double *prY,double *piY,int M,int N,int KC,int NC, + const mxArray *A,const mxArray *B, + const double *prC,const double *piC) +/* Evaluates the recurrence as defined by the matrices A and B, + multiplies it by the coefficients C and stores the sum of the + result in (prY,piY), which must be allocated prior to call. [M,N] = + max(size(A),size(B),size(C,[1 3])) and C is M-by-KC-by-NC (where NC + is either 1 or N). The result has the dimensions KC-by-N. All + dimensions must either match or be singletons, except for C's first + dimension which must be M. */ +{ + const double *prA = mxGetPr(A),*piA = mxGetPi(A); + const double *prB = mxGetPr(B),*piB = mxGetPi(B); + + /* start at the end since Clenshaws algorithm runs backwards */ + if (M > 2) { + prA += mxGetNumberOfElements(A)-1; + prB += mxGetNumberOfElements(B)-1; + prC += M*KC*NC-1; + prY += KC*N-1; + } + + /* increments for columns and rows */ + const int ia = mxGetM(A) == M; + const int ja = mxGetM(A)*(mxGetN(A) == N); + const int ka = ia-ia*M; + const int ib = mxGetM(B) == M; + const int jb = mxGetM(B)*(mxGetN(B) == N); + const int kb = ib-ib*M; + const int jc = -(NC != N)*M*KC; + + /* all real case */ + if (piY == NULL) { + /* two special cases: sum includes boundary values only */ + if (M == 1) + for (int j = 0; j < N; j++) { + for (int k = 0; k < KC; k++) { + const double R1 = prA[0]+prB[0]; + *prY++ = prC[0]*R1; prC++; + } + prA += ka+ja; + prB += kb+jb; + prC += jc; + } + else if (M == 2) + for (int j = 0; j < N; j++) { + for (int k = 0; k < KC; k++) { + const double R2 = prA[0]+prB[0]; + const double R1 = prA[ia]*R2+prB[ib]; + *prY++ = prC[0]*R2+prC[1]*R1; prC += 2; + } + prA += ia+ka+ja; + prB += ib+kb+jb; + prC += jc; + } + /* evaluate using Clenshaw summation */ + else if (M >= 3) + for (int j = 0; j < N; j++) { + for (int k = 0; k < KC; k++) { + double r1 = *prC,r2,r3; + prC--; + + /* initial boundary */ + r2 = r1; + r1 = *prA*r2+*prC; + prA -= ia; + prC--; + + /* internal formula */ + for (int i = 0; i < M-3; i++) { + r3 = r2; + r2 = r1; + r1 = *prA*r2+*prB*r3+*prC; + prA -= ia; + prB -= ib; + prC--; + } + + /* final boundary */ + r3 = r2; + r2 = r1; + r1 = *prB*r3+*prC; + + /* initial values of the recurrence */ + prA -= ia; + prB -= 2*ib; + const double R2 = prA[0]+prB[0]; + const double R1 = prA[ia]*R2+prB[ib]; + + /* accumulated sum */ + *prY-- = R2*r1+R1*r2; + + prA -= ka; + prB -= kb; + prC--; + } + prA -= ja; + prB -= jb; + prC -= jc; + } + } + else { + /* in order to avoid a messy code the remaining 7 complex cases + are handled in the same way as the pure complex case */ + const int ia2 = piA == NULL ? 0 : ia; + const int ja2 = piA == NULL ? 0 : ja; + const int ka2 = piA == NULL ? 0 : ka; + const int ib2 = piB == NULL ? 0 : ib; + const int jb2 = piB == NULL ? 0 : jb; + const int kb2 = piB == NULL ? 0 : kb; + const int ic2 = piC == NULL ? 0 : 1; + const int jc2 = piC == NULL ? 0 : jc; + + if (M > 2) { + if (piA != NULL) piA += mxGetNumberOfElements(A)-1; + if (piB != NULL) piB += mxGetNumberOfElements(B)-1; + if (piC != NULL) piC += M*KC*NC-1; + piY += KC*N-1; + } + + const double zero = 0.0; + if (piA == NULL) piA = &zero; + if (piB == NULL) piB = &zero; + if (piC == NULL) piC = &zero; + + if (M == 1) + for (int j = 0; j < N; j++) { + for (int k = 0; k < KC; k++) { + const double R1 = prA[0]+prB[0],I1 = piA[0]+piB[0]; + *prY++ = prC[0]*R1-piC[0]*I1; + *piY++ = prC[0]*I1+piC[0]*R1; + prC++; piC += ic2; + } + prA += ka+ja; piA += ka2+ja2; + prB += kb+jb; piB += kb2+jb2; + prC += jc; piC += jc2; + } + else if (M == 2) + for (int j = 0; j < N; j++) { + for (int k = 0; k < KC; k++) { + const double R2 = prA[0]+prB[0],I2 = piA[0]+piB[0]; + const double R1 = prA[ia]*R2-piA[ia2]*I2+prB[ib], + I1 = prA[ia]*I2+piA[ia2]*R2+piB[ib2]; + *prY++ = prC[0]*R2-piC[0]*I2+prC[1]*R1-piC[ic2]*I1; + *piY++ = prC[0]*I2+piC[0]*R2+prC[1]*I1+piC[ic2]*R1; + prC += 2; piC += 2*ic2; + } + prA += ia+ka+ja; piA += ia2+ka2+ja2; + prB += ib+kb+jb; piB += ib2+kb2+jb2; + prC += jc; piC += jc2; + } + else if (M >= 3) + for (int j = 0; j < N; j++) { + for (int k = 0; k < KC; k++) { + double r1 = *prC,r2,r3; + double i1 = *piC,i2,i3; + prC--; piC -= ic2; + r2 = r1; i2 = i1; + r1 = *prA*r2-*piA*i2+*prC; + i1 = *prA*i2+*piA*r2+*piC; + prA -= ia; piA -= ia2; + prC--; piC -= ic2; + for (int i = 0; i < M-3; i++) { + r3 = r2; i3 = i2; + r2 = r1; i2 = i1; + r1 = *prA*r2-*piA*i2+*prB*r3-*piB*i3+*prC; + i1 = *prA*i2+*piA*r2+*prB*i3+*piB*r3+*piC; + prA -= ia; piA -= ia2; + prB -= ib; piB -= ib2; + prC--; piC -= ic2; + } + r3 = r2; i3 = i2; + r2 = r1; i2 = i1; + r1 = *prB*r3-*piB*i3+*prC; + i1 = *prB*i3+*piB*r3+*piC; + prA -= ia; piA -= ia2; + prB -= 2*ib; piB -= 2*ib2; + const double R2 = prA[0]+prB[0],I2 = piA[0]+piB[0]; + const double R1 = prA[ia]*R2-piA[ia2]*I2+prB[ib], + I1 = prA[ia]*I2+piA[ia2]*R2+piB[ib2]; + *prY-- = R2*r1-I2*i1+R1*r2-I1*i2; + *piY-- = R2*i1+I2*r1+I1*r2+R1*i2; + prA -= ka; piA -= ka2; + prB -= kb; piB -= kb2; + prC--; piC -= ic2; + } + prA -= ja; piA -= ja2; + prB -= jb; piB -= jb2; + prC -= jc; piC -= jc2; + } + } +} +/*------------------------------------------------------------------------*/ diff --git a/stenglib/Fast/source/fsetop.c b/stenglib/Fast/source/fsetop.c new file mode 100644 index 00000000..935ed810 --- /dev/null +++ b/stenglib/Fast/source/fsetop.c @@ -0,0 +1,1058 @@ +/* fsetop.c */ +/* S. Engblom 2010-02-09 (Revision, extended union operation) */ +/* S. Engblom 2006-06-13 (Major revision) */ +/* S. Engblom 2005-06-02 */ + +#include +// temporary fix for CC under Solaris: +#ifndef NO_STDINT +#include +#endif +#include + +#include "mex.h" +#include "matrix.h" + +/*------------------------------------------------------------------------*/ + +// forward declarations +typedef enum {CHECK = 8,INTERSECT = 4,SETDIFF = 6,SETXOR = 1, + UNION = 0,UNIQUE = 7,ISMEMBER = 10,UNKNOWN = -1} HASH_OP; +HASH_OP getop(const char *str); + +// hash-table for two arrays A and B containing data +typedef struct { + uint8_t **hash_tb; // pointers to raw data + size_t hashsz; // size of table + char *hash_id; // identification of element + uint32_t *hash_ixa,*hash_ixb; // pointers from data into table + size_t na,nb,nx; // number of A's, B's and X's +} hashTable; + +// hash-table for two arrays containing mxArrays +typedef struct { + mxArray **hash_tb; + size_t hashsz; + char *hash_id; + uint32_t *hash_ixa,*hash_ixb; + uint32_t *hash_iixa,*hash_iixb; // pointers back into hash_ixa, hash_ixb + size_t na,nb,nx; +} mxhashTable; + +// hash-utility functions +size_t hashsize(size_t len); +uint32_t hash(const uint8_t *val,size_t nbytes,uint32_t offset); +uint32_t hash2nd(const uint8_t *val,size_t nbytes); +bool mx_IsEqual(const mxArray *A,const mxArray *B); + +// Venn-topology routines +void hashSort(hashTable *T, + const uint8_t *A,size_t Na, + const uint8_t *B,size_t Nb,size_t Mbytes); +void mxhashSort(mxhashTable *T, + const mxArray *A,size_t Na, + const mxArray *B,size_t Nb); + +// post-processing of hash-tables +typedef void (*fsetopOut)(hashTable,size_t,size_t,size_t, + int,mxArray **,size_t,mxClassID); + +void intersectOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t,mxClassID); +void setdiffOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t,mxClassID); +void setxorOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t,mxClassID); +void unionOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t,mxClassID); +void uniqueOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t,mxClassID); +void ismemberOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t,mxClassID); + +// mx-versions +typedef void (*mxfsetopOut)(mxhashTable,size_t,size_t, + int,mxArray **); + +void mxintersectOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]); +void mxsetdiffOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]); +void mxsetxorOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]); +void mxunionOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]); +void mxuniqueOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]); +void mxismemberOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]); + +/*------------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + // fetch first argument op (read at most 9 characters) + char strop[10]; + HASH_OP op; + + // determine the type of the set-operation + if (nrhs < 2) + mexErrMsgIdAndTxt("fsetop:e1","Expecting at least two inputs."); + if (!mxIsChar(prhs[0])) + mexErrMsgIdAndTxt("fsetop:e2","First argument must be a " + "character array."); + if (mxGetString(prhs[0],strop,10) != 0 || (op = getop(strop)) == UNKNOWN) + mexErrMsgIdAndTxt("fsetop:e3","Unknown set operation."); + + // syntax + if (op != CHECK && op != UNIQUE) { + if (nrhs != 3) + mexErrMsgIdAndTxt("fsetop:e5","Expecting exactly three inputs."); + } + else if (nrhs != 2) + mexErrMsgIdAndTxt("fsetop:e8","Expecting exactly two inputs."); + if (op == UNION) { + if (nlhs > 5) + mexErrMsgIdAndTxt("fsetop:e13","Expecting at most five outputs."); + } + else if (op != CHECK && op != SETDIFF && op != ISMEMBER) { + if (nlhs > 3) + mexErrMsgIdAndTxt("fsetop:e9","Expecting at most three outputs."); + } + else if (op != CHECK) { + if (nlhs > 2) + mexErrMsgIdAndTxt("fsetop:e10","Expecting at most two outputs."); + } + else if (nlhs > 1) + mexErrMsgIdAndTxt("fsetop:e12","Expecting at most one output."); + + // two cases + const bool isCell = mxIsCell(prhs[1]); + + // case 1: input is numeric array + if (!isCell) { + uint8_t *A,*B = NULL; + size_t Na,Nb = 0,Mbytes; + + // fetch second input A + if (!(mxIsNumeric(prhs[1]) || mxIsChar(prhs[1]) || mxIsLogical(prhs[1])) + || mxIsSparse(prhs[1]) || mxIsComplex(prhs[1]) || + mxGetNumberOfDimensions(prhs[1]) > 2) + mexErrMsgIdAndTxt("fsetop:e4","Input argument must be a 2-D real " + "and non-sparse array."); + A = (uint8_t *)mxGetData(prhs[1]); + + Mbytes = mxGetM(prhs[1])*mxGetElementSize(prhs[1]); + Na = mxGetN(prhs[1]); + + // quick return + if (op == CHECK) { + uint32_t *C; + plhs[0] = mxCreateNumericMatrix(1,Na,mxUINT32_CLASS,mxREAL); + C = mxGetData(plhs[0]); + for (size_t i = 0; i < Na; i++) + C[i] = hash(&A[i*Mbytes],Mbytes,0); + return; + } + + // fetch third input B, if any + if (op != UNIQUE) { + if (mxIsSparse(prhs[2]) || mxIsComplex(prhs[2]) || + mxGetNumberOfDimensions(prhs[2]) > 2) + mexErrMsgIdAndTxt("fsetop:e4","Input argument must be a 2-D real " + "and non-sparse array."); + if (mxGetClassID(prhs[1]) != mxGetClassID(prhs[2])) + mexErrMsgIdAndTxt("fsetop:e6","Arguments must be " + "of the same class."); + if (mxGetM(prhs[1]) != mxGetM(prhs[2])) + mexErrMsgIdAndTxt("fsetop:e7","The number of rows must match."); + + B = (uint8_t *)mxGetData(prhs[2]); + Nb = mxGetN(prhs[2]); + } + + // sort it out! + hashTable T; + if (op != ISMEMBER) + hashSort(&T,A,Na,B,Nb,Mbytes); + else + hashSort(&T,B,Nb,A,Na,Mbytes); // 2nd input must be the master array + + // produce whatever output is asked for + static const fsetopOut ftab[] = { + &unionOut,&setxorOut,NULL,NULL,&intersectOut,NULL, + &setdiffOut,&uniqueOut,NULL,NULL,&ismemberOut + }; + + (*ftab[op])(T,Na,Nb,Mbytes,nlhs,plhs, + mxGetElementSize(prhs[1]),mxGetClassID(prhs[1])); + + // dealllocate the hash table + mxFree(T.hash_ixb); + mxFree(T.hash_ixa); + mxFree(T.hash_id); + mxFree(T.hash_tb); + } + // case 2: cell-array of arrays containing real and non-sparse data + else { + mxArray *A,*B = NULL; + size_t Na,Nb = 0; + + // fetch second input A + A = (mxArray *)prhs[1]; + Na = mxGetNumberOfElements(A); + for (size_t i = 0; i < Na; i++) { + const mxArray *Ai = mxGetCell(A,i); + if (!(mxIsNumeric(Ai) || mxIsChar(Ai) || mxIsLogical(Ai)) || + mxIsComplex(Ai) || mxIsSparse(Ai)) + mexErrMsgIdAndTxt("fsetop:e11","Cell-vector must contain real, " + "non-sparse arrays."); + } + + // quick return + if (op == CHECK) { + uint32_t *C; + plhs[0] = mxCreateNumericMatrix(1,Na,mxUINT32_CLASS,mxREAL); + C = mxGetData(plhs[0]); + for (size_t i = 0; i < Na; i++) { + const mxArray *Ai = mxGetCell(A,i); + uint32_t hashkey = hash(mxGetData(Ai),mxGetNumberOfElements(Ai)* + mxGetElementSize(Ai),mxGetClassID(Ai)); + hashkey = hash((uint8_t *)mxGetDimensions(Ai), + mxGetNumberOfDimensions(Ai)*sizeof(int),hashkey); + C[i] = hashkey; + } + return; + } + + // fetch third input B, if any + if (op != UNIQUE) { + if (!mxIsCell(prhs[2])) + mexErrMsgIdAndTxt("fsetop:e6","Arguments must be " + "of the same class."); + + B = (mxArray *)prhs[2]; + Nb = mxGetNumberOfElements(B); + for (size_t i = 0; i < Nb; i++) { + const mxArray *Bi = mxGetCell(B,i); + if (!(mxIsNumeric(Bi) || mxIsChar(Bi) || mxIsLogical(Bi)) || + mxIsComplex(Bi) || mxIsSparse(Bi)) + mexErrMsgIdAndTxt("fsetop:e11","Cell-vector must contain real, " + "non-sparse arrays."); + } + } + + // sort it out! + mxhashTable T; + if (op != ISMEMBER) + mxhashSort(&T,A,Na,B,Nb); + else + mxhashSort(&T,B,Nb,A,Na); // 2nd input must be the master array + + // produce whatever output is asked for + static const mxfsetopOut ftab[] = { + &mxunionOut,&mxsetxorOut,NULL,NULL,&mxintersectOut,NULL, + &mxsetdiffOut,&mxuniqueOut,NULL,NULL,&mxismemberOut + }; + + (*ftab[op])(T,Na,Nb,nlhs,plhs); + + // deallocate the hash table + mxFree(T.hash_iixb); + mxFree(T.hash_iixa); + mxFree(T.hash_ixb); + mxFree(T.hash_ixa); + mxFree(T.hash_id); + mxFree(T.hash_tb); + } +} +/*------------------------------------------------------------------------*/ +void intersectOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t siz,mxClassID id) +/* [C,IA,IB] = FSETOP('intersect',A,B) returns the columns common to + both A and B. C = A(:,IA) = B(:,IB). */ +{ + uint8_t *C; + double *ia = NULL,*ib = NULL; + + // allocate output + plhs[0] = mxCreateNumericMatrix(Mbytes/siz,T.nx,id,mxREAL); + C = (uint8_t *)mxGetData(plhs[0]); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.nx,mxREAL); + ia = mxGetPr(plhs[1]); + if (nlhs > 2) { + plhs[2] = mxCreateDoubleMatrix(1,T.nx,mxREAL); + ib = mxGetPr(plhs[2]); + } + } + + for (size_t i = 0,j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + if (T.hash_id[hashkey] == 'X') { + memcpy(&C[j*Mbytes],T.hash_tb[hashkey],Mbytes); + if (ia != NULL) { + ia[j] = i+1; + if (ib != NULL) + // IB follows the order determined by IA + T.hash_tb[hashkey] = &C[j*Mbytes]; + } + + // first unique element only + T.hash_id[hashkey] = 'Y'; + j++; + } + } + + if (ib != NULL) + for (size_t i = 0; i < Nb; i++) { + const uint32_t hashkey = T.hash_ixb[i]; + if (T.hash_id[hashkey] == 'Y') { + ib[(T.hash_tb[hashkey]-&C[0])/(Mbytes ? Mbytes : 1)] = i+1; + + // again, first unique element only + T.hash_id[hashkey] = 'Z'; + } + } +} +/*------------------------------------------------------------------------*/ +void mxintersectOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]) +{ + mxArray *C; + double *ia = NULL,*ib = NULL; + + C = plhs[0] = mxCreateCellMatrix(1,T.nx); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.nx,mxREAL); + ia = mxGetPr(plhs[1]); + if (nlhs > 2) { + plhs[2] = mxCreateDoubleMatrix(1,T.nx,mxREAL); + ib = mxGetPr(plhs[2]); + } + } + + for (size_t i = 0,j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + if (T.hash_id[hashkey] == 'X') { + mxSetCell(C,j,mxDuplicateArray(T.hash_tb[hashkey])); + if (ia != NULL) { + ia[j] = i+1; + if (ib != NULL) + ib[j] = T.hash_iixb[hashkey]+1; // new construction + } + + T.hash_id[hashkey] = 'Y'; + j++; + } + } +} +/*------------------------------------------------------------------------*/ +void setdiffOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t siz,mxClassID id) +/* [C,IA] = FSETOP('setdiff',A,B) returns the columns in A that are + not in B. C = A(:,IA). */ +{ + uint8_t *C; + double *ia = NULL; + + // allocate output + plhs[0] = mxCreateNumericMatrix(Mbytes/siz,T.na,id,mxREAL); + C = (uint8_t *)mxGetData(plhs[0]); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.na,mxREAL); + ia = mxGetPr(plhs[1]); + } + + for (size_t i = 0,j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + if (T.hash_id[hashkey] == 'A') { + memcpy(&C[j*Mbytes],T.hash_tb[hashkey],Mbytes); + if (ia != NULL) ia[j] = i+1; + + // first unique element only + T.hash_id[hashkey] = 'Y'; + j++; + } + } +} +/*------------------------------------------------------------------------*/ +void mxsetdiffOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]) +{ + mxArray *C; + double *ia = NULL; + + C = plhs[0] = mxCreateCellMatrix(1,T.na); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.na,mxREAL); + ia = mxGetPr(plhs[1]); + } + + for (size_t i = 0,j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + if (T.hash_id[hashkey] == 'A') { + mxSetCell(C,j,mxDuplicateArray(T.hash_tb[hashkey])); + if (ia != NULL) ia[j] = i+1; + + T.hash_id[hashkey] = 'Y'; + j++; + } + } +} +/*------------------------------------------------------------------------*/ +void setxorOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t siz,mxClassID id) +/* [C,IA,IB] = FSETOP('setxor',A,B) returns the columns that are not + in the intersection of A and B. C = [A(:,IA) B(:,IB)]. */ +{ + uint8_t *C; + double *ia = NULL,*ib = NULL; + + // allocate output + plhs[0] = mxCreateNumericMatrix(Mbytes/siz,T.na+T.nb,id,mxREAL); + C = (uint8_t *)mxGetData(plhs[0]); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.na,mxREAL); + ia = mxGetPr(plhs[1]); + if (nlhs > 2) { + plhs[2] = mxCreateDoubleMatrix(1,T.nb,mxREAL); + ib = mxGetPr(plhs[2]); + } + } + + size_t k = 0; + for (size_t i = 0,j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + if (T.hash_id[hashkey] == 'A') { + memcpy(&C[k++*Mbytes],T.hash_tb[hashkey],Mbytes); + if (ia != NULL) ia[j++] = i+1; + + // first unique element only + T.hash_id[hashkey] = 'Y'; + } + } + + for (size_t i = 0, j = 0; i < Nb; i++) { + const uint32_t hashkey = T.hash_ixb[i]; + if (T.hash_id[hashkey] == 'B') { + memcpy(&C[k++*Mbytes],T.hash_tb[hashkey],Mbytes); + if (ib != NULL) ib[j++] = i+1; + + T.hash_id[hashkey] = 'Y'; + } + } +} +/*------------------------------------------------------------------------*/ +void mxsetxorOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]) +{ + mxArray *C; + double *ia = NULL,*ib = NULL; + + C = plhs[0] = mxCreateCellMatrix(1,T.na+T.nb); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.na,mxREAL); + ia = mxGetPr(plhs[1]); + if (nlhs > 2) { + plhs[2] = mxCreateDoubleMatrix(1,T.nb,mxREAL); + ib = mxGetPr(plhs[2]); + } + } + + size_t k = 0; + for (size_t i = 0,j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + if (T.hash_id[hashkey] == 'A') { + mxSetCell(C,k++,mxDuplicateArray(T.hash_tb[hashkey])); + if (ia != NULL) ia[j++] = i+1; + + T.hash_id[hashkey] = 'Y'; + } + } + + for (size_t i = 0, j = 0; i < Nb; i++) { + const uint32_t hashkey = T.hash_ixb[i]; + if (T.hash_id[hashkey] == 'B') { + mxSetCell(C,k++,mxDuplicateArray(T.hash_tb[hashkey])); + if (ib != NULL) ib[j++] = i+1; + + T.hash_id[hashkey] = 'Y'; + } + } +} +/*------------------------------------------------------------------------*/ +void unionOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t siz,mxClassID id) +/* [C,IA,IB,JA,JB] = FSETOP('union',A,B) returns the combined columns + from A and B but with no repetitions. C = [A(:,IA) B(:,IB)] and A = + C(:,JA), B = C(:,JB). */ +{ + uint8_t *C; + double *ia = NULL,*ib = NULL,*ja = NULL,*jb = NULL; + + plhs[0] = mxCreateNumericMatrix(Mbytes/siz,T.na+T.nx+T.nb,id,mxREAL); + C = (uint8_t *)mxGetData(plhs[0]); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.na+T.nx,mxREAL); + ia = mxGetPr(plhs[1]); + if (nlhs > 2) { + plhs[2] = mxCreateDoubleMatrix(1,T.nb,mxREAL); + ib = mxGetPr(plhs[2]); + if (nlhs > 3) { + plhs[3] = mxCreateDoubleMatrix(1,Na,mxREAL); + ja = mxGetPr(plhs[3]); + if (nlhs > 4) { + plhs[4] = mxCreateDoubleMatrix(1,Nb,mxREAL); + jb = mxGetPr(plhs[4]); + } + } + } + } + + size_t k = 0; + for (size_t i = 0, j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + if (T.hash_id[hashkey] == 'A' || T.hash_id[hashkey] == 'X') { + memcpy(&C[k*Mbytes],T.hash_tb[hashkey],Mbytes); + if (ia != NULL) { + ia[j++] = i+1; + if (ja != NULL) { + ja[i] = k+1; + T.hash_tb[hashkey] = &C[k*Mbytes]; + } + } + k++; + + // first unique element + T.hash_id[hashkey] = 'Y'; + } + else if (ja != NULL && T.hash_id[hashkey] == 'Y') + ja[i] = (T.hash_tb[hashkey]-&C[0])/(Mbytes ? Mbytes : 1)+1; + } + + for (size_t i = 0, j = 0; i < Nb; i++) { + const uint32_t hashkey = T.hash_ixb[i]; + if (T.hash_id[hashkey] == 'B') { + memcpy(&C[k*Mbytes],T.hash_tb[hashkey],Mbytes); + if (ib != NULL) { + ib[j++] = i+1; + if (jb != NULL) { + jb[i] = k+1; + T.hash_tb[hashkey] = &C[k*Mbytes]; + } + } + k++; + + T.hash_id[hashkey] = 'Y'; + } + else if (jb != NULL && T.hash_id[hashkey] == 'Y') + jb[i] = (T.hash_tb[hashkey]-&C[0])/(Mbytes ? Mbytes : 1)+1; + } +} +/*------------------------------------------------------------------------*/ +void mxunionOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]) +{ + mxArray *C; + double *ia = NULL,*ib = NULL,*ja = NULL,*jb = NULL; + + C = plhs[0] = mxCreateCellMatrix(1,T.na+T.nx+T.nb); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.na+T.nx,mxREAL); + ia = mxGetPr(plhs[1]); + if (nlhs > 2) { + plhs[2] = mxCreateDoubleMatrix(1,T.nb,mxREAL); + ib = mxGetPr(plhs[2]); + if (nlhs > 3) { + plhs[3] = mxCreateDoubleMatrix(1,Na,mxREAL); + ja = mxGetPr(plhs[3]); + if (nlhs > 4) { + plhs[4] = mxCreateDoubleMatrix(1,Nb,mxREAL); + jb = mxGetPr(plhs[4]); + } + } + } + } + + size_t k = 0; + for (size_t i = 0, j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + if (T.hash_id[hashkey] == 'A' || T.hash_id[hashkey] == 'X') { + mxSetCell(C,k,mxDuplicateArray(T.hash_tb[hashkey])); + if (ia != NULL) { + ia[j++] = i+1; + if (ja != NULL) { + ja[i] = k+1; + T.hash_iixa[hashkey] = k; + } + } + k++; + + T.hash_id[hashkey] = 'Y'; + } + else if (ja != NULL && T.hash_id[hashkey] == 'Y') + ja[i] = T.hash_iixa[hashkey]+1; + } + + for (size_t i = 0, j = 0; i < Nb; i++) { + const uint32_t hashkey = T.hash_ixb[i]; + if (T.hash_id[hashkey] == 'B') { + mxSetCell(C,k,mxDuplicateArray(T.hash_tb[hashkey])); + if (ib != NULL) { + ib[j++] = i+1; + if (jb != NULL) { + jb[i] = k+1; + T.hash_iixb[hashkey] = k; + } + } + k++; + + T.hash_id[hashkey] = 'Z'; + } + else if (jb != NULL) { + if (T.hash_id[hashkey] == 'Y') + jb[i] = T.hash_iixa[hashkey]+1; + else if (T.hash_id[hashkey] == 'Z') + jb[i] = T.hash_iixb[hashkey]+1; + } + } +} +/*------------------------------------------------------------------------*/ +void uniqueOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t siz,mxClassID id) +/* [B,IA,IB] = FSETOP('unique',A) returns the same columns as in A but + with no repetitions. B = A(:,IA) and A = B(:,IB). */ +{ + uint8_t *B; + double *ia = NULL,*ib = NULL; + + plhs[0] = mxCreateNumericMatrix(Mbytes/siz,T.na,id,mxREAL); + B = (uint8_t *)mxGetData(plhs[0]); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.na,mxREAL); + ia = mxGetPr(plhs[1]); + if (nlhs > 2) { + plhs[2] = mxCreateDoubleMatrix(1,Na,mxREAL); + ib = mxGetPr(plhs[2]); + } + } + + for (size_t i = 0, j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + + if (T.hash_id[hashkey] == 'A') { + memcpy(&B[j*Mbytes],T.hash_tb[hashkey],Mbytes); + if (ia != NULL) { + ia[j] = i+1; + if (ib != NULL) + // because IB points into B + T.hash_tb[hashkey] = &B[j*Mbytes]; + } + + // first unique element only + T.hash_id[hashkey] = 'Y'; + j++; + } + + // IB does not refer to unique elements + if (ib != NULL) ib[i] = ((T.hash_tb[hashkey]-&B[0]))/ + (Mbytes ? Mbytes : 1)+1; + } +} +/*------------------------------------------------------------------------*/ +void mxuniqueOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]) +{ + mxArray *B; + double *ia = NULL,*ib = NULL; + + B = plhs[0] = mxCreateCellMatrix(1,T.na); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,T.na,mxREAL); + ia = mxGetPr(plhs[1]); + if (nlhs > 2) { + plhs[2] = mxCreateDoubleMatrix(1,Na,mxREAL); + ib = mxGetPr(plhs[2]); + } + } + + for (size_t i = 0, j = 0; i < Na; i++) { + const uint32_t hashkey = T.hash_ixa[i]; + + if (T.hash_id[hashkey] == 'A') { + mxSetCell(B,j,mxDuplicateArray(T.hash_tb[hashkey])); + if (ia != NULL) { + ia[j] = i+1; + if (ib != NULL) + T.hash_iixa[hashkey] = j; + } + + T.hash_id[hashkey] = 'Y'; + j++; + } + + // new construction + if (ib != NULL) ib[i] = T.hash_iixa[hashkey]+1; + } +} +/*------------------------------------------------------------------------*/ +void ismemberOut(hashTable T,size_t Na,size_t Nb,size_t Mbytes, + int nlhs,mxArray *plhs[], + size_t siz,mxClassID id) +/* [IA,IB] = FSETOP('ismember',A,B) returns a logical vector IA + containing 1 where the columns of A are also columns of B and 0 + otherwise. IB contains the index in B of each column in A and zero + if no such index exists. A(:,IA) = B(:,IB(IA). */ +{ + mxLogical *ia; + double *ib = NULL; + + plhs[0] = mxCreateLogicalMatrix(1,Na); + ia = mxGetLogicals(plhs[0]); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,Na,mxREAL); + ib = mxGetPr(plhs[1]); + } + + for (size_t j = 0; j < Na; j++) { + // the meaning of hash_ixa and hash_ixb has now switched! + const size_t hashkey = T.hash_ixb[j]; + if (T.hash_id[hashkey] == 'X') { + ia[j] = 1; + + if (ib != NULL) + ib[j] = (T.hash_tb[hashkey]-T.hash_tb[T.hash_ixa[0]])/ + (Mbytes ? Mbytes : 1)+1; + } + } +} +/*------------------------------------------------------------------------*/ +void mxismemberOut(mxhashTable T,size_t Na,size_t Nb, + int nlhs,mxArray *plhs[]) +{ + mxLogical *ia; + double *ib = NULL; + + plhs[0] = mxCreateLogicalMatrix(1,Na); + ia = mxGetLogicals(plhs[0]); + if (nlhs > 1) { + plhs[1] = mxCreateDoubleMatrix(1,Na,mxREAL); + ib = mxGetPr(plhs[1]); + } + + for (size_t j = 0; j < Na; j++) { + const size_t hashkey = T.hash_ixb[j]; + if (T.hash_id[hashkey] == 'X') { + ia[j] = 1; + + // new construction + if (ib != NULL) ib[j] = T.hash_iixa[hashkey]+1; + } + } +} +/*------------------------------------------------------------------------*/ +HASH_OP getop(const char *str) +/* Small perfect hash for the supported set operations. Note that + str[4] must be readable for this to work. Returns UNKNOWN on + failure. */ +{ + static const char *tab[] = { + "union","setxor","","","intersect","", + "setdiff","unique","check","","ismember" + }; + const unsigned hash = ((unsigned)str[4])%11; + + if (str[0] != '\0' && strcmp(str,tab[hash]) == 0) + return hash; + else + return UNKNOWN; +} +/*------------------------------------------------------------------------*/ + +// table is only allowed to become about 70% full +const double HASH_LOAD = 1.0/0.70; + +size_t hashsize(size_t len) +/* Size of hash table. This is simply + exp2(ceil(log2(HASH_LOAD*len))). */ +{ + size_t ans = 1; + + len = ceil(HASH_LOAD*len); + do ans <<= 1; while (len >>= 1); + + return ans; +} +/*------------------------------------------------------------------------*/ + +// should work on most platforms +#define get16bits(d) (*((const uint16_t *)(d))) + +uint32_t hash(const uint8_t *val,size_t nbytes,uint32_t offset) +/* Computes a hashkey from bytes. The input offset may be used to + generate a new hashkey from the same data. Use zero as the default. + + The original author of this function is Paul Hsieh. See + http://www.azillionmonkeys.com/qed/hash.html for further details. +*/ +{ + uint32_t hash = offset,temp,rem; + + // incorporate 4 bytes at a time + rem = nbytes&3; + nbytes >>= 2; + + // main loop + for ( ; nbytes > 0; nbytes--, val += 4) { + hash += get16bits(val); + temp = (get16bits(val+2) << 11)^hash; + hash = (hash << 16)^temp; + hash += hash >> 11; + } + + // handle end cases + switch (rem) { + case 3: + hash += get16bits(val); + hash ^= hash << 16; + hash ^= val[2] << 18; + hash += hash >> 11; + break; + case 2: + hash += get16bits(val); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: + hash += val[0]; + hash ^= hash << 10; + hash += hash >> 1; + } + + // force "avalanching" of final 127 bits + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 2; + hash += hash >> 15; + hash ^= hash << 10; + + return hash; +} +/*------------------------------------------------------------------------*/ +uint32_t hash2nd(const uint8_t *val,size_t nbytes) +/* Computes a secondary (incremental) hashkey from bytes. */ +{ + // a fairly independent and odd hashkey + return hash(val,nbytes,0x0f0f0f0f)|1; +} +/*------------------------------------------------------------------------*/ +bool mx_IsEqual(const mxArray *A,const mxArray *B) +/* Bitwise equality for real non-sparse mxArrays containing data. This + function will not work properly for sparse arrays, cell-arrays, + struct-arrays or for arrays containing imaginary data. */ +{ + // class + if (mxGetClassID(A) != mxGetClassID(B)) return false; + + // real data + const int nelems = mxGetNumberOfElements(A); + const int sizelem = mxGetElementSize(A); + if (nelems != mxGetNumberOfElements(B)) return false; + if (memcmp(mxGetData(A),mxGetData(B),nelems*sizelem)) return false; + + // shape + int ndims = mxGetNumberOfDimensions(A); + if (ndims != mxGetNumberOfDimensions(B)) return false; + if (memcmp(mxGetDimensions(A),mxGetDimensions(B),ndims*sizeof(int))) + return false; + + return true; +} +/*------------------------------------------------------------------------*/ +void hashSort(hashTable *T, + const uint8_t *A,size_t Na, + const uint8_t *B,size_t Nb,size_t Mbytes) +/* Determines the full Venn-topology of the arrays A and B using + hashing. + + Input are the arrays A and B of sizes Mbytes-by-Na and + Mbytes-by-Nb. + + On return, hash_tb is the hash-table pointing back into the columns + of A and B, hash_id contains 'A' for pointers strictly into A, 'B' + for pointers strictly into B and 'X' for pointers into A but with + equivalent elements contained in B as well. The number of A's, B's + and X's are returned in na, nb and nx respectively. The vectors + hash_ixa and hash_ixb has the same length as A and B and points + into hash_tb/hash_id. +*/ +{ + // hash table and index into hash table + T->hashsz = hashsize(Na+Nb); + T->hash_tb = mxMalloc(T->hashsz*sizeof(uint8_t *)); + T->hash_id = mxCalloc(T->hashsz,sizeof(char)); + T->hash_ixa = mxMalloc(Na*sizeof(uint32_t)); + T->hash_ixb = mxMalloc(Nb*sizeof(uint32_t)); + + // clear counters + T->na = T->nb = T->nx = 0; + + // hash columns of A + for (size_t i = 0; i < Na; i++) { + uint32_t hashkey = hash(&A[i*Mbytes],Mbytes,0)&(T->hashsz-1); + + // collision? + if (T->hash_id[hashkey] != '\0' && + memcmp(&A[i*Mbytes],T->hash_tb[hashkey],Mbytes)) { + const uint32_t hashkey2nd = hash2nd(&A[i*Mbytes],Mbytes); + + // rehash until empty slot found + do + hashkey = (hashkey+hashkey2nd)&(T->hashsz-1); + while (T->hash_id[hashkey] != '\0' && + memcmp(&A[i*Mbytes],T->hash_tb[hashkey],Mbytes)); + } + + // fill in table + if (T->hash_id[hashkey] == '\0') { + T->hash_tb[hashkey] = &((uint8_t *)A)[i*Mbytes]; + T->hash_id[hashkey] = 'A'; + T->hash_ixa[i] = hashkey; + T->na++; + } + else + T->hash_ixa[i] = hashkey; + } + + // hash columns of B + for (size_t i = 0; i < Nb; i++) { + uint32_t hashkey = hash(&B[i*Mbytes],Mbytes,0)&(T->hashsz-1); + + if (T->hash_id[hashkey] != '\0' && + memcmp(&B[i*Mbytes],T->hash_tb[hashkey],Mbytes)) { + const uint32_t hashkey2nd = hash2nd(&B[i*Mbytes],Mbytes); + + do + hashkey = (hashkey+hashkey2nd)&(T->hashsz-1); + while (T->hash_id[hashkey] != '\0' && + memcmp(&B[i*Mbytes],T->hash_tb[hashkey],Mbytes)); + } + + if (T->hash_id[hashkey] == '\0') { + T->hash_tb[hashkey] = &((uint8_t *)B)[i*Mbytes]; + T->hash_id[hashkey] = 'B'; + T->hash_ixb[i] = hashkey; + T->nb++; + } + else { + T->hash_ixb[i] = hashkey; + + // special case here + if (T->hash_id[hashkey] == 'A') { + T->na--; + T->hash_id[hashkey] = 'X'; + T->nx++; + } + } + } +} +/*------------------------------------------------------------------------*/ +void mxhashSort(mxhashTable *T, + const mxArray *A,size_t Na, + const mxArray *B,size_t Nb) +/* Corresponding routine for two cell-arrays of mxArrays. */ +{ + T->hashsz = hashsize(Na+Nb); + T->hash_tb = mxMalloc(T->hashsz*sizeof(mxArray *)); + T->hash_id = mxCalloc(T->hashsz,sizeof(char)); + T->hash_ixa = mxMalloc(Na*sizeof(uint32_t)); + T->hash_ixb = mxMalloc(Nb*sizeof(uint32_t)); + + // inverse pointers (back into hash_ixa, hash_ixb) + T->hash_iixa = mxMalloc(T->hashsz*sizeof(uint32_t)); + T->hash_iixb = mxMalloc((Nb != 0)*T->hashsz*sizeof(uint32_t)); + + T->na = T->nb = T->nx = 0; + + for (size_t i = 0; i < Na; i++) { + // hashing over data and class only... + const mxArray *Ai = mxGetCell(A,i); + uint32_t hashkey = hash(mxGetData(Ai),mxGetNumberOfElements(Ai)* + mxGetElementSize(Ai),mxGetClassID(Ai)); + hashkey = hashkey&(T->hashsz-1); + + if (T->hash_id[hashkey] != '\0' && + !mx_IsEqual(Ai,T->hash_tb[hashkey])) { + // ...hashing over the shape is used as the secondary key + const uint32_t hashkey2nd = hash2nd((const uint8_t *) + mxGetDimensions(Ai), + mxGetNumberOfDimensions(Ai)* + sizeof(int)); + + do + hashkey = (hashkey+hashkey2nd)&(T->hashsz-1); + while (T->hash_id[hashkey] != '\0' && + !mx_IsEqual(Ai,T->hash_tb[hashkey])); + } + + if (T->hash_id[hashkey] == '\0') { + T->hash_tb[hashkey] = (mxArray *)Ai; + T->hash_id[hashkey] = 'A'; + T->hash_ixa[i] = hashkey; + T->hash_iixa[hashkey] = i; + T->na++; + } + else + T->hash_ixa[i] = hashkey; + } + + for (size_t i = 0; i < Nb; i++) { + const mxArray *Bi = mxGetCell(B,i); + uint32_t hashkey = hash(mxGetData(Bi),mxGetNumberOfElements(Bi)* + mxGetElementSize(Bi),mxGetClassID(Bi)); + hashkey = hashkey&(T->hashsz-1); + + if (T->hash_id[hashkey] != '\0' && + !mx_IsEqual(Bi,T->hash_tb[hashkey])) { + const uint32_t hashkey2nd = hash2nd((const uint8_t *) + mxGetDimensions(Bi), + mxGetNumberOfDimensions(Bi)* + sizeof(int)); + + do + hashkey = (hashkey+hashkey2nd)&(T->hashsz-1); + while (T->hash_id[hashkey] != '\0' && + !mx_IsEqual(Bi,T->hash_tb[hashkey])); + } + + if (T->hash_id[hashkey] == '\0') { + T->hash_tb[hashkey] = (mxArray *)Bi; + T->hash_id[hashkey] = 'B'; + T->hash_ixb[i] = hashkey; + T->hash_iixb[hashkey] = i; + T->nb++; + } + else { + T->hash_ixb[i] = hashkey; + + if (T->hash_id[hashkey] == 'A') { + T->na--; + T->hash_id[hashkey] = 'X'; + T->hash_iixb[hashkey] = i; + T->nx++; + } + } + } +} +/*------------------------------------------------------------------------*/ diff --git a/stenglib/Fast/source/fsparse.c b/stenglib/Fast/source/fsparse.c new file mode 100644 index 00000000..88f667ca --- /dev/null +++ b/stenglib/Fast/source/fsparse.c @@ -0,0 +1,1688 @@ +/* fsparse.c */ + +/* S. Engblom 2013-12-02 (OpenMP) */ +/* S. Engblom 2010-02-02 (Minor revision) */ +/* S. Engblom 2007-05-04 (Revision) */ +/* S. Engblom 2005-05-05 (Revision) */ +/* S. Engblom 2004-10-29 */ + +#include +#include +// temporary fix for CC under Solaris: +#ifndef NO_STDINT +#include +#endif + +#include "mex.h" +#include "matrix.h" + +#ifdef _OPENMP +#include +#endif + +#ifndef FSPARSE_TIME + +#define StartTime +#define StopTime +#define GetTime(dest) + +#else + +#include + +static double *time_vec; // global dummy for output time vector + +static struct timeval TIME_before,TIME_after; +#define StartTime gettimeofday(&TIME_before,NULL) +#define StopTime gettimeofday(&TIME_after,NULL) +#define GetTime(dest) ((dest) = (double)(TIME_after.tv_sec-TIME_before.tv_sec)+ \ + (TIME_after.tv_usec-TIME_before.tv_usec)/1000000.0) + +#endif // FSPARSE_TIME + +// print intermediate results: +#undef PRINT_INTERMEDIATE + +/*------------------------------------------------------------------------*/ + +// forward declarations +bool mx_IsInt(const mxArray *array_ptr); + +bool getix(int **ix,int M,int N,int *max,bool nocopy,const mxArray *IX); + +mxArray *sparse2sparse(const mxArray *S); +mxArray *full2sparse(const mxArray *S); + +void squeeze(mxArray *S); + +void sparse_insert(mwIndex *irS,double *prS,double *piS, + const int *irank,const int *rank,const mwSize *jrS, + const int *ii, + const double *sr,const double *si, + int smod,int sdiv,int len,int M); +void sparse_inserti(mwIndex *irS,double *prS,double *piS, + const int *irank, + const int *ii,int imod, + const double *sr,const double *si, + int smod,int sdiv,int len); + +mxArray *sparse(const int *ii,const int *jj, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax); +mxArray *sparse_nosort(const int *ii,const int *jj, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax); +mxArray *gsparse(const int *ii,int imod, + const int *jj,int jdiv, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax); +mxArray *gsparse_nosort(const int *ii,int imod, + const int *jj,int jdiv, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax); + +/*------------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ +#ifdef FSPARSE_TIME + // output time vector allocated here already + plhs[1] = mxCreateDoubleMatrix(1,6,mxREAL); + time_vec = mxGetPr(plhs[1]); + + // check of syntax + if (nrhs < 1 || nrhs == 2 || 6 < nrhs || nlhs > 2) + mexErrMsgIdAndTxt("fsparse:e1", + "Expecting 1, 3..6 inputs and one or two outputs."); +#else + if (nrhs < 1 || nrhs == 2 || 6 < nrhs || nlhs > 1) + mexErrMsgIdAndTxt("fsparse:e1", + "Expecting 1, 3..6 inputs and one output."); +#endif + + // special case for one input + if (nrhs == 1) { + if (!mxIsDouble(prhs[0])) + mexErrMsgIdAndTxt("fsparse:e2", + "Single input argument must be double."); + + if (mxIsSparse(prhs[0])) { + plhs[0] = sparse2sparse(prhs[0]); + return; + } + else { + if (mxGetNumberOfDimensions(prhs[0]) > 2) + mexErrMsgIdAndTxt("fsparse:e3", + "Single input argument must be 2-D."); + plhs[0] = full2sparse(prhs[0]); + return; + } + } + + if (!mx_IsInt(prhs[0]) && !mxIsDouble(prhs[0]) || + mxIsComplex(prhs[0]) || mxIsSparse(prhs[0]) || + !mx_IsInt(prhs[1]) && !mxIsDouble(prhs[1]) || + mxIsComplex(prhs[1]) || mxIsSparse(prhs[1])) + mexErrMsgIdAndTxt("fsparse:e4", + "Index argument must be real, double or integers " + "and non-sparse."); + + if (!mxIsDouble(prhs[2]) || mxIsSparse(prhs[2])) + mexErrMsgIdAndTxt("fsparse:e5", + "Value argument must be double and non-sparse."); + + if (mxGetNumberOfDimensions(prhs[0]) > 2 || + mxGetNumberOfDimensions(prhs[1]) > 2 || + mxGetNumberOfDimensions(prhs[2]) > 2) + mexErrMsgIdAndTxt("fsparse:e6","Input arguments must be 2-D."); + + if (nrhs > 3) { + if (!mxIsDouble(prhs[3]) || mxIsComplex(prhs[3]) || mxIsSparse(prhs[3])) + mexErrMsgIdAndTxt("fsparse:e7","Size argument must be real, " + "double and non-sparse."); + if (nrhs > 4) { + if (mxGetNumberOfElements(prhs[4]) != 0 && !mxIsChar(prhs[4])) + mexErrMsgIdAndTxt("fsparse:e8","Flag argument must be " + "character array."); + if (nrhs > 5) { + // must determine the number of threads here (before any parallel region) + if (!mxIsDouble(prhs[5]) || + mxGetNumberOfElements(prhs[5]) != 1) + mexErrMsgIdAndTxt("fsparse:e17","Number of threads must be a " + "real scalar."); + const int nthreads = (int)mxGetScalar(prhs[5]); + if (nthreads < 1) + mexErrMsgIdAndTxt("fsparse:e18","Number of threads must be >= 1."); +#ifdef _OPENMP + omp_set_num_threads(nthreads); +#endif // ignored otherwise + } + } + } + + // input + const int Mii = mxGetM(prhs[0]),Nii = mxGetN(prhs[0]); + const int Mjj = mxGetM(prhs[1]),Njj = mxGetN(prhs[1]); + const bool nocopyii = mx_IsInt(prhs[0]),nocopyjj = mx_IsInt(prhs[1]); + int *ii,*jj; + const int Mss = mxGetM(prhs[2]),Nss = mxGetN(prhs[2]); + const double *sr = mxGetPr(prhs[2]); + const double *si = mxGetPi(prhs[2]); + const int len = Mii*Njj; + int M = 0,N = 0,Nzmax = -1,sort = 1; + + // check of 'assembly' syntax + if (Nii != Njj && Nii != 1 || + Mjj != Mii && Mjj != 1 || + Mss != Mii && Mss != 1 || + Nss != Njj && Nss != 1) + mexErrMsgIdAndTxt("fsparse:e9","Sizes mismatch."); + + // input ii and jj + StartTime; + bool ok1,ok2; +#ifndef _OPENMP + ok1 = getix(&ii,Mii,Nii,&M,nocopyii,prhs[0]); + ok2 = getix(&jj,Mjj,Njj,&N,nocopyjj,prhs[1]); +#else + // independent calls +#pragma omp single nowait + ok1 = getix(&ii,Mii,Nii,&M,nocopyii,prhs[0]); +#pragma omp single nowait + ok2 = getix(&jj,Mjj,Njj,&N,nocopyjj,prhs[1]); +#pragma omp barrier +#endif // _OPENMP + if (!ok1 || !ok2) + mexErrMsgIdAndTxt("fsparse:e10","Index argument must be " + "nonnegative integers."); + StopTime; + GetTime(time_vec[0]); + + // determine the input dimensions [M N Nzmax] of the output + if (nrhs > 3) { + const int szlen = mxGetNumberOfElements(prhs[3]); + const double *szval = mxGetPr(prhs[3]); + + if (szlen > 0) { + if (szval[0] < 0.0 || szval[0] != ceil(szval[0])) + mexErrMsgIdAndTxt("fsparse:e11","Size argument must be " + "nonnegative integer."); + if (M > szval[0]) + mexErrMsgIdAndTxt("fsparse:e12","Index exceeds matrix dimensions."); + M = szval[0]; + if (szlen > 1) { + if (szval[1] < 0.0 || szval[1] != ceil(szval[1])) + mexErrMsgIdAndTxt("fsparse:e11","Size argument must be " + "nonnegative integer."); + if (N > szval[1]) + mexErrMsgIdAndTxt("fsparse:e12","Index exceeds " + "matrix dimensions."); + N = szval[1]; + if (szlen > 2) { + if (szval[2] < 0.0 || szval[2] != ceil(szval[2])) + mexErrMsgIdAndTxt("fsparse:e13","Nzmax argument must be " + "nonnegative integer."); + Nzmax = szval[2]; + if (szlen > 3) + mexErrMsgIdAndTxt("fsparse:e14","Size argument must " + "contain 3 elements or less."); + } + } + } + } + + // sorted/not sorted output + if (nrhs > 4) { + if (mxGetNumberOfElements(prhs[4]) == 0) + sort = 1; + else { + char buf[15]; // read 14 characters at most + if (mxGetString(prhs[4],buf,15) != 0) + mexErrMsgIdAndTxt("fsparse:e15","Unrecognized flag argument."); + if (strcmp(buf,"nosort") == 0) + sort = 0; + else if (strcmp(buf,"sort") != 0) + mexErrMsgIdAndTxt("fsparse:e15","Unrecognized flag argument."); + } + } + + // empty case + if (len == 0) { + plhs[0] = mxCreateSparse(M,N,Nzmax == -1 ? 0 : Nzmax, + si == NULL ? mxREAL : mxCOMPLEX); + return; + } + + if (Nii == Njj && Mjj == Mii) { + /* cases when ii and jj have the same shape but (sr,si) have one + of 4 different shapes */ + const int smod = Nss != Njj ? Mii : len; + const int sdiv = Mss != Mii ? Mii : 1; + if (sort) + plhs[0] = sparse(ii,jj,sr,si,smod,sdiv,len, + M,N,Nzmax); + else + plhs[0] = sparse_nosort(ii,jj,sr,si,smod,sdiv,len, + M,N,Nzmax); + } + else { + // fully general case + const int imod = Nii != Njj ? Mii : len; + const int jdiv = Mjj != Mii ? Mii : 1; + const int smod = Nss != Njj ? Mii : len; + const int sdiv = Mss != Mii ? Mii : 1; + if (sort) + plhs[0] = gsparse(ii,imod,jj,jdiv,sr,si,smod,sdiv,len, + M,N,Nzmax); + else + plhs[0] = gsparse_nosort(ii,imod,jj,jdiv,sr,si,smod,sdiv,len, + M,N,Nzmax); + } + + // deallocate + if (!nocopyii) mxFree(ii); + if (!nocopyjj) mxFree(jj); + + // squeeze out zero elements + squeeze(plhs[0]); +} +/*------------------------------------------------------------------------*/ +bool mx_IsInt(const mxArray *array_ptr) +/* Returns logical 1 (true) if array_ptr is a numeric array containing + integers (int8, int16, int32 or int64 depending on the platform), + and logical 0 (false) otherwise. This is useful since the test is + not provided in MEX. + + In the name of the function, an extra underscore is used in order + to avoid confusing it with true MEX-functions. */ +{ + const int id = mxGetClassID(array_ptr); + const size_t siz = mxGetElementSize(array_ptr); + + /* check that the class is an integer and that its size matches the + size of an int */ + return (id == mxINT8_CLASS || id == mxINT16_CLASS || + id == mxINT32_CLASS || id == mxINT64_CLASS) && siz == sizeof(int); +} +/*------------------------------------------------------------------------*/ +bool getix(int **ix,int M,int N,int *max,bool nocopy,const mxArray *IX) +/* Gets indices ix from mxArray IX. The dimensions are M-by-N, max is + set to the maximum index and nocopy defines the type of IX (double + or int). */ +{ + bool ok = true; + int mx = *max; + +#ifndef _OPENMP + if (nocopy) { + // no copy + const int *iix = (*ix = (int *)mxGetData(IX)); + for (int i = 0; i < M*N; i++) { + if (iix[i] < 1) + return false; + if (iix[i] > mx) mx = iix[i]; + } + } + else { + // typecast copy + const double *ival = mxGetPr(IX); + int *iix = (*ix = mxMalloc(M*N*sizeof(int))); + for (int i = 0; i < M*N; i++) { + if (ival[i] < 1.0 || ival[i] != ceil(ival[i])) + return false; + if ((iix[i] = ival[i]) > mx) mx = ival[i]; + } + } +#else // _OPENMP + if (nocopy) { + // no copy + const int *iix = (*ix = (int *)mxGetData(IX)); +#pragma omp parallel shared (mx) +{ + int mymx = mx; // local version of mx +#pragma omp for + for (int i = 0; i < M*N; i++) { + if (iix[i] > mymx) + mymx = iix[i]; + else if (iix[i] < 1) + ok = false; // no harm in continuing + } + + if (mx < mymx) +#pragma omp critical + // ensure nothing changed, then make the swap: + if (mx < mymx) mx = mymx; +} // end omp parallel + } + else { + // typecast copy + const double *ival = mxGetPr(IX); + int *iix; +#pragma omp critical + // not thread-safe: + iix = (*ix = mxMalloc(M*N*sizeof(int))); +#pragma omp parallel shared (mx) +{ + int mymx = mx; // local version of mx +#pragma omp for + for (int i = 0; i < M*N; i++) { + if (ival[i] < 1.0 || ival[i] != ceil(ival[i])) + ok = false; // no harm in continuing + else if ((iix[i] = ival[i]) > mymx) + mymx = ival[i]; + } + + if (mx < mymx) +#pragma omp critical + // ensure nothing changed, then make the swap: + if (mx < mymx) mx = mymx; +} // end omp parallel + } +#endif // _OPENMP + *max = mx; + return ok; +} +/*------------------------------------------------------------------------*/ +mxArray *sparse2sparse(const mxArray *S) +/* Returns a deep copy T of a sparse matrix S. The allocation is + exact. */ +{ + const mwSize *jcS = mxGetJc(S); + const mwIndex *irS = mxGetIr(S); + const double *prS = mxGetPr(S),*piS = mxGetPi(S); + const int N = mxGetN(S),M = mxGetM(S),Nnz = jcS[N]; + const bool real = piS == NULL; + mxArray *T; + + /* straightforward */ + T = mxCreateSparse(M,N,Nnz,real ? mxREAL : mxCOMPLEX); + memcpy(mxGetJc(T),jcS,(N+1)*sizeof(jcS[0])); + memcpy(mxGetIr(T),irS,Nnz*sizeof(irS[0])); + memcpy(mxGetPr(T),prS,Nnz*sizeof(prS[0])); + if (!real) memcpy(mxGetPi(T),piS,Nnz*sizeof(piS[0])); + + return T; +} +/*------------------------------------------------------------------------*/ +mxArray *full2sparse(const mxArray *A) +/* Constructs a sparse matrix S from a full matrix A. */ +{ + const double *prA = mxGetPr(A),*piA = mxGetPi(A); + const int N = mxGetN(A),M = mxGetM(A); + const bool real = piA == NULL; + mxArray *S; + + if (real) { + mwSize *jcS = mxCalloc(N+1,sizeof(mwSize)); + mwIndex *irS; + double *prS; + + /* determine the column pointer */ + for (int c = 1,k = 0; c <= N; c++,k += M) + for (int i = 0; i < M; i++) + if (prA[k+i] != 0.0) jcS[c]++; + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + + /* allocate */ + S = mxCreateSparse(0,0,jcS[N],mxREAL); + mxSetM(S,M); + mxSetN(S,N); + irS = mxGetIr(S); + prS = mxGetPr(S); + + /* set the column pointer */ + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + /* copy data */ + for (int c = 1,k = 0,dest = 0; c <= N; c++,k += M) + for (int i = 0; i < M; i++) + if (prA[k+i] != 0.0) { + irS[dest] = i; + prS[dest++] = prA[k+i]; + } + } + else { + mwSize *jcS = mxCalloc(N+1,sizeof(mwSize)); + mwIndex *irS; + double *prS,*piS; + + for (int c = 1,k = 0; c <= N; c++,k += M) + for (int i = 0; i < M; i++) + if (prA[k+i] != 0.0 && piA[k+i] != 0.0) jcS[c]++; + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + + S = mxCreateSparse(0,0,jcS[N],mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + irS = mxGetIr(S); + prS = mxGetPr(S); + piS = mxGetPi(S); + + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + for (int c = 1,k = 0,dest = 0; c <= N; c++,k += M) + for (int i = 0; i < M; i++) + if (prA[k+i] != 0.0 && piA[k+i] != 0.0) { + irS[dest] = i; + prS[dest] = prA[k+i]; + piS[dest++] = piA[k+i]; + } + } + + return S; +} +/*------------------------------------------------------------------------*/ +void squeeze(mxArray *S) +/* Removes any zero elements explicitly stored in the sparse matrix + S. No reallocation is performed. + + There is a quite complicated and potentially faster algorithm based + on memmove() that performs the same operation. However, benchmark + tests indicate that the following simple code optimizes better. */ +{ + const int N = mxGetN(S); + mwSize *jcS = mxGetJc(S); + mwIndex *irS = mxGetIr(S); + double *prS = mxGetPr(S),*piS = mxGetPi(S); + + if (piS == NULL) { + int c,i; + + /* find the first zero, if any */ + for (i = c = 0; c < N; c++) + for ( ; i < jcS[c+1]; i++) + if (prS[i] == 0.0) + goto rfound0; /* a 'double break' */ + return; + + rfound0: + /* copy in a conservative fashion */ + for (int dest = i++; c < N; c++) { + for ( ; i < jcS[c+1]; i++) + if (prS[i] != 0.0) { + irS[dest] = irS[i]; + prS[dest++] = prS[i]; + } + jcS[c+1] = dest; + } + } + else { + int c,i; + for (i = c = 0; c < N; c++) + for ( ; i < jcS[c+1]; i++) + if (prS[i] == 0.0 && piS[i] == 0.0) + goto zfound0; + return; + + zfound0: + for (int dest = i++; c < N; c++) { + for ( ; i < jcS[c+1]; i++) + if (prS[i] != 0.0 || piS[i] != 0.0) { + irS[dest] = irS[i]; + prS[dest] = prS[i]; + piS[dest++] = piS[i]; + } + jcS[c+1] = dest; + } + } +} +/*------------------------------------------------------------------------*/ +void sparse_insert(mwIndex *irS,double *prS,double *piS, + const int *irank,const int *rank,const mwIndex *jrS, + const int *ii,const double *sr,const double *si, + int smod,int sdiv,int len,int M) +/* Inserts elements into sparse matrix. Input is the sparse matrix + itself (irS,prS,piS), an index-table irank, the rowindices ii and + the values of the elements (sr,si). Four different formats of the + values are allowed as indicated by the parameters smod and sdiv. + + Currently, input rank, jrS, and M are only used #ifdef _OPENMP and + for the full case 3 below. */ +{ + const bool real = si == NULL; + + switch (2*(smod == len)+(sdiv == 1)) { + case 3 : /* full case */ +#ifndef _OPENMP + if (real) + for (int i = 0; i < len; i++) { + irS[irank[i]] = ii[i]-1; + prS[irank[i]] += sr[i]; + } + else + for (int i = 0; i < len; i++) { + irS[irank[i]] = ii[i]-1; + prS[irank[i]] += sr[i]; + piS[irank[i]] += si[i]; + } +#else // _OPENMP + if (real) { + + if (rank != NULL) { + /* needed since not all cases respond to _OPENMP and uses a + different syntax */ +#pragma omp parallel +{ + const int nThreads = omp_get_num_threads(); + const int myId = omp_get_thread_num(); + const int rstart = 1+M*myId/nThreads; + const int rend = M*(myId+1)/nThreads; + int istart; + if (rstart == 1) + istart = 0; + else + istart = jrS[rstart-1]; + + if (rend >= 1) { + for (int i = istart; i < jrS[rend]; i++) irS[irank[i]] = ii[rank[i]]-1; + for (int i = istart; i < jrS[rend]; i++) prS[irank[i]] += sr[rank[i]]; + } +} // end parallel + } + else { +#pragma omp single nowait + for (int i = 0; i < len; i++) irS[irank[i]] = ii[i]-1; +#pragma omp single nowait + for (int i = 0; i < len; i++) prS[irank[i]] += sr[i]; + } + } + else { +#pragma omp parallel +{ + if (rank != NULL) { + const int nThreads = omp_get_num_threads(); + const int myId = omp_get_thread_num(); + const int rstart = 1+M*myId/nThreads; + const int rend = M*(myId+1)/nThreads; + int istart; + if (rstart == 1) + istart = 0; + else + istart = jrS[rstart-1]; + + if (rend >= 1) + for (int i = istart; i < jrS[rend]; i++) { + irS[irank[i]] = ii[rank[i]]-1; + prS[irank[i]] += sr[rank[i]]; + piS[irank[i]] += si[rank[i]]; + } + } + else { +#pragma omp single nowait + for (int i = 0; i < len; i++) irS[irank[i]] = ii[i]-1; +#pragma omp single nowait + for (int i = 0; i < len; i++) prS[irank[i]] += sr[i]; +#pragma omp single nowait + for (int i = 0; i < len; i++) piS[irank[i]] += si[i]; + } +} // end parallel + } +#endif // _OPENMP + break; + + case 2 : /* horizontal case */ + if (real) + for (int j = 0; j < len; j += sdiv) { + const double ssr = sr[j/sdiv]; + for (int i = j; i < j+sdiv; i++) { + irS[irank[i]] = ii[i]-1; + prS[irank[i]] += ssr; + } + } + else + for (int j = 0; j < len; j += sdiv) { + const double ssr = sr[j/sdiv]; + const double ssi = si[j/sdiv]; + for (int i = j; i < j+sdiv; i++) { + irS[irank[i]] = ii[i]-1; + prS[irank[i]] += ssr; + piS[irank[i]] += ssi; + } + } + break; + + case 1 : /* vertical case */ + if (real) + for (int i = 0; i < len; i++) { + irS[irank[i]] = ii[i]-1; + prS[irank[i]] += sr[i%smod]; + } + else + for (int i = 0; i < len; i++) { + irS[irank[i]] = ii[i]-1; + prS[irank[i]] += sr[i%smod]; + piS[irank[i]] += si[i%smod]; + } + break; + + case 0 : /* scalar case */ +#ifndef _OPENMP + if (real) { + const double ssr = sr[0]; + for (int i = 0; i < len; i++) { + irS[irank[i]] = ii[i]-1; + prS[irank[i]] += ssr; + } + } + else { + const double ssr = sr[0]; + const double ssi = si[0]; + for (int i = 0; i < len; i++) { + irS[irank[i]] = ii[i]-1; + prS[irank[i]] += ssr; + piS[irank[i]] += ssi; + } + } +#else // _OPENMP + if (real) { +#pragma omp parallel +{ + const double ssr = sr[0]; +#pragma omp single nowait + for (int i = 0; i < len; i++) irS[irank[i]] = ii[i]-1; +#pragma omp single nowait + for (int i = 0; i < len; i++) prS[irank[i]] += ssr; +} // end omp parallel + } + else { +#pragma omp parallel +{ + const double ssr = sr[0]; + const double ssi = si[0]; +#pragma omp single nowait + for (int i = 0; i < len; i++) irS[irank[i]] = ii[i]-1; +#pragma omp single nowait + for (int i = 0; i < len; i++) prS[irank[i]] += ssr; +#pragma omp single nowait + for (int i = 0; i < len; i++) piS[irank[i]] += ssi; +} // end omp parallel + } +#endif // _OPENMP + break; + } +} +/*------------------------------------------------------------------------*/ +void sparse_inserti(mwIndex *irS,double *prS,double *piS, + const int *irank, + const int *ii,int imod, + const double *sr,const double *si, + int smod,int sdiv,int len) +/* Same as sparse_insert() above except that ii is assumed to be + vertical. */ +{ + const bool real = si == NULL; + + switch (2*(smod == len)+(sdiv == 1)) { + case 3 : /* full case */ + if (real) + for (int i = 0; i < len; i++) { + irS[irank[i]] = ii[i%imod]-1; + prS[irank[i]] += sr[i]; + } + else + for (int i = 0; i < len; i++) { + irS[irank[i]] = ii[i%imod]-1; + prS[irank[i]] += sr[i]; + piS[irank[i]] += si[i]; + } + break; + case 2 : /* horizontal case */ + if (real) + for (int j = 0; j < len; j += sdiv) { + const double ssr = sr[j/sdiv]; + for (int i = 0; i < imod; i++) { + irS[irank[j+i]] = ii[i]-1; + prS[irank[j+i]] += ssr; + } + } + else + for (int j = 0; j < len; j += sdiv) { + const double ssr = sr[j/sdiv]; + const double ssi = si[j/sdiv]; + for (int i = 0; i < imod; i++) { + irS[irank[j+i]] = ii[i]-1; + prS[irank[j+i]] += ssr; + piS[irank[j+i]] += ssi; + } + } + break; + case 1 : /* vertical case */ + if (real) + for (int i = 0; i < imod; i++) { + const double ssr = sr[i]; + const int iii = ii[i]-1; + for (int j = i; j < len; j += imod) { + irS[irank[j]] = iii; + prS[irank[j]] += ssr; + } + } + else + for (int i = 0; i < imod; i++) { + const double ssr = sr[i]; + const double ssi = si[i]; + const int iii = ii[i]-1; + for (int j = i; j < len; j += imod) { + irS[irank[j]] = iii; + prS[irank[j]] += ssr; + piS[irank[j]] += ssi; + } + } + break; + case 0 : /* scalar case */ + if (real) { + const double ssr = sr[0]; + for (int j = 0; j < len; j += imod) + for (int i = 0; i < imod; i++) { + irS[irank[j+i]] = ii[i]-1; + prS[irank[j+i]] += ssr; + } + } + else { + const double ssr = sr[0]; + const double ssi = si[0]; + for (int j = 0; j < len; j += imod) + for (int i = 0; i < imod; i++) { + irS[irank[j+i]] = ii[i]-1; + prS[irank[j+i]] += ssr; + piS[irank[j+i]] += ssi; + } + } + break; + } +} +/*------------------------------------------------------------------------*/ +#ifndef _OPENMP +mxArray *sparse(const int *ii,const int *jj, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax) +/* Constructs a sparse matrix in Compressed Column Storage (CCS) from + triplet format [ii,jj,sr(si)]. An ordinary Matlab sparse matrix is + thus constructed. + + Input (smod,sdiv) determine the shape of the value array sr(si) and + len is the length of the index arrays which both must have the same + shape. [M N Nzmax] determine the dimensions of the resulting + matrix. If Nzmax = -1, then the allocation is exact (i.e. nnz(S) = + nzmax(S)). Otherwise, Nzmax must be greater than or equal to the + number of nonzeros needed to be stored. + + The memory demand of the algorithm is (at peak and for sufficiently + large output) +int[len]+. */ +{ + // output + mxArray *S; + mwSize *jcS; // column pointer for sparse matrix S + int *irank; // inverse rank array of length len + + mwSize *jrS; // accumulated "pessimistic" row counter + int *rank; // rank-array for rows + int *hcol; // cache memory for columns + + // Part 1: count and accumulate indices to rows + StartTime; + jrS = mxCalloc(M+1,sizeof(jrS[0])); + for (int i = 0; i < len; i++) jrS[ii[i]]++; + for (int r = 2; r <= M; r++) jrS[r] += jrS[r-1]; + StopTime; + GetTime(time_vec[1]); +#ifdef PRINT_INTERMEDIATE + mexPrintf("jrS = ["); + for (int r = 0; r <= M; r++) mexPrintf("%d,",jrS[r]); + mexPrintf("]\n\n"); +#endif + + // Part 2: build rank with the active use of jrS + StartTime; + rank = mxMalloc(len*sizeof(rank[0])); + jrS--; /* (unit-offset in ii) */ + for (int i = 0; i < len; i++) rank[jrS[ii[i]]++] = i; + // rank now allows for row-wise traversal + StopTime; + GetTime(time_vec[2]); +#ifdef PRINT_INTERMEDIATE + mexPrintf("rank = ["); + for (int i = 0; i < len; i++) mexPrintf("%d,",rank[i]); + mexPrintf("]\n"); + mexPrintf("jrS = [*,"); + for (int r = 1; r <= M+1; r++) mexPrintf("%d,",jrS[r]); + mexPrintf("]\n\n"); +#endif + + /* Part 3: loop over input and make each column unique with respect + to rowindices, building both an index vector irank and the final + column pointer at the same time */ + StartTime; + jcS = mxCalloc(N+1,sizeof(jcS[0])); + hcol = mxCalloc(N,sizeof(hcol[0])); + hcol--; /* (unit-offset in jj) */ + irank = mxMalloc(len*sizeof(irank[0])); + for (int row = 1,i = 0; row <= M; row++) + for ( ; i < jrS[row]; i++) { + const int ixijs = rank[i]; // index into input data triplet (ii,jj,sr) + const int col = jj[ixijs]; // column index + + // new element? + if (hcol[col] < row) { + hcol[col] = row; // remembered by the row index + jcS[col]++; // count it + } + + // irank keeps track of where it should go + irank[ixijs] = jcS[col]-1; + } + mxFree(++hcol); + mxFree(rank); + mxFree(++jrS); + StopTime; + GetTime(time_vec[3]); +#ifdef PRINT_INTERMEDIATE + mexPrintf("irank = ["); + for (int i = 0; i < len; i++) mexPrintf("%d,",irank[i]); + mexPrintf("]\n"); + mexPrintf("jcS = ["); + for (int c = 0; c <= N; c++) mexPrintf("%d,",jcS[c]); + mexPrintf("]\n\n"); +#endif + + // Part 4: accumulate pointer to columns + StartTime; + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + + // irank must account for the previous accumulation + jcS--; /* (again, unit-offset in jj) */ + for (int i = 0; i < len; i++) irank[i] += jcS[jj[i]]; + jcS++; + StopTime; + GetTime(time_vec[4]); +#ifdef PRINT_INTERMEDIATE + mexPrintf("irank = ["); + for (int i = 0; i < len; i++) mexPrintf("%d,",irank[i]); + mexPrintf("]\n"); + mexPrintf("jcS = ["); + for (int c = 0; c <= N; c++) mexPrintf("%d,",jcS[c]); + mexPrintf("]\n\n"); +#endif + + // allocate output + if (Nzmax == -1) + Nzmax = jcS[N]; + else if (Nzmax < jcS[N]) { + mxFree(irank); + mxFree(jcS); + mexErrMsgIdAndTxt("fsparse:e16","Allocation limited by caller: " + "sparse matrix does not fit."); + } + S = mxCreateSparse(0,0,Nzmax,si == NULL ? mxREAL : mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + + // set the column pointer + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + // insert the data + StartTime; + sparse_insert(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irank,0,0,ii,sr,si,smod,sdiv,len,M); + StopTime; + GetTime(time_vec[5]); + + mxFree(irank); + return S; +} +/*------------------------------------------------------------------------*/ +#else // _OPENMP +/*------------------------------------------------------------------------*/ +mxArray *sparse(const int *ii,const int *jj, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax) +/* This is the OpenMP-version of the sparse function above. */ +{ + // output + mxArray *S; + mwSize **jcS; // column pointer, one per thread + mwSize *jcS_; // final column pointer + int *irank; // inverse rank array of length len + int *irankP; // permuted version of irank + + mwSize **jrS; // accumulated "pessimistic" row counter + int *rank; // rank-array for rows + + // Part 1: count and accumulate indices to rows + StartTime; + const int nThreads = omp_get_max_threads(); + jrS = mxMalloc((nThreads+1)*sizeof(jrS[0])); + for (int k = 0; k <= nThreads; k++) { + jrS[k] = mxCalloc(M+1,sizeof(jrS[k][0])); + jrS[k]--; /* (unit-offset in ii) */ + } + +#pragma omp parallel +{ + const int myId = omp_get_thread_num(); + const int istart = len*myId/nThreads; + const int iend = len*(myId+1)/nThreads; + for (int i = istart; i < iend; i++) + jrS[myId+1][ii[i]]++; + +#pragma omp barrier + + // accumulate jrS over the threads +#pragma omp for + for (int r = 1; r <= M; r++) + for (int k = 1; k < nThreads; k++) + jrS[k+1][r] += jrS[k][r]; + + // serial accumulation in jrS[0] +#pragma omp single + for (int r = 1; r <= M; r++) + jrS[0][r+1] += jrS[0][r]+jrS[nThreads][r]; + + // determine a private jrS for each thread +#pragma omp for + for (int r = 1; r <= M; r++) + for (int k = 1; k < nThreads; k++) + jrS[k][r] += jrS[0][r]; +} // end parallel + StopTime; + GetTime(time_vec[1]); + + // Part 2: build rank with the active use of jrS + StartTime; + rank = mxMalloc(len*sizeof(rank[0])); + +#pragma omp parallel +{ + const int myId = omp_get_thread_num(); + const int istart = len*myId/nThreads; + const int iend = len*(myId+1)/nThreads; + for (int i = istart; i < iend; i++) + rank[jrS[myId][ii[i]]++] = i; + // rank now allows for row-wise traversal +} // end parallel + StopTime; + GetTime(time_vec[2]); + + /* Part 3: loop over input and make each column unique with respect + to rowindices, building both a permuted index vector irankP and + the final column pointer at the same time */ + StartTime; + jcS = mxMalloc((nThreads+1)*sizeof(jcS[0])); + for (int k = 0; k <= nThreads; k++) + jcS[k] = mxCalloc(N+1,sizeof(jcS[k][0])); + irankP = mxMalloc(len*sizeof(irankP[0])); + if (2*(smod == len)+(sdiv == 1) != 3) + // *** case not fully implemented + irank = mxMalloc(len*sizeof(irank[0])); + +#pragma omp parallel +{ + int *hcol; // cache memory for columns +#pragma omp critical + hcol = mxCalloc(N,sizeof(hcol[0])); + hcol--; /* (unit-offset in jj) */ + + const int myId = omp_get_thread_num(); + const int rstart = 1+M*myId/nThreads; + const int rend = M*(myId+1)/nThreads; + int istart; + if (rstart == 1) + istart = 0; + else + istart = jrS[nThreads-1][rstart-1]; + + // loop over segment of row indices + for (int row = rstart,i = istart; row <= rend; row++) + // loop over single row + for ( ; i < jrS[nThreads-1][row]; i++) { + const int ixijs = rank[i]; // index into input data triplet (ii,jj,sr) + const int col = jj[ixijs]; // column index + + // new element? + if (hcol[col] < row) { + hcol[col] = row; // store row index + jcS[myId+1][col]++; // count it + } + + // irankP keeps track of where it should go + irankP[i] = jcS[myId+1][col]-1; + } +#pragma omp critical + mxFree(++hcol); + +#pragma omp barrier + + // accumulate jcS over the threads +#pragma omp for + for (int c = 1; c <= N; c++) + for (int k = 1; k < nThreads; k++) + jcS[k+1][c] += jcS[k][c]; + + // serial accumulation in jcS[0] +#pragma omp single +{ + for (int c = 1; c <= N; c++) + jcS[0][c] += jcS[0][c-1]+jcS[nThreads][c]; + jcS[0]--; /* (unit-offset in jj) */ +} + + // determine a private jcS for each thread +#pragma omp for + for (int c = 1; c <= N; c++) + for (int k = 1; k < nThreads; k++) + jcS[k][c] += jcS[0][c]; + + // irankP must now account to these changes to jcS + if (rend >= 1) + for (int i = istart; i < jrS[nThreads-1][rend]; i++) + irankP[i] += jcS[myId][jj[rank[i]]]; + + if (2*(smod == len)+(sdiv == 1) != 3) { + // *** case not fully implemented + if (rend >= 1) + for (int i = istart; i < jrS[nThreads-1][rend]; i++) + irank[rank[i]] = irankP[i]; + } +} // end parallel + StopTime; + GetTime(time_vec[3]); + + // Part 4: final accumulation of jcS + StartTime; + /* (nil in this version) */ + StopTime; + GetTime(time_vec[4]); + + // allocate output + jcS[0]++; + if (Nzmax == -1) + Nzmax = jcS[0][N]; + else if (Nzmax < jcS[0][N]) + mexErrMsgIdAndTxt("fsparse:e16","Allocation limited by caller: " + "sparse matrix does not fit."); + S = mxCreateSparse(0,0,Nzmax,si == NULL ? mxREAL : mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + + // set the column pointer + mxFree(mxGetJc(S)); + mxSetJc(S,jcS[0]); + // free the remaining pointers + for (int k = 1; k <= nThreads; k++) + mxFree(jcS[k]); + mxFree(jcS); + + // insert the data + StartTime; + if (2*(smod == len)+(sdiv == 1) == 3) + sparse_insert(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irankP,rank,jrS[nThreads-1],ii,sr,si,smod,sdiv,len,M); + else + sparse_insert(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irank,0,0,ii,sr,si,smod,sdiv,len,M); + StopTime; + GetTime(time_vec[5]); + + // *** case not fully implemented + if (2*(smod == len)+(sdiv == 1) != 3) + mxFree(irank); + mxFree(irankP); + + return S; +} +#endif // _OPENMP +/*------------------------------------------------------------------------*/ + +/* table is only allowed to become about 70% full */ +const double HASH_LOAD = 1.0/0.70; + +size_t hashsize(size_t len) +/* Size of hash table. This is simply + exp2(ceil(log2(HASH_LOAD*len))). */ +{ + size_t ans = 1; + + len = ceil(HASH_LOAD*len); + do ans <<= 1; while (len >>= 1); + + return ans; +} +/*------------------------------------------------------------------------*/ +uint32_t hash(int ii,int jj) +/* Computes a hashkey from an integer pair (ii,jj). + + Adapted from a code by Paul Hsieh. See + http://www.azillionmonkeys.com/qed/hash.html for further details. +*/ +{ + uint32_t hash = jj,temp; + + /* main loop executed twice */ + hash += ii&0x0000FFFF; + temp = ((ii&0xFFFF0000) >> 5)^hash; + hash = (hash << 16)^temp; + hash += hash >> 11; + + hash += jj&0x0000FFFF; + temp = ((jj&0xFFFF0000) >> 5)^hash; + hash = (hash << 16)^temp; + hash += hash >> 11; + + /* "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 2; + hash += hash >> 15; + hash ^= hash << 10; + + return hash; +} +/*------------------------------------------------------------------------*/ +mxArray *sparse_nosort(const int *ii,const int *jj, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax) +/* This function does the same job as sparse() above except that the + result is *not* sorted w.r.t. rowindices. The sparse matrix thus + constructed is therefore a sparse matrix which is valid in many, + but not all situations in Matlab. + + The allocation is still exact (i.e. nzmax(S) = nnz(S)) and the + memory demand is (at peak and for sufficiently large output) + +int[N+2]+int[len]+, where len is the size of the + input index set. */ +{ + +#define SPS1 /* new version with hashing */ +#undef SPS2 /* updated version without hashing */ + +#ifdef SPS1 + + /* output */ + mxArray *S; + mwSize *jcS; + + /* rank- and hash-table */ + int *irank,*hash_tb; + size_t hash_sz = hashsize(len); + + jcS = mxCalloc(N+1,sizeof(jcS[0])); + irank = mxMalloc(len*sizeof(irank[0])); + hash_tb = mxCalloc(hash_sz,sizeof(hash_tb[0])); + + for (int i = 0; i < len; i++) { + const int row = ii[i],col = jj[i]; + uint32_t hashkey = hash(row,col)&(hash_sz-1); + + /* collision? */ + if (hash_tb[hashkey] != 0 && + !(row == ii[hash_tb[hashkey]-1] && + col == jj[hash_tb[hashkey]-1])) { + /* a fairly independent and odd (incremental) hashkey */ + const uint32_t hashkey2nd = hash(col,row)|1; + + /* rehash until empty slot found */ + do + hashkey = (hashkey+hashkey2nd)&(hash_sz-1); + while (hash_tb[hashkey] != 0 && + !(row == ii[hash_tb[hashkey]-1] && + col == jj[hash_tb[hashkey]-1])); + } + + /* fill in table */ + if (hash_tb[hashkey] == 0) { + hash_tb[hashkey] = i+1; + irank[i] = jcS[col]++; + } + else + irank[i] = irank[hash_tb[hashkey]-1]; + } + mxFree(hash_tb); + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + + /* account for the accumulation of indices */ + jcS--; + for (int i = 0; i < len; i++) irank[i] += jcS[jj[i]]; + jcS++; + + /* allocate output */ + if (Nzmax == -1) + Nzmax = jcS[N]; + else if (Nzmax < jcS[N]) { + mxFree(irank); + mxFree(jcS); + mexErrMsgIdAndTxt("fsparse:e16","Allocation limited by caller: " + "sparse matrix does not fit."); + } + S = mxCreateSparse(0,0,Nzmax,si == NULL ? mxREAL : mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + + /* set the column pointer */ + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + /* insert the data */ + sparse_insert(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irank,0,0,ii,sr,si,smod,sdiv,len,M); + mxFree(irank); + + return S; + +#elif defined(SPS2) + + /* output */ + mxArray *S; + int *jcS; + + /* rank-tables and help-pointer for rowindices */ + int *rank,*irank,*hrow; + + /* determine a "pessimistic" column pointer */ + jcS = mxCalloc(N+2,sizeof(jcS[0])); + jcS++; /* allows for a fast restore */ + for (int i = 0; i < len; i++) jcS[jj[i]]++; + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + + /* produce the rank-table so that the data may be traversed in order + with respect to columns */ + rank = mxMalloc(len*sizeof(rank[0])); + jcS--; /* restore jcS and account for unit offset in jj */ + for (int i = 0; i < len; i++) rank[jcS[jj[i]]++] = i; + + /* allocate index- and help-pointers */ + irank = mxMalloc(len*sizeof(irank[0])); + hrow = mxCalloc(M,sizeof(hrow[0])); + hrow--; /* unit offset in ii */ + + /* loop over input and make each column unique with respect to + rowindices, building both an index vector irank and the final + column pointer at the same time */ + for (int col = 1,i = 0; col <= N; col++) { + const int begin = jcS[col-1],end = jcS[col]; + jcS[col] = begin; + for ( ; i < end; i++) { + const int ixijs = rank[i]; + const int row = ii[ixijs]; + + /* new element; mark and count it */ + if (hrow[row] <= begin) hrow[row] = ++jcS[col]; + + /* remember where it should go */ + irank[ixijs] = hrow[row]-1; + } + } + mxFree(++hrow); + mxFree(rank); + + /* allocate output */ + if (Nzmax == -1) + Nzmax = jcS[N]; + else if (Nzmax < jcS[N]) { + mxFree(irank); + mxFree(jcS); + mexErrMsgIdAndTxt("fsparse:e16","Allocation limited by caller: " + "sparse matrix does not fit."); + } + S = mxCreateSparse(0,0,Nzmax,si == NULL ? mxREAL : mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + + /* set the column pointer */ + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + /* insert the data */ + sparse_insert(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irank,0,0,ii,sr,si,smod,sdiv,len,M); + mxFree(irank); + + return S; + +#endif +} +/*------------------------------------------------------------------------*/ +/*- General versions -----------------------------------------------------*/ +/*------------------------------------------------------------------------*/ +mxArray *gsparse(const int *ii,int imod, + const int *jj,int jdiv, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax) +/* Derived from sparse(). */ +{ + +#define GSP1 /* new version */ +#undef GSP2 /* (very) old version */ + +#ifdef GSP1 + + mxArray *S; + mwSize *jcS; + mwIndex *irS; + + int *rank,*irank,*hcol; + + irS = mxCalloc(M+1,sizeof(irS[0])); + /* for (int i = 0; i < len; i++) irS[ii[i%imod]]++; */ + if (imod == len) + for (int i = 0; i < len; i++) irS[ii[i]]++; + else + for (int i = 0; i < imod; i++) irS[ii[i]] += len/imod; + for (int r = 2; r <= M; r++) irS[r] += irS[r-1]; + + rank = mxMalloc(len*sizeof(rank[0])); + irS--; + for (int i = 0; i < len; i++) rank[irS[ii[i%imod]]++] = i; + + jcS = mxCalloc(N+1,sizeof(jcS[0])); + hcol = mxCalloc(N,sizeof(hcol[0])); + hcol--; + irank = mxMalloc(len*sizeof(irank[0])); + for (int row = 1,i = 0; row <= M; row++) + for ( ; i < irS[row]; i++) { + const int ixijs = rank[i]; + const int col = jj[ixijs/jdiv]; + + if (hcol[col] < row) { + hcol[col] = row; + jcS[col]++; + } + + irank[ixijs] = jcS[col]-1; + } + mxFree(++irS); + mxFree(rank); + mxFree(++hcol); + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + + jcS--; + /* for (int i = 0; i < len; i++) irank[i] += jcS[jj[i/jdiv]]; */ + if (jdiv == 1) + for (int i = 0; i < len; i++) irank[i] += jcS[jj[i]]; + else + for (int c = 0, i = 0; c < len/jdiv; c++) + for ( ; i < (c+1)*jdiv; i++) irank[i] += jcS[jj[c]]; + jcS++; + + if (Nzmax == -1) + Nzmax = jcS[N]; + else if (Nzmax < jcS[N]) { + mxFree(irank); + mxFree(jcS); + mexErrMsgIdAndTxt("fsparse:e16","Allocation limited by caller: " + "sparse matrix does not fit."); + } + S = mxCreateSparse(0,0,Nzmax,si == NULL ? mxREAL : mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + if (imod == len) + sparse_insert(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irank,0,0,ii,sr,si,smod,sdiv,len,M); + else + sparse_inserti(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irank,ii,imod,sr,si,smod,sdiv,len); + mxFree(irank); + + return S; + +#elif defined(GSP2) + + mxArray *S; + int *jcS,*irS; + + int *rank,*irank; + + jcS = mxCalloc(N+2,sizeof(jsC[0])); + jcS++; + irS = mxCalloc(M+1,sizeof(irS[0])); + + for (int i = 0; i < len; i++) { + jcS[jj[i/jdiv]]++; + irS[ii[i%imod]]++; + } + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + for (int r = 2; r <= M; r++) irS[r] += irS[r-1]; + + irank = mxMalloc(len*sizeof(irank[0])); + irS--; + for (int i = 0; i < len; i++) + irank[irS[ii[i%imod]]++] = i; + mxFree(++irS); + + rank = mxMalloc(len*sizeof(rank[0])); + jcS--; + for (int i = 0; i < len; i++) + rank[jcS[jj[irank[i]/jdiv]]++] = irank[i]; + + for (int i = 0,col = jj[rank[0]/jdiv],row; ; ) { + const int end = jcS[col]; + + irank[rank[i]] = jcS[col-1]; + jcS[col] = jcS[col-1]+1; + row = ii[rank[i]%imod]; + + for (i++; i < end; i++) { + const int ixijs = rank[i]; + + if (row < ii[ixijs%imod]) { + row = ii[ixijs%imod]; + jcS[col]++; + } + + irank[ixijs] = jcS[col]-1; + } + + if (i < len) + for (col++; col < jj[rank[i]/jdiv]; col++) + jcS[col] = jcS[col-1]; + else { + for (col++; col <= N; col++) jcS[col] = jcS[col-1]; + break; + } + } + mxFree(rank); + + if (Nzmax == -1) + Nzmax = jcS[N]; + else if (Nzmax < jcS[N]) { + mxFree(irank); + mxFree(jcS); + mexErrMsgIdAndTxt("fsparse:e16","Allocation limited by caller: " + "sparse matrix does not fit."); + } + S = mxCreateSparse(0,0,Nzmax,si == NULL ? mxREAL : mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + if (imod == len) + sparse_insert(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irank,0,0,ii,sr,si,smod,sdiv,len,M); + else + sparse_inserti(mxGetIr(S),mxGetPr(S),mxGetPi(S), + irank,ii,imod,sr,si,smod,sdiv,len); + mxFree(irank); + + return S; + +#endif + +} +/*------------------------------------------------------------------------*/ +mxArray *gsparse_nosort(const int *ii,int imod, + const int *jj,int jdiv, + const double *sr,const double *si, + int smod,int sdiv, + int len,int M,int N,int Nzmax) +/* Derived from sparse_nosort(). */ +{ + +#define GSPS1 /* new version using hashing */ +#undef GSPS2 /* (very) old version without hashing */ + +#ifdef GSPS1 + + /* output */ + mxArray *S; + mwSize *jcS; + + /* rank- and hash-table */ + int *irank,*hash_tb; + size_t hash_sz = hashsize(len); + + jcS = mxCalloc(N+1,sizeof(jcS[0])); + irank = mxMalloc(len*sizeof(irank[0])); + hash_tb = mxCalloc(hash_sz,sizeof(hash_tb[0])); + + for (int i = 0; i < len; i++) { + const int row = ii[i%imod],col = jj[i/jdiv]; + uint32_t hashkey = hash(row,col)&(hash_sz-1); + + /* collision? */ + if (hash_tb[hashkey] != 0 && + !(row == ii[(hash_tb[hashkey]-1)%imod] && + col == jj[(hash_tb[hashkey]-1)/jdiv])) { + /* a fairly independent and odd (incremental) hashkey */ + const uint32_t hashkey2nd = hash(col,row)|1; + + /* rehash until empty slot found */ + do + hashkey = (hashkey+hashkey2nd)&(hash_sz-1); + while (hash_tb[hashkey] != 0 && + !(row == ii[(hash_tb[hashkey]-1)%imod] && + col == jj[(hash_tb[hashkey]-1)/jdiv])); + } + + /* fill in table */ + if (hash_tb[hashkey] == 0) { + hash_tb[hashkey] = i+1; + irank[i] = jcS[col]++; + } + else + irank[i] = irank[hash_tb[hashkey]-1]; + } + mxFree(hash_tb); + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + + /* account for the accumulation of indices */ + jcS--; + /* for (int i = 0; i < len; i++) irank[i] += jcS[jj[i/jdiv]]; */ + if (jdiv == 1) + for (int i = 0; i < len; i++) irank[i] += jcS[jj[i]]; + else + for (int c = 0, i = 0; c < len/jdiv; c++) + for ( ; i < (c+1)*jdiv; i++) irank[i] += jcS[jj[c]]; + jcS++; + + /* allocate output */ + if (Nzmax == -1) + Nzmax = jcS[N]; + else if (Nzmax < jcS[N]) { + mxFree(irank); + mxFree(jcS); + mexErrMsgIdAndTxt("fsparse:e16","Allocation limited by caller: " + "sparse matrix does not fit."); + } + S = mxCreateSparse(0,0,Nzmax,si == NULL ? mxREAL : mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + + /* set the column pointer */ + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + /* insert the data */ + if (imod == len) + sparse_insert(mxGetIr(S), mxGetPr(S),mxGetPi(S), + irank,0,0,ii,sr,si,smod,sdiv,len,M); + else + sparse_inserti(mxGetIr(S), mxGetPr(S),mxGetPi(S), + irank,ii,imod,sr,si,smod,sdiv,len); + mxFree(irank); + + return S; + +#elif defined(GSPS2) + + mxArray *S; + int *jcS; + + int *rank,*irank,*hrow; + + jcS = mxCalloc(N+2,sizeof(jsC[0])); + jcS++; + for (int i = 0; i < len; i++) jcS[jj[i/jdiv]]++; + for (int c = 2; c <= N; c++) jcS[c] += jcS[c-1]; + + rank = mxMalloc(len*sizeof(rank[0])); + jcS--; + for (int i = 0; i < len; i++) rank[jcS[jj[i/jdiv]]++] = i; + + irank = mxMalloc(len*sizeof(irank[0])); + hrow = mxCalloc(M,sizeof(hrow[0])); + hrow--; + + for (int i = 0,col = jj[rank[0]/jdiv]; ; ) { + const int begin = jcS[col-1]; + const int end = jcS[col]; + for (jcS[col] = begin; i < end; i++) { + const int row = ii[rank[i]%imod]; + + if (hrow[row] <= begin) hrow[row] = ++jcS[col]; + + irank[rank[i]] = hrow[row]-1; + } + + if (i < len) + for (col++; col < jj[rank[i]/jdiv]; col++) jcS[col] = jcS[col-1]; + else { + for (col++; col <= N; col++) jcS[col] = jcS[col-1]; + break; + } + } + mxFree(++hrow); + mxFree(rank); + + if (Nzmax == -1) + Nzmax = jcS[N]; + else if (Nzmax < jcS[N]) { + mxFree(irank); + mxFree(jcS); + mexErrMsgIdAndTxt("fsparse:e16","Allocation limited by caller: " + "sparse matrix does not fit."); + } + S = mxCreateSparse(0,0,Nzmax,si == NULL ? mxREAL : mxCOMPLEX); + mxSetM(S,M); + mxSetN(S,N); + + mxFree(mxGetJc(S)); + mxSetJc(S,jcS); + + if (imod == len) + sparse_insert(mxGetIr(S), mxGetPr(S),mxGetPi(S), + irank,0,0,ii,sr,si,smod,sdiv,len,M); + else + sparse_inserti(mxGetIr(S), mxGetPr(S),mxGetPi(S), + irank,ii,imod,sr,si,smod,sdiv,len); + mxFree(irank); + + return S; + +#endif + +} +/*------------------------------------------------------------------------*/ diff --git a/stenglib/Fast/source/make.m b/stenglib/Fast/source/make.m new file mode 100644 index 00000000..2bb8f9ea --- /dev/null +++ b/stenglib/Fast/source/make.m @@ -0,0 +1,465 @@ +function make(varargin) +%MAKE Makefile for FAST. +% MAKE by itself makes FAST with default arguments. +% +% MAKE(...) accepts additional arguments as property/value-pairs. +% +% Property Value/{Default} Description +% ----------------------------------------------------------------- +% OpenMP Boolean {false} Turns OpenMP-compilation +% on/off. Only affects FSPARSE. +% +% fsparseonly Boolean {false} Only compiles FSPARSE. +% +% fsparsetime Boolean {false} Timing syntax for FSPARSE. +% +% silent Boolean {false} Turns information display on/off. + +% Johannes Dufva 2020-10-30 (mexw64, 9.7) +% S. Engblom 2019-01-23 (mexmaci64, mexa64, 9.6) +% S. Engblom 2016-11-23 (spreplace) +% S. Engblom 2015-03-23 (mexa64, 8.4) +% S. Engblom 2015-01-19 (mexmaci64, 8.4) +% S. Engblom 2013-12-02 (OpenMP, fsparse) +% S. Engblom 2012-04-16 (mexmaci64, 7.11) +% S. Engblom 2011-04-17 (mexmaci64, 7.10) +% S. Engblom 2011-03-07 (mexa64, 7.11) +% S. Engblom 2010-09-23 (mexs64, 7.7) +% S. Engblom 2010-02-02 (mexa64, 7.8) +% S. Engblom 2010-01-12 (mexmaci) +% S. Engblom 2007-05-17 (mexs64) +% S. Engblom 2006-11-09 (mexa64) +% S. Engblom 2005-03-22 (mexmac) + +% Use '-DmwIndex=int' for platforms where mex is not automatically +% linked against the library defining the mwIndex type. + +% default options +optdef.openmp = false; +optdef.fsparseonly = false; +optdef.fsparsetime = false; +optdef.silent = false; + +% merge defaults with actual inputs +if nargin > 0 + opts = struct(varargin{:}); + fn = fieldnames(opts); + for i = 1:length(fn) + optdef = setfield(optdef,fn{i},getfield(opts,fn{i})); + end +end +opts = optdef; +if opts.openmp + if ~opts.silent + fprintf(1,'Compiling FSPARSE with OpenMP.\n'); + end +end +if opts.fsparseonly + if ~opts.silent + fprintf(1,'Compiling FSPARSE only.\n'); + end +end +if opts.fsparsetime + FSPARSEDEF = '-DFSPARSE_TIME'; + if ~opts.silent + fprintf(1,'Compiling with #define FSPARSE_TIME.\n'); + end +else + FSPARSEDEF = ''; + if ~opts.silent + fprintf(1,'Compiling with #undef FSPARSE_TIME.\n'); + end +end + +% Note that the OpenMP version of fsparse is a beta-release. Not all +% cases of the code has been parallelized. Also, the OpenMP make has +% only been implemented for some platforms. The important lines are: +% +% clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/glnxa64 -lmx ' ... +% '-lmex']; +% mex('-largeArrayDims',clibs, ... +% ['CFLAGS=-fopenmp -O5 -fPIC -fno-omit-frame-pointer -std=c99 ' ... +% '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... +% '-outdir',s,[s '/source/fsparse.c']); + +s = pwd; +mx = mexext; +ver = version; + +% first is the fsparse-only compilation +if opts.fsparseonly + if strcmp(mx,'mexa64') + if ver(1) == '7' + if ~strncmp(ver,'7.2',3) && ~strncmp(ver,'7.8',3) && ... + ~strncmp(ver,'7.11',4) && ~strncmp(ver,'7.13',4) + warning(['Extension .' mexext [' tested with Matlab version(s) ' ... + '7.2, 7.8, 7.11 and 7.13 only.']]); + end + if ~strncmp(ver,'7.11',4) + if strncmp(ver,'7.2',3) + % should be an easy fix: + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + else + if opts.openmp + clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/glnxa64 -lmx ' ... + '-lmex']; + mex('-largeArrayDims',clibs, ... + ['CFLAGS=-fopenmp -O5 -fPIC -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -O5 -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + end + end + else + % should be an easy fix: + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + % apparently, the linker path is not properly set up on 7.11: + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/fsparse.c']); + end + else + if ~strncmp(ver,'8.4',3) && ~strncmp(version,'9.6',3) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '8.4 only.']); + end + + % apparently, the linker path is not properly set up on 8.4 (also a + % soft link libstdc++.so inside [matlabroot '/sys/os/glnxa64'] + % is required to point to the correct shared library, in this + % case libstdc++.so.6.0.17) + if opts.openmp + clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/glnxa64 -lmx ' ... + '-lmex']; + mex('-largeArrayDims',clibs, ... + ['CFLAGS=-fopenmp -O5 -fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC O5 -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/fsparse.c']); + end + end + elseif strcmp(mx,'mexmaci64') + if ver(1) == '7' + if ~strncmp(ver,'7.10',4) && ~strncmp(ver,'7.11',4) && ... + ~strncmp(ver,'7.14',4) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '7.10 and 7.11 only.']); + end + if opts.openmp + clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/maci64 -lmx ' ... + '-lmex']; + mex('-largeArrayDims',clibs, ... + ['CFLAGS=-fopenmp -O5 -fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CC=gcc -std=c99 -fast ',FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + end + else + if opts.openmp, warning('Compilation of OpenMP not (yet?) supported for this platform.'); end + if ~strncmp(ver,'8.4',3) && ~strncmp(version,'9.6',3) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '8.4 and 9.6 only.']); + end + if opts.openmp + % no harm in trying (await update of Clang?) + clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/maci64 -lmx ' ... + '-lmex']; + mex('-largeArrayDims',clibs, ... + ['CFLAGS=-fopenmp -O5 -fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CFLAGS=-Wno-logical-op-parentheses -std=c99 ',FSPARSEDEF],'-outdir',s,[s '/source/fsparse.c']); + end + end + else + error('FSPARSE-only compilation not implemented for this platform.'); + end + return; +end + +% main compilation +if strcmp(mx,'mexglx') + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + if ~strncmp(ver,'7.5',3) && ~strncmp(ver,'7.8',3) + warning(['Extension .' mexext [' tested with Matlab version(s) ' ... + '7.5 and 7.8 only.']]); + end + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/clenshaw.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/fsetop.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/mexfrepmat.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/powerseries.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/sppmul.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/spreplace.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); +elseif strcmp(mx,'mexa64') + if ver(1) == '7' + if ~strncmp(ver,'7.2',3) && ~strncmp(ver,'7.8',3) && ... + ~strncmp(ver,'7.11',4) && ~strncmp(ver,'7.13',4) + warning(['Extension .' mexext [' tested with Matlab version(s) ' ... + '7.2, 7.8, 7.11 and 7.13 only.']]); + end + if ~strncmp(ver,'7.11',4) + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/clenshaw.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/fsetop.c']); + if strncmp(ver,'7.2',3) + % should be an easy fix: + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/mexfrepmat.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/mexfrepmat.c']); + if opts.openmp + clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/glnxa64 -lmx ' ... + '-lmex']; + mex('-largeArrayDims',clibs, ... + ['CFLAGS=-fopenmp -O5 -fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -O5 -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + end + end + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/powerseries.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/sppmul.c']); + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/spreplace.c']); + else + % should be an easy fix: + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + % apparently, the linker path is not properly set up on 7.11: + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/clenshaw.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/fsetop.c']); + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/mexfrepmat.c']); + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/fsparse.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/powerseries.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/sppmul.c']); + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/spreplace.c']); + end + else + if ~strncmp(ver,'8.4',3) && ~strncmp(version,'9.6',3) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '8.4 and 9.6 only.']); + end + + % apparently, the linker path is not properly set up on 8.4 (also a + % soft link libstdc++.so inside [matlabroot '/sys/os/glnxa64'] is + % required to point to the correct shared library, in this case + % libstdc++.so.6.0.17) + mex('CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/clenshaw.c']); + mex('CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/fsetop.c']); + mex('-largeArrayDims','CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/mexfrepmat.c']); + mex('CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/powerseries.c']); + mex('-largeArrayDims','CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/sppmul.c']); + mex('-largeArrayDims','CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/spreplace.c']); + + if opts.openmp + clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/glnxa64 -lmx ' ... + '-lmex']; + mex('-largeArrayDims',clibs, ... + ['CFLAGS=-fopenmp -O5 -fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CFLAGS=-fPIC -O5 -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/fsparse.c']); + end + end +elseif strcmp(mx,'mexmac') + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + if ~strncmp(ver,'7.0',3) + warning(['Extension .' mexext ' tested with Matlab version(s) 7.0 only.']); + end + mex('CC=gcc -std=c99','-outdir',s,[s '/source/clenshaw.c']); + mex('CC=gcc -std=c99','-outdir',s,[s '/source/fsetop.c']); + mex('CC=gcc -std=c99','-outdir',s,[s '/source/mexfrepmat.c']); + mex(['CC=gcc -std=c99',FSPARSEDEF],'-outdir',s,[s '/source/fsparse.c']); + mex('CC=gcc -std=c99','-outdir',s,[s '/source/powerseries.c']); + mex('CC=gcc -std=c99','-outdir',s,[s '/source/sppmul.c']); + mex('CC=gcc -std=c99','-outdir',s,[s '/source/spreplace.c']); +elseif strcmp(mx,'mexmaci') + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + if ~strncmp(ver,'7.8',3) + warning(['Extension .' mexext ' tested with Matlab version(s) 7.8 only.']); + end + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/clenshaw.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/fsetop.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/mexfrepmat.c']); + mex(['CC=gcc -std=c99 -fast',FSPARSEDEF],'-outdir',s,[s '/source/fsparse.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/powerseries.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/sppmul.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/spreplace.c']); +elseif strcmp(mx,'mexmaci64') + if ver(1) == '7' + if ~strncmp(ver,'7.10',4) && ~strncmp(ver,'7.11',4) && ... + ~strncmp(ver,'7.14',4) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '7.10 and 7.11 only.']); + end + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/clenshaw.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/fsetop.c']); + mex('-largeArrayDims', ... + 'CC=gcc -std=c99 -fast','-outdir',s,[s '/source/mexfrepmat.c']); + if opts.openmp + clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/maci64 -lmx ' ... + '-lmex']; + mex('-largeArrayDims',clibs, ... + ['CFLAGS=-fopenmp -O5 -fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CC=gcc -std=c99 -fast',FSPARSEDEF],'-outdir',s,[s '/source/fsparse.c']); + end + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/powerseries.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/sppmul.c']); + mex('-largeArrayDims', ... + 'CC=gcc -std=c99 -fast','-outdir',s,[s '/source/spreplace.c']); + else + if opts.openmp, warning('Compilation of OpenMP not (yet?) supported for this platform.'); end + if ~strncmp(ver,'8.4',3) && ~strncmp(version,'9.6',3) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '8.4 and 9.6 only.']); + end + mex('CFLAGS=-Wno-parentheses -std=c99','-outdir',s,[s '/source/clenshaw.c']); + mex('CFLAGS= -std=c99','-outdir',s,[s '/source/fsetop.c']); + + mex('-largeArrayDims', ... + 'CFLAGS= -std=c99','-outdir',s,[s '/source/mexfrepmat.c']); + if opts.openmp + % no harm in trying (await update of Clang?) + clibs = ['CLIBS=-lgomp -lm -L' matlabroot '/bin/maci64 -lmx ' ... + '-lmex']; + mex('-largeArrayDims',clibs, ... + ['CFLAGS=-fopenmp -O5 -fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + else + mex('-largeArrayDims', ... + ['CFLAGS=-Wno-logical-op-parentheses -std=c99 ',FSPARSEDEF],'-outdir',s,[s '/source/fsparse.c']); + end + mex('CFLAGS= -std=c99','-outdir',s,[s '/source/powerseries.c']); + mex('CFLAGS= -std=c99','-outdir',s,[s '/source/sppmul.c']); + mex('-largeArrayDims', ... + 'CFLAGS= -std=c99','-outdir',s,[s '/source/spreplace.c']); + end +elseif strcmp(mx,'mexs64') + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + if ~strncmp(ver,'7.7',3) + warning(['Extension .' mexext ' tested with Matlab version(s) 7.7 only.']); + end + mex('-outdir',s,[s '/source/clenshaw.c']); + mex('-DNO_STDINT','-outdir',s,[s '/source/fsetop.c']); + mex('-largeArrayDims','-outdir',s,[s '/source/mexfrepmat.c']); + mex('-largeArrayDims',['-DNO_STDINT ' FSPARSEDEF], ... + '-outdir',s,[s '/source/fsparse.c']); + mex('-outdir',s,[s '/source/powerseries.c']); + mex('-outdir',s,[s '/source/sppmul.c']); + mex('-largeArrayDims','-outdir',s,[s '/source/spreplace.c']); +elseif strcmp(mx,'mexw64') + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + mex('-outdir',s,[s '/source/clenshaw.c']); + mex('-outdir',s,[s '/source/fsetop.c']); + mex('-outdir',s,[s '/source/mexfrepmat.c']); + mex(FSPARSEDEF,'-outdir',s,[s '/source/fsparse.c']); + mex('-outdir',s,[s '/source/powerseries.c']); + mex('-outdir',s,[s '/source/sppmul.c']); + mex('-largeArrayDims','-outdir',s,[s '/source/spreplace.c']); +else + warning('New platform. Trying default make.'); + if opts.openmp, warning('OpenMP not implemented for this platform.'); end + mex('-outdir',s,[s '/source/clenshaw.c']); + mex('-outdir',s,[s '/source/fsetop.c']); + mex('-outdir',s,[s '/source/mexfrepmat.c']); + mex(FSPARSEDEF,'-outdir',s,[s '/source/fsparse.c']); + mex('-outdir',s,[s '/source/powerseries.c']); + mex('-outdir',s,[s '/source/sppmul.c']); + mex('-largeArrayDims','-outdir',s,[s '/source/spreplace.c']); +end diff --git a/stenglib/Fast/source/mexfrepmat.c b/stenglib/Fast/source/mexfrepmat.c new file mode 100644 index 00000000..aae1825b --- /dev/null +++ b/stenglib/Fast/source/mexfrepmat.c @@ -0,0 +1,300 @@ +/* mexfrepmat.c */ + +/* S. Engblom 2010-02-02 (Minor revision) */ +/* S. Engblom 2007-05-04 (Revision) */ +/* S. Engblom 2005-05-06 (Revision) */ +/* S. Engblom 2004-10-22 */ + +#include +#include +#include + +#include "mex.h" +#include "matrix.h" + +// rmemcpy() below assumes this: +#if CHAR_BIT != 8 +#error "Code assumes that 1 char is 1 byte." +#endif + +/*------------------------------------------------------------------------*/ + +// forward declarations +void replicate(const char *prA,const char *piA, + int ndimA,const mwSize *sizA,int lenA, + char *prB,char *piB, + int ndimB,const mwSize *sizB,int lenB,int nbytes); +void sparse_replicate(const char *prA,const char *piA, + const mwIndex *irA,const mwSize *jcA,const mwSize *sizA, + char *prB,char *piB, + mwIndex *irB,mwSize *jcB,const mwSize *sizB, + int rM,int rN,int nnzB,int nbytes); +void rmemcpy(void *s,size_t block,size_t len); +void r2memcpy(void *s1,void *s2,size_t block,size_t len); + +/*------------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + // check of syntax + if (nrhs != 2 || nlhs > 1) + mexErrMsgIdAndTxt("frepmat:e1","Expecting two inputs and one output."); + + if (!(mxIsNumeric(prhs[0]) || mxIsChar(prhs[0]) || mxIsLogical(prhs[0]))) + mexErrMsgIdAndTxt("frepmat:e2","Only numerical, character and " + "logical arrays supported."); + + if (!mxIsDouble(prhs[1]) || mxIsComplex(prhs[1]) || mxIsSparse(prhs[1])) + mexErrMsgIdAndTxt("frepmat:e3","Size argument must be real, double " + "and non-sparse."); + + // input A + const int ndimA = mxGetNumberOfDimensions(prhs[0]); + const mwSize *sizA = mxGetDimensions(prhs[0]); + const int lenA = mxGetNumberOfElements(prhs[0]); + const char *prA = mxGetData(prhs[0]); + const char *piA = mxGetImagData(prhs[0]); + const bool complx = piA != NULL; + const bool sparse = mxIsSparse(prhs[0]); + + // input REP + const int lenREP = mxGetNumberOfElements(prhs[1]); + const double *prREP = mxGetPr(prhs[1]); + + // output B + int ndimB = ndimA >= lenREP ? ndimA : lenREP; + mwSize sizB[ndimB]; + + // check input REP + for (int i = 0; i < lenREP; i++) + if (prREP[i] < 0.0 || prREP[i] != ceil(prREP[i])) + mexErrMsgIdAndTxt("frepmat:e4","Size argument must be nonnegative " + "integers."); + + if (sparse && ndimB > 2) + mexErrMsgIdAndTxt("frepmat:e5","Sparse N-dimensional matrices are " + "not supported."); + + // determine the size of the result B + if (ndimB == ndimA) { + int i; + for (i = 0; i < lenREP; i++) sizB[i] = sizA[i]*prREP[i]; + for ( ; i < ndimA; i++) sizB[i] = sizA[i]; + } + else { + int i; + for (i = 0; i < ndimA; i++) sizB[i] = prREP[i]*sizA[i]; + for ( ; i < lenREP; i++) sizB[i] = prREP[i]; + } + + // allocate and do the job + if (!sparse) { + plhs[0] = mxCreateNumericArray(ndimB,sizB, + mxGetClassID(prhs[0]), + complx ? mxCOMPLEX : mxREAL); + replicate(prA,piA,ndimA,sizA,lenA, + mxGetData(plhs[0]),mxGetImagData(plhs[0]), + mxGetNumberOfDimensions(plhs[0]), + mxGetDimensions(plhs[0]), + mxGetNumberOfElements(plhs[0]), + mxGetElementSize(plhs[0])); + } + else { + const int rM = lenREP < 1 ? 1 : prREP[0]; + const int rN = lenREP < 2 ? 1 : prREP[1]; + const int nnzB = mxGetJc(prhs[0])[sizA[1]]*rM*rN; + + if (mxGetClassID(prhs[0]) == mxLOGICAL_CLASS) + plhs[0] = mxCreateSparseLogicalMatrix(sizB[0],sizB[1],nnzB); + else + plhs[0] = mxCreateSparse(sizB[0],sizB[1],nnzB, + complx ? mxCOMPLEX : mxREAL); + + sparse_replicate(prA,piA,mxGetIr(prhs[0]),mxGetJc(prhs[0]),sizA, + mxGetData(plhs[0]),mxGetImagData(plhs[0]), + mxGetIr(plhs[0]),mxGetJc(plhs[0]), + mxGetDimensions(plhs[0]), + rM,rN,nnzB,mxGetElementSize(plhs[0])); + } +} +/*------------------------------------------------------------------------*/ +void replicate(const char *prA,const char *piA, + int ndimA,const mwSize *sizA,int lenA, + char *prB,char *piB, + int ndimB,const mwSize *sizB,int lenB,int nbytes) +/* Replicates the matrix A within the matrix B by filling B in all its + dimensions with repeated copies of A. Each element of both matrices + is assumed to occupy nbytes bytes. + + Note: using char * instead of void * as done above means that it is + implicitly assumed that sizeof(char) == 1 (byte). If this is not + true, the code won't work. */ +{ + // empty case + if (lenB == 0) return; + + // various + const bool complx = piA != NULL; + int d1,dimcount[ndimA]; + int strB[ndimB+1],strBsizA[ndimA]; + + // construct stride for B + strB[0] = 1; + for (int i = 0; i < ndimB; i++) strB[i+1] = strB[i]*sizB[i]; + + // frequently used product + for (int i = 0; i < ndimA; i++) strBsizA[i] = strB[i]*sizA[i]; + + // find first nonmatching dimension + for (d1 = 0; d1 < ndimA-1; d1++) + if (sizA[d1] != sizB[d1]) break; + + // a single initial copy of A is placed in B + memset(dimcount,0,ndimA*sizeof(dimcount[0])); + for (int i = 0, j = 0, block = strBsizA[d1]; ; ) { + // copy as much as possible + memcpy(&prB[j*nbytes],&prA[i*nbytes],block*nbytes); + if (complx) memcpy(&piB[j*nbytes],&piA[i*nbytes],block*nbytes); + + // increase pointer in A + if ((i += block) == lenA) break; + + // increase pointer in B + for (int k = d1+1; j += strB[k],++dimcount[k] == sizA[k]; k++) { + dimcount[k] = 0; + j -= strBsizA[k]; + } + } + + // replicate over the first dimensions + memset(dimcount,0,ndimA*sizeof(dimcount[0])); + for (int r = d1; r < ndimA-1; r++) { + const int block = strBsizA[r]; + for (int i = 0; i < lenB; ) { + // copy in a recursive fashion + if (complx) + r2memcpy(&prB[i*nbytes],&piB[i*nbytes], + block*nbytes,strB[r+1]*nbytes); + else + rmemcpy(&prB[i*nbytes],block*nbytes,strB[r+1]*nbytes); + + // determine next adress in B + for (int k = r+1; i += strB[k],++dimcount[k] == sizA[k]; ) { + dimcount[k] = 0; + i -= strBsizA[k]; + if (++k == ndimA) { + i = lenB; + break; + } + } + } + } + + // the final dimensions are easier + if (complx) + r2memcpy(prB,piB,strBsizA[ndimA-1]*nbytes,lenB*nbytes); + else + rmemcpy(prB,strBsizA[ndimA-1]*nbytes,lenB*nbytes); +} +/*------------------------------------------------------------------------*/ +void sparse_replicate(const char *prA,const char *piA, + const mwIndex *irA,const mwSize *jcA, + const mwSize *sizA, + char *prB,char *piB, + mwIndex *irB,mwSize *jcB,const mwSize *sizB, + int rM,int rN,int nnzB,int nbytes) +/* Sparse replication of matrix. Works as replicate() above (and with + the same limitations on sizeof(char)) but for sparse matrices A and + B instead. */ +{ + // empty case + if (nnzB == 0) return; + + const bool complx = piA != NULL; + + /* copy jcA to jcB and then replicate the copy rN-1 times in + dimension 2 */ + memcpy(jcB,jcA,(sizA[1]+1)*sizeof(jcA[0])); + for (int i = 1,jj = sizA[1]+1; i < rN; i++) { + // add new offset for each round + const int offset = jcB[jj-1]; + for (int ii = 1; ii <= sizA[1]; ii++,jj++) + jcB[jj] = jcA[ii]+offset; + } + + if (rM > 1) { + /* replicate rM times in dimension 1 by scaling the cumulative + pointer to account for added elements in each column */ + for (int jj = 1; jj <= sizB[1]; jj++) jcB[jj] *= rM; + + // construct irB columnwise and copy values at the same time + for (int j = 0; j < sizA[1]; j++) { + // copy and replicate the values + memcpy(&prB[jcB[j]*nbytes],&prA[jcA[j]*nbytes], + (jcA[j+1]-jcA[j])*nbytes); + if (complx) { + memcpy(&piB[jcB[j]*nbytes],&piA[jcA[j]*nbytes], + (jcA[j+1]-jcA[j])*nbytes); + r2memcpy(&prB[jcB[j]*nbytes],&piB[jcB[j]*nbytes], + (jcA[j+1]-jcA[j])*nbytes,(jcB[j+1]-jcB[j])*nbytes); + } + else + rmemcpy(&prB[jcB[j]*nbytes],(jcA[j+1]-jcA[j])*nbytes, + (jcB[j+1]-jcB[j])*nbytes); + + /* copy irA to irB and then replicate the copy rM-1 times in + dimension 1 */ + memcpy(&irB[jcB[j]],&irA[jcA[j]],(jcA[j+1]-jcA[j])*sizeof(irA[0])); + for (int i = 1,jj = jcB[j]+jcA[j+1]-jcA[j]; i < rM; i++) { + // add new offset for each round + const int offset = i*sizA[0]; + for (int ii = jcA[j]; ii < jcA[j+1]; ii++,jj++) + irB[jj] = irA[ii]+offset; + } + } + } + else { + // faster case, no replication in dimension 1 + memcpy(irB,irA,jcA[sizA[1]]*sizeof(irA[0])); + memcpy(prB,prA,jcA[sizA[1]]*nbytes); + if (complx) + memcpy(piB,piA,jcA[sizA[1]]*nbytes); + } + + // finally replicate the copy rN-1 times in dimension 2 + rmemcpy(irB,jcA[sizA[1]]*rM*sizeof(irB[0]),jcB[sizB[1]]*sizeof(irB[0])); + if (complx) + r2memcpy(prB,piB,jcA[sizA[1]]*rM*nbytes, + jcB[sizB[1]]*nbytes); + else + rmemcpy(prB,jcA[sizA[1]]*rM*nbytes, + jcB[sizB[1]]*nbytes); +} +/*------------------------------------------------------------------------*/ +void rmemcpy(void *s,size_t block,size_t len) +/* Formally the same as memcpy(s+block,s,len-block), except that + according to ANSI-C, memcpy() does not allow overlapping + blocks. Assumes that sizeof(char) == 1 byte. */ +{ + // the size of the block is doubled for each round + while (block < len >> 1) { + memcpy((char *)s+block,s,block); + block <<= 1; + } + + // the remainder + memcpy((char *)s+block,s,len-block); +} +/*------------------------------------------------------------------------*/ +void r2memcpy(void *s1,void *s2,size_t block,size_t len) +/* The same as rmemcpy(s1,block,len) followed by + rmemcpy(s2,block,len). Used for real and imaginary parts. */ +{ + while (block < len >> 1) { + memcpy((char *)s1+block,s1,block); + memcpy((char *)s2+block,s2,block); + block <<= 1; + } + memcpy((char *)s1+block,s1,len-block); + memcpy((char *)s2+block,s2,len-block); +} +/*------------------------------------------------------------------------*/ diff --git a/stenglib/Fast/source/powerseries.c b/stenglib/Fast/source/powerseries.c new file mode 100644 index 00000000..8e619dd5 --- /dev/null +++ b/stenglib/Fast/source/powerseries.c @@ -0,0 +1,133 @@ +/* powerseries.c */ + +/* S. Engblom 2007-08-17 (Minor revision) */ +/* S. Engblom 2007-06-13 (Minor revision) */ +/* S. Engblom 2007-01-22 */ + +#include + +#include "mex.h" +#include "matrix.h" + +/* forward declaration */ +void powerseries(double *yr,double *yi, + const double *cr,const double *ci,int Nc, + const double *xr,const double *xi,int Nx, + double tol); + +#define ISDOUBLEMATRIX(A) (mxIsDouble(A) && !mxIsSparse(A) && \ + mxGetNumberOfDimensions(A) == 2) +#define ISREALSCALAR(A) (!mxIsCell(A) && !mxIsStruct(A) && \ + !mxIsComplex(A) && mxGetNumberOfElements(A) == 1) + +/*------------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + if (1 < nlhs || nrhs != 3) + mexErrMsgTxt("Expecting one output and 3 inputs."); + + /* inputs C and X */ + if (!ISDOUBLEMATRIX(prhs[0]) || !ISDOUBLEMATRIX(prhs[1])) + mexErrMsgTxt("First two arguments must be double matrices."); + + /* input tol */ + if (!ISREALSCALAR(prhs[2])) + mexErrMsgTxt("Expecting a real scalar tolerance."); + + /* create output */ + plhs[0] = mxCreateDoubleMatrix(mxGetM(prhs[1]), + mxGetN(prhs[1]), + mxIsComplex(prhs[0]) || + mxIsComplex(prhs[1]) ? mxCOMPLEX : mxREAL); + + /* evaluate the result */ + powerseries(mxGetPr(plhs[0]),mxGetPi(plhs[0]), + mxGetPr(prhs[0]),mxGetPi(prhs[0]), + mxGetNumberOfElements(prhs[0]), + mxGetPr(prhs[1]),mxGetPi(prhs[1]), + mxGetNumberOfElements(prhs[1]), + *mxGetPr(prhs[2])); +} +/*------------------------------------------------------------------------*/ +void powerseries(double *yr,double *yi, + const double *cr,const double *ci,int Nc, + const double *xr,const double *xi,int Nx, + double tol) +/* Summation of power series. Computes y = sum_(k >= 0) c[k]*x^k where + y = (yr,yi), c = (cr,ci) and x = (xr,xi) and where any of the + imaginary parts may be NULL. The coefficient vector has length Nc + and the coordinate vector length Nx. It is required that y be + allocated and cleared before the call.*/ +{ + bool warn = false; + + if (yi == NULL) + for (int i = 0; i < Nx; i++) { + int j; + double powr = 1.0; + for (j = 0; j < Nc; j++) { + double termr = cr[j]*powr; + yr[i] += termr; + if (fabs(termr) < fabs(yr[i])*tol) break; + powr *= xr[i]; + } + warn = warn || j == Nc; + } + else if (ci == NULL) + for (int i = 0; i < Nx; i++) { + int j; + double powr = 1.0,powi = 0.0; + for (j = 0; j < Nc; j++) { + double termr = cr[j]*powr; + double termi = cr[j]*powi; + yr[i] += termr; + yi[i] += termi; + if (fabs(termr)+fabs(termi) < (fabs(yr[i])+fabs(yi[i]))*tol) + break; + double powr_ = powr*xr[i]-powi*xi[i]; + double powi_ = powr*xi[i]+powi*xr[i]; + powr = powr_; + powi = powi_; + } + warn = warn || j == Nc; + } + else if (xi == NULL) + for (int i = 0; i < Nx; i++) { + int j; + double powr = 1.0; + for (j = 0; j < Nc; j++) { + double termr = cr[j]*powr; + double termi = ci[j]*powr; + yr[i] += termr; + yi[i] += termi; + if (fabs(termr)+fabs(termi) < (fabs(yr[i])+fabs(yi[i]))*tol) + break; + powr *= xr[i]; + } + warn = warn || j == Nc; + } + else + for (int i = 0; i < Nx; i++) { + int j; + double powr = 1.0,powi = 0.0; + for (j = 0; j < Nc; j++) { + double termr = cr[j]*powr-ci[j]*powi; + double termi = cr[j]*powi+ci[j]*powr; + yr[i] += termr; + yi[i] += termi; + if (fabs(termr)+fabs(termi) < (fabs(yr[i])+fabs(yi[i]))*tol) + break; + double powr_ = powr*xr[i]-powi*xi[i]; + double powi_ = powr*xi[i]+powi*xr[i]; + powr = powr_; + powi = powi_; + } + warn = warn || j == Nc; + } + + if (warn) + mexWarnMsgIdAndTxt("powerseries:w1", + "Series did not converge to the prescribed " + "accuracy for all arguments."); +} +/*------------------------------------------------------------------------*/ diff --git a/stenglib/Fast/source/sppmul.c b/stenglib/Fast/source/sppmul.c new file mode 100644 index 00000000..9ac4dfb5 --- /dev/null +++ b/stenglib/Fast/source/sppmul.c @@ -0,0 +1,80 @@ +/* sppmul.c */ +/* S. Engblom 2005-10-27 */ + +#include "mex.h" +#include "matrix.h" + +#define MXTYPE(A,B) (mxIsComplex(A) || mxIsComplex(B) ? \ + mxCOMPLEX : mxREAL) + +/*-----------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + /* check of syntax */ + if (nrhs != 3 || nlhs > 1) + mexErrMsgTxt("Expecting three inputs and one output."); + + /* input */ + if (!mxIsSparse(prhs[0])) + mexErrMsgTxt("Expecting a sparse array as the first input."); + const int M = mxGetM(prhs[0]),N = mxGetN(prhs[0]); + const mwSize *jc = mxGetJc(prhs[0]); + const mwIndex *ir = mxGetIr(prhs[0]); + + if (!mxIsDouble(prhs[1]) || mxGetNumberOfElements(prhs[1]) != jc[N]) + mexErrMsgTxt("Expecting a matching double array as " + "the second input."); + const double *prv = mxGetPr(prhs[1]),*piv = mxGetPi(prhs[1]); + + if (!mxIsDouble(prhs[2]) || mxGetNumberOfDimensions(prhs[2]) > 2 || + mxGetM(prhs[2]) != N) + mexErrMsgTxt("Expecting a double matrix with matching first " + "dimension as the third input."); + const int K = mxGetN(prhs[2]); + const double *prX = mxGetPr(prhs[2]),*piX = mxGetPi(prhs[2]); + + /* allocate output */ + plhs[0] = mxCreateDoubleMatrix(M,K,MXTYPE(prhs[1],prhs[2])); + double *prY = mxGetPr(plhs[0]),*piY = mxGetPi(plhs[0]); + + /* evaluate the product */ + if (piY == NULL) + for (int k = 0; k < K; k++) { + for (int i = 0; i < N; i++) + for (mwSize j = jc[i]; j < jc[i+1]; j++) + prY[ir[j]] += prv[j]*prX[i]; + prY += M; + prX += N; + } + else if (piX == NULL) + for (int k = 0; k < K; k++) { + for (int i = 0; i < N; i++) + for (mwSize j = jc[i]; j < jc[i+1]; j++) { + prY[ir[j]] += prv[j]*prX[i]; + piY[ir[j]] += piv[j]*prX[i]; + } + prY += M; piY += M; + prX += N; + } + else if (piv == NULL) + for (int k = 0; k < K; k++) { + for (int i = 0; i < N; i++) + for (mwSize j = jc[i]; j < jc[i+1]; j++) { + prY[ir[j]] += prv[j]*prX[i]; + piY[ir[j]] += prv[j]*piX[i]; + } + prY += M; piY += M; + prX += N; piX += N; + } + else + for (int k = 0; k < K; k++) { + for (int i = 0; i < N; i++) + for (mwSize j = jc[i]; j < jc[i+1]; j++) { + prY[ir[j]] += prv[j]*prX[i]-piv[j]*piX[i]; + piY[ir[j]] += piv[j]*prX[i]+piv[j]*prX[i]; + } + prY += M; piY += M; + prX += N; piX += N; + } +} +/*-----------------------------------------------------------------------*/ diff --git a/stenglib/Fast/source/spreplace.c b/stenglib/Fast/source/spreplace.c new file mode 100644 index 00000000..85ef3277 --- /dev/null +++ b/stenglib/Fast/source/spreplace.c @@ -0,0 +1,38 @@ +/* spreplace.c */ +/* S. Engblom 2016-11-23 */ + +#include + +#include "mex.h" +#include "matrix.h" + +#define MXTYPE(A) (mxIsComplex(A) ? mxCOMPLEX : mxREAL) + +/*-----------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + /* check of syntax */ + if (nrhs != 2 || nlhs > 1) + mexErrMsgTxt("Expecting two inputs and one output."); + + /* input */ + if (!mxIsSparse(prhs[0])) + mexErrMsgTxt("Expecting a sparse array as the first input."); + const int M = mxGetM(prhs[0]),N = mxGetN(prhs[0]); + const mwSize *jc = mxGetJc(prhs[0]); + const mwIndex *ir = mxGetIr(prhs[0]); + + if (!mxIsDouble(prhs[1]) || mxGetNumberOfElements(prhs[1]) != jc[N]) + mexErrMsgTxt("Expecting a matching double array as " + "the second input."); + const double *prx = mxGetPr(prhs[1]),*pix = mxGetPi(prhs[1]); + + /* allocate and create output */ + plhs[0] = mxCreateSparse(M,N,jc[N],MXTYPE(prhs[1])); + memcpy(mxGetJc(plhs[0]),jc,(N+1)*sizeof(jc[0])); + memcpy(mxGetIr(plhs[0]),ir,jc[N]*sizeof(ir[0])); + memcpy(mxGetPr(plhs[0]),prx,jc[N]*sizeof(double)); + if (pix) + memcpy(mxGetPi(plhs[0]),pix,jc[N]*sizeof(double)); +} +/*-----------------------------------------------------------------------*/ diff --git a/stenglib/Fast/sppmul.m b/stenglib/Fast/sppmul.m new file mode 100644 index 00000000..6141da6a --- /dev/null +++ b/stenglib/Fast/sppmul.m @@ -0,0 +1,15 @@ +function y = sppmul(S,v,x) +%SPPMUL Sparse pattern multiply. +% Y = SPPMUL(S,V,X), where S is a sparse matrix and X is a full +% matrix, computes the product Y = S*X but with the nonzero elements +% of S replaced by those in the vector V. SPPMUL is thus formally +% equivalent to the code +% [i,j,s] = find(S); +% Y = sparse(i,j,v,size(S,1),size(S,2))*X; +% Of course, the intermediate sparse matrix is never actually +% constructed. It is required that NNZ(S) = PROD(SIZE(V)) and +% that SIZE(S,2) = SIZE(X,1). + +% S. Engblom 2005-10-27 + +error('.MEX-file not found on path.'); diff --git a/stenglib/Fast/sppmul.mexw64 b/stenglib/Fast/sppmul.mexw64 new file mode 100644 index 00000000..93274d7b Binary files /dev/null and b/stenglib/Fast/sppmul.mexw64 differ diff --git a/stenglib/Fast/spreplace.m b/stenglib/Fast/spreplace.m new file mode 100644 index 00000000..2af88bcc --- /dev/null +++ b/stenglib/Fast/spreplace.m @@ -0,0 +1,18 @@ +function T = spreplace(S,x) +%SPREPLACE Sparse pattern replace. +% T = SPREPLACE(S,X), where S is a sparse matrix and X a vector, +% replaces the nonzero elements of S with X. SPREPLACE is thus +% formally equivalent to the code +% [i,j,s] = find(S); +% T = sparse(i,j,x,size(S,1),size(S,2)); +% It is required that NNZ(S) = PROD(SIZE(X)). +% +% Example: +% S = sprand(30,30,0.2); +% F = spreplace(S,exp(nonzeros(S))); +% +% See also SPPMUL, FSPARSE. + +% S. Engblom 2016-11-23 + +error('.MEX-file not found on path.'); diff --git a/stenglib/Fast/spreplace.mexw64 b/stenglib/Fast/spreplace.mexw64 new file mode 100644 index 00000000..b80c2d0f Binary files /dev/null and b/stenglib/Fast/spreplace.mexw64 differ diff --git a/stenglib/Fast/startup.m b/stenglib/Fast/startup.m new file mode 100644 index 00000000..8b778f35 --- /dev/null +++ b/stenglib/Fast/startup.m @@ -0,0 +1,9 @@ +%STARTUP Add paths to FAST stuff. + +% S. Engblom 2005-03-22 + +s = pwd; +addpath([s '/source']); +if exist('test','dir') + addpath([s '/test']); +end diff --git a/stenglib/Misc/repeat.m b/stenglib/Misc/repeat.m new file mode 100644 index 00000000..5396e87b --- /dev/null +++ b/stenglib/Misc/repeat.m @@ -0,0 +1,18 @@ +function c = repeat(a,b) +%REPEAT Repeat elements of vector. +% C = REPEAT(A,B) returns A repeated by B, see the example +% below. Both arguments are assumed to be row vectors of the same +% length and B must contain nonnegative integers only. +% +% No error-checking is performed. +% +% Example: +% a = [0 1 pi inf nan -nan]; +% b = [1 4 0 2 3 0 ]; +% c = repeat(a,b) % returns c = [0 1 1 1 1 inf inf nan nan nan] + +% S. Engblom 2005-10-14 + +% can't beat this! +ix = cumsum(sparse(1,cumsum([1 b]),1)); +c = a(ix(1:end-1)); diff --git a/stenglib/Misc/startup.m b/stenglib/Misc/startup.m new file mode 100644 index 00000000..eb2f226b --- /dev/null +++ b/stenglib/Misc/startup.m @@ -0,0 +1,6 @@ +%STARTUP Add paths to MISC stuff. + +% S. Engblom 2010-02-10 + +s = pwd; +% (do nothing) diff --git a/stenglib/Misc/sudsolve.m b/stenglib/Misc/sudsolve.m new file mode 100644 index 00000000..c1483841 --- /dev/null +++ b/stenglib/Misc/sudsolve.m @@ -0,0 +1,137 @@ +function [S,ix] = sudsolve(G) +%SUDSOLVE Solve Sudoku puzzle. +% S = SUDSOLVE(G) solves the Sudoku puzzle with initial conditions +% given by G. The algorithm used is a recursive greedy selection +% procedure. +% +% [S,IX] = SUDSOLVE(G) does the same thing but also returns the +% index IX of the Sudoku. The index is defined as the highest branch +% point taken by the algorithm and indicates the degree of +% difficulty. For most human solvable Sudokus, IX <= 2. +% +% Example: +% % a Sudoku of medium difficulty: +% G = zeros(9,9); +% G([1 2 6 10 13 14 16 18 24 25 29 36 37 41 ... +% 45 46 53 57 58 64 66 68 69 72 76 80 81]) = ... +% [7 5 8 3 4 9 2 6 1 4 6 8 5 1 3 1 2 1 6 8 6 2 9 7 7 6 1]; +% % (Published in Sydsvenska Dagbladet, Malmoe, Sweden, 2005-07-13) +% S = sudsolve(G) +% +% % a rather tricky one: +% M = zeros(9,9); +% M([2 5 8 10 18 21 25 31 33 37 41 ... +% 45 49 51 57 61 64 72 74 77 80]) = ... +% [6 8 4 2 3 4 7 8 7 3 1 4 6 5 5 6 4 2 3 6 8]; +% [S,ix] = sudsolve(M) % takes some time +% % (From Cleve's corner) + +% S. Engblom 2010-02-25 (Added Moler's example) +% S. Engblom 2005-07-15 + +% used for tracking branches +stack = cell(3,0); +kmax = 0; + +% start with all possibilities +S = true(9,9,9); + +% insert given values G +[i,j,v] = find(G); + +% check G +if any(v ~= ceil(v)) || any(v < 1) || any(9 < v) + error('Illegal initial conditions.'); +end +if ndims(G) > 2 || any(size(G) ~= 9) + error('Illegal initial conditions.'); +end + +for k = 1:size(v,1) + ii = i(k); jj = j(k); vv = v(k); + + if ~S(ii,jj,vv), error('Inconsistent initial conditions.'); end + + % set individual number + S(ii,jj,:) = false; + + % remove possibilities in same column... + S(:,jj,vv) = false; + + % ...same row... + S(ii,:,vv) = false; + + % ...and same 3-by-3 square + I = ceil(ii/3); I = 3*(I-1)+1:3*I; + J = ceil(jj/3); J = 3*(J-1)+1:3*J; + S(I,J,vv) = false; +end + +% remove possibilities in a greedy fashion +while true + % no more possibilities? + while ~any(S(:)) + % done/unsolvable/pop stack? + if all(G(:)) + % the answer is in G... + S = G; + if nargout > 1, ix = kmax; end + return; + elseif size(stack,2) == 0 + error('Sudoku is unsolvable.'); + else + S = stack{1,end}; + G = stack{2,end}; + kmax = stack{3,end}; + stack = stack(:,1:end-1); + end + end + + % find the branch with the lowest complexity + for k = 1:9 + [ii,jj] = find(sum(S,3) == k,1); + if ~isempty(ii), break; end + end % will always succeed + kmax = max(k,kmax); + + % increase stack if branch is taken + if k > 1 + v = find(S(ii,jj,:)); + for l = 1:k + vv = v(l); + + % S and G must be saved + SS = S; GG = G; + + % make the move + GG(ii,jj) = vv; + SS(ii,jj,:) = false; + SS(:,jj,vv) = false; + SS(ii,:,vv) = false; + I = ceil(ii/3); I = 3*(I-1)+1:3*I; + J = ceil(jj/3); J = 3*(J-1)+1:3*J; + SS(I,J,vv) = false; + + % increase stack + stack(:,end+1) = {SS; GG; kmax}; + end + + % continue with the front of the stack + S = stack{1,end}; + G = stack{2,end}; + kmax = stack{3,end}; + stack = stack(:,1:end-1); + else + % no need for stack + vv = find(S(ii,jj,:)); + + % make the move + G(ii,jj) = vv; + S(ii,jj,:) = false; + S(:,jj,vv) = false; + S(ii,:,vv) = false; + I = ceil(ii/3); I = 3*(I-1)+1:3*I; + J = ceil(jj/3); J = 3*(J-1)+1:3*J; + S(I,J,vv) = false; + end +end diff --git a/stenglib/README.md b/stenglib/README.md new file mode 100644 index 00000000..0ec5ba0d --- /dev/null +++ b/stenglib/README.md @@ -0,0 +1,232 @@ +# stenglib +Stefan Engblom's Matlab libraries - packages for daily use. + +## License statement for stenglib + +You may download all of **stenglib** and +use, modify and redistribute it in any way you like. A +redistributor must fully attribute the authorship and make a good +effort to cite the original location of the software. A researcher +making *critical* use of the software in research is requested to +acknowledge this in publications related to the +research. A company may use the code in software products provided +that the original location and the author is clearly cited. + +All code provided here comes with absolutely **no warranty** +and **no support** whatsoever is given. + +There are a lot of freeware available on the net. Do **not** +download unless you agree to the above license. + +## About stenglib + +**stenglib** is loosely divided into 5 sub-packages, with few dependencies in between them: +* Tensor +* Fast +* Scicomp +* Utils +* Misc + +For contact details, see [stenglib.org](http://www.stenglib.org). +I welcome bug reports and comments. Please *do not* send support requests. + +## Tensor + +Originally, I made the **Tensor** package +because I had the need to easily, efficiently and consistently +manage multi-dimensional arrays in Matlab. *Examples:* given a +matrix and a vector, how do you scale each row in the matrix by +the vector? How can you multiply a 3-D array with a matrix? The +package is useful to anyone who writes code for (pseudo-) spectral +methods, FEM, or who uses multi-dimensional arrays or tensor +notation a lot. + +* **tndims** Number of dimensions. + (Depend: 0, status: stable) + [tndims.m](Tensor/tndims.m) + [tndims.c](Tensor/source/tndims.c) + +* **tsize** Size of array. + (Depend: 0, status: stable) + [tsize.m](Tensor/tsize.m) + [tsize.c](Tensor/source/tsize.c) + +* **tsum** Tensor summation. + (Depend: 0, status: stable) + [tsum.m](Tensor/tsum.m) + [tsum.c](Tensor/source/tsum.c) + +* **tprod** Tensor product. + Based on a concept by D. Bertilsson, [COMSOL](http://www.comsol.com). + (Depend: 0, status: stable) + [tprod.m](Tensor/tprod.m) + [tprod.c](Tensor/source/tprod.c) + +There is also a [make.m](Tensor/source/make.m) available. +It will work on several, but not all, platforms. + +## Fast + +The routines in the **Fast** package exist +because some things just take too much time in Matlab. *Examples:* +replicate a data set in different dimensions (a.k.a. repmat), +assemble a sparse matrix, or evaluate set operations. These +routines should be of general interest to programmer in the +scientific computing community. + +* **frepmat** Fast replication of array. + (Depend: Tensor/{tndims,tsize} (weakly), status: stable) + [frepmat.m](Fast/frepmat.m) + [mexfrepmat.c](Fast/sourcee/mexfrepmat.c) + +* **fsparse** Fast assembly of sparse matrix. + (Depend: 0, status: stable but not completely settled) + [fsparse.m](Fast/fsparse.m) + [fsparse.c](Fast/source/fsparse.c) + +There is now a *parallel* **fsparse** version available. A +paper describing the algorithm is *S. Engblom, D. Lukarski: +Fast Matlab compatible sparse assembly on multicore computers*, +in *Parallel Comput.* 56:1--17 (2016) [(doi)](http://dx.doi.org/10.1016/j.parco.2016.04.001). +*Fact:* the **fsparse**-code has been selected as the base for the sparse assembly routines +in [PARALUTION](http://www.paralution.com). + +* **clenshaw** Evaluation of 3-term recurrences. + (Depend: 0, status: stable) + [clenshaw.m](Fast/clenshaw.m) + [clenshaw.c](Fast/source/clenshaw.c) + +* **fsetop** Fast set operations based on hashing. Based on + a concept by [P.-O. Persson](http://www.mit.edu/~persson) and a + hash-function by [P. Hsieh](http://www.azillionmonkeys.com/qed/hash.html) + (Depend: 0, status: stable) + [fsetop.m](Fast/fsetop.m) + [fsetop.c](Fast/source/fsetop.c) + +* **sppmul** Sparse pattern multiply. + (Depend: 0, status: stable) Download: + [sppmul.m](Fast/sppmul.m) + [sppmul.c](Fast/source/sppmul.c) + +* **powerseries** Sum power series. + (Depend: 0, status: stable) + [powerseries.m](Fast/powerseries.m) + [powerseries.c](Fast/source/powerseries.c) + +As before there is a [make.m](Fast/source/make.m) available which you will probably have to modify. + +## Scicomp + +In **Scicomp** I've assembled some solvers from different areas within scientific computing: two solvers for +nonlinear problems and an implementation of the Nelder-Mead simplex algorithm. I have also put three routines +for Gaussian quadratures with respect to discrete measures in this package. + +* **rtsafe** Scalar nonlinear solver. + (Depend: 0, status: stable) + [rtsafe.m](Scicomp/rtsafe.m) + +* **ainsolve** Solver for large sets of nonlinear equations. + (Depend: 0, status: experimental) + [ainsolve.m](Scicomp/ainsolve.m) + +* **nmsimplex** Nelder-Meads simplex-algorithm. + (Depend: Fast/frepmat, Utils/parseopts, status: stable) + [nmsimplex.m](Scicomp/nmsimplex.m) + +* **ode1s** ODE-solver: split-step Euler method with digital control. + (Depend: Fast/{frepmat,fsetop}, Utils/{parseopts,report}, status: stable, but not finalized) + [ode1s.m](Scicomp/ode1s.m) + +* **gaussqd** Gaussian quadrature for discrete measures. + (Depend: Tensor/tprod, Fast/fsparse, status: stable) + [gaussqd.m](Scicomp/gaussqd.m) + +* **gausspd** Gaussian polynomial for discrete measures. + (Depend: Tensor/{tsum,tprod}, Fast/clenshaw, status: stable) + [gausspd.m](Scicomp/gausspd.m) + +* **poch** Pochhammer's function. + (Depend: 0, status: stable) + [poch.m](Scicomp/poch.m) + +## Utils + +In the package **Utils** I've collected various +routines for performing everyday tasks. Examples include +generating LaTeX-arrays from matrices, .gif-animations, small +perfect hash functions and removing files ending with a tilde (!). + +* **arr2latex** LaTeX-table from matrix. +(Depend: Fast/frepmat, Utils/parseopts, status: stable) +[arr2latex.m](Utils/arr2latex.m) + +* **assignopts** Assign options. +(Depend: 0, status: stable) +[assignopts.m](Utils/assignopts.m) + +* **parseopts** Parse options. +(Depend: Fast/fsetop, status: stable) +[parseopts.m](Utils/parseopts.m) + +* **matmerge** Merge .mat-files. +(Depend: Fast/fsetop, Tensor/{tsize,tndims}, status: stable but kind of a hack) +[matmerge.m](Utils/matmerge.m) + +* **consistency** Local truncation error of linear multistep method. +(Depend: 0, status: stable) +[consistency.m](Utils/consistency.m) + +* **stability** Plot of stability region for linear multistep method. +(Depend: 0, status: stable) +[stability.m](Utils/stability.m) + +* **perfecthash** Perfect hash-function from strings. +(Depend: Tensor/tsum, Fast/fsparse, status: this is a hack, really) +[perfecthash.m](Utils/perfecthash.m) + +* **movie2gif** Create GIF animation from MATLAB movie. +(Depend: imwrite-function in Matlab, status: stable) +[movie2gif.m](Utils/movie2gif.m) + +* **report** Report progress of solver. +(Depend: 0, status: stable) +[report.m](Utils/report.m) + +* **runtest** General test facility. +(Depend: 0, status: stable) +[runtest.m](Utils/runtest.m) + +* **rmtilde** Remove files ending with a tilde ('~'). +(Depend: 0, status: stable) +[rmtilde.m](Utils/rmtilde.m) + +* **connect** Connectivity information for points. +(Depend: 0, status: stable) +[connect.m](Utils/connect.m) + +* **ndop** General N-dimensional operator. +(Depend: Tensor/{tndims,tsum,tprod}, Fast/frepmat, status: stable) +[ndop.m](Utils/ndop.m) + +* **spblock** Sparse matrix from blocks. +(Depend: Fast/fsparse, status: stable) +[spblock.m](Utils/spblock.m) + +* **spym** Visualize magnitude of elements in matrix. +(Depend: 0, status: this is a hack) +[spym.m](Utils/spym.m) + +## Misc + +I'll put amusing routines in **Misc**. For now, +this humble package contains a sudoku solver and a short function +which I personally believe is the most beautiful Matlab-code ever +written. + +* **sudsolve** Solve Sudoku puzzle. +(Depend: 0, status: stable) +[sudsolve.m](Misc/sudsolve.m) + +* **repeat** Repeat elements of vector. +(Depend: 0, status: stable) +[repeat.m](Misc/repeat.m) diff --git a/stenglib/Scicomp/ainsolve.m b/stenglib/Scicomp/ainsolve.m new file mode 100644 index 00000000..fd14bbdf --- /dev/null +++ b/stenglib/Scicomp/ainsolve.m @@ -0,0 +1,415 @@ +function [x,fx,flag,out,jac] = ainsolve(fun,x0,opts,varargin) +%AINSOLVE Solver for large sets of nonlinear equations. +% AINSOLVE solves nonlinear equations of the form F(X) = 0, where F +% and X are vectors, by using either a damped Newton method (DN) or +% an accelerated Newton method (AN) where old search directions as +% well as a preconditioned Krylov space is used together with the +% Newton direction in order to accelerate the convergence. The +% inversion of the Jacobian may be avoided using an inexact variant +% (AIN) where the Newton direction is not used. Furthermore, there +% are variants (XAN/XAIN) of both accelerated algorithms in which +% the Krylov space is recomputed for every inner iteration. This is +% more expensive but may also produce a faster convergence. +% +% X = AINSOLVE(FUN,X0) starts at the initial guess X0 and tries to +% solve the equations in FUN. FUN has the signature [Y,J] = +% FUN(X,...) and returns a vector of values Y together with a +% Jacobian J evaluated at the point X. Note that FUN will be called +% with both one and two output arguments which can be exploited in +% order to improve on the efficiency. +% +% X = AINSOLVE(FUN,X0,OPTS) solves with the default parameters +% replaced by the values in the structure OPTS. The syntax OPTDEF = +% AINSOLVE returns the default options and the syntax OPTHELP = +% AINSOLVE('opts') produces a short description of available +% options. +% +% X = AINSOLVE(FUN,X0,OPTS,P1,P2,...) passes the parameters P1, P2, +% ... directly to the function FUN; FUN(X,P1,P2,...). +% +% An important option is the use of the field OPTS.prefun which +% specify the preconditioner for the Krylov space. The syntax works +% as follows; initially let M = [] and for every assembled Jacobian +% J, initialize the preonditioner by calling [PFUN,M] = +% feval(OPTS.prefun,J,M,OPTS.prepar,P1,P2,...). Then the +% preconditioner is defined by the call X = feval(PFUN,R,M), where X +% should be an approximative solution to the equation J*X = R. +% +% [X,FX] = AINSOLVE(FUN,X0,...) returns the value of the objective +% function at X. +% +% [X,FX,FLAG] = AINSOLVE(FUN,X0,...) returns a number FLAG that +% describes the exit condition of AINSOLVE. +% If FLAG is: +% 0 then AINSOLVE converged to a solution X, +% 1 then AINSOLVE converged to a solution X that does not satisfy +% the tolerance on the norm of the residual, +% 2 then the maximum number of nonlinear iterations was reached, +% 3 then the maximum number of function evaluations was reached. +% +% The error criteria used works as follows: +% if all(abs(r) <= opts.atolf+opts.rtolf*abs(r0)) +% then exit with FLAG = 0, +% elseif all(abs(x-xold) <= opts.atolx+opts.rtolx*abs(x)) +% then exit with FLAG = 1. +% Here r0 is the initial residual and xold is the previous +% approximation. +% +% [X,FX,FLAG,OUT] = AINSOLVE(FUN,X0,...) returns a structure OUT +% with the number of iterations taken in OUT.iterations, the number +% of function (Jacobian) evaluations in OUT.funcCount (OUT.jacCount) +% and the algorithm used in OUT.algorithm. +% +% [X,FX,FLAG,OUT,JAC] = AINSOLVE(FUN,X0,...) also returns the +% Jacobian JAC of FUN at X. +% +% Cautionary: this is an experimental version with very little error +% checking. +% +% See also FSOLVE. + +% S. Engblom 2010-09-03 (Minor revision) +% S. Engblom 2004-10-11 (Revision) +% S. Engblom 2004-01-26 + +% *** Use parseopts! +% *** Option jacmul +% *** Use of numjac +% *** For some combinations, only the action of J to a vector v is +% needed. This is just (F(u+h*v)-F(u))/h for a suitable small h. +% *** Check the angle between the latest step and the one just taken +% before adding it to the trace space? Alternative: update a +% Gram-Schmidt orthogonalization? How should the LS-problem really be +% solved? GMRES? + +% special call: existing options +if nargin == 1 && ischar(fun) && strcmpi(fun,'opts') + optdef.maxnlinit = 'integer > 0 {500}'; + optdef.maxfunevals = 'integer > 0 {1000}'; + optder.report = '{''on''} | ''off'''; + optdef.atolx = 'scalar {1e-8}'; + optdef.rtolx = 'scalar {1e-6}'; + optdef.atolf = 'scalar {1e-8}'; + optdef.rtolf = 'scalar {1e-6}'; + optdef.algorithm = ['''xain'' | {''ain''} | ' ... + '''xan'' | ''an'' | ''dn''']; + optdef.linesearch = '{''bisect''} | ''poly'''; + optdef.tracedim = 'integer >= 0 {6}'; + optdef.krylovdim = 'integer >= 0 {6}'; + optdef.accmaxits = 'integer > 0 {5}'; + optdef.accminfac = 'scalar in (0,1) {0.95}'; + optdef.prefun = 'function {[]}'; + optdef.prepar = 'argument {[]}'; + x = optdef; + return; +end + +% default options +optdef.maxnlinit = 500; +optdef.maxfunevals = 1000; +optdef.report = 'on'; +optdef.atolx = 1e-8; +optdef.rtolx = 1e-6; +optdef.atolf = 1e-8; +optdef.rtolf = 1e-6; +optdef.algorithm = 'ain'; +optdef.linesearch = 'bisect'; +optdef.tracedim = 6; +optdef.krylovdim = 6; +optdef.accmaxits = 5; +optdef.accminfac = 0.95; +optdef.prefun = []; +optdef.prepar = []; + +% special call: return defaults +if nargin == 0, x = optdef; return; end + +% merge defaults with actual inputs +if nargin > 2 + if iscell(opts), opts = struct(opts{:}); end + fn = fieldnames(opts); + for i = 1:length(fn) + optdef = setfield(optdef,fn{i},getfield(opts,fn{i})); + end +end +opts = optdef; + +% size of the problem +ndof = size(x0,1); + +% algorithm +switch opts.algorithm + case 'dn', opts.alg = 1; + case 'an', opts.alg = 2; + case 'xan', opts.alg = 3; + case 'ain', opts.alg = 4; + case 'xain', opts.alg = 5; + otherwise, error('Unknown algorithm.'); +end +if opts.alg ~= 1, Vt = zeros(ndof,0); end + +% linesearch +switch opts.linesearch + case 'bisect', lines = @l_linesearch_bisect; + case 'poly', lines = @l_linesearch_poly; + otherwise, error('Unknown linesearch.'); +end + +% preconditioner +hasPrecond = ~isempty(opts.prefun); +if hasPrecond, M = []; end + +% evaluate the first residual and Jacobian +[r,J] = feval(fun,x0,varargin{:}); +nfun = 1; njac = 1; +rcmp = opts.atolf+opts.rtolf*abs(r); +normr = norm(r); + +% diagnostics +opts.report = strcmpi(opts.report,'on'); +if opts.report + disp(sprintf([' Iteration Func-count Jac-count ' ... + ' |residual| |step|\n' ... + ' %5.0f %5.0f %5.0f ' ... + ' %0.6e -'], ... + 0,nfun,njac,normr)); +end + +% early return +if all(abs(r) <= rcmp) + x = x0; + nliniter = 0; +else + % nonlinear (outer) iterations + for nliniter = 1:opts.maxnlinit + + if opts.alg == 1 % damped Newton (DN) + en = J\(-r); + [x,nfun] = l_linesearch(lines,x0,J,r,normr,en, ... + nfun,fun,varargin); + + % next residual and Jacobian + [r,J] = feval(fun,x,varargin{:}); + nfun = nfun+1; njac = njac+1; + normr = norm(r); + step = x-x0; + else % accelerated variants (AN/XAN/AIN/XAIN) + % needed for progress report + xold = x0; + + % compute the Newton direction (AN/XAN) + if opts.alg == 2 || opts.alg == 3, en = J\(-r); end + + % inner iterations + for j = 1:opts.accmaxits + if j == 1 || opts.alg == 3 || opts.alg == 5 + % create Krylov space + Vk = zeros(ndof,opts.krylovdim); + if hasPrecond + [prefun,M] = feval(opts.prefun,J,M,opts.prepar, ... + varargin{:}); + Vk(:,1) = feval(prefun,-r,M); + for j = 2:opts.krylovdim + Vk(:,j) = feval(prefun,J*Vk(:,j-1),M); + end + else + Vk(:,1) = -r; + for j = 2:opts.krylovdim + Vk(:,j) = J*Vk(:,j-1); + end + end + + if opts.alg == 2 || opts.alg == 3 % (AN/XAN) + W = orth([Vt en Vk]); % *** sloppy + else % (AIN/XAIN) + W = orth([Vt Vk]); % *** sloppy + end + end + + % restricted (local) Jacobian + U = J*W; + + % Newton direction for local problem + ea = W*(U\(-r)); % *** sloppy(?) + [x,nfun] = l_linesearch(lines,x0,J,r,normr,ea, ... + nfun,fun,varargin); + + % next residual and Jacobian + [r,J] = feval(fun,x,varargin{:}); + nfun = nfun+1; njac = njac+1; + normr0 = normr; + normr = norm(r); + + % should we stop? + if all(abs(r) <= rcmp) || ... + normr > opts.accminfac*normr0 || ... + nfun >= opts.maxfunevals + break; + end + x0 = x; + end + + % restore old solution + x0 = xold; + + % add the step thus produced to the trace space + step = x-x0; + if size(Vt,2) == opts.tracedim + Vt = [Vt(:,2:end) step]; + else + Vt = [Vt step]; + end + end + + % diagnostics + if opts.report + disp(sprintf([' %5.0f %5.0f %5.0f ' ... + ' %0.6e %0.6e'], ... + nliniter,nfun,njac,normr,norm(step))); + end + + % stopping criteria + if all(abs(r) <= rcmp) || ... + all(abs(step) <= opts.atolx+opts.rtolx*abs(x)) || ... + nfun >= opts.maxfunevals + break; + end + + % cycle solution for next round + x0 = x; + end +end + +% various outputs +if nargout > 1 + fx = r; + if nargout > 2 + flag = 0; + if nfun >= opts.maxfunevals + flag = 3; + elseif nliniter >= opts.maxnlinit + flag = 2; + elseif any(abs(r) > rcmp) + flag = 1; + end + if nargout > 3 + out = struct('algorithm',opts.algorithm, ... + 'iterations',nliniter, ... + 'funcCount',nfun, ... + 'jacCount',njac); + if nargout > 4 + jac = J; + end + end + end +end + +%-------------------------------------------------------------------------- +function [x,fc] = l_linesearch(lines,xold,J,r,normr,eo,fcold,fun,fargs) +%L_LINESEARCH Linesearch algorithm. +% [X,FC] = L_LINSEARCH(LINES,XOLD,J,R,NORMR,EO,FCOLD,FUN,FARGS) +% performs an optimal scaling of the search direction EO and then +% determines X = X0+LAMBDA*EO for which the norm of the function FUN +% (with additional arguments FARGS) has decreased +% sufficiently. LINES is a linesearch algorithm, J is the Jacobian +% at XOLD, R and NORMR the residual and norm of the residual. FCOLD +% is the function count before the call; it is increased for every +% call to FUN and the accumulated number of calls is returned in FC. + +% *** scaling really needed for AN/XAN/AIN/XAIN? +% *** factor 1/2 really needed for the norm? (kept for the sake of poly) + +% straightforward +Je = J*eo; +mrtJe = -r'*Je; +normJe = norm(Je); +eta = mrtJe/(normr*normJe); +e = (mrtJe/(normJe*normJe))*eo; +grad = -(mrtJe/normJe)^2; + +[x,fc] = feval(lines,xold,normr^2/2,grad,e, ... + fcold,fun,fargs,0.5*eta*eta); +%-------------------------------------------------------------------------- +function [x,fc] = l_linesearch_bisect(xold,fold,dfold,e, ... + fcold,fun,fargs,eta2) +%L_LINESEARCH_BISECT Linesearch by bisection. + +fc = fcold; % count the number of residuals evaluated +lambda = 1; % start by trying the full step + +% backtrack by bisection +while lambda > eps + x = xold+lambda*e; + f = norm(feval(fun,x,fargs{:}))^2/2; + fc = fc+1; + if f <= (1-lambda*eta2)*fold, return; end + lambda = 0.5*lambda; +end + +% otherwise we are probably on a local minimum -- this should be +% handled by the caller by monitoring the residual +warning('Linesearch direction does not appear to be a descent.'); +%-------------------------------------------------------------------------- +function [x,fc] = l_linesearch_poly(xold,fold,dfold,e, ... + fcold,fun,fargs,eta2) +%L_LINESEARCH_POLY Linesearch by polynomial interpolation. + +% first try the full Newton step... +lambda1 = 1; +x = xold+e; +f1 = norm(feval(fun,x,fargs{:}))^2/2; +fc = fcold+1; + +% this much decrease is sufficient +if f1 <= (1-eta2)*fold, return; end + +% ...then backtrack using a quadratic form... +lambda2 = -dfold/(2*(f1-fold-dfold)); +lambda2 = min(lambda2,0.5); +lambda2 = max(lambda2,0.1); +x = xold+lambda2*e; +f2 = norm(feval(fun,x,fargs{:}))^2/2; +fc = fc+1; +if f2 <= (1-lambda2*eta2)*fold, return; end + +% ...otherwise backtrack using a cubic form +while lambda2 > eps + % solve for certain cubic coefficients + ab = ([1 -1; -lambda1 lambda2]* ... + [(f2-dfold*lambda2-fold)/lambda2^2; ... + (f1-dfold*lambda1-fold)/lambda1^2])./(lambda2-lambda1); + + % cycle the old values + lambda1 = lambda2; + f1 = f2; + + % compute next lambda safely + if ab(1) ~= 0 + lambda2 = ab(2)^2-3*ab(1)*dfold; + if lambda2 <= 0 + lambda2 = 1; + elseif ab(2) <= 0 + lambda2 = (sqrt(lambda2)-ab(2))/(3*ab(1)); + else + lambda2 = -dfold/(sqrt(lambda2)+ab(2)); + end + elseif ab(2) ~= 0 + lambda2 = -dfold/(2*ab(2)); + else + lambda2 = -fold/dfold; + end + + % enforce bounds + lambda2 = min(lambda2,0.5*lambda1); + lambda2 = max(lambda2,0.1*lambda1); + + x = xold+lambda2*e; + f2 = norm(feval(fun,x,fargs{:}))^2/2; + fc = fc+1; + if f2 <= (1-lambda2*eta2)*fold, return; end +end + +% otherwise we are probably on a local minimum -- this should be +% handled by the caller by monitoring the residual +warning('Linesearch direction does not appear to be a descent.'); +%-------------------------------------------------------------------------- diff --git a/stenglib/Scicomp/gausspd.m b/stenglib/Scicomp/gausspd.m new file mode 100644 index 00000000..d4bcad1e --- /dev/null +++ b/stenglib/Scicomp/gausspd.m @@ -0,0 +1,96 @@ +function [A,B] = gausspd(name,x,n,p1,p2,p3) +%GAUSSPD Orthogonal polynomials generated by discrete inner products. +% [A,B] = GAUSSPD(NAME,X,N,P1,P2,...) returns suitable matrices A and B +% for usage with CLENSHAW for the polynomials with name NAME with +% parameters P1, P2, ... +% +% X is a vector with coordinates and N is the maximum order of the +% polynomials. +% +% The polynomials are normalized to unit norm for computational +% reasons. In addition, the measure has also been normalized to unity. +% +% No error-checking is performed. +% +% Name Parameter(s) Weight +% ------------------------------------------------------------- +% 'charlier' a exp(-a) a^x/x! +% 'krawtchouk' p, N bin(N,x) p^x (1-p)^(N-x) +% (x = 0..N) +% 'meixner' c, b (1-c)^b c^x (b)_x/x! +% 'chebyshev' N 1/N (x = 0..N-1) +% 'hahn' a, b, c gamma(c-a)gamma(c-b) ... +% /gamma(c)/gamma(c-a-b) ... +% (a)_x (b)_x/(c)_x/x! +% +% Here bin(N,x) is the binomial coefficients and (a)_x is Pochhammer's +% symbol. Use GAUSSQD to check the sanity of the parameters P1, P2, ... +% +% Examples: +% x = linspace(0,15); +% n = 5; a = 5; +% [A,B] = gausspd('charlier',x,n,a); +% y = clenshaw(A,B); +% figure, plot(x,y); title('Charlier polynomials') +% +% x = linspace(5,20); +% c = 0.4; b = 3; +% [A,B] = gausspd('meixner',x,n,c,b); +% y = clenshaw(A,B); +% figure, plot(x,y); title('Meixner polynomials') +% +% See also CLENSHAW, GAUSSQD. + +% S. Engblom 2006-11-06 (Revision) +% S. Engblom 2006-01-12 + +switch name + case 'charlier' + x = reshape(x,1,[]); + nn = reshape(1:n,n,1); + sn = sqrt(nn); + sa = 1./(sqrt(p1)*sn); + A = [ones(size(x)); ... + tsum(sa.*(nn+(p1-1)),-sa*x,[1],[1 2])]; + B = -[0; [0; sn(1:end-1)]./sn]; + case 'krawtchouk' + [A,B] = gausspd('meixner',x,n,-p1/(1-p1),-p2); + case 'meixner' + x = reshape(x,1,[]); + nn = reshape(1:n,n,1); + nb = sqrt((nn+p2).*(nn+1)); + sc = sqrt(p1); + A = [ones(size(x)); ... + tsum((p1*(n+p2)+n)./(sc*nb),(p1-1)/sc./nb*x,[1],[1 2])]; + B = -[0; [0; nb(1:end-1)]./nb]; + case 'chebyshev' + [A,B] = gausspd('hahn',x,n,1-p1,1,1-p1); + case 'hahn' + x = reshape(x,1,[]); + nn = reshape(0:n,n+1,1); + + w = p3-p1-p2; + ww = w-2*nn; + w1 = ww-1; + w2 = ww+1; + + An = -(ww.*w1)./((w-nn).*(p1+nn).*(p2+nn)); + % *** hopefully, the only singular special case... + if w == -1 + num = nn.*(w+p1-nn).*(w+p2-nn).*w1; + den = (w-nn).*(p1+nn).*(p2+nn).*w2; + Cn = [0; num(2:end)./den(2:end)]; + else + Cn = (nn.*(w+p1-nn).*(w+p2-nn).*w1)./((w-nn).*(p1+nn).*(p2+nn).*w2); + end + A = [ones(size(x)); ... + tsum(1+Cn(1:end-1),An(1:end-1)*x,[1],[1 2])]; + B = -[0; Cn(1:end-1)]; + % *** implementation of normalization is sub-optimal + nrm = realsqrt([1; cumprod(Cn(2:end).*An(1:end-1)./An(2:end))]); + A = tprod(A,[1; nrm(1:end-1)./nrm(2:end)],[1 2],[1]); + B = B.*[1; 1; nrm(1:end-2)./nrm(3:end)]; + + otherwise + error('Unknown polynomial.'); +end diff --git a/stenglib/Scicomp/gaussqd.m b/stenglib/Scicomp/gaussqd.m new file mode 100644 index 00000000..e5d63b66 --- /dev/null +++ b/stenglib/Scicomp/gaussqd.m @@ -0,0 +1,178 @@ +function [x,w,y] = gaussqd(name,n,p1,p2,p3) +%GAUSSQD Quadratures for discrete measures. +% [X,W] = GAUSSQD(NAME,N,P1,P2,...) returns abscissas X and weights W +% for the Nth discrete quadrature associated with the polynomial NAME +% using the parameters P1, P2, ... +% +% The resulting summation formula is +% sum_0^inf f(x) weight(x) = sum_i f(x(i))*w(i), +% where the weight-function is defined in GAUSSPD. +% +% [X,W,Y] = GAUSSQD(NAME,N,P1,P2,...) additionally returns an N-by-N +% matrix Y containing values of the normalized polynomials of order < N +% in all the quadrature points X. Each column Y(:,i) contains the values +% of the polynomials evaluated at X(i). +% +% Unlike GAUSSPD, GAUSSQD carefully checks the parameters P1, P2, ... to +% see if the corresponding set of polynomials makes sense. The +% conditions put on the coefficients are that enough moments exist and +% that the measure is strictly positive. In addition, the order of the +% polynomial must be bounded whenever the natural domain of summation is +% limited. +% +% Reference: +% [1] S. Engblom: "Gaussian quadratures with respect to discrete +% measures". Technical Report 2006-007, Dept of Information +% Technology, Uppsala University, 2006. Available at +% http://www.it.uu.se/research. +% +% Example: +% f = inline('(x.^5+2*x.^4+3*x.^3+4*x.^2+5*x)'); +% [x,w] = gaussqd('charlier',2,1); sum1 = f(x)'*w +% [x,w] = gaussqd('charlier',3,1); sum2 = f(x)'*w +% [x,w] = gaussqd('charlier',4,1); sum3 = f(x)'*w +% +% See also GAUSSPD. + +% S. Engblom 2006-01-12 + +if ~isscalar(n) || n ~= ceil(n) || n <= 0 + error('Order of quadrature must be a positive integer.'); +end + +wmsg = 'Parameter(s) out of range. Quadrature may not make sense.'; +switch name + case 'charlier' + if nargin ~= 3 || ~isscalar(p1) + error('Parameter of Charlier polynomial must be a single scalar.'); + end + ok = p1 > 0; + if ~ok, warning(wmsg); end + + % construct a symmetric positive definite and tridiagonal matrix J + nn = 1:n; + b = -sqrt(nn*p1); + b(end) = 0; + J = fsparse([[1 1:n-1]; nn; [2:n n]],nn, ... + [b([end 1:end-1]); nn+p1-1; b],[],'nosort'); + + % eigenvalues and -vectors + [y,x] = eig(full(J)); + + % the abscissas are the eigenvalues of J + x = diag(x); + + % the weights can be found from the first element of each eigenvector + w = reshape(y(1,:).^2,[],1); + + % the values of the polynomials themselves can be found from the + % eigenvectors (normalizing the first element) + if nargout > 2 + y = tprod(y,1./y(1,:),[1 2],[3 2]); + end + + case 'krawtchouk' + if nargin ~= 4 || ~isscalar(p1) || ~isscalar(p2) + error('Parameters of Krawtchouk polynomial must be two scalars.'); + end + [x,w,y] = gaussqd('meixner',n,-p1/(1-p1),-p2); + + case 'meixner' + if nargin ~= 4 || ~isscalar(p1) || ~isscalar(p2) + error('Parameters of Meixner polynomial must be two scalars.'); + end + + % Conditions on (1) convergence of the inner product, (2) the measure + % being positive, (3) when p2 is a non-positive integer (i.e. the + % measure has finite support), then n must be sufficiently small. + xmax = inf; + if p2 <= 0 && p2 == ceil(p2) + xmax = -p2; + end + pp = ceil([-p2 1-p2 -1-p2]); + pp = min(max(0,pp),xmax); + pp = sign(p1.*(p2+pp-1)).^pp; + ok = xmax < inf || abs(p1) < 1 || abs(p1) == 1 && -p2 >= 2*n; + ok = ok && all(pp > 0); + ok = ok && n <= xmax+1; + if ~ok, warning(wmsg); end + + nn = 1:n; + b = -sqrt(p1*nn.*(p2+nn-1)); + b(end) = 0; + J = fsparse([[1 1:n-1]; nn; [2:n n]],nn, ... + [b([end 1:end-1]); (p1+1)*(nn-1)+p1*p2; b],[],'nosort'); + + [y,x] = eig(full(J./(1-p1))); + + x = diag(x); + w = reshape(y(1,:).^2,[],1); + + if nargout > 2 + y = tprod(y,1./y(1,:),[1 2],[3 2]); + end + + case 'chebyshev' + if nargin ~= 3 || ~isscalar(p1) + error('Parameter of Chebyshev polynomial must be a single scalar.'); + end + [x,w,y] = gaussqd('hahn',n,1-p1,1,1-p1); + + case 'hahn' + if nargin ~= 5 || ~isscalar(p1) || ~isscalar(p2) || ~isscalar(p3) + error('Parameters of Hahn polynomial must be three scalars.'); + end + % Conditions on (1) convergence of the inner product, (2) the measure + % being positive, (3) the denominator not containing zeros and (4) when + % p1 and/or p2 are non-positive integers (i.e. the measure has finite + % support), then n must be sufficiently small. + xmax = inf; + if p1 <= 0 && p1 == ceil(p1) + xmax = -p1; + end + if p2 <= 0 && p2 == ceil(p2) + xmax = min(xmax,-p2); + end + pp = [p1 p2 p3]; + pp = ceil([-pp 1-pp -1-pp]); + pp = min(max(0,pp),xmax); + pp = sign((p1+pp-1).*(p2+pp-1).*(p3+pp-1)).^pp; + ok = xmax < inf || p3-p1-p2 >= 2*n; + ok = ok && all(pp > 0); + ok = ok && (p3 > 0 || p3 == ceil(p3) && xmax <= -p3); + ok = ok && n <= xmax+1; + if ~ok, warning(wmsg); end + + nn = 1:n; + w = p3-p1-p2; + w1 = w-2*nn+1; + if w == -1 + % Special case for essentially the Gauss-Chebyshev formula. It's a guess + % that this is the only singular case. + a = [p1*p2/(w-1) ... + polyval([1-p1-p2-p3 -(1-p1-p2-p3)*w (1+w)*p1*p2],nn(2:end)-1)./ ... + (w1(2:end).*(w1(2:end)+2))]; + else + a = polyval([1-p1-p2-p3 -(1-p1-p2-p3)*w (1+w)*p1*p2],nn-1)./(w1.*(w1+2)); + end + n2 = 1:n-1; + w2 = w-2*n2; + b = [-sqrt(n2.*(w+1-n2).* ... + (p1+n2-1)./(w2+1).* ... + (p2+n2-1)./(w2+1).* ... + (p1+w-n2)./(w2+2).* ... + (p2+w-n2)./w2) 0]; + J = fsparse([[1 1:n-1]; nn; [2:n n]],nn, ... + [b([end 1:end-1]); a; b],[],'nosort'); + + [y,x] = eig(full(J)); + + x = diag(x); + w = reshape(y(1,:).^2,[],1); + + if nargout > 2 + y = tprod(y,1./y(1,:),[1 2],[3 2]); + end + otherwise + error('Unknown type of polynomial.'); +end diff --git a/stenglib/Scicomp/nmsimplex.m b/stenglib/Scicomp/nmsimplex.m new file mode 100644 index 00000000..88eae4f4 --- /dev/null +++ b/stenglib/Scicomp/nmsimplex.m @@ -0,0 +1,329 @@ +function [x,fx,flag] = nmsimplex(fun,x0,opts,varargin) +%NMSIMPLEX Nelder-Meads simplex-algorithm. +% X = NMSIMPLEX(FUN,X0) starts at the initial guess X0 and tries to +% minimize the objective function FUN. FUN has the signature Y = +% FUN(X,...) and returns a scalar value. +% +% X = NMSIMPLEX(FUN,X0,OPTS) solves with the default parameters replaced +% by the values in the structure OPTS. The syntax OPTDEF = NMSIMPLEX +% returns the default options and the syntax OPTHELP = NMSIMPLEX('opts') +% produces a short description of available options (see also the table +% below). +% +% X = NMSIMPLEX(FUN,X0,OPTS,P1,P2,...) passes the parameters P1, P2, +% ... directly to the function FUN; FUN(X,P1,P2,...). +% +% [X,FX] = NMSIMPLEX(FUN,X0,...) returns the value of the objective +% function at X. +% +% [X,FX,FLAG] = NMSIMPLEX(FUN,X0,...) returns a number FLAG that +% describes the exit condition of NMSIMPLEX. +% If FLAG is: +% 0 then NMSIMPLEX converged to a solution X, +% 1 then NMSIMPLEX halted at a flat local minimum of the objective +% function, +% 2 then the maximum number of function evaluations was +% reached, +% 3 then the report function returned non-zero (see below). +% +% The convergence criterion used works as follows: +% if all(abs(xstep) <= opts.atolx+opts.rtolx*abs(xmin)) +% then exit with FLAG = 0, +% elseif ystep <= opts.atolf+opts.rtolf*abs(ymin) +% then exit with FLAG = 1. +% Here (xmin,ymin) is the current best approximation and (xstep,ystep) +% is the step just taken. +% +% Property Value/{Default} Description +% ----------------------------------------------------------------------- +% init Scalar {0.25} | Simplex A scalar indicating an +% estimate of the error in the +% initial guess used to produce the +% initial simplex. Alternatively, +% an ND-by-ND matrix where each +% column defines a corner in the +% initial simplex. Together with +% the initial guess X0 this must +% define a feasible simplex. +% +% maxfunevals integer > 0 {1000} Maximum number of function +% evaluations. +% +% report {'on'} | 'off' Print diagnostics. +% reportfun Function {[]} Report function, see below. +% +% atolx Scalar {1e-8} Absolute tolerance in solution. +% rtolx Scalar {1e-6} Relative tolerance in solution. +% +% atolf Scalar {1e-14} Absolute tolerance in function. +% rtolf Scalar {1e-12} Relative tolerance in function. +% +% lb Scalar | Vector {-Inf} Lower bound in each dimension. +% ub Scalar | Vector {Inf} Upper bound in each dimension. +% +% The report function is called according to the three syntaxes STATUS = +% reportfun([t(0) -1],suggested title,'init'), STATUS = +% reportfun(t,xsimplex,[],...), and, STATUS = +% reportfun(t,xsimplex,'done',...). The "time" is defined by t = +% -max(abs(xstep)./(opts.atolx+opts.rtolx*abs(xmin))). An occasional +% useful feature is that the minimization is automatically aborted +% whenever the returned value STATUS is non-zero. +% +% Example: +% % Rosenbrock function +% f = @(x)((1-x(1,:)).^2+100*(x(2,:)-x(1,:).^2).^2); +% [x,y] = meshgrid(linspace(-2,2,40),linspace(-1,3)); +% figure, contour(x,y,reshape(f([x(:)'; y(:)']),size(x)),30), +% hold on +% +% % reporter to plot convergence (could also be an .m-file function!) +% iif = @(varargin)(varargin{2*find([varargin{1:2:end}],1,'first')}()); +% simplex_report = @(t,x,s)(iif(~ischar(x), ... +% @()0*plot(x(1,[1:end 1]),x(2,[1:end 1]),'b.-'), ... +% true,@()0)); +% +% % solve +% x = nmsimplex(f,[-1 2],{'reportfun' simplex_report}) +% +% See also FMINSEARCH, FSOLVE, REPORT. + +% S. Engblom 2014-08-28 (Revision, reportfun and plot example) +% S. Engblom 2006-09-19 (Revision) +% S. Engblom 2006-02-03 + +% *** stat counter of some kind? + +% special call: existing options +if nargin == 1 & ischar(fun) & strcmpi(fun,'opts') + optdef.init = 'scalar | simplex {0.25}' + optdef.maxfunevals = 'integer > 0 {1000}'; + optdef.report = '{''on''} | ''off'''; + optdef.reportfun = 'Function {[]}'; + optdef.atolx = 'scalar {1e-8}'; + optdef.rtolx = 'scalar {1e-6}'; + optdef.atolf = 'scalar {1e-14}'; + optdef.rtolf = 'scalar {1e-12}'; + optdef.lb = 'scalar | vector {-inf}'; + optdef.ub = 'scalar | vector {inf}'; + x = optdef; + return; +end + +% default options +optdef.init = 0.25; +optdef.maxfunevals = 1000; +optdef.report = 'on'; +optdef.reportfun = []; +optdef.atolx = 1e-8; +optdef.rtolx = 1e-6; +optdef.atolf = 1e-14; +optdef.rtolf = 1e-12; +optdef.lb = -inf; +optdef.ub = inf; + +% special call: return defaults +if nargin == 0, x = optdef; return; end + +% parse options +if nargin < 3 + opts = optdef; +else + opts = parseopts(optdef,opts); +end + +% initial guess +x0 = x0(:); +nd = size(x0,1); +opts.lb = opts.lb(:); +if isscalar(opts.lb) + opts.lb = frepmat(opts.lb,nd); +end +opts.ub = opts.ub(:); +if isscalar(opts.ub) + opts.ub = frepmat(opts.ub,nd); +end +if any(x0 < opts.lb) || any(opts.ub < x0) + error('Initial guess not feasible.'); +end + +nfun = 0; +if isscalar(opts.init) + % try creating a non-degenerate simplex + r = opts.init*abs(x0)+opts.atolx; + x1 = tsum(x0,diag(r),[1],[1 2]); + y1 = zeros(1,size(x1,2)); + for i = 1:size(x1,2) + x1(:,i) = min(max(x1(:,i),opts.lb),opts.ub); + y1(i) = feval(fun,x1(:,i),varargin{:}); + end + nfun = nfun+size(x1,2); + + x2 = tsum(x0,diag(-r),[1],[1 2]); + y2 = zeros(1,size(x2,2)); + for i = 1:size(x2,2) + x2(:,i) = min(max(x2(:,i),opts.lb),opts.ub); + y2(i) = feval(fun,x2(:,i),varargin{:}); + end + nfun = nfun+size(x2,2); + + % use best corners for the initial guess + [foo,i] = min([y1; y2]); + x = [x1(:,i == 1) x2(:,i == 2)]; + y = [y1(i == 1) y2(i == 2)]; + + % add user's corner... + x = [x x0]; + y = [y feval(fun,x0,varargin{:})]; + nfun = nfun+1; +else + % intial simplex supplied by user + x = [x0 opts.init]; + y = zeros(1,size(x,2)); + for i = 1:size(x,2) + x(:,i) = min(max(x(:,i),opts.lb),opts.ub); + y(i) = feval(fun,x(:,i),varargin{:}); + end + nfun = nfun+size(x,2); +end + +if size(x,2) ~= nd+1 || rank(tsum(x(:,2:end),-x(:,1),[1 2],[1])) < nd + error(['Degenerate initial simplex. Either provide a feasible initial ' ... + 'simplex or try a smaller scalar option .init.']); +end + +opts.report = strcmpi(opts.report,'on'); +if opts.report + disp(' Iteration Func-count Func-value |step|'); +end + +% find best and worst values +[ymin,imin] = min(y); +[ymax,imax] = max(y); +xmin = x(:,imin); +xmax = x(:,imax); +ymax2 = max(y([1:imax-1 imax+1:end])); + +% setup reporter +if ischar(opts.reportfun) + reportfun = str2func(opts.reportfun); +else + reportfun = opts.reportfun; +end +if ~isempty(reportfun) + status = reportfun([-max(abs(xmax-xmin)./(opts.atolx+opts.rtolx*abs(xmin))) -1], ... + 'Solution progress...','init'); +else + % void function + reportfun = @report; + status = reportfun([],[],'none'); +end + +nit = 0; +while 1 + if status ~= 0 + fprintf('Reporter returned non-zero status = %d. Bailing out.\n',status); + status = reportfun(-max(abs(xmax-xmin)./(opts.atolx+opts.rtolx*abs(xmin))), ... + x,'done',varargin{:}); + x = xmin; + if nargout > 1 + fx = ymin; + if nargout > 2 + flag = 3; + end + end + return; + end + nit = nit+1; + + xcen = (-xmax+sum(x,2))/nd; + xref = min(max(2*xcen-xmax,opts.lb),opts.ub); + yref = feval(fun,xref,varargin{:}); + nfun = nfun+1; + + % form new simplex + if ymin > yref + % xref seems promising; try to extrapolate + xexp = min(max(2*xref-xcen,opts.lb),opts.ub); + yexp = feval(fun,xexp,varargin{:}); + nfun = nfun+1; + if yexp < yref + % successful, good decrease + x(:,imax) = xexp; + y(imax) = yexp; + else + % a fair decrease anyway... + x(:,imax) = xref; + y(imax) = yref; + end + elseif ymax2 > yref + % intermediate cost; slight improvement + x(:,imax) = xref; + y(imax) = yref; + else + % high cost; local contraction + if ymax <= yref + x(:,imax) = 0.5*(xmax+xcen); + else + x(:,imax) = 0.5*(xref+xcen); + end + + % new evaluation + y(imax) = feval(fun,x(:,imax),varargin{:}); + nfun = nfun+1; + + % still not an improvement: try multiple contraction around the + % best corner + if y(imax) > ymax + for i = [1:imin-1 imin+1:size(x,2)] + x(:,i) = 0.5*(x(:,i)+xmin); + y(i) = feval(fun,x(:,i),varargin{:}); + end + nfun = nfun+nd; + end + end + + % update best and worst values + [ymin,imin] = min(y); + [ymax,imax] = max(y); + xmin = x(:,imin); + xmax = x(:,imax); + ymax2 = max(y([1:imax-1 imax+1:end])); + + % report progress + xstep = xmax-xmin; + ystep = ymax-ymin; + if opts.report + fprintf(1,' %5.0f %5.0f %0.6e %0.6e\n', ... + nit,nfun,ymin,norm(xstep)); + end + status = reportfun(-max(abs(xstep)./(opts.atolx+opts.rtolx*abs(xmin))),x,''); + + % convergence criteria + ok = all(abs(xstep) <= opts.atolx+opts.rtolx*abs(x(:,imin))); + if ok || abs(ystep) <= opts.atolf+opts.rtolf*abs(ymin) || ... + nfun >= opts.maxfunevals + break; + end +end + +% various outputs +x = xmin; +if nargout > 1 + fx = ymin; + if nargout > 2 + flag = 0; + if nfun >= opts.maxfunevals + flag = 2; + elseif ~ok + flag = 1; + end + end +elseif nargout < 3 + if nfun >= opts.maxfunevals + warning('The maximum number of function evaluations was reached.'); + elseif ~ok + warning('Halted at a flat local minimum of the objective function.'); + end +end + +status = reportfun(-max(abs(xstep)./(opts.atolx+opts.rtolx*abs(xmin))),x,'done'); diff --git a/stenglib/Scicomp/ode1s.m b/stenglib/Scicomp/ode1s.m new file mode 100644 index 00000000..7c05ded9 --- /dev/null +++ b/stenglib/Scicomp/ode1s.m @@ -0,0 +1,654 @@ +function [t,y] = ode1s(fun,tspan,y0,opts,varargin) +%ODE1S Solve stiff and strongly nonlinear ODE. Split-step Euler method. +% [T,Y] = ODE1S(FUN,[TSTART TEND],Y0,OPTS,...) solves the ODE dY/dt +% = FUN(t,Y,...) from TSTART to TEND with initial data Y0. OPTS is a +% structure with options, see the table below. +% +% For the right-hand side, the special syntax [dYimpl,dYexpl] = +% FUN(tnew,Ynew,told,Yold,...) is used. Here Ynew(Yold) is the +% latest iterate(solution) at the next(previous) step. This syntax +% makes it easy to implement split-step semi-implicit methods. With +% only one output, dYimpl = FUN(tnew,Ynew,told,Yold,...) is +% understood. +% +% The method is thus a forward/backward Euler combination. The error +% is estimated by taking half-steps and a step-size control strategy +% based on digital filters is used. +% +% SOL = ODE1S(...) is also an allowed syntax, returning a solution +% structure instead. +% +% Field Value/{Default} Description +% ----------------------------------------------------------------------- +% atol Vector/scalar {1e-6} Absolute tolerance. +% rtol Vector/scalar {1e-4} Relative tolerance. +% +% adaptivity {'eps'} | 'epus' | Error per step, per unit step, or +% 'eq' equilibrium control. +% errnorm1 Function {@abs} Inner and outer error-norms, see +% errnorm2 Function {@norm} below. +% +% initialstep Scalar {0} The initial step, zero means that +% this is determined automatically. +% maxstep Scalar {inf} Maximum step, +% minstep Scalar {0} minimum step. +% +% nlinauto {1} | 2 | 3 Tuning of the nonlinear solver. A +% higher value means a more +% suspicious solver, prepared to take +% more drastic measures when +% poor convergence is detected. +% +% reportfun Function {[]} Report function, see below. +% info {0} | 1 | 2 Level of information. 0: +% only serious errors, 1: +% include warnings, 2: +% include final statistics +% (not yet implemented). +% +% In EPS-adaptivity, the control objective is to keep +% errnorm2(TOL./ERR) = 1, +% where in terms of the tentative solution y, +% TOL = max(rtol.*errnorm1(y),atol), +% ERR = errnorm1(error estimate). +% The default choice is errnorm1(z) = abs(z) and errnorm2(z) = +% norm(z), and is the usual component-wise control. An alternative +% is the less stringent "norm control" defined by errnorm1(z) = +% norm(z) and errnorm2(z) = abs(z). Other variants are easily +% constructed. Note that the norms are automatically normalized to +% unit measure so that errnorm2(errnorm1(ones(size(solution +% vector)))) = 1. +% +% In EPUS-adaptivity, one rather uses +% TOL = h*max(rtol.*errnorm1(FUN(y)),atol), +% while in EQ-adaptivity, +% TOL = max(rtol.*errnorm1(y-y0),atol), +% ensuring small steps near an equilibrium. +% +% The report function is called according to the three +% self-explaining syntaxes STATUS = reportfun([TSTART +% TEND],suggested title,'init'), STATUS = reportfun(t,y,[],...), +% and, STATUS = reportfun(t,y,'done',...). An occasional useful +% feature is that the integration is automatically aborted whenever +% the returned value STATUS is non-zero. +% +% See also ODE15S, ODE23S. + +% S. Engblom 2012-09-18 (info added) +% S. Engblom 2010-08-25 (Port from earlier code) +% S. Engblom 2007-03-13 + +yy = y0(:); +ndof = size(yy,1); + +% default options +optdef.atol = 1e-6; +optdef.rtol = 1e-4; +optdef.adaptivity = 'eps'; +optdef.errnorm1 = @abs; +optdef.errnorm2 = @norm; +optdef.initialstep = 0; +optdef.maxstep = inf; +optdef.minstep = 0; +optdef.nlinauto = 1; +optdef.reportfun = []; +optdef.info = 0; + +% merge defaults with actual inputs +if nargin > 3 + opts = parseopts(optdef,opts); +else + opts = optdef; +end +assignopts(opts); +info = opts.info; % (silly, but there is a Matlab-function 'info'...) + +% EPS/EPUS/EQ control +[foo,adaptivity] = fsetop('ismember',{adaptivity},{'eps' 'epus' 'eq'}); +if adaptivity == 0 + error('Unknown type of adaptivity.'); +end + +% normalize the norms... +NM = 1/feval(errnorm2,(feval(errnorm1,(ones(ndof,1))))); + +% setup reporter +if ischar(reportfun) + reportfun = str2func(reportfun); +end +if ~isempty(reportfun) + status = reportfun([tspan(1) tspan(end)],'Solution progress...','init'); +else + reportfun = @report; + status = reportfun([],[],'none'); +end +% *** stat counter of some kind? + +% step-size controller +Const = [0 0 1]; % constant step-size +Ho110 = [1 0 0]; % elementary controller (Deadbeat I) +H110 = [1/3 0 0]; % I controller (convolution filter) +PI34 = [7/10 -2/5 0]; % PI controller (Gustafsson) +PI42 = [3/5 -1/5 0]; % PI controller for nonstiff methods +H211PI = [1/6 1/6 0]; % 1st order LP filter of PI structure +gain = 1/4; % gain in [1/6 1/2] (gain = 1/2 yields Ho211) +H211b = gain*[1 1 1]; % general purpose 1st order LP filter +% controller used: +ctrl = H211b; +% *** ctrl and Mfac (below) could be options + +% (inverse of the) order of the scheme +switch adaptivity + case 1 % EPS + order = 1/2; + case {2 3} % EPUS/EQ + order = 1; +end + +% rejection logic is based on step-ratios +Mfac = 2; % rejection whenever err > Mfac*TOL + +% reject when FILTER(err) > Mfac*TOL, where FILTER is the product of +% the controller and the limiters +rhomin = l_limiter(l_lpfilter(ctrl,l_limiter(1/Mfac^order,2),1,1),1); + +% start-up +cerr = 1; +rho = 1; +% *** direct manipulation of the controller's state is done in three +% places in the code below when the nonlinear solver fails; it is not +% completely clear how this should be done in the best way + +% build options for the nonlinear solver +if nlinauto == 1 + nlinopts.kappa = 10^-1.5; % required extra precision + nlinopts.thetaREF = 0.4; % target convergence factor + nlinopts.thetaJAC = 0.4; % recompute Jacobian + nlinopts.thetaLU = 0.4; % refactorize Jacobian + nlinopts.etainit = 0.9/(1-0.9); % (total) initial convergence factor + nlinopts.etapow = 0.8; % power law for reuse of estimate + nlinopts.maxiter = 10; % maximum number of iterations + nlinopts.hfac = 0.5; % for irregular convergence +elseif nlinauto == 2 + nlinopts.kappa = 10^-2; + nlinopts.thetaREF = 0.3; + nlinopts.thetaJAC = 0.3; + nlinopts.thetaLU = 0.3; + nlinopts.etainit = 0.95/(1-0.95); + nlinopts.etapow = 0.7; + nlinopts.maxiter = 9; + nlinopts.hfac = 0.4; + % (nlinauto >= 2 also uses a more conservative initial guess) +elseif nlinauto == 3 + nlinopts.kappa = 10^-3; + nlinopts.thetaREF = 0.2; + nlinopts.thetaJAC = 0.2; + nlinopts.thetaLU = 0.2; + nlinopts.etainit = 0.95/(1-0.95); + nlinopts.etapow = 0.6; + nlinopts.maxiter = 8; + nlinopts.hfac = 0.3; +else + error('Property ''nlinauto'' must be one of {1 2 3}.'); +end + +% solution and allocation logic +tspan = tspan(:); +nchunks = 10; +nchunksmax = 10000; +t = zeros(1,nchunks); +t(1) = tspan(1); +y = zeros(ndof,nchunks); +y(:,1) = yy; +i = 1; + +% clear persistent data +l_implsolve(nlinopts); + +% initial step +tt = tspan(1); +tend = tspan(end); +[h,TOL] = l_initialstep(rtol,atol,adaptivity,order, ... + NM,errnorm1,errnorm2,tt,yy,fun,varargin{:}); +if initialstep > 0 + h = initialstep; +end +h = max(min(h,maxstep),minstep); +h2 = 0.5*h; + +while 1 + if h2 < eps(16*(1+abs(tt))) + warning(sprintf(['Unable to meet integration tolerances without ' ... + 'reducing the step-size below the smallest ' ... + 'value allowed. Failure at time t = %f.'],tt)); + t = t(1:i); + y = y(:,1:i); + break; % return solution obtained so far + end + + % First take the full step y3 = T_1(h) using zero as the initial + % approximation for the implicit part. + [y3,rhon] = l_step(true,tt,yy,h,[], ... + TOL,NM,errnorm1,errnorm2,nlinopts,fun,varargin{:}); + if isempty(y3) + h = l_limiter(rhon,1)*h; % retry with smaller h + h2 = 0.5*h; + cerr = 1; + rho = 1; + if info > 0 + disp('Nonlinear iteration converged poorly.'); + end + continue; + % *** the continue-statement implies that minstep+maxstep are + % disregarded! + end + + % Then take two half-steps T_1(h/2) and T_2(h/2) using the full step + % T_1(h) as a predictor. These steps are usually easier than the + % previous full step, so something strange is going on if this does + % not converge. + + % y1 = T_1(h/2) + if nlinauto < 3 + % interpolate using the full step + [y1,rhon] = l_step(false,tt,yy,h2,0.5*(yy+y3), ... + TOL,NM,errnorm1,errnorm2,nlinopts,fun,varargin{:}); + else + % be more suspicious... + [y1,rhon] = l_step(false,tt,yy,h2,[], ... + TOL,NM,errnorm1,errnorm2,nlinopts,fun,varargin{:}); + end + if isempty(y1) + h2 = l_limiter(rhon,1)*h2; + h = 2*h2; % (step just attempted was h2) + cerr = 1; + rho = 1; + if info > 0 + warning('Nonlinear iteration did not converge!'); + end + continue; + end + + % y2 = T_2(h/2) + if nlinauto == 1 + % predictor by combining the two previous steps + [y2,rhon] = l_step(false,tt+h2,y1,h2,y1+0.5*(y3-yy), ... + TOL,NM,errnorm1,errnorm2,nlinopts,fun,varargin{:}); + elseif nlinauto == 2 + % using only the full step + [y2,rhon] = l_step(false,tt+h2,y1,h2,y3, ... + TOL,NM,errnorm1,errnorm2,nlinopts,fun,varargin{:}); + else + % suspicious... + [y2,rhon] = l_step(false,tt+h2,y1,h2,[], ... + TOL,NM,errnorm1,errnorm2,nlinopts,fun,varargin{:}); + end + if isempty(y2) + h2 = l_limiter(rhon,1)*h2; + h = 2*h2; + cerr = 1; + rho = 1; + if info > 0 + warning('Nonlinear iteration did not converge!'); + end + continue; + end + + % Richardson's estimate... + err = errnorm1((y3-y2)/3); + + % control objective + switch adaptivity + case 1 % EPS + TOL = max(rtol.*errnorm1(y2),atol); + case 2 % EPUS + [dydt,dydtexpl] = fun(tt+h,y2,tt+h,y2,varargin{:}); + dydt = dydt+dydtexpl; + TOL = h*max(rtol.*errnorm1(dydt),atol); + case 3 % EQ + TOL = max(rtol.*errnorm1(y2-yy),atol); + end + + % digital control of step-size + cerrold = cerr; + cerr = l_limiter(1/(NM*errnorm2(err./TOL)^order),2); + rho = l_limiter(l_lpfilter(ctrl,cerr,cerrold,rho),1); + + % acceptance/rejection based on smooth rho + if rho >= rhomin + % step accepted: record the value of T_2(h/2) + tt = tt+h; + yy = y2; + i = i+1; + if i > size(t,2) + t = [t zeros(1,nchunks)]; + y = [y zeros(ndof,nchunks)]; + nchunks = min(2*nchunks,nchunksmax); + end + t(i) = tt; + y(:,i) = yy; + status = reportfun(tt,yy,[],varargin{:}); + + % normal end of algorithm is here + if tt-tend > -eps(32*abs(tt)) + status = reportfun(tt,yy,'done',varargin{:}); + t = t(1:i); + y = y(:,1:i); + break; + elseif status ~= 0 + fprintf('Reporter returned non-zero status = %d. Bailing out.\n',status); + status = reportfun(tt,yy,'done',varargin{:}); + t = t(1:i); + y = y(:,1:i); + break; + end + else + if info > 0 + disp('Failed integration tolerance.'); + fprintf(' [h = %g, err = %f, cerr = %f, rho = %f]\n', ... + h,1/(NM*errnorm2(err./TOL)^order),cerr,rho); + end + end + h = rho*h; + + % in these two cases (not the preferred regime!) we must ensure that + % the controller gets at least some kind of 'reasonable' information + if h > maxstep + rho = rho*maxstep/h; + h = maxstep; + cerr = 1; + elseif h < minstep + rho = rho*minstep/h; + h = minstep; + cerr = 1; + end + h2 = 0.5*h; +end + +% output solution structure +if nargout < 2 + t = struct('x',t,'y',y,'solver','ode1s'); +end + +%------------------------------------------------------------------------- +function [h,TOL] = l_initialstep(rtol,atol,adaptivity,order, ... + NM,errnorm1,errnorm2,t0,y0,fun,varargin) +%L_INITIALSTEP Starting step-size and tolerance. +% [H,TOL] = L_INITIALSTEP(RTOL,ATOL,...) provides a starting +% step-size whenever none is supplied. A value of the effective +% tolerance TOL is also returned. It "usually gives a good guess for +% the initial step-size... (or at least avoids a very bad choice)" +% [1, p. 169]. The algorithm is described in [1, Chap. II.4, +% p. 169]. +% +% Reference: +% [1] E. Hairer, S. P. Norsett and G. Wanner: "Solving Ordinary +% Differential Equations I, Nonstiff Problems", 2nd ed, Springer +% (1993). + +% h0 by ensuring that an explicit Euler step is small +[dydt,dydtexpl] = fun(t0,y0,t0,y0,varargin{:}); +dydt0 = dydt+dydtexpl; +% *** evaluation can be reused later + +d0 = errnorm1(y0); +TOL = max(rtol.*d0,atol); +d0 = NM*errnorm2(d0./TOL); +d1 = NM*errnorm2(errnorm1(dydt0)./TOL); +if d0 >= 1e-5 && d1 >= 1e-5 + h0 = 0.01*d0/d1; +else + h0 = 1e-6; +end + +% take step and estimate the 2nd derivative +y1 = y0+h0*dydt0; +[dydt,dydtexpl] = fun(t0+h0,y1,t0+h0,y1,varargin{:}); +dydt1 = dydt+dydtexpl; +d2 = NM*errnorm2(errnorm1(dydt1-dydt0)./(TOL*h0)); + +% update TOL (used in the implicit step) +switch adaptivity + case 2 % EPUS + TOL = h0*max(rtol.*errnorm1(dydt0),atol); + case 3 % EQ + TOL = max(rtol.*errnorm1(y1-y0),atol); +end + +% h1 from simple error estimate +d3 = max(d1,d2); +if d3 > 1e-15 + h1 = (0.01/d3)^order; +else + h1 = max(1e-6,h0*1e-3); +end + +h = min(100*h0,h1); +% or otherwise integration might be aborted immediately: +h = max(h,eps(1024*(1+abs(t0)))); + +%------------------------------------------------------------------------- +function [y,rhon] = l_step(fullstep,t0,y0,h,yest,TOL, ... + NM,errnorm1,errnorm2, ... + nlinopts,fun,varargin) +%L_STEP One step with the split-step Euler method. +% [Y,RHON] = L_STEP(FULLSTEP,T0,Y0,H,YEST,...) returns the step from +% [T0,Y0] to [T0+H,Y] with a split-step Euler method using YEST as +% an initial guess for the implicit solver. +% +% See L_IMPLSOLVE. + +if ~isempty(yest) + ydelta = yest-y0; +else + ydelta = zeros(size(y0)); +end + +% step: implicit step by solving a nonlinear system +[ydelta,rhon] = l_implsolve(fullstep,ydelta,t0,h,y0,TOL, ... + NM,errnorm1,errnorm2, ... + nlinopts,fun,varargin{:}); +if isempty(ydelta) + % nonlinear iteration did not converge, empty return + y = []; + return; +end + +% assemble step +y = y0+ydelta; + +%------------------------------------------------------------------------- +function [ydelta,rhon] = l_implsolve(fullstep,ydelta,t0,h,y0,TOL, ... + NM,errnorm1,errnorm2, ... + nlinopts,fun,varargin) +%L_IMPLSOLVE Solver for split-step Euler method. +% [YDELTA,RHON] = L_IMPLSOLVE(FULLSTEP,YDELTA,T0,H,Y0,[TOL,...], +% NLINOPTS,FUN,...) solves the equation YDELTA = +% H*F(T0,Y0)+H*G(T0+H,Y0+YDELTA) for YDELTA. FULLSTEP is true if H +% is a full step, false for half-steps. The split in +% implicit/explicit parts is input as [G,F] = +% FUN(T0+H,T0+YDELTA,T0,Y0,...). Empty YDELTA is returned whenever +% poor convergence is detected, in which case RHON < 1 is a +% suggested step-size reduction factor. +% +% The implementation follows rather closely the prescription in [2, +% Chap. IV.8, pp. 118--121], but has also been augmented with ideas +% from [1]. +% +% Reference: +% [1] K. Gustafsson and G. Söderlind: "Control Strategies for the +% Iterative Solution of Nonlinear Equations in ODE Solvers", SIAM +% J. Sci. Comput. 18(1):23--40 (1997). +% [2] E. Hairer and G. Wanner: "Solving Ordinary Differential +% Equations II, Stiff and Differential-Algebraic Problems", 2nd ed, +% Springer (1996). + +% control of Jacobian evaluations and refactorizations +persistent J wksp tJAC L U P hLU eta theta L2 U2 P2; + +if nargin == 1 + % clear persistent data + nlinopts = fullstep; + J = []; + wksp = []; + eta = nlinopts.etainit; + theta = eta/(1+eta); + return; +elseif isempty(eta) + % previous attempt did not converge well + eta = nlinopts.etainit; + theta = eta/(1+eta); +end + +% parameters of the solver +rhon = 1; % only changed in case of problems +eta = max(eta,eps)^nlinopts.etapow; % convergence factor (reused on success) +kappa = nlinopts.kappa; % extra accuracy +maxiter = nlinopts.maxiter; % max number of iterations + +% The equation solved is now +% +% ydelta = h*F(t0,y0)+h*G(t0+h,y0+ydelta,[t0 y0]), +% +% where [G,F] = fun(t0+h,y0+ydelta,t0,y0,varargin{:}) is the +% [implicit,explicit] part. + +% first residual +[dydt,dydtexpl] = fun(t0+h,y0+ydelta,t0,y0,varargin{:}); + +% full/half-step +if fullstep + % new Jacobian and/or factorization + if isempty(J) || theta*hLU-abs(h-hLU) > nlinopts.thetaJAC*hLU + % convergence was slow previously despite a not very large difference + % in step-size (or there is no saved Jacobian) + [J,wksp] = numjac(fun,t0+h,y0+ydelta,dydt,{0 1},wksp,0,[],[], ... + t0,y0,varargin{:}); + % *** user-defined Jacobian (exact and numerical) + tJAC = t0; % (formed with data available up to t = t0) + [L,U,P] = lu(eye(size(J))-h*J); + hLU = h; + P2 = []; % force refactorization at the half-step + elseif isempty(P) || abs(h-hLU) > nlinopts.thetaLU*hLU + % anticipated convergence failure due to a step-size change: + % refactorize the Jacobian (or a new factorization was explicitly + % called for) + [L,U,P] = lu(eye(size(J))-h*J); + hLU = h; + P2 = []; + end +else + % when taking a half-step, the previous full step always + % converged and hence we only care about the factorization + if isempty(P2) + [L2,U2,P2] = lu(eye(size(J))-h*J); + end + % During the iterations below we let the half-step change the + % convergence indicators theta and eta since they should normally be + % about the same. If this is not the case and the half-step is + % rejected, then the full step is rejected automatically. +end + +% first Newton-step +res = ydelta-h*dydtexpl-h*dydt; +if fullstep + delta = U\(L\(P*res)); +else + delta = U2\(L2\(P2*res)); +end +ydelta = ydelta-delta; +ndelta = NM*errnorm2(errnorm1(delta)./TOL); + +% early return +if eta*ndelta <= kappa + return; +end + +for i = 2:maxiter + % new update and residual + res = ydelta-h*dydtexpl-h*fun(t0+h,y0+ydelta,t0,y0,varargin{:}); + if fullstep + delta = U\(L\(P*res)); + else + delta = U2\(L2\(P2*res)); + end + ydelta = ydelta-delta; + + % check convergence + ndeltaold = ndelta; + ndelta = NM*errnorm2(errnorm1(delta)./TOL); + theta = ndelta/ndeltaold; + eta = theta/(1-theta); + + if theta >= 1 || theta^(maxiter-i)*ndelta > kappa*(1-theta) + % failure: diverging or not converging fast enough + break; + elseif eta*ndelta <= kappa + % converged, return solution + return; + end +end + +% iteration not terminated successfully, take action +if theta >= 1 + % diverging, suggest a smaller step + rhon = nlinopts.thetaREF/theta; +elseif tJAC == t0 + % slow convergence despite a fresh Jacobian + if theta > nlinopts.thetaREF + % suggest a smaller step + rhon = nlinopts.thetaREF/theta; + else + % rare: irregular convergence, shrink step by an arbitrary factor + rhon = nlinopts.hfac; + end +elseif ~fullstep + % rare: half-step, irregular convergence despite a successful full + % step? + rhon = nlinopts.hfac; +end + +% note: rejecting a half-step implies also a new full step and hence +% we postpone handling the Jacobian to after a smaller step has been +% tried +if fullstep + % handle the Jacobian + if tJAC ~= t0 + J = []; % force a new evaluation + else + P = []; % force refactorization + end +end + +ydelta = []; % step failed +eta = []; % don't reuse convergence indicators + +%------------------------------------------------------------------------- +function u = l_limiter(u,kappa) +%L_LIMITER Step-size ratio limiter. +% U = L_LIMITER(U,KAPPA) applies an arctangent limiter parametrized +% by KAPPA to U. +% +% Reference: +% [1] G. Söderlind, L. Wang: "Adaptive time-stepping and +% computational stability", J. Comput. Appl. Math. 185:225--243 (2006). + +u = 1+kappa*atan((u-1)/kappa); + +%------------------------------------------------------------------------- +function rho = l_lpfilter(fir,cerr,cerrold,rho) +%L_LPFILTER Low-pass 2nd order digital filter. +% RHO = L_LPFILTER(FIR,CERR,CERROLD,RHO) evaluates the digital +% filter FIR = [k*beta_1 k*beta_2 alpha_2] using scaled controls +% (CERR,CERROLD) and previous ratio RHO. +% +% The proposed next ratio is returned. +% +% Reference: +% [1] G. Söderlind: "Digital Filters in Adaptive Time-Stepping", +% ACM Trans. Math. Software, 29:1--26 (2003). + +% immediate +rho = prod([cerr cerrold 1/rho].^fir); + +%------------------------------------------------------------------------- diff --git a/stenglib/Scicomp/poch.m b/stenglib/Scicomp/poch.m new file mode 100644 index 00000000..78df397a --- /dev/null +++ b/stenglib/Scicomp/poch.m @@ -0,0 +1,40 @@ +function y = poch(a,x,b) +%POCH Pochhammer's function. +% Y = POCH(A,X), for a scalar A and a vector X, computes the value of +% GAMMA(A+X)./GAMMA(A). The output Y is a vector the same size as X. +% +% If A is a vector, the ouput is Y(i) = prod_i POCH(A(i),X). +% +% Y = POCH(A,B,X) is the same as POCH(A,X)./POCH(B,X). +% +% See also GAMMA, GAMMALN, PSI. + +% S. Engblom 2006-01-12 + +if nargin == 3 + t = x(:); + x = b(:); + a = a(:); b = t; + y = exp(sum(l_gammaln(tsum(a,x,[1],[2])),1)- ... + sum(l_gammaln(tsum(b,x,[1],[2])),1)- ... + sum(l_gammaln(a))+sum(l_gammaln(b))); +else + a = a(:); + x = x(:); + y = exp(sum(l_gammaln(tsum(a,x,[1],[2])),1)- ... + sum(l_gammaln(a))); +end + +%-------------------------------------------------------------------------- +function y = l_gammaln(x) +%L_GAMMALN Fix for bug in GAMMALN. +% Annoyingly, GAMMALN is incorrectly imlemented. This function is a +% patch for the case when there might be negative elements in +% X. Still, complex values of X are not allowed. + +isneg = find(x < 0); +x(isneg) = 1-x(isneg); +y = gammaln(x); +y(isneg) = -y(isneg)+log(pi./sin(pi*(x(isneg)))); + +%-------------------------------------------------------------------------- diff --git a/stenglib/Scicomp/rtsafe.m b/stenglib/Scicomp/rtsafe.m new file mode 100644 index 00000000..e69e35bc --- /dev/null +++ b/stenglib/Scicomp/rtsafe.m @@ -0,0 +1,104 @@ +function x = rtsafe(x1,f1,x2,f2,fun,fargs,tol,maxits) +%RTSAFE Scalar nonlinear solver. +% X = RTSAFE(X1,F1,X2,F2,FUN,FARGS,TOL,MAXITS) computes a solution X +% to the equation FUN(X) = 0. FUN has the signature [F,DF] = +% FUN(X,FARGS{:}) and should compute both function values and +% derivatives. On entry, X1 and X2 must bracket a root and the +% function values F1 and F2 must therefore satisfy F1.*F2 <= 0. Only +% the signs of F1 and F2 need to be correct in order for the +% algorithm to work properly. +% +% If there are more than one root, RTSAFE will find one of them +% (including possible singularities). If FUN is identical to zero on +% the whole interval [X1 X2], RTSAFE will converge to the midpoint. +% +% FARGS is a cell-array and should contain additional inputs to +% FUN. An empty cell is the default. +% +% TOL = [RTOL ATOL] is the absolute and relative tolerance. If TOL +% is a scalar, then RTOL = TOL and ATOL = 1e-2*TOL are used. The +% default is TOL = [10*eps 0.1*eps]. +% +% MAXITS is the maximum number of iterations allowed. The default is +% 100. +% +% The method used is a fail-safe combination of Newtons method and +% bisection. Only limited error-checking is performed. + +% S. Engblom 2004-03-03 + +if any(f1.*f2 > 0), error('Roots must be bracketed.'); end + +% defaults +if nargin < 8 + maxits = 100; + if nargin < 7 + tol = 10*eps; + if nargin < 6 + fargs = {}; + end + end +end + +% absolute/relative tolerance +rtol = tol(1); +if prod(size(tol)) == 1 + atol = 1e-2*rtol; +else + atol = tol(2); +end + +% form initial brackets according to the following rules: +% xl(f2 < 0 f1 < 0 f2 == 0 f1 == 0) = [x2 x1 x2 x1] +% xh(f1 > 0 f2 > 0 f1 == 0 f2 == 0) = [x1 x2 x1 x2] +% (the brackets has to be formed exactly as written) +xl = zeros(size(x1)); +xh = zeros(size(x2)); +ix1 = find(f1 < 0); +ix2 = find(f2 < 0); +xl(ix2) = x2(ix2); xl(ix1) = x1(ix1); +ix1 = find(f1 > 0); +ix2 = find(f2 > 0); +xh(ix1) = x1(ix1); xh(ix2) = x2(ix2); + +ix1 = find(f1 == 0); +ix2 = find(f2 == 0); +xl(ix2) = x2(ix2); xl(ix1) = x1(ix1); +xh(ix1) = x1(ix1); xh(ix2) = x2(ix2); + +% evaluation in midpoint +x = 0.5*(xl+xh); +[f,df] = feval(fun,x,fargs{:}); + +% step and 'step before last' +dxold = xh-xl; +dx = dxold; + +for i = 1:maxits + % prefer bisection whenever the Newton step jumps out of brackets or + % when the size of the interval is not decreasing fast enough + ix1 = (((x-xh).*df-f).*((x-xl).*df-f) >= 0 | ... + abs(2*f) > abs(dxold.*df)); + ix2 = find(~ix1); + ix1 = find(ix1); + + dxold = dx; + + % bisection step + dx(ix1) = 0.5*(xh(ix1)-xl(ix1)); + x(ix1) = xl(ix1)+dx(ix1); + + % Newton step + dx(ix2) = f(ix2)./df(ix2); + x(ix2) = x(ix2)-dx(ix2); + + if all(abs(dx) <= rtol*abs(x)+atol), return; end + + % new evaluation and brackets + [f,df] = feval(fun,x,fargs{:}); + ix1 = find(f <= 0); + ix2 = find(f >= 0); + xl(ix1) = x(ix1); + xh(ix2) = x(ix2); +end +error('Maximum number of iterations reached.'); diff --git a/stenglib/Scicomp/startup.m b/stenglib/Scicomp/startup.m new file mode 100644 index 00000000..3b2b4498 --- /dev/null +++ b/stenglib/Scicomp/startup.m @@ -0,0 +1,8 @@ +%STARTUP Add paths to SCICOMP stuff. + +% S. Engblom 2008-06-17 + +s = pwd; +if exist('test','dir') + addpath([s '/test']); +end diff --git a/stenglib/Tensor/source/make.m b/stenglib/Tensor/source/make.m new file mode 100644 index 00000000..0be2c8f8 --- /dev/null +++ b/stenglib/Tensor/source/make.m @@ -0,0 +1,174 @@ +function make +%MAKE Makefile for TENSOR. + +% Johannes Dufva 2020-10-30 (mexw64, 9.7) +% S. Engblom 2019-01-23 (mexmaci64, mexa64, 9.6) +% S. Engblom 2015-03-20 (mexa64, 8.4) +% S. Engblom 2015-01-19 (mexmaci64, 8.4) +% S. Engblom 2012-04-16 (mexmaci64, 7.11) +% S. Engblom 2011-04-17 (mexmaci64, 7.10) +% S. Engblom 2011-03-07 (mexa64, 7.11) +% S. Engblom 2010-09-23 (mexs64, 7.7) +% S. Engblom 2010-02-02 (mexa64, 7.8) +% S. Engblom 2010-01-12 (mexmaci) +% S. Engblom 2007-05-17 (mexs64) +% S. Engblom 2006-11-09 (mexa64) +% S. Engblom 2005-04-10 (mexmac) + +% Use '-DBLASINT=size_t' for the (bad!) platforms where the 'int' in +% the declaration of BLAS subroutines is in fact a 'size_t' and +% sizeof(size_t) > sizeof(int). + +s = pwd; +mx = mexext; +ver = version; + +if strcmp(mx,'mexglx') + if ~strncmp(version,'7.5',3) && ~strncmp(version,'7.8',3) + warning(['Extension .' mexext [' tested with Matlab version(s) ' ... + '7.5 and 7.8 only.']]); + end + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/tndims.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/tsize.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/tsum.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,'-lmwblas',[s '/source/tprod.c']); +elseif strcmp(mx,'mexa64') + v = version; + if v(1) == '7' + if ~strncmp(version,'7.2',3) && ~strncmp(version,'7.8',3) && ... + ~strncmp(version,'7.11',4) && ~strncmp(version,'7.13',4) + warning(['Extension .' mexext [' tested with Matlab version(s) ' ... + '7.2, 7.8, 7.11 and 7.13 only.']]); + end + if ~strncmp(version,'7.11',4) + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/tndims.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/tsize.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/tsum.c']); + if strncmp(version,'7.2',3) + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + '-outdir',s,[s '/source/tprod.c']); + else + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 ' ... + '-D_GNU_SOURCE -pthread -fexceptions -DBLASINT=size_t'], ... + '-outdir',s,'-lmwblas',[s '/source/tprod.c']); + end + else + % apparently, the linker path is not properly set up on 7.11: + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/tndims.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/tsize.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/tsum.c']); + mex(['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions -DBLASINT=size_t'], ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,'-lmwblas',[s '/source/tprod.c']); + end + else + if ~strncmp(version,'8.4',3) && ~strncmp(version,'9.6',3) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '8.4 and 9.6 only.']); + end + + % apparently, the linker path is not properly set up on 8.4 (also a + % soft link libstdc++.so inside [matlabroot '/sys/os/glnxa64'] is + % required to point to the correct shared library, in this case + % libstdc++.so.6.0.17) + mex('CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/tndims.c']); + mex('CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/tsize.c']); + mex('CFLAGS=-fPIC -std=c99 -O3',['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,[s '/source/tsum.c']); + mex('CFLAGS=-fPIC -std=c99 -O3 -DBLASINT=size_t', ... + ['-L' matlabroot '/sys/os/glnxa64'], ... + '-outdir',s,'-lmwblas',[s '/source/tprod.c']); + end +elseif strcmp(mx,'mexmac') + if ~strncmp(version,'7.0',3) + warning(['Extension .' mexext ' tested with Matlab version(s) 7.0 only.']); + end + mex('CC=gcc -std=c99','-outdir',s,[s '/source/tndims.c']); + mex('CC=gcc -std=c99','-outdir',s,[s '/source/tsize.c']); + mex('CC=gcc -std=c99','-outdir',s,[s '/source/tsum.c']); + mex('CC=gcc -std=c99','-outdir',s,[s '/source/tprod.c']); +elseif strcmp(mx,'mexmaci') + if ~strncmp(version,'7.8',3) + warning(['Extension .' mexext ' tested with Matlab version(s) 7.8 only.']); + end + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/tndims.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/tsize.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/tsum.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,'-lmwblas', ... + [s '/source/tprod.c']); +elseif strcmp(mx,'mexmaci64') + v = version; + if v(1) == '7' + if ~strncmp(version,'7.10',4) && ~strncmp(version,'7.11',4) && ... + ~strncmp(version,'7.14',4) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '7.10 and 7.11 only.']); + end + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/tndims.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/tsize.c']); + mex('CC=gcc -std=c99 -fast','-outdir',s,[s '/source/tsum.c']); + mex('CC=gcc -std=c99 -fast -DBLASINT=size_t', ... + '-outdir',s,'-lmwblas', ... + [s '/source/tprod.c']); + else + if ~strncmp(version,'8.4',3) && ~strncmp(version,'9.6',3) + warning(['Extension .' mexext ' tested with Matlab version(s) ' ... + '8.4 and 9.6 only.']); + end + mex('CFLAGS= -std=c99','-outdir',s,[s '/source/tndims.c']); + mex('CFLAGS=-Wno-logical-op-parentheses -std=c99','-outdir',s,[s '/source/tsize.c']); + mex('CFLAGS=-Wno-logical-op-parentheses -std=c99','-outdir',s,[s '/source/tsum.c']); + mex('CFLAGS= -std=c99 -DBLASINT=size_t', ... + '-outdir',s,'-lmwblas', ... + [s '/source/tprod.c']); + end +elseif strcmp(mx,'mexs64') + if ~strncmp(version,'7.7',3) + warning(['Extension .' mexext ' tested with Matlab version(s) 7.7 only.']); + end + mex('-outdir',s,[s '/source/tndims.c']); + mex('-outdir',s,[s '/source/tsize.c']); + mex('-outdir',s,[s '/source/tsum.c']); + mex('-outdir',s,[s '/source/tprod.c']); + +elseif strcmp(mx,'mexw64') + mex('-outdir',s,[s '/source/tndims.c']); cflags = 'CFLAGS= -lmwblas -std=c99 '; + mex('-outdir',s,[s '/source/tsize.c']); mex(cflags, '-outdir',s,[s '/source/tndims.c']); + mex('-outdir',s,[s '/source/tsum.c']); mex(cflags, '-outdir',s,[s '/source/tsize.c']); + mex('-lmwblas','CFLAGS= -D"dgemm_=dgemm" ','-outdir',s,[s '/source/tprod.c']); mex(cflags, '-outdir',s,[s '/source/tsum.c']); + mex([cflags '-D"dgemm_=dgemm" -D"BLASINT=size_t" '],'-outdir',s,[s '/source/tprod.c']); % '-lmwblas', + +else + warning('New platform. Trying default make.'); + mex('-outdir',s,[s '/source/tndims.c']); + mex('-outdir',s,[s '/source/tsize.c']); + mex('-outdir',s,[s '/source/tsum.c']); + mex('-outdir',s,[s '/source/tprod.c']); +end diff --git a/stenglib/Tensor/source/tndims.c b/stenglib/Tensor/source/tndims.c new file mode 100644 index 00000000..4a0daf8d --- /dev/null +++ b/stenglib/Tensor/source/tndims.c @@ -0,0 +1,25 @@ +/* tndims.c */ +/* S. Engblom 2005-04-10 */ + +#include "mex.h" +#include "matrix.h" + +/*-----------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + /* check of syntax */ + if (nrhs != 1 || nlhs > 1) + mexErrMsgIdAndTxt("tndims:e1","Expecting one input and one output."); + + /* input */ + mwSize ndimA = mxGetNumberOfDimensions(prhs[0]); + + /* adjust */ + if (ndimA == 2) + ndimA -= (mxGetN(prhs[0]) == 1)*(1+(mxGetM(prhs[0]) == 1)); + + /* output */ + plhs[0] = mxCreateDoubleMatrix(1,1,mxREAL); + *mxGetPr(plhs[0]) = (double)ndimA; +} +/*-----------------------------------------------------------------------*/ diff --git a/stenglib/Tensor/source/tprod.c b/stenglib/Tensor/source/tprod.c new file mode 100644 index 00000000..a8ebbbca --- /dev/null +++ b/stenglib/Tensor/source/tprod.c @@ -0,0 +1,298 @@ +/* tprod.c */ + +/* S. Engblom 2010-09-23 (Minor revision) */ +/* S. Engblom 2010-02-02 (Minor revision) */ +/* S. Engblom 2005-04-29 */ + +#include +#include + +#include "mex.h" +#include "matrix.h" + +/* Note: All integer arguments in calls to the BLAS function dgemm() + will be casted according to (int *)&BLASINT. This is to provide for + the (bad!) platforms where sizeof(int) < sizeof(size_t) and the + binary library uses size_t while the declaration is erroneously + int. For such cases you will have to manually define BLASINT as + size_t when calling the compiler. */ +#ifndef BLASINT +#define BLASINT int +#endif + +// declaration of BLAS-kernel +void dgemm_(const char *,const char *, + const int *,const int *,const int *,const double *, + const double *,const int *, + const double *,const int *, + const double *,double *,const int *); + +// forward declarations +void checkix(const double *ix,mwSize len,int *min,int *max); +void sortix(const double *ix,mwSize len,int edg,int *in); +void logicalsize(const mxArray *A,mwSize nda,mwSize *sizA); +void permute(const mxArray *A, + const int *p1,const int*p2,const int *p3,int len, + mxArray **Ap); + +/*-----------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + // check of syntax + if (nrhs != 4 || nlhs > 1) + mexErrMsgIdAndTxt("tprod:e9", + "Expecting four inputs and one output."); + + // check of input + if (!mxIsDouble(prhs[0]) || mxIsSparse(prhs[0]) || + !mxIsDouble(prhs[1]) || mxIsSparse(prhs[1])) + mexErrMsgIdAndTxt("tprod:e8", + "Expecting a double, non-sparse array."); + if (!mxIsDouble(prhs[2]) || mxIsComplex(prhs[2]) || + mxIsSparse(prhs[2]) || !mxIsDouble(prhs[3]) || + mxIsComplex(prhs[3]) || mxIsSparse(prhs[3])) + mexErrMsgIdAndTxt("tprod:e10", + "Index vector must be real, double and non-sparse."); + + // inputs IA and IB in *unit offset* + const double *ia = mxGetPr(prhs[2])-1; + const double *ib = mxGetPr(prhs[3])-1; + const mwSize nda = mxGetNumberOfElements(prhs[2]); + const mwSize ndb = mxGetNumberOfElements(prhs[3]); + int min = 0,max = 0; + + // perform some checks and determine the span of the indices + checkix(ia,nda,&min,&max); + checkix(ib,ndb,&min,&max); + + // sanity check + if (max-min > nda+ndb) + mexErrMsgIdAndTxt("tprod:e7","Index vector is not contiguous or " + "a summation index is missing."); + + /* Sort the indices out. The vectors ina and inb points back into ia + and ib (in *unit offset*) according to the absolute ordering. */ + const int edg = min; + const int len = max-min+1; + const int ndc = max; // the number of dimensions of the output + int ina[len],inb[len]; + + sortix(ia,nda,edg,memset(ina,0,len*sizeof(ina[0]))); + sortix(ib,ndb,edg,memset(inb,0,len*sizeof(inb[0]))); + + /* Set operations: the permutations contain indices pointing back + into ia and ib in *unit offset* for the private parts (pa1 and + pb1), the negative common parts (pa2 and pb2) and the positive + common parts (pa3 and pb3). */ + int pa1[len],pa2[len],pa3[len]; + int pb1[len],pb2[len],pb3[len]; + + memset(pa1,0,len*sizeof(pa1[0])); memset(pb1,0,len*sizeof(pb1[0])); + memset(pa2,0,len*sizeof(pa2[0])); memset(pb2,0,len*sizeof(pb2[0])); + memset(pa3,0,len*sizeof(pa3[0])); memset(pb3,0,len*sizeof(pb3[0])); + for (int i = 0,ia1 = 0,ib1 = 0,i2 = 0,i3 = 0; i < len; i++) { + const int ix = i+edg; + + // a setdiff produces the private part + if (ina[i] && !inb[i]) { + if (ix < 0) + mexErrMsgIdAndTxt("tprod:e3","Summation index is missing."); + pa1[ia1++] = ina[i]; + } + // setdiff... + else if (!ina[i] && inb[i]) { + if (ix < 0) + mexErrMsgIdAndTxt("tprod:e3","Summation index is missing."); + pb1[ib1++] = inb[i]; + } + // an intersection produces the (positive and negative) common part + else if (ina[i] && inb[i]) { + if (ix < 0) { + pa2[i2] = ina[i]; pb2[i2++] = inb[i]; + } + else { + pa3[i3] = ina[i]; pb3[i3++] = inb[i]; + } + } + else if (ix != 0) + mexErrMsgIdAndTxt("tprod:e4","Index vector must be contiguous."); + } + + // determine the logical size of the inputs A and B + mwSize sizA[nda],sizB[ndb]; + logicalsize(prhs[0],nda,sizA); + logicalsize(prhs[1],ndb,sizB); + + // check common sizes + for (int i = 0; pa2[i] != 0; i++) + if (sizA[pa2[i]-1] != sizB[pb2[i]-1]) + mexErrMsgIdAndTxt("tprod:e6","Dimensions must agree."); + for (int i = 0; pa3[i] != 0; i++) + if (sizA[pa3[i]-1] != sizB[pb3[i]-1]) + mexErrMsgIdAndTxt("tprod:e6","Dimensions must agree."); + + // permute to an order suitable for computations + mxArray *Ap,*Bp; + + permute(prhs[0],pa1,pa2,pa3,nda,&Ap); + const double *prA = mxGetPr(Ap); + const double *piA = mxGetPi(Ap); + + permute(prhs[1],pb1,pb2,pb3,ndb,&Bp); + const double *prB = mxGetPr(Bp); + const double *piB = mxGetPi(Bp); + + // allocate the (permuted) output C + mwSize sizC[ndc > 2 ? ndc : 2]; + sizC[0] = sizC[1] = 1; /* singletons */ + BLASINT m = 1,n = 1,k = 1,l = 1; + { + // the size of C, including the sizes of the various blocks + int j = 0; + for (int i = 0; pa1[i] != 0; i++) m *= (sizC[j++] = sizA[pa1[i]-1]); + for (int i = 0; pa2[i] != 0; i++) k *= sizA[pa2[i]-1]; + for (int i = 0; pb1[i] != 0; i++) n *= (sizC[j++] = sizB[pb1[i]-1]); + for (int i = 0; pa3[i] != 0; i++) l *= (sizC[j++] = sizA[pa3[i]-1]); + } + mxArray *Cp = mxCreateNumericArray(ndc > 2 ? ndc : 2,sizC, + mxDOUBLE_CLASS, + piA != NULL || piB != NULL ? + mxCOMPLEX : mxREAL); + double *prC = mxGetPr(Cp); + double *piC = mxGetPi(Cp); + + /* Evaluate the product as a series of matrix products, C(:,:,i) = + A(:,:,i).'*B(:,:,i), i = 1..l. This corresponds to the canonical + form C = tprod(A,B,[-1 1 3],[-1 2 3]) where A is k-by-m-by-l, B + is k-by-n-by-l and C is m-by-n-by-l. */ + const double one = 1.0,minusone = -1.0; + const char *t1 = "T",*t2 = "N"; + + // real x real, complex x real, real x complex, complex x complex + if (piC == NULL) + for (int jc = 0,ja = 0,jb = 0; jc < m*n*l; ) { + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&one, + &prA[ja],(int *)&k,&prB[jb],(int *)&k,&one,&prC[jc],(int *)&m); + jc += m*n; ja += m*k; jb += k*n; + } + else if (piB == NULL) + for (int jc = 0,ja = 0,jb = 0; jc < m*n*l; ) { + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&one, + &prA[ja],(int *)&k,&prB[jb],(int *)&k,&one,&prC[jc],(int *)&m); + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&one, + &piA[ja],(int *)&k,&prB[jb],(int *)&k,&one,&piC[jc],(int *)&m); + jc += m*n; ja += m*k; jb += k*n; + } + else if (piA == NULL) + for (int jc = 0,ja = 0,jb = 0; jc < m*n*l; ) { + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&one, + &prA[ja],(int *)&k,&prB[jb],(int *)&k,&one,&prC[jc],(int *)&m); + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&one, + &prA[ja],(int *)&k,&piB[jb],(int *)&k,&one,&piC[jc],(int *)&m); + jc += m*n; ja += m*k; jb += k*n; + } + else + for (int jc = 0,ja = 0,jb = 0; jc < m*n*l; ) { + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&one, + &prA[ja],(int *)&k,&prB[jb],(int *)&k,&one,&prC[jc],(int *)&m); + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&one, + &piA[ja],(int *)&k,&prB[jb],(int *)&k,&one,&piC[jc],(int *)&m); + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&one, + &prA[ja],(int *)&k,&piB[jb],(int *)&k,&one,&piC[jc],(int *)&m); + dgemm_(t1,t2,(int *)&m,(int *)&n,(int *)&k,&minusone, + &piA[ja],(int *)&k,&piB[jb],(int *)&k,&one,&prC[jc],(int *)&m); + jc += m*n; ja += m*k; jb += k*n; + } + + // can deallocate permuted versions of A and B now + mxDestroyArray(Bp); + mxDestroyArray(Ap); + + // permute according to input + mxArray *prhs2[2]; + prhs2[0] = Cp; + prhs2[1] = mxCreateDoubleMatrix(1,ndc > 2 ? ndc : 2,mxREAL); + { + // note: unit offset + int j = 1; + double *p = mxGetPr(prhs2[1])-1; + p[1] = 1.0; p[2] = 2.0; // singletons + + for (int i = 0; pa1[i] != 0; i++) p[(int)ia[pa1[i]]] = j++; + for (int i = 0; pb1[i] != 0; i++) p[(int)ib[pb1[i]]] = j++; + for (int i = 0; pa3[i] != 0; i++) p[(int)ia[pa3[i]]] = j++; + } + mexCallMATLAB(1,plhs,2,prhs2,"permute"); + mxDestroyArray(prhs2[1]); + mxDestroyArray(Cp); +} +/*-----------------------------------------------------------------------*/ +void checkix(const double *ix,mwSize len,int *min,int *max) +/* Checks the indices ix[1..len] and adjusts min and max + accordingly. */ +{ + for (int i = 1; i <= len; i++) { + if (ix[i] == 0.0 || ix[i] != ceil(ix[i]) || isinf(ix[i])) + mexErrMsgIdAndTxt("tprod:e1", + "Index vector must contain nonzero integers."); + + if (ix[i] < *min) *min = (int)ix[i]; + else if (*max < ix[i]) *max = (int)ix[i]; + } +} +/*-----------------------------------------------------------------------*/ +void sortix(const double *ix,mwSize len,int edg,int *in) +/* Sorts the indices ix[1..len] and determines a rank-table in. The + smallest index is edg and in is assumed to be allocated and cleared + prior to call. */ +{ + for (int i = 1; i <= len; i++) { + const int ii = (int)ix[i]-edg; + + if (in[ii] != 0) + mexErrMsgIdAndTxt("tprod:e2","Indices must be distinct."); + in[ii] = i; + } +} +/*-----------------------------------------------------------------------*/ +void logicalsize(const mxArray *A,mwSize nda,mwSize *sizA) +/* Determines the size sizA of the array A. The sizes of all + dimensions i, 1 <= i <= nda are included in sizA. */ +{ + mwSize ndimA = mxGetNumberOfDimensions(A); + const mwSize *msizA = mxGetDimensions(A); + + // mask for Matlab's stupid convention + if (ndimA == 2) ndimA -= (msizA[1] == 1)*(1+(msizA[0] == 1)); + if (nda < ndimA) + mexErrMsgIdAndTxt("tprod:e5","Wrong size of index vector."); + + memcpy(sizA,msizA,ndimA*sizeof(sizA[0])); + for (int i = ndimA; i < nda; i++) sizA[i] = 1; +} +/*-----------------------------------------------------------------------*/ +void permute(const mxArray *A, + const int *p1,const int*p2,const int *p3,int len, + mxArray **Ap) +/* Permutes the array A according to the ordering p = [p2 p1 p3] which + must be of total length len. The result is stored in Ap. */ +{ + // input to Matlab + mxArray *prhs[2]; + prhs[0] = (mxArray *)A; + prhs[1] = mxCreateDoubleMatrix(1,len > 2 ? len : 2,mxREAL); + + int j = 0; + double *p = mxGetPr(prhs[1]); + p[0] = 1.0; p[1] = 2.0; // singletons + + // construct the permutation p + for (int i = 0; p2[i] != 0; i++) p[j++] = (double)p2[i]; + for (int i = 0; p1[i] != 0; i++) p[j++] = (double)p1[i]; + for (int i = 0; p3[i] != 0; i++) p[j++] = (double)p3[i]; + + // call Matlab and clean up + mexCallMATLAB(1,Ap,2,prhs,"permute"); + mxDestroyArray(prhs[1]); +} +/*-----------------------------------------------------------------------*/ diff --git a/stenglib/Tensor/source/tsize.c b/stenglib/Tensor/source/tsize.c new file mode 100644 index 00000000..7975dc74 --- /dev/null +++ b/stenglib/Tensor/source/tsize.c @@ -0,0 +1,60 @@ +/* tsize.c */ +/* S. Engblom 2005-04-10 */ + +#include + +#include "mex.h" +#include "matrix.h" + +/*-----------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + /* check of syntax */ + if (nrhs != 1 && nrhs != 2 || nlhs > 1) + mexErrMsgIdAndTxt("tsize:e1", + "Expecting one or two inputs and one output."); + + /* input */ + mwSize ndimA = mxGetNumberOfDimensions(prhs[0]); + const mwSize *sizA = mxGetDimensions(prhs[0]); + + /* adjust the number of dimensions */ + if (ndimA == 2) + ndimA -= (sizA[1] == 1)*(1+(sizA[0] == 1)); + + /* full or indexed version */ + if (nrhs == 1) { + /* output */ + plhs[0] = mxCreateDoubleMatrix(1,ndimA,mxREAL); + double *siz = mxGetPr(plhs[0]); + for (int i = 0; i < ndimA; i++) + siz[i] = (double)sizA[i]; + } + else { + if (!mxIsDouble(prhs[1]) || mxIsComplex(prhs[1]) || + mxIsSparse(prhs[1])) + mexErrMsgIdAndTxt("tsize:e2", + "Dimension argument must be real, double and non-sparse."); + + /* input DIMS */ + const mwSize lenDIMS = mxGetNumberOfElements(prhs[1]); + const double *prDIMS = mxGetPr(prhs[1]); + + /* check input DIMS */ + for (int i = 0; i < lenDIMS; i++) + if (prDIMS[i] < 1.0 || prDIMS[i] != ceil(prDIMS[i])) + mexErrMsgIdAndTxt("tsize:e3", + "Size argument must be nonnegative integers."); + + /* output */ + plhs[0] = mxCreateDoubleMatrix(1,lenDIMS,mxREAL); + double *siz = mxGetPr(plhs[0]); + for (int i = 0; i < lenDIMS; i++) { + if (prDIMS[i] <= ndimA) + siz[i] = (double)sizA[(int)prDIMS[i]-1]; + else + siz[i] = 1.0; + } + } +} +/*-----------------------------------------------------------------------*/ diff --git a/stenglib/Tensor/source/tsum.c b/stenglib/Tensor/source/tsum.c new file mode 100644 index 00000000..236b10ab --- /dev/null +++ b/stenglib/Tensor/source/tsum.c @@ -0,0 +1,498 @@ +/* tsum.c */ +/* S. Engblom 2005-08-26 */ + +#include +#include + +#include "mex.h" +#include "matrix.h" + +/* forward declarations */ +void checkix(const double *ix,mwSize len,int *max); +void sortix(const double *ix,mwSize len,int *in); +void logicalsize(const mxArray *A,mwSize nda,mwSize *sizA); +void permute(mxArray **Ap,const mxArray *A, + const int *p1,const int *p2,int len); +void sum(int dim,double *prB,double *piB, + double *prA,double *piA,double **prC,double **piC, + mwSize ndims,mwSize *siz,mwSize *str); + +#define MAX(a,b) ((a) >= (b) ? (a) : (b)) + +#define MXTYPE(A,B) (mxIsComplex(A) || mxIsComplex(B) ? \ + mxCOMPLEX : mxREAL) + +#define ISDOUBLETENSOR(A) (mxIsDouble(A) && !mxIsSparse(A)) +#define ISDOUBLEREALTENSOR(A) (mxIsDouble(A) && !mxIsSparse(A) && \ + !mxIsComplex(A)) + +/*-----------------------------------------------------------------------*/ +void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) +{ + /* check of syntax */ + if (nrhs != 2 && nrhs != 4 || nlhs > 1) + mexErrMsgIdAndTxt("tsum:e1","Expecting two or four inputs and " + "one output."); + + /* tensor-sum of two arrays */ + if (nrhs == 4) { + /* check of input */ + if (!ISDOUBLETENSOR(prhs[0]) || !ISDOUBLETENSOR(prhs[1])) + mexErrMsgIdAndTxt("tsum:e2","Expecting a double, " + "non-sparse array."); + if (!ISDOUBLEREALTENSOR(prhs[2]) || !ISDOUBLEREALTENSOR(prhs[3])) + mexErrMsgIdAndTxt("tsum:e3","Index vector must be real, " + "double and non-sparse."); + + /* inputs IA and IB in *unit offset* */ + const double *ia = mxGetPr(prhs[2])-1; + const double *ib = mxGetPr(prhs[3])-1; + const mwSize nda = mxGetNumberOfElements(prhs[2]); + const mwSize ndb = mxGetNumberOfElements(prhs[3]); + int max = 0; /* the number of dimensions of the output */ + + /* perform some checks and determine the span of the indices */ + checkix(ia,nda,&max); checkix(ib,ndb,&max); + + /* sanity check */ + if (max > nda+ndb) + mexErrMsgIdAndTxt("tsum:e5","Index vector must be contiguous."); + + /* Sort the indices out. The vectors ina and inb points back into ia + and ib (in *unit offset*) according to the absolute ordering. */ + int ina[max],inb[max]; + + sortix(ia,nda,memset(ina,0,max*sizeof(ina[0]))); + sortix(ib,ndb,memset(inb,0,max*sizeof(inb[0]))); + + /* Set operations: the permutations contain indices pointing back + into ia and ib in *unit offset* for the private parts (pa1 and + pb1) and the common parts (pa2 and pb2). */ + int pa1[max+1],pa2[max+1],pb1[max+1],pb2[max+1]; + + memset(pa1,0,(max+1)*sizeof(pa1[0])); memset(pa2,0,(max+1)*sizeof(pa2[0])); + memset(pb1,0,(max+1)*sizeof(pb1[0])); memset(pb2,0,(max+1)*sizeof(pb2[0])); + for (int i = 0,ia1 = 0,ib1 = 0,i2 = 0; i < max; i++) { + + /* a setdiff produces the private part */ + if (ina[i] && !inb[i]) + pa1[ia1++] = ina[i]; + /* setdiff... */ + else if (!ina[i] && inb[i]) + pb1[ib1++] = inb[i]; + /* an intersection produces the common part */ + else if (ina[i] && inb[i]) { + pa2[i2] = ina[i]; pb2[i2++] = inb[i]; + } + else + mexErrMsgIdAndTxt("tsum:e5","Index vector must be contiguous."); + } + + /* determine the logical size of the inputs A and B */ + mwSize sizA[nda],sizB[ndb]; + logicalsize(prhs[0],nda,sizA); logicalsize(prhs[1],ndb,sizB); + + /* permute to an order suitable for computations */ + mxArray *Ap,*Bp; + + permute(&Ap,prhs[0],pa2,pa1,nda); + const double *prA = mxGetPr(Ap),*piA = mxGetPi(Ap); + + permute(&Bp,prhs[1],pb2,pb1,ndb); + const double *prB = mxGetPr(Bp),*piB = mxGetPi(Bp); + + /* allocate the (permuted) output C (here prhs2[0] as it will be + permuted by prhs2[1] in the end) */ + mxArray *prhs2[2]; + prhs2[1] = mxCreateDoubleMatrix(1,MAX(2,max),mxREAL); + mwSize sizC[MAX(2,max)]; sizC[0] = sizC[1] = 1; /* singletons */ + int k = 1,m = 1,n = 1; + { + /* build the dimensions of C, including the sizes of the various + blocks along with the final permutation at the same time */ + int j = 0; + double *p = mxGetPr(prhs2[1])-1; /* unit offset */ + p[1] = 1.0; p[2] = 2.0; /* singletons */ + + for (int i = 0; pa2[i] != 0; i++) { + /* check common sizes */ + if (sizA[pa2[i]-1] != sizB[pb2[i]-1]) + mexErrMsgIdAndTxt("tsum:e8","Dimensions must agree."); + k *= (sizC[j++] = sizA[pa2[i]-1]); + p[(int)ia[pa2[i]]] = j; + } + for (int i = 0; pb1[i] != 0; i++) { + m *= (sizC[j++] = sizB[pb1[i]-1]); + p[(int)ib[pb1[i]]] = j; + } + for (int i = 0; pa1[i] != 0; i++) { + n *= (sizC[j++] = sizA[pa1[i]-1]); + p[(int)ia[pa1[i]]] = j; + } + } + prhs2[0] = mxCreateNumericArray(MAX(2,max),sizC,mxDOUBLE_CLASS, + MXTYPE(prhs[0],prhs[1])); + double *prC = mxGetPr(prhs2[0]),*piC = mxGetPi(prhs2[0]); + + /* Evaluate the sum on the canonical form C = tsum(A,B,[1 3],[1 + 2]), where A is k-by-n, B is k-by-m and C is k-by-m-by-n. */ + + /* real + real, complex + real, real + complex, complex + complex */ + if (piA == NULL || piB == NULL) { + for (int ja = 0; ja < n; ja++, prA += k, prB -= k*m) + for (int jb = 0; jb < m; jb++, prA -= k) { + for (int ii = 0; ii < k; ii++) + *prC++ = (*prA++)+(*prB++); + } + + /* straightforward copy of imaginary parts, if any */ + if (piA != NULL) + for (int ja = 0; ja < n; ja++, piA += k) + for (int jb = 0; jb < m; jb++, piC += k) + memcpy(piC,piA,k*sizeof(double)); + else if (piB != NULL) + for (int ja = 0; ja < n; ja++, piC += k*m) + memcpy(piC,piB,k*m*sizeof(double)); + } + else + /* same loop as in the real case, rewritten for typographical + reasons */ + for (int ja = 0; ja < n; ja++) { + for (int jb = 0; jb < m; jb++) { + for (int ii = 0; ii < k; ii++) { + *prC++ = (*prA++)+(*prB++); + *piC++ = (*piA++)+(*piB++); + } + prA -= k; piA -= k; + } + prA += k; piA += k; + prB -= k*m; piB -= k*m; + } + + /* deallocate permuted versions of A and B */ + mxDestroyArray(Bp); mxDestroyArray(Ap); + + /* permute according to input */ + mexCallMATLAB(1,plhs,2,prhs2,"permute"); + mxDestroyArray(prhs2[1]); mxDestroyArray(prhs2[0]); + } + /* tensor-sum of one array */ + else { + /* check of input */ + if (!ISDOUBLETENSOR(prhs[0])) + mexErrMsgIdAndTxt("tsum:e2","Expecting a double, " + "non-sparse array."); + if (!ISDOUBLEREALTENSOR(prhs[1])) + mexErrMsgIdAndTxt("tsum:e3","Index vector must be real, " + "double and non-sparse."); + + /* input IA */ + const double *ia = mxGetPr(prhs[1]); + const mwSize nda = mxGetNumberOfElements(prhs[1]); + + for (int i = 0; i < nda; i++) + if (ia[i] == 0.0 || ia[i] != ceil(ia[i])) + mexErrMsgIdAndTxt("tsum:e9","Index vector must contain " + "nonzero integers."); + + if (mxGetNumberOfElements(prhs[0]) == 0) { + /* empty dimensions is special since the output might be larger + than the input (a sum over an empty dimension produces a + singleton dimension) */ + const mwSize ndimA = mxGetNumberOfDimensions(prhs[0]); + mwSize sizA[ndimA]; + memcpy(sizA,mxGetDimensions(prhs[0]),ndimA*sizeof(sizA[0])); + + for (int i = 0; i < nda; i++) { + const int dim = abs((int)ia[i]); + /* fix for a GCC-bug in abs() -- see sum() below */ + if (0 < dim && dim <= ndimA) sizA[dim-1] = 1; + } + plhs[0] = mxCreateNumericArray(ndimA,sizA,mxDOUBLE_CLASS, + mxIsComplex(prhs[1]) ? mxCOMPLEX + : mxREAL); + return; + } + else if (nda == 0) { + plhs[0] = mxDuplicateArray(prhs[0]); + return; + } + + /* input and working arrays A and B */ + double *prA = mxGetPr(prhs[0]),*piA = mxGetPi(prhs[0]); + double *prB,*piB = NULL; + + /* dimensions and stride, to be updated */ + const mwSize ndimA = mxGetNumberOfDimensions(prhs[0]); + mwSize sizA[ndimA],strA[ndimA+1]; + memcpy(sizA,mxGetDimensions(prhs[0]),ndimA*sizeof(sizA[0])); + strA[0] = 1; + for (int i = 0; i < ndimA; i++) strA[i+1] = strA[i]*sizA[i]; + + /* B is A summed along the first dimension in IA */ + { + const int dim = abs((int)ia[0]); + int remove = 1; + /* fix for a GCC-bug in abs() -- see sum() below */ + if (0 < dim && dim <= ndimA) remove = sizA[dim-1]; + prB = mxMalloc(strA[ndimA]/remove*sizeof(double)); + if (piA != NULL) piB = mxMalloc(strA[ndimA]/remove*sizeof(double)); + } + + /* must be saved: */ + double *prB_ = prB,*piB_ = piB; + + /* sum over all given dimensions */ + sum((int)ia[0],prB,piB,prA,piA,NULL,NULL,ndimA,sizA,strA); + for (int i = 1; i < nda; i++) + sum((int)ia[i],NULL,NULL,prB,piB,&prB,&piB,ndimA,sizA,strA); + + /* account for negative indices and reallocate the final sum */ + memmove(prB_,prB,strA[ndimA]*sizeof(double)); prB = prB_; + prB = mxRealloc(prB,strA[ndimA]*sizeof(double)); + if (piB != NULL) { + memmove(piB_,piB,strA[ndimA]*sizeof(double)); piB = piB_; + piB = mxRealloc(piB,strA[ndimA]*sizeof(double)); + } + mxArray *B = mxCreateDoubleMatrix(0,0,piB != NULL ? mxCOMPLEX + : mxREAL); + mxFree(mxGetPr(B)); mxSetPr(B,prB); + if (piB != NULL) { + mxFree(mxGetPi(B)); mxSetPi(B,piB); + } + mxSetDimensions(B,sizA,ndimA); + plhs[0] = B; + } +} +/*-----------------------------------------------------------------------*/ +void checkix(const double *ix,mwSize len,int *max) +/* Checks the indices ix[1..len] and adjusts max accordingly. */ +{ + for (int i = 1; i <= len; i++) { + if (ix[i] <= 0.0 || ix[i] != ceil(ix[i]) || isinf(ix[i])) + mexErrMsgIdAndTxt("tsum:e4","Index vector must contain " + "positive integers."); + + if (*max < ix[i]) *max = (int)ix[i]; + } +} +/*-----------------------------------------------------------------------*/ +void sortix(const double *ix,mwSize len,int *in) +/* Sorts the indices ix[1..len] and determines a rank-table, in, which + is assumed to be allocated and cleared prior to call. */ +{ + for (int i = 1; i <= len; i++) { + const int ii = (int)ix[i]-1; + + if (in[ii] != 0) + mexErrMsgIdAndTxt("tsum:e6","Indices must be distinct."); + in[ii] = i; + } +} +/*-----------------------------------------------------------------------*/ +void logicalsize(const mxArray *A,mwSize nda,mwSize *sizA) +/* Determines the size sizA of the array A. The sizes of all + dimensions i, 1 <= i <= nda are included in sizA. */ +{ + mwSize ndimA = mxGetNumberOfDimensions(A); + const mwSize *msizA = mxGetDimensions(A); + + /* mask for Matlab's stupid convention */ + if (ndimA == 2) ndimA -= (msizA[1] == 1)*(1+(msizA[0] == 1)); + if (nda < ndimA) + mexErrMsgIdAndTxt("tsum:e7","Wrong size of index vector."); + + memcpy(sizA,msizA,ndimA*sizeof(sizA[0])); + for (int i = ndimA; i < nda; i++) sizA[i] = 1; +} +/*-----------------------------------------------------------------------*/ +void permute(mxArray **Ap,const mxArray *A, + const int *p1,const int *p2,int len) +/* Permutes the array A according to the ordering p = [p1 p2] which + must be of total length len. The result is stored in Ap. */ +{ + /* input to Matlab */ + mxArray *prhs[2]; + prhs[0] = (mxArray *)A; + prhs[1] = mxCreateDoubleMatrix(1,MAX(2,len),mxREAL); + + int j = 0; + double *p = mxGetPr(prhs[1]); + p[0] = 1.0; p[1] = 2.0; /* singletons */ + + /* construct the permutation p */ + for (int i = 0; p1[i] != 0; i++) p[j++] = (double)p1[i]; + for (int i = 0; p2[i] != 0; i++) p[j++] = (double)p2[i]; + + /* call Matlab and clean up */ + mexCallMATLAB(1,Ap,2,prhs,"permute"); + mxDestroyArray(prhs[1]); +} +/*-----------------------------------------------------------------------*/ +void sum(int dim,double *prB,double *piB, + double *prA,double *piA,double **prC,double **piC, + mwSize ndims,mwSize *siz,mwSize *str) +/* Sums the array (prA,piA) along the dimension dim (dim must be + nonzero and the corresponding dimension is not allowed to be of + vanishing width). The resulting array is placed in (prB,piB) and + contains on exit the newly created sum with siz[dim-1] = 1 (siz and + str are thus updated). Use prB = piB = NULL to perform the sum + within the original memory (prA,piA). In this case, the result + starts at (prC,piC) which is somewhere inside (prA,piA). The input + dim may be negative, indicating summation backwards. */ +{ + const bool fwd = dim >= 0; + dim = abs(dim); + + /* early returns (plus a small fix for GCC-problems with abs(the + most negative number)) */ + if (dim < 0 || ndims < dim || siz[dim-1] <= 1) { + if (prB == NULL) { + *prC = prA; + *piC = piA; + } + else { + memcpy(prB,prA,str[ndims]*sizeof(double)); + if (piA != NULL) memcpy(piB,piA,str[ndims]*sizeof(double)); + } + return; + } + + if (piA == NULL) { + if (fwd) { + /* forward summation */ + if (prB == NULL) *prC = prB = prA; + + /* summing along the first non-singleton dimension is faster */ + if (str[dim-1] == 1) + for (int i = 0; i < str[ndims]; i += str[dim], prB++) { + *prB = *prA++; + for (int j = 1; j < siz[dim-1]; j++) + *prB += *prA++; + } + else + for (int i = 0; i < str[ndims]; i += str[dim], prB += str[dim-1]) { + memmove(prB,prA,str[dim-1]*sizeof(double)); + prA += str[dim-1]; + for (int j = 1; j < siz[dim-1]; j++) + for (int k = 0; k < str[dim-1]; k++) + prB[k] += *prA++; + } + } + else { + /* backward summation */ + const bool inside = prB == NULL; + if (inside) + prB = prA += str[ndims]; + else { + prB += str[ndims]/siz[dim-1]; + prA += str[ndims]; + } + + /* it's still easier to sum along the first non-singleton... */ + if (str[dim-1] == 1) { + for (int i = 0; i < str[ndims]; i += str[dim]) { + *--prB = *--prA; + for (int j = 1; j < siz[dim-1]; j++) + *prB += *--prA; + } + /* the sum has been computed in the wrong memory area */ + if (inside) *prC = prB; + } + else { + for (int i = 0; i < str[ndims]; i += str[dim]) { + prB -= str[dim-1]; + prA -= str[dim-1]; + memmove(prB,prA,str[dim-1]*sizeof(double)); + for (int j = 1; j < siz[dim-1]; j++) + for (int k = str[dim-1]; --k >= 0; ) + prB[k] += *--prA; + } + if (inside) *prC = prB; + } + } + } + else { + /* the complex case follows analogously */ + if (fwd) { + if (prB == NULL) { + *prC = prB = prA; + *piC = piB = piA; + } + + if (str[dim-1] == 1) + for (int i = 0; i < str[ndims]; i += str[dim], prB++, piB++) { + *prB = *prA++; *piB = *piA++; + for (int j = 1; j < siz[dim-1]; j++) { + *prB += *prA++; + *piB += *piA++; + } + } + else + for (int i = 0; i < str[ndims]; i += str[dim], + prB += str[dim-1], piB += str[dim-1]) { + memmove(prB,prA,str[dim-1]*sizeof(double)); + prA += str[dim-1]; + memmove(piB,piA,str[dim-1]*sizeof(double)); + piA += str[dim-1]; + for (int j = 1; j < siz[dim-1]; j++) + for (int k = 0; k < str[dim-1]; k++) { + prB[k] += *prA++; + piB[k] += *piA++; + } + } + } + else { + const bool inside = prB == NULL; + if (inside) { + prB = prA += str[ndims]; + piB = piA += str[ndims]; + } + else { + prB += str[ndims]/siz[dim-1]; + prA += str[ndims]; + piB += str[ndims]/siz[dim-1]; + piA += str[ndims]; + } + + if (str[dim-1] == 1) { + for (int i = 0; i < str[ndims]; i += str[dim]) { + *--prB = *--prA; + *--piB = *--piA; + for (int j = 1; j < siz[dim-1]; j++) { + *prB += *--prA; + *piB += *--piA; + } + } + if (inside) { + *prC = prB; + *piC = piB; + } + } + else { + for (int i = 0; i < str[ndims]; i += str[dim]) { + prB -= str[dim-1]; + prA -= str[dim-1]; + memmove(prB,prA,str[dim-1]*sizeof(double)); + piB -= str[dim-1]; + piA -= str[dim-1]; + memmove(piB,piA,str[dim-1]*sizeof(double)); + for (int j = 1; j < siz[dim-1]; j++) + for (int k = str[dim-1]; --k >= 0; ) { + prB[k] += *--prA; + piB[k] += *--piA; + } + } + if (inside) { + *prC = prB; + *piC = piB; + } + } + } + } + + /* update the dimensions */ + siz[dim-1] = 1; + for (int i = dim-1; i < ndims; i++) str[i+1] = str[i]*siz[i]; +} +/*-----------------------------------------------------------------------*/ diff --git a/stenglib/Tensor/startup.m b/stenglib/Tensor/startup.m new file mode 100644 index 00000000..1865f967 --- /dev/null +++ b/stenglib/Tensor/startup.m @@ -0,0 +1,9 @@ +%STARTUP Add paths to TENSOR stuff. + +% S. Engblom 2005-04-10 + +s = pwd; +if exist('test','dir') + addpath([s '/test']); +end +addpath([s '/source']); diff --git a/stenglib/Tensor/tndims.m b/stenglib/Tensor/tndims.m new file mode 100644 index 00000000..daed17e1 --- /dev/null +++ b/stenglib/Tensor/tndims.m @@ -0,0 +1,17 @@ +%TNDIMS Number of dimensions. +% N = TNDIMS(A) returns the number of dimensions of the array A. TNDIMS +% works just like the MATLAB function NDIMS except that scalars and +% column vectors are detected correctly, see the examples below. This is +% useful since the equivalent construction using NDIMS and SIZE is quite +% ugly. +% +% Examples: +% [tndims(rand(2,2)) ndims(rand(2,2))] +% [tndims(rand(2,1)) ndims(rand(2,1))] +% [tndims(rand(1,1)) ndims(rand(1,1))] +% +% See also NDIMS, SIZE, TSIZE. + +% S. Engblom 2005-04-10 + +error('.MEX-file not found on path.'); diff --git a/stenglib/Tensor/tndims.mexw64 b/stenglib/Tensor/tndims.mexw64 new file mode 100644 index 00000000..4eada8ed Binary files /dev/null and b/stenglib/Tensor/tndims.mexw64 differ diff --git a/stenglib/Tensor/tprod.m b/stenglib/Tensor/tprod.m new file mode 100644 index 00000000..a2bf528e --- /dev/null +++ b/stenglib/Tensor/tprod.m @@ -0,0 +1,80 @@ +function c = tprod(a,b,ia,ib) +%TPROD Tensor product. +% C = TPROD(A,B,IA,IB) computes a general tensor product of the arrays A +% and B. The supported tensor product can be described as a general +% multiplication of the elements in A and B where some indices may be +% equal and/or summed over. The mapping from input indices to output +% indices, as well as how to sum, is described by the vectors IA and IB. +% +% This function is best explained by an example. Let A be a +% 4-dimensional array and let B be a 3-dimensional array. Then +% C = TPROD(A,B,[2 -1 1 -2],[-2 2 -1]) +% creates a 2-dimensional array (matrix) C in the following way. +% First, the product +% D(j2,j1,i1,i2) = A(i2,j1,i1,j2)*B(j2,i2,j1) +% is formed. This is a 4-dimensional array D, where j1, j2, i1, i2 +% denote index variables. D is the tensor (outer) product of A and B, +% followed by a permutation of the indices and setting some indices +% equal. Of course, it is assumed that SIZE(A,1) = SIZE(B,2), SIZE(A,2) +% = SIZE(B,3), and SIZE(A,4) = SIZE(B,1). Second, we sum over the index +% variables corresponding to negative numbers (j1 and j2): +% C(i1,i2) = sum of D(j2,j1,i1,i2) +% where the indices j1 and j2 (independently) run through all their +% possible values. +% +% The arguments are assumed to have the following format: +% - A, B, and C are (real or complex) arrays of doubles. +% +% - IA and IB are vectors of doubles, containing nonzero integers. The +% length of IA(IB) has to be equal to the number of dimensions of +% A(B). However, since MATLAB automatically removes trailing singleton +% dimensions, A is padded with singleton dimensions if the number of +% dimensions of A is less than the length of IA (and similarly for B). +% Unlike MATLAB, TPROD supports 0-D and 1-D tensors (scalars and +% column vectors), so the index vectors may contain less than 2 +% elements. +% +% - The numbers in IA(IB) have to be distinct. +% +% - If a number occurs both in IA and IB, it is required that the +% corresponding dimensions in A and B have the same size. +% +% - If a negative number occurs in A(B), it must also occur in B(A). +% +% - It is assumed that the union of the numbers in IA and IB together +% with 0 form a contiguous sequence of integers. +% +% For optimal performance of TPROD you should try to have the vectors +% IA, IB and [IA IB] in order. +% +% Examples: +% % the tensor (outer) product of the matrices A and B +% A = rand(3,4); B = rand(2,5); +% C1 = tprod(A,B,[1 2],[3 4]); +% +% % the Kronecker product of the matrices A and B +% C2 = reshape(tprod(A,B,[2 4],[1 3]),size(A).*size(B)); +% +% % the ordinary matrix product of the matrices A and B +% A = rand(3,4); B = rand(4,2); +% C3 = tprod(A,B,[1 -1],[-1 2]); +% +% % the element-wise product of the matrices A and B +% A = rand(3,5); B = rand(3,5); +% C4 = tprod(A,B,[1 2],[1 2]); +% +% % the transpose of the matrix A +% C5 = tprod(A,1,[2 1],[]); +% +% % the sum of all entries in the matrix A +% C6 = tprod(A,ones(size(A)),[-1 -2],[-1 -2]); +% +% % the sum of all diagonal entries in the matrix A (the trace) +% C7 = tprod(A,eye(size(A)),[-1 -2],[-1 -2]); +% +% See also KRON, MTIMES, TIMES. + +% S. Engblom 2005-04-04 +% Based on a concept by D. Bertilsson, COMSOL AB. + +error('.MEX-file not found on path.'); diff --git a/stenglib/Tensor/tprod.mexw64 b/stenglib/Tensor/tprod.mexw64 new file mode 100644 index 00000000..a6b3d5a3 Binary files /dev/null and b/stenglib/Tensor/tprod.mexw64 differ diff --git a/stenglib/Tensor/tsize.m b/stenglib/Tensor/tsize.m new file mode 100644 index 00000000..625cf24c --- /dev/null +++ b/stenglib/Tensor/tsize.m @@ -0,0 +1,21 @@ +%TSIZE Size of array. +% SIZ = TSIZE(A) returns the size of the array A. Unlike the MATLAB +% function SIZE, the size of SIZ itself is 1-by-TNDIMS(A) (see this +% function), so that scalars and column vectors are treated in a +% consistent way. +% +% SIZ = TSIZE(A,DIMS) returns the size along the dimensions DIMS. Unlike +% the corresponding syntax using SIZE, DIMS may be a vector. +% +% Examples: +% [tsize(rand(2,2)) size(rand(2,2))] +% [tsize(rand(2,1)) size(rand(2,1))] +% [tsize(rand(1,1)) size(rand(1,1))] +% +% tsize(rand(4,3,2,1),1:4) +% +% See also TNDIMS, SIZE. + +% S. Engblom 2005-04-10 + +error('.MEX-file not found on path.'); diff --git a/stenglib/Tensor/tsize.mexw64 b/stenglib/Tensor/tsize.mexw64 new file mode 100644 index 00000000..859b584a Binary files /dev/null and b/stenglib/Tensor/tsize.mexw64 differ diff --git a/stenglib/Tensor/tsum.m b/stenglib/Tensor/tsum.m new file mode 100644 index 00000000..6e4f17ab --- /dev/null +++ b/stenglib/Tensor/tsum.m @@ -0,0 +1,74 @@ +function c = tsum(a,b,ia,ib) +%TSUM Tensor summation. +% C = TSUM(A,B,IA,IB) computes a general sum of the arrays A and B in +% the order specified by the vectors IA and IB. +% +% This syntax is best explained by an example. Let A be a 3-dimensional +% array and let B be a 2-dimensional array. Then +% C = TSUM(A,B,[1 3 2],[3 1]) +% creates a 3-dimensional array C by forming the sum +% C(i1,i2,i3) = A(i1,i3,i2)+B(i3,i1). +% Of course, it is assumed that SIZE(A,1) = SIZE(B,2) and SIZE(A,2) = +% SIZE(B,1). The output C has the dimensions +% SIZE(A,1)-by-size(A,3)-by-SIZE(A,2). +% +% The arguments are assumed to have the following format: +% - A, B, and C are (real or complex) arrays of doubles. +% +% - IA and IB are vectors of doubles, containing positive +% integers. The length of IA(IB) has to be equal to the number of +% dimensions of A(B). However, since MATLAB automatically removes +% trailing singleton dimensions, A is padded with singleton dimensions +% if the number of dimensions of A is less than the length of IA (and +% similarly for B). Unlike MATLAB, TSUM supports 0-D and 1-D tensors +% (scalars and column vectors), so the index vectors may contain less +% than 2 elements. +% +% - The numbers in IA(IB) have to be distinct. +% +% - If a number occurs both in IA and IB, it is required that the +% corresponding dimensions in A and B have the same size. +% +% - It is assumed that the union of the numbers in IA and IB form a +% contiguous sequence of integers. +% +% For optimal performance of TSUM you should try to have the vectors IA, +% IB and [IA IB] in order. +% +% C = TSUM(A,IA) sums the array A along the dimensions specified in the +% ordered vector IA. This is an extension of the MATLAB-function SUM, +% which only allows for summation along one dimension. After summing, +% the dimensions specified in IA become singleton dimensions of C; you +% may efficiently remove them by using the MATLAB-function SQUEEZE. +% +% As a further extension of MATLABs SUM-function, negative numbers in IA +% may be used to force backward summation of the corresponding +% dimensions. +% +% Examples: +% % the "outer sum" of the vectors v and w +% v = rand(3,1); w = rand(4,1); +% C1 = tsum(v,w,[1],[2]); +% +% % the symmetric part of the matrix A +% A = rand(4); +% C2 = 0.5*tsum(A,A,[1 2],[2 1]); +% +% % the transpose of the matrix A +% C3 = tsum(A,0,[2 1],[]); +% +% % add i to row i of A +% C4 = tsum(A,1:4,[1 2],[3 1]); +% +% % sum dimensions 1 and 3 of B +% B = rand(3,4,5); +% C5 = tsum(B,[1 3]); +% +% % sum backwards +% C6 = tsum([1:100000].^-4,-2); +% +% See also TPROD, SUM, SQUEEZE. + +% S. Engblom 2005-08-26 + +error('.MEX-file not found on path.'); diff --git a/stenglib/Tensor/tsum.mexw64 b/stenglib/Tensor/tsum.mexw64 new file mode 100644 index 00000000..4abd0d0a Binary files /dev/null and b/stenglib/Tensor/tsum.mexw64 differ diff --git a/stenglib/Utils/arr2latex.m b/stenglib/Utils/arr2latex.m new file mode 100644 index 00000000..7965447e --- /dev/null +++ b/stenglib/Utils/arr2latex.m @@ -0,0 +1,232 @@ +function S = arr2latex(T,fmt,varargin) +%ARR2LATEX LaTeX-table from matrix. +% S = ARR2LATEX(T,FMT) creates a LaTeX-table S from a matrix T and a +% format string FMT. +% +% T must be a matrix containing scalars that SPRINTF +% accepts. Currently, complex matrices are not supported. +% +% FMT must be a cell-matrix of the same size as T. Singleton +% dimensions are, however, automatically expanded. ARR2LATEX +% supports all format-flags as supported by SPRINTF but the +% percent-character should not be included as it is added +% automatically. +% +% Additionally, flags ending with a dollar-sign (see the second +% example below) expands the 'scientific' notation so that +% '6.022e23' becomes '$6.022 \cdot 10^{23}$' instead. Also, 'n_' +% reflects n whitespaces in a row and is useful for typesetting +% empty entries. +% +% S = ARR2LATEX(T,FMT,...) allows for options to be passed; the +% table below explains the available options. +% +% Property Value/{Default} Description +% ----------------------------------------------------------------------- +% collabel, Cell-vector with Label of columns and rows. +% rowlabel strings {''} +% +% breakrow Integer {74} Where to insert a newline. +% +% hline {'on'} | 'off' Insert a \hline between +% each row. +% +% posinf String {'$\infty$'} Symbols of plus/minus infinity +% neginf String {'$-\infty$'} and NaN's. Use empty to type +% nan String {'-'} them using SPRINTFs defaults. +% +% caption String {''} The caption of the table. +% +% centering {'on'} | 'off' Use \centering. +% +% colspec String {'|c| ... |c|'} The format of each column. +% +% label String {''} The label of the table. +% +% pos String {'htp'} The position of the table. +% +% Examples: +% % a small table of the BesselJ-function +% A = [0 1:5; ... +% [1:4]' besselj([1:5],[1:4]')]; +% fmt = [{'1_'} frepmat({'7d'},[1 5]); ... +% frepmat([{'d'} frepmat({'7.4f'},[1 5])],[4])]; +% S = arr2latex(A,fmt,'colspec','|c||r|r|r|r|r|','hline','off') +% +% % the Gamma-function +% A = [10:15; gamma(10:15)]'; +% fmt = {'3d' '6.4e$'}; +% T = arr2latex(A,fmt,'colspec','|c|l|', ... +% 'collabel',{'$x$' '$\Gamma(x)$'}, ... +% 'hline','off') +% +% See also SPRINTF. + +% S. Engblom 2005-09-22 + +% checks... +if ~isreal(T) || ndims(T) > 2 + error('Real, two-dimensional matrix expected.'); +end +if ndims(fmt) > 2 || ~isa(fmt,'cell') || ... + ~all(all(cellfun('isclass',fmt,'char') & ... + cellfun('ndims',fmt) == 2 & ... + cellfun('size',fmt,1) == 1)) + error('Cell-matrix containing row strings expected.'); +end + +% expand fmt to match T +rep = size(T)./size(fmt); +if any(rep ~= ceil(rep)) + error('The sizes of the table and the format strings must match.'); +end +fmt = frepmat(fmt,rep); + +% parse options +opts = struct('collabel','', ... + 'rowlabel','', ... + 'breakrow',74, ... + 'hline','on', ... + 'posinf','$\infty$', ... + 'neginf','$-\infty', ... + 'nan',' -', ... + 'caption','', ... + 'centering','on', ... + 'colspec',['|' frepmat('c|',[1 size(T,2)])], ... + 'label','', ... + 'pos','htp'); +[opts,got] = parseopts(opts,varargin); +if strcmpi(opts.hline,'on') + opts.hline = '\t\\hline\n'; +else + opts.hline = ''; +end +if ~isempty(opts.label) + opts.label = ['\\label{' opts.label '}\n']; +end +if strcmpi(opts.centering,'on') + opts.centering = '\\centering\n'; +else + opts.centering = ''; +end +% rowlabels? add one column! +if ~isempty(opts.rowlabel) && ~got.colspec + opts.colspec = ['|c|' opts.colspec]; +end +if ~isempty(opts.caption) + opts.caption = ['\\caption{' l_string(opts.caption) '}\n']; +end + +% the beginning... +S = ['\\begin{table}[' opts.pos ']\n' ... + opts.centering ... + '\\begin{tabular}{' opts.colspec '}\n' ... + '\t\\hline\n']; + +% column labels +if ~isempty(opts.collabel) + s = '\t'; + for j = 1:size(T,2) + s = [s l_string(opts.collabel{j}) ' & ']; + end + s(end-1:end) = ''; + s = l_break(s,opts.breakrow); + s = [s '\t\\\\\n\t\\hline\n']; + S = [S s]; +end + +% ...the body... +for i = 1:size(T,1) + s = '\t'; + if ~isempty(opts.rowlabel) + s = [s l_string(opts.rowlabel{i}) ' & ']; + end + for j = 1:size(T,2) + ss = l_format(fmt{i,j},T(i,j)); + + % signs for infinity and NaN's, if any -- try to keep the tabs + % of the table whenever possible + if ~isempty(opts.posinf) && T(i,j) == Inf + ss = [frepmat(' ',[1 max(size(ss,2)-size(opts.posinf,2),0)]) ... + l_string(opts.posinf)]; + elseif ~isempty(opts.neginf) && T(i,j) == -Inf + ss = [frepmat(' ',[1 max(size(ss,2)-size(opts.neginf,2),0)]) ... + l_string(opts.neginf)]; + elseif ~isempty(opts.nan) && isnan(T(i,j)) + ss = [frepmat(' ',[1 max(size(ss,2)-size(opts.nan,2),0)]) ... + l_string(opts.nan)]; + end + s = [s ss ' & ']; + end + s(end-1:end) = ''; + + % possibly insert breaks + s = l_break(s,opts.breakrow); + + % new row + s = [s '\t\\\\\n' opts.hline]; + S = [S s]; +end + +% always produce a 'bounding box' +if isempty(opts.hline), S = [S '\t\\hline\n']; end + +% ...and the end! +S = [S ... + '\\end{tabular}\n' ... + opts.caption ... + opts.label ... + '\\end{table}']; + +% let sprintf do the rest +S = sprintf(S); + +%-------------------------------------------------------------------------- +function s = l_string(t) +%L_STRING Formatting one string. +% S = L_FORMAT(T) formats the string T. Care is taken to insert a +% double '\\' whenever a single slash is encountered. + +s = t(sort([find(t == '\') 1:size(t,2)])); + +%-------------------------------------------------------------------------- +function s = l_break(t,n) +%L_BREAK Break rows. +% S = L_BREAK(T,N) tries to insert new lines in the string T at +% every Nth character position. The new lines are always inserted +% after a '&'-character and a double tab is used so as to keep the +% text readable. + +ii = find(t == '&'); +jj = find(diff(mod(ii,n)) < 0); +if ~isempty(jj) + jj = ii(jj); + s = t(1:jj(1)); + for k = [jj+1; jj(2:end) size(t,2)] + s = [s '\n\t\t' t(k(1):k(2))]; + end +else + s = t; +end + +%-------------------------------------------------------------------------- +function s = l_format(f,t) +%L_FORMAT Formatting one scalar. +% S = L_FORMAT(F,T) formats the scalar T using the flag F and +% returns the result in the string S. + +if f(end) == '_' + s = frepmat(' ',[1 str2num(f(1:end-1))]); +elseif f(end) == '$' + s = sprintf(['%' f(1:end-1)],t); + i = find(s == 'e',1,'first'); + if ~isempty(i) + s1 = s(1:i-1); s2 = s(i+1:end); + % masks for 'e-08' ('-8'), 'e+01' ('1'), 'e+00' ('0') + s2 = num2str(str2num(s2)); + s = ['$' s1 ' \\cdot 10^{' s2 '}$']; + end +else + s = sprintf(['%' f],t); +end +%-------------------------------------------------------------------------- diff --git a/stenglib/Utils/assignopts.m b/stenglib/Utils/assignopts.m new file mode 100644 index 00000000..9d1c780c --- /dev/null +++ b/stenglib/Utils/assignopts.m @@ -0,0 +1,46 @@ +function assignopts(opts) +%ASSIGNOPTS Assign options. +% ASSIGNOPTS(OPTS) assigns the options in OPTS. This is useful +% whenever you prefer to use 'name' rather than 'OPTS.name' +% throughout the code. OPTS can either be a structure or a +% cell-vector with property/value pairs. +% +% Example: +% % caller's site: +% opts.order = 2; +% +% % inside fun(opts, ...) +% def.rho = 0.01; +% def.method = 'lagrange'; +% def.order = 4; +% opts = parseopts(def,opts); +% assignopts(opts); +% +% % use the variables directly +% order % note: 2 +% method +% +% See also PARSEOPTS, ASSIGNIN. + +% S. Engblom 2010-08-25 + +if isstruct(opts) + if any(size(opts) ~= 1) + error('Expecting a scalar structure.'); + end + field = fieldnames(opts); + val = struct2cell(opts); +elseif iscell(opts) + field = reshape(opts(1:2:end),[],1); + val = reshape(opts(2:2:end),[],1); + if size(field,1) ~= size(val,1) + error('Cell-vector must contain property/value pairs.'); + end +else + error('Options must either be a struct or a cell-vector.'); +end + +% immediate +for i = 1:size(field,1) + assignin('caller',field{i},val{i}); +end diff --git a/stenglib/Utils/connect.m b/stenglib/Utils/connect.m new file mode 100644 index 00000000..fd141fc3 --- /dev/null +++ b/stenglib/Utils/connect.m @@ -0,0 +1,113 @@ +function S = connect(z,tol,exact) +%CONNECT Connectivity information for points. +% S = CONNECT(Z,tol) determines closest neighbours for all the (complex) +% coordinates Z so that find(S(:,i)) contains a complete list of indices +% to elements Z(j) that satisfy abs(Z(j)-Z(i)) <= tol. +% +% S = CONNECT(Z,tol,0) returns a slightly less sharp result. The list is +% still complete but generally contains more elements. All elements Z(j) +% are guaranteed to satisfy abs(Z(j)-Z(i)) <= 2*sqrt(2)*tol ~ 2.82*tol. +% +% The output S is an N-by-N sparse matrix, where N is the number of +% coordinates. +% +% Cautionary: if tol is small compared to the largest bounding box +% containing all points in Z, a lot of memory is allocated. +% +% Example: +% % 10000 random points +% np = 10000; +% z = 2*complex(rand(np,1),rand(np,1))-(1+1i); +% +% S = connect(z,0.5e-1); +% S0 = connect(z,0.5e-1,0); % faster but less sharp +% +% figure, plot(z,'b.'); hold on +% i = find(S0(:,1)); plot(z(i),'r.'); +% i = find(S(:,1)); plot(z(i),'c.'); plot(z(1),'k*'); +% j = find(S0(:,2)); plot(z(j),'r.'); +% j = find(S(:,2)); plot(z(j),'c.'); plot(z(2),'k*'); +% axis equal, axis([-1 1 -1 1]); + +% S. Engblom 2009-09-24 (Minor revision) +% S. Engblom 2008-04-29 + +% check input +if ~isscalar(tol) || tol <= 0 + error('Invalid input.'); +end + +% column vector +z = z(:); N = size(z,1); + +% extract coordinates +x = real(z); y = imag(z); + +% bounding box +xmin = min(x); hx = max(x)-xmin; +ymin = min(y); hy = max(y)-ymin; + +% size of each box +heff = 2*tol; + +% number of boxes +nx = max(1,ceil(hx/heff)); ny = max(1,ceil(hy/heff)); +NB = nx*ny; + +% a possibly slightly larger bounding box +hx = nx*heff; hy = ny*heff; + +% change to local coordinates in [0,1] +x = (x-xmin)/hx; y = (y-ymin)/hy; + +% index into primary mesh +i1 = 1+min(floor(x*nx),nx-1); +j1 = min(floor(y*ny),ny-1); + +% index into shifted mesh +i2 = 1+min(floor(x*nx+0.5),nx); +j2 = min(floor(y*ny+0.5),ny); + +% assemble the result +ix = repmat(1:N,1,4); +jx = [i1 i1+NB i2+2*NB+nx i2+3*NB+nx+ny]+ ... + [j1*nx j2*nx j1*(nx+1) j2*(nx+1)]; +S = sparse(ix,jx,1); +S = S*S'; + +% optionally improve the result +if nargin < 3 || exact + [ix,jx] = find(S); + mask = find(abs(z(ix)-z(jx)) <= tol); + S = sparse(ix(mask),jx(mask),1,N,N); +end + +% this code resembles the multipole mesh more closely but is +% slightly less effective: + +% % size of each box +% heff = tol/2; +% +% % number of boxes +% nx = max(1,ceil(hx/heff)); ny = max(1,ceil(hy/heff)); +% NB = nx*ny; +% +% % a possibly slightly larger bounding box +% hx = nx*heff; hy = ny*heff; +% +% % change to local coordinates in [0,1] +% x = (x-xmin)/hx; y = (y-ymin)/hy; +% +% % index into an imagined nx-by-ny matrix +% ii = 1+min(floor(x*nx),nx-1); +% jj = min(floor(y*ny),ny-1); +% ii = [max(ii-1,1); ii; min(ii+1,nx)]; +% jj = [max(jj-1,0) jj min(jj+1,ny-1)]; +% ii = repmat(ii,1,3); +% jj = repmat(jj,3,1); +% +% % assemble the result +% ix = repmat(1:N,1,9); +% jx = ii(:)+jj(:)*nx; +% S = sparse(ix,jx,1,N,NB); % N-by-NB (coordinate-to-box) +% S = S*S'; % N-by-N (coordinate-to-coordinate in the same box) diff --git a/stenglib/Utils/consistency.m b/stenglib/Utils/consistency.m new file mode 100644 index 00000000..b89393a6 --- /dev/null +++ b/stenglib/Utils/consistency.m @@ -0,0 +1,97 @@ +function [p,c] = consistency(rho,sigma,N) +%CONSISTENCY Local truncation error of linear multistep method. +% [P,C] = CONSISTENCY(RHO,SIGMA) determines the local truncation +% error for the linear multistep method defined by the polynomials +% RHO and SIGMA. For the ODE Y'= F(t,Y) the method is given by +% RHO(E)*Y(n) = h*SIGMA(E)*F(t(n),Y(n)), where E is the forward +% shift operator. +% +% The local error is returned as an integer P (the order) and a +% length-2 vector C (error constants). The local error is then to +% leading order given by C(1)*h^(P+1)*D^(P+1)*Y(n), where D = +% d/dt. The error constant is given by C(2) = C(1)/sum(sigma). +% +% P = CONSISTENCY(RHO,SIGMA) returns a residual polynomial P such +% that the local error is given by POLYVAL(P,DELTA)*Y(n), where the +% discrete difference DELTA = E-1 = exp(hD)-1 = hD+(hD)^2/2+... +% +% P = CONSISTENCY(RHO,SIGMA,N) returns the first N terms, where N = +% max(numel(RHO),numel(SIGMA))+2 is the default. +% +% Examples: +% % explicit Euler +% [p,c] = consistency([1 -1],[0 1]) +% +% % BDF2 +% p1 = consistency([3/2 -2 1/2],[1 0 0]) +% +% % "explicit" BDF2 +% p2 = consistency([1 -4/3 1/3],2/3*[0 2 -1]) +% +% % 2nd order Adams-Bashforth +% p3 = consistency([1 -1 0],[0 3/2 -1/2]) +% +% % (note that the actual error constants differ more than +% % perhaps expected judging from p2 and p3:) +% [p,c2] = consistency([1 -4/3 1/3],2/3*[0 2 -1]) +% [p,c3] = consistency([1 -1 0],[0 3/2 -1/2]) +% +% % trapezoidal rule +% p4 = consistency([1 -1],1/2*[1 1],10)' +% +% See also STABILITY, POLYVAL. + +% S. Engblom 2010-01-31 + +% input +rlen = numel(rho); +slen = numel(sigma); +len = max(rlen,slen); +if nargin < 3, N = len+2; end + +% code becomes much slicker if the constant term is first: +rho = [reshape(rho(end:-1:1),1,[]) zeros(1,len-rlen) ]; +sigma = [reshape(sigma(end:-1:1),1,[]) zeros(1,len-slen)]; + +% series for log(1+x) +logpoly = [0 1./(1:N-1)]; % enough for terms 1..N +logpoly(1:2:end) = -logpoly(1:2:end); +% (understood with x = DELTA = exp(hD)-1, logpoly = hD) + +% re-expand rho and sigma around x = 1 +sigma1 = zeros(1,len); +rho1 = zeros(1,len); +fac = 1; +dfac = 1:len; +for d = 1:len + % derivatives at x = 1 + sigma1(d) = sum(sigma)/fac; + sigma = sigma(2:end).*dfac(1:end-d); + rho1(d) = sum(rho)/fac; + rho = rho(2:end).*dfac(1:end-d); + fac = d*fac; +end + +% residual polynomial +p = [rho1 zeros(1,N-1)]-conv(logpoly,sigma1); +if nargout > 1 + ip = find(abs(p) > eps(1000*(norm(rho1,1)+norm(sigma1,1))),1,'first'); + c = [p(ip) p(ip)/sigma1(1)]; + p = ip-2; +else + p = p(1:N); + + % Stirling transformation (in case one prefers a residual polynomial + % in hD rather than in DELTA = exph(hD)-1) + %v = [1 zeros(1,N-2)]; + %q = [p(1) zeros(1,N-1)]; + %p = p(2:N); + %for i = 2:N + % q(i) = v*p'; + % v = (1:N-1)/i.*(v+[0 v(1:end-1)]); + %end + %p = q; + + % Matlab convention: + p = p(end:-1:1); +end diff --git a/stenglib/Utils/matmerge.m b/stenglib/Utils/matmerge.m new file mode 100644 index 00000000..aac2057b --- /dev/null +++ b/stenglib/Utils/matmerge.m @@ -0,0 +1,57 @@ +function matmerge(dest,file1,file2) +%MATMERGE Merge .mat-files. +% MATMERGE(DEST,FILE1,FILE2) Loads the two files FILE1 and FILE2 and +% examines the variables. The intersection of variable names is +% determined and the values of all those variables are concatenated +% along the first non-matching dimension (or last dimension). The +% resulting variables are then stored in the created file DEST. It +% is considered an error if DEST exists. +% +% MATMERGE(DEST,FILE1) allows for an existing file DEST. Hence this +% syntax can be thought of as DEST += FILE1. +% +% MATMERGE is a simple design to aid with distributed computations. No +% attempt in merging non-array data or making the data unique has been +% made. + +% S. Engblom 2012-04-01 + +% new file or appending to dest +if ~strcmp(dest,'.mat') + dest = [dest '.mat']; +end +if nargin > 2 + if exist(dest,'file') + error('matmerge:e1','Will not overwrite existing file.'); + end +else + if ~exist(dest,'file') + error('matmerge:e2','Cannot merge with non-existing file.'); + end + file2 = file1; + file1 = dest; +end + +% load files +s1 = load(file1); +n1 = fieldnames(s1); +s2 = load(file2); +n2 = fieldnames(s2); + +% intersection of variable names +n3 = fsetop('intersect',n1,n2); +s3 = struct; + +% merge data +for i = 1:size(n3,2) + temp1 = s1.(n3{i}); + temp2 = s2.(n3{i}); + effdim = max(tndims(temp1),tndims(temp2)); + sz1 = tsize(temp1,1:effdim); + sz2 = tsize(temp2,1:effdim); + catdim = find([sz1 0] ~= [sz2 1],1,'first'); + % this wont work if temp1 and temp2 doesn't match + s3.(n3{i}) = cat(catdim,temp1,temp2); +end + +save(dest,'-struct','s3'); diff --git a/stenglib/Utils/movie2gif.m b/stenglib/Utils/movie2gif.m new file mode 100644 index 00000000..43ef08a4 --- /dev/null +++ b/stenglib/Utils/movie2gif.m @@ -0,0 +1,77 @@ +function movie2gif(M,cdata,file,varargin) +%MOVIE2GIF Create GIF animation from MATLAB movie. +% MOVIE2GIF works like the MATLAB function MOVIE2AVI except that a +% GIF animation is created instead of an AVI movie. This usually +% saves a lot of space for simpler movies. +% +% MOVIE2GIF(M,CDATA,FILE,...) creates a GIF animation from the movie +% M using colors in the cell-vector CDATA and writes the result to +% FILE. The extension '.gif' will be added to FILE if it doesn't +% already have an extension. +% +% Usually, CDATA kan be obtained from M itself. For instance, CDATA +% = {M(1).cdata} might work in many situations, meaning that only +% the colors of the first frame are used. +% +% Property Value Description +% ----------------------------------------------------------------------- +% comment String or cell- Comment(s) to add to the image. +% array of strings +% +% delaytime Scalar in [0,655] Specifies the delay in seconds +% before displaying the next image. +% +% loopcount Integer in Specifies the number of times to repeat +% [0..65535] | Inf the animation. If loopcount = 0, +% the animation will be played +% once, if loopcount = 1, the +% animation will be played twice, +% and so on. +% +% For more available properties, see the GIF-section in the help-text for +% IMWRITE. +% +% Example: +% tspan = linspace(0,2*pi,30); tspan(end) = []; +% x = linspace(-pi,pi); +% figure, j = 0; +% M = struct('cdata',{},'colormap',{}); +% for t = tspan +% j = j+1; +% y1 = sin(x+t); y2 = cos(x-t); +% plot(x,y1,'b',x,y2,'r'); +% M(j) = getframe; +% end +% +% % use cdata from first two frames only +% movie2gif(M,{M(1:2).cdata},'test.gif','delaytime',0.05,'loopcount',inf); +% +% See also MOVIE2AVI, AVIREAD, IMWRITE, PRIVATE/WRITEGIF. + +% S. Engblom 2010-09-01 (Revision, explicitly adding extension .gif) +% S. Engblom 2010-01-14 (Revision, argument cdata) +% S. Engblom 2007-05-08 + +% straightforward once you know how to do it... +sz = size(M(1).cdata); +gif = zeros([sz(1:2) 1 size(M,2)],'uint8'); + +if ~all(cellfun('isempty',{M.colormap})) + warning('Colormap of movie ignored.'); +end + +% find single color map from cdata-cells provided +[foo,map] = rgb2ind(cat(1,cdata{:}),256); +if size(map,1) < 2 + warning(['Obtained a colormap with less than two colors. ' ... + 'Try to provide more colors.']); +end + +% compute GIF-frames +for j = 1:size(M,2) + gif(:,:,:,j) = rgb2ind(M(j).cdata,map); +end + +% final write +if all(file ~= '.'), file = [file '.gif']; end +imwrite(gif,map,file,'gif',varargin{:}); diff --git a/stenglib/Utils/ndop.m b/stenglib/Utils/ndop.m new file mode 100644 index 00000000..d7a3a9e7 --- /dev/null +++ b/stenglib/Utils/ndop.m @@ -0,0 +1,198 @@ +function varargout = ndop(M,ix,sz,varargin) +%NDOP General N-dimensional operator. +% S = NDOP(M,IX,SZ) constructs a sparse matrix S representing the +% N-dimensional computational molecule M in a way such that the result of +% M acting on a vector X is given by the sparse matrix-vector product S*X. +% +% M is an N-dimensional full matrix with M(IX(1),...,IX(N)) being regarded +% as the target point of the molecule. +% +% SZ is a vector of length N specifying the size of the N-dimensional +% rectangular computational domain. +% +% S = NDOP(M,IX,SZ,L,R) is a syntax for variable coefficients and +% additionally performs left and right scaling by vectors L and R. The +% result is the same as if S had been multiplied from the left and right +% by diagonal matrices with L and R as diagonals. Empty arguments should +% be used when one-sided scaling is requested. +% +% S = NDOP(M,IX,SZ,IP) is a syntax for boundary conditions and uses the +% cell-vector IP of length N to specify the points where the molecule +% should be applied. If IP{j} is empty, all points in that dimension are +% mapped -- similar to MATLABs colon-notation. +% +% S = NDOP(M,IX,SZ,[]) is a useful shorthand for mapping all points using +% periodicity at all boundaries, see the remark on boundaries below. +% +% S = NDOP(M,IX,SZ,IP,L,R) additionally performs scaling as before. Again, +% empty arguments should be used for one-sided scaling. +% +% [II,JJ,SS] = NDOP(M,IX,SZ,IP,L,R,I,J,S) is a special fast syntax for +% boundary conditions. It does not assemble the resulting sparse matrix +% but rather keeps it in the triplet-format for later assembly via a call +% to FSPARSE. The result is concatenated with the obligatory inputs I, J +% and S. +% +% Boundaries: in the first case, boundary points in X are mapped to zero; +% boundary points in X are all those points such that applying M would +% cause an 'index out of bounds'-error. In the 'boundary syntax' however, +% periodicity is used in order to be able to handle periodic boundary +% conditions. +% +% On return, S is a sparse square matrix of size PROD(SZ). The enumeration +% of the solution vector X follows that of M, that is, the weights for +% X(j-1) and X(j+1) when computing X(j) is given by +% M(IX(1)-1,IX(2),...,IX(N)) and M(IX(1)+1,IX(2),...,IX(N)) +% respectively. Hence, "matrix-coordinates" rather than Cartesian +% coordinates are used. +% +% Example: +% % classical 2nd order accurate Laplace-operator +% % on the (N-by-N) unit square +% N = 50; h = 1/(N-1); +% M = [0 1 0; 1 -4 1; 0 1 0]/h^2; +% S = ndop(M,[2 2],[N N]); +% +% % Dirichlet BCs at all boundaries +% BC = ndop(1,[1 1],[N N],{[1 N] []})+... % lower/upper +% ndop(1,[1 1],[N N],{[2:N-1] [1 N]}); % left/right +% +% % solve -Laplace(u) = 1 with u = 0 on the boundary +% f = ones(N); f([1 N],:) = 0; f(:,[1 N]) = 0; +% u = (-S+BC)\f(:); +% figure, surf(reshape(u,N,N)); +% +% See also FSPARSE, SPBLOCK. + +% S. Engblom 2007-01-29 (Major revision) +% S. Engblom 2004-10-29 (Revision, changed to frepmat) +% S. Engblom 2003-03-26 + +% determine dimensionality +ix = ix(:); sz = sz(:); +ndim = size(ix,1); +if size(ix,1) ~= size(sz,1) + error('ndop:e1','Ambiguous dimension of input.'); +elseif tndims(M) > ndim + error('ndop:e2','Molecule has incorrect dimension.'); +end + +% scaling arguments +if nargin == 5 + L = varargin{1}(:); + R = varargin{2}(:); +elseif nargin == 6 || nargin == 9 + L = varargin{2}(:); + R = varargin{3}(:); +else + L = []; + R = []; +end + +% find on M +[imm,foo,ss] = find(M(:)); +imm = imm(:); % fix for bug in find when M is zero scalar +ss = ss(:).'; + +% compute indices +imr = zeros(size(imm,1),ndim); +str = [1 cumprod(tsize(M,1:ndim))]; +imm = imm-1; +for j = ndim:-1:1, + imr(:,j) = floor(imm/str(j))+1-ix(j); % use ix as origin + imm = rem(imm,str(j)); +end + +% stride for the (imagined) N-dimensional solution block +str = [1; cumprod(sz)]; + +% check of scaling +if ~isempty(L) && numel(L) ~= str(end) + error('ndop:e3','Wrong size of scaling argument.'); +end +if ~isempty(R) && numel(R) ~= str(end) + error('ndop:e3','Wrong size of scaling argument.'); +end + +% normal syntax: apply molecule only where possible +if nargout <= 1 && (nargin == 3 || nargin == 5) + % compute relative indices to DOFs coupled to each DOF (there will typically + % be a zero somewhere in relind representing the target point of the + % molecule) + relind = imr*str(1:end-1); + + % build rowindices by finding all interior points: the location of the + % target point together with the index of the extreme points of the + % molecule determine which DOFs are inner DOFs + first = max([-min(imr,[],1); zeros(1,ndim)],[],1)'; + last = sz-1+min([-max(imr,[],1); zeros(1,ndim)],[],1)'; + first = first.*str(1:end-1); + last = last.*str(1:end-1); + + ii = 1; + for j = 1:ndim + % the rank of ii increases by 1 + ii = tsum(ii(:),[first(j):str(j):last(j)]',1,2); + end + + % final assembly + ii = ii(:); + jj = tsum(ii,relind,1,2); +% 'boundary' or 'periodic' syntax +elseif nargout <= 1 && (nargin == 4 || nargin == 6) || ... + (nargout == 3 && nargin == 9) + ip = varargin{1}(:); + if isempty(ip) + % support of all-periodic case + ip = cell(ndim,1); + ipsz = sz; + elseif size(ip,1) ~= ndim + error('ndop:e1','Ambiguous dimension of input.'); + else + ipsz = cellfun('prodofsize',ip)'; + ipsz(ipsz == 0) = sz(ipsz == 0); % 'colon'-notation + end + + % build jj by indexing according to input ip, assembling all indices per row + % at the same time (this is the last dimension in jj) + jj = ones(size(imm,1),1); + ii = 1; + for j = 1:ndim + if isempty(ip{j}), ip{j} = 1:sz(j); end + jj = tsum(jj,mod(tsum(ip{j}(:)-1,imr(:,j),1,2),sz(j))*str(j),... + [1:j-1 j+1],[j j+1]); + ii = tsum(ii(:),(ip{j}(:)-1)*str(j),1,2); + end + + % final assembly + ii = ii(:); + jj = reshape(jj,[],size(imr,1)); +else + error('ndop:e4','Unknown syntax.'); +end + +% scaling (if any) +if ~isempty(L) + ss = L(ii)*ss; + if ~isempty(R) + ss = ss.*reshape(R(jj),size(ss)); + end +elseif ~isempty(R) + ss = tprod(ss,reshape(R(jj),size(jj)),[3 2],[1 2]); +end + +% output +if nargout <= 1 + varargout{1} = fsparse(ii,jj,ss,[str(end) str(end)]); +else + % cannot use the 'row' format of fsparse for this output + if size(ii,2) ~= size(jj,2) + ii = frepmat(ii,[1 size(jj,2)]); + end + if size(ss,1) ~= size(ii,1) + ss = frepmat(ss,size(ii,1)); + end + varargout(1:3) = {[varargin{4}(:); ii(:)] ... + [varargin{5}(:); jj(:)] ... + [varargin{6}(:); ss(:)]}; +end diff --git a/stenglib/Utils/parseopts.m b/stenglib/Utils/parseopts.m new file mode 100644 index 00000000..1dc3aca1 --- /dev/null +++ b/stenglib/Utils/parseopts.m @@ -0,0 +1,84 @@ +function [opts,got] = parseopts(optdef,optin) +%PARSEOPTS Parse options. +% [OPTS,GOT] = PARSEOPTS(OPTDEF,OPTIN) merges the options OPTIN with +% the default options OPTDEF and produces the options structure +% OPTS. OPTDEF and OPTIN can either be structures or cell-vectors +% with property/value pairs. +% +% Fields in OPTDEF that are structures are recursively parsed, see +% the example below. +% +% The optional output GOT is a structure containing the same fields +% as OPTS. The values of each field are boolean variables indicating +% whether the corresponging field was received in OPTIN or not. +% +% It is considered an error for OPTIN to contain options not in +% OPTDEF. +% +% Example: +% f1 = struct('delta',0.5,'rho',0.8); +% f2 = struct('delta',0.2,'rho',0.1); +% od = struct('nlin','yes','theta',0.1,'relax','on', ... +% 'filter1',f1,'filter2',f2); +% oi = {'theta' 0.25 'relax' 'off' 'filter1' {'rho' 0.2}}; +% [opts,got] = parseopts(od,oi) +% +% See also ASSIGNOPTS. + +% S. Engblom 2005-07-21 + +% extract fields and values +if isstruct(optdef) + if any(size(optdef) ~= 1) + error('Expecting a scalar structure.'); + end + dfield = fieldnames(optdef); + dval = struct2cell(optdef); +elseif iscell(optdef) + dfield = reshape(optdef(1:2:end),[],1); + dval = reshape(optdef(2:2:end),[],1); + if size(dfield,1) ~= size(dval,1) + error('Cell-vector must contain property/value pairs.'); + end +else + error('Default options must either be a struct or a cell-vector.'); +end + +if isstruct(optin) + if any(size(optin) ~= 1) + error('Expecting a scalar structure.'); + end + ifield = fieldnames(optin); + ival = struct2cell(optin); +elseif iscell(optin) + ifield = reshape(optin(1:2:end),[],1); + ival = reshape(optin(2:2:end),[],1); + if size(ifield,1) ~= size(ival,1) + error('Cell-vector must contain property/value pairs.'); + end +else + error('Input options must either be a struct or a cell-vector.'); +end + +% check for illegal fields +d = fsetop('setdiff',ifield,dfield); +if ~isempty(d) + error(sprintf('Unsupported option "%s".',d{1})); +end + +% merge and create structure of options +[sfield,ii,id] = fsetop('union',ifield,dfield); +opts = cell2struct([ival(ii); dval(id)],sfield(:)); +got = cell2struct([frepmat({true},size(ii,2)); ... + frepmat({false},size(id,2))],sfield(:)); + +% recursive call for fields containing structures +is = find(cellfun('isclass',dval,'struct'))'; +for i = is + f = dfield{i}; + if got.(f) + [opts.(f),got.(f)] = parseopts(dval{i},opts.(f)); + else + [opts.(f),got.(f)] = parseopts(dval{i},{}); + end +end diff --git a/stenglib/Utils/perfecthash.m b/stenglib/Utils/perfecthash.m new file mode 100644 index 00000000..c1fcda97 --- /dev/null +++ b/stenglib/Utils/perfecthash.m @@ -0,0 +1,228 @@ +function hash = perfecthash(s) +%PERFECTHASH Perfect hash-function from strings. +% HASH = PERFECTHASH(S) with S a cell-vector of strings tries to +% determine a perfect hash-function. A few simple models for such +% functions are tried and any function determined in this way is +% returned as a string HASH in C pseudo-code. +% +% Note: PERFECTHASH only indexes characters with index <= maximum length +% of any of the strings. You must explicitly increase the length of the +% strings to index other characters. See the second example below. +% +% Example: +% s = {'check','intersect','setdiff','setxor', ... +% 'union','unique','ismember'}; +% perfecthash(s) +% +% % case when increasing the length of the strings might help +% s = {'if' 'else' 'elseif' 'while' 'for' 'switch' 'case' ... +% 'end' 'return' 'break' 'continue' 'function'}; +% perfecthash(s) % void +% +% % add strlen(s{i}) and ending '\0' +% sz = cellfun('prodofsize',s); +% for i = 1:numel(s), s{i} = [char(sz(i)) s{i} char(0)]; end +% perfecthash(s) +% % (here str[0] is strlen(str)) + +% S. Engblom 2016-10-07 (Improved handling of non-printable characters) +% S. Engblom 2011-06-27 (enum+char*[] output, bug for 3-ary hashes fixed) +% S. Engblom 2008-04-25 (Minor revision) +% S. Engblom 2007-05-25 + +% to be built +hash = ''; + +% construct character array c with strings as rows +c = char(s); +slen = min(cellfun('prodofsize',s)); +nums = size(c,1); +% consistently using doubles (not optimal -- but convenient) +c = double(c(:,1:slen)); + +% enums are formatted in printable uppercases, strings are case-sensitives +S = s; +for i = 1:nums + prints1 = find(33 <= s{i} & s{i} <= 126); + prints2 = ('a' <= s{i} & s{i} <= 'z') | ... + ('A' <= s{i} & s{i} <= 'Z') | ... + ('0' <= s{i} & s{i} <= '9'); + s{i} = s{i}(prints1); % be more generous here + S{i} = upper(S{i}); + S{i}(~prints2) = '_'; +end + +% try these sizes for the hash-tables +siz = nums:2*nums; + +% factors to try +muls = 1:20; + +% 1st order hash-functions +for sz = siz + cc = mod(c,sz); + ii = all(fsparse(cc+1,1:slen,1) <= 1,1); + if any(ii) + for i = find(ii) + % output function + str = ['typedef enum {' frepmat('%s = %d,',[1 size(c,1)])]; + str = [str(1:end-1) '} PROP;\n']; + [foo,is] = sort(cc(:,i)); + t = [S(is); mat2cell(cc(is,i)',1,ones(1,size(c,1)))]; + hash = [hash sprintf(str,t{:})]; + + t = frepmat({'NULL%s,'},[1 sz]); + t(cc(:,i)+1) = {'&get%s,'}; + t = cell2mat(t); + str = ['static const getfun_t getval[] = {' t]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,i)+1) = S; + hash = [hash sprintf(str,t{:})]; + + t = frepmat({'NULL%s,'},[1 sz]); + t(cc(:,i)+1) = {'&%s,'}; + t = cell2mat(t); + str = ['void *val[] = {' t]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,i)+1) = S; + hash = [hash sprintf(str,t{:})]; + + str = ['static const char *tab[] = {' frepmat('"%s",',[1 sz])]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,i)+1) = s; + hash = [hash sprintf(str,t{:})]; + hash = [hash sprintf('hash(str) = str[%d] %% %d;\n\n',i-1,sz)]; + end + % no point in continuing here + siz = nums:sz-1; + break; + end +end + +% 2nd order functions + +% *** use of symmetricity could reduce time by a factor of 2 +found = false; +for sz = siz + for mul1 = muls + for mul2 = muls + cc = reshape(tsum(mul1*c,mul2*c,[1 2],[1 3]),nums,[]); + cc = mod(cc,sz); + ii = all(fsparse(cc+1,1:slen^2,1) <= 1,1); + if any(ii) + [i1,i2] = ind2sub([slen slen],find(ii)); + ix = i1 <= i2; + if any(ix) + i1 = i1(ix); i2 = i2(ix); + for i = 1:numel(i1) + j = (i2(i)-1)*slen+i1(i); + str = ['typedef enum {' frepmat('%s = %d,',[1 size(c,1)])]; + str = [str(1:end-1) '} PROP;\n']; + [foo,is] = sort(cc(:,j)); + t = [S(is); ... + mat2cell(cc(is,j)',1,ones(1,size(c,1)))]; + hash = [hash sprintf(str,t{:})]; + + t = frepmat({'NULL%s,'},[1 sz]); + t(cc(:,j)+1) = {'&get%s,'}; + t = cell2mat(t); + str = ['static const getfun_t getval[] = {' t]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,j)+1) = S; + hash = [hash sprintf(str,t{:})]; + + t = frepmat({'NULL%s,'},[1 sz]); + t(cc(:,j)+1) = {'&%s,'}; + t = cell2mat(t); + str = ['void *val[] = {' t]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,j)+1) = S; + hash = [hash sprintf(str,t{:})]; + + str = ['static const char *tab[] = {' frepmat('"%s",',[1 sz])]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,j)+1) = s; + hash = [hash sprintf(str,t{:})]; + hash = [hash ... + sprintf(['hash(str) = ' ... + '((str[%d] * %d)+(str[%d] * %d)) %% %d;\n\n'], ... + i1(i)-1,mul1,i2(i)-1,mul2,sz)]; + end + siz = nums:sz-1; + found = true; + break; + end + end + end + if found, break; end + end + if found, break; end +end + +% 3rd order functions + +% *** use of symmetricity could reduce time by a factor of 6 +for sz = siz + for mul1 = muls + for mul2 = muls + for mul3 = muls + cc = reshape(tsum(mul1*c,mul2*c,[1 2],[1 3]),nums,[]); + cc = reshape(tsum(cc,mul3*c,[1 2],[1 3]),nums,[]); + cc = mod(cc,sz); + ii = all(fsparse(cc+1,1:slen^3,1) <= 1,1); + if any(ii) + [i1,i2,i3] = ind2sub([slen slen slen],find(ii)); + ix = i1 <= i2 & i2 <= i3; + if any(ix) + i1 = i1(ix); i2 = i2(ix); i3 = i3(ix); + for i = 1:numel(i1) + j = ((i3(i)-1)*slen+(i2(i)-1))*slen+i1(i); + str = ['typedef enum {' frepmat('%s = %d,',[1 size(c,1)])]; + str = [str(1:end-1) '} PROP;\n']; + [foo,is] = sort(cc(:,j)); + t = [S(is); mat2cell(cc(is,j)',1,ones(1,size(c,1)))]; + hash = [hash sprintf(str,t{:})]; + + t = frepmat({'NULL%s,'},[1 sz]); + t(cc(:,j)+1) = {'&get%s,'}; + t = cell2mat(t); + str = ['static const getfun_t getval[] = {' t]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,j)+1) = S; + hash = [hash sprintf(str,t{:})]; + + t = frepmat({'NULL%s,'},[1 sz]); + t(cc(:,j)+1) = {'&%s,'}; + t = cell2mat(t); + str = ['void *val[] = {' t]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,j)+1) = S; + hash = [hash sprintf(str,t{:})]; + + str = ['static const char *tab[] = {' frepmat('"%s",',[1 sz])]; + str = [str(1:end-1) '};\n']; + t = frepmat({''},[1 sz]); + t(cc(:,j)+1) = s; + hash = [hash sprintf(str,t{:})]; + + hash = [hash ... + sprintf(['hash(str) = ' ... + '((str[%d] * %d)+(str[%d] * %d)+(str[%d] * %d)) ' ... + '%% %d;\n'], ... + i1(i)-1,mul1,i2(i)-1,mul2,i3(i)-1,mul3,sz)]; + end + return; + end + end + end + end + end +end diff --git a/stenglib/Utils/report.m b/stenglib/Utils/report.m new file mode 100644 index 00000000..4c1c8ace --- /dev/null +++ b/stenglib/Utils/report.m @@ -0,0 +1,98 @@ +function status = report(t,title,s,varargin) +%REPORT Report progress of solver. +% The function REPORT is designed to report progress of any type of solver +% and for Matlab's ODE-solvers in particular. +% +% Only one reporter may be active at any given time. Use WAITBAR +% directly to achieve multiple reporters. +% +% STATUS = REPORT([T0 Tend],TITLE,'init',...) sets up the progress +% reporter. T0 is the initial time/initial accuracy and Tend the final +% time/requested accuracy. The character array TITLE is the title of the +% reporter; if TITLE is a non-character array or empty, the default used +% is 'Solution progress'. This somewhat singular behavior is due to +% considerations of compatibility with Matlab's ODE-solvers. +% +% STATUS = REPORT([T0 Tend],'timeleft','init',...) continuously +% measures time and reports an estimate of the time until +% completion. +% +% STATUS = REPORT([],[],'none') causes the reporter to be inactive until +% reinitialized again. This is useful for avoiding if-clauses before +% calling REPORT. +% +% STATUS = REPORT(T,[],'',...) reports the progress T, where T0 <= T <= +% Tend. +% +% STATUS = REPORT([],[],'',...) relies on the 'timeleft'-syntax and +% updates the estimated time without computing a new estimate. This +% is useful when the number of tasks are limited, but each task +% takes a considerable amount of time. +% +% STATUS = REPORT([],[],'done',...) finishes the reporter. +% +% STATUS = 0 is returned in all syntaxes. +% +% See also WAITBAR. + +% S. Engblom 2009-03-06 (Minor revision) +% S. Engblom 2007-04-05 + +persistent T0 Tend percent hwait t0 est lastupdate; + +if isempty(s) && ~isempty(hwait) + if isempty(t) + if ~isempty(est) && ~isempty(lastupdate) + % blind update (relying on est and lastupdate being properly initiated) + est = est-toc(lastupdate); + hrs = floor(est/3600); + mins = floor(est/60)-60*hrs; + waitbar(percent/100,hwait, ... + sprintf('Estimated time left: %d hour(s), %d minute(s).', ... + hrs,mins)); + lastupdate = tic; + end + % call ignored otherwise + else + % main use + now = round((t(end)-T0)/(Tend-T0)*100); + if now > percent + percent = now; + if isempty(t0) || percent < eps + waitbar(percent/100,hwait); + else + t1 = toc(t0); + est = (100-percent)/percent*t1; + hrs = floor(est/3600); + mins = floor(est/60)-60*hrs; + waitbar(percent/100,hwait, ... + sprintf('Estimated time left: %d hour(s), %d minute(s).', ... + hrs,mins)); + end + end + lastupdate = tic; + end +elseif strcmp(s,'init') + T0 = t(1); + Tend = t(end); + t0 = []; + est = []; + lastupdate = []; + percent = 0; + try, close(hwait); catch, end + if isempty(title) || ~isa(title,'char') + title = 'Solution progress'; + elseif strcmp(title,'timeleft') + title = 'Estimated time left: '; + t0 = tic; + end + hwait = waitbar(0.0,title); +else % 'none' and 'done' empties hwait + try, close(hwait); catch, end + hwait = []; + t0 = []; + est = []; + lastupdate = []; +end + +status = 0; diff --git a/stenglib/Utils/rmtilde.m b/stenglib/Utils/rmtilde.m new file mode 100644 index 00000000..8e64d09b --- /dev/null +++ b/stenglib/Utils/rmtilde.m @@ -0,0 +1,51 @@ +function nrem = rmtilde(wd) +%RMTILDE Remove files ending with a tilde ('~'). +% NREM = RMTILDE Recursively removes all files with names ending +% with a tilde, starting from the current directory. The total +% number of removed files is returned. +% +% NREM = RMTILDE(WD) does the same thing, but starts from the +% directory WD instead. +% +% In any of the syntaxes, filenames or directories starting with a +% period are left unaffected. +% +% Cautionary: the function is recursive and may run slowly in +% certain situations. +% +% See also DELETE, RECYCLE, RMDIR. + +% S. Engblom 2005-06-16 + +% remember callers working directory +cwd = pwd; +nrem = 0; + +% input +if nargin == 0, wd = cwd; end + +% change working directory +cd(wd); + +% fetch all files/directories +d = dir; + +% loop over all of them +for i = 1:size(d,1) + % avoid all names starting with a period + if d(i).name(1) ~= '.' + if d(i).isdir + % directory: recursive call + fprintf(1,'Searching directory %s...\n',d(i).name); + nrem = nrem+rmtilde(d(i).name); + elseif d(i).name(end) == '~' + % file ending with '~': remove it + delete(d(i).name); + nrem = nrem+1; + fprintf(1,'Removed file: %s.\n',d(i).name); + end + end +end + +% switch back to callers directory +cd(cwd); diff --git a/stenglib/Utils/runtest.m b/stenglib/Utils/runtest.m new file mode 100644 index 00000000..839035e9 --- /dev/null +++ b/stenglib/Utils/runtest.m @@ -0,0 +1,60 @@ +function allok = runtest(idstr,funs,funnames,varargin) +%RUNTEST General test facility. +% OK = RUNTEST(IDSTR,FUNS,FUNNAMES,...) calls the functions in the +% cell-vector FUNS, one by one, and displays appropriate +% messages. FUNNAMES is a cell-vector with corresponding names of +% the functions and IDSTR is an identifying string which appears +% first in all messages produces by RUNTEST (use empty to supress +% all such messages). Additional arguments, if any, are passed to +% the functions. +% +% Each function in FUNS should be written so that it returns 1 for +% 'passed' and zero for 'failed'. +% +% Example: +% runtest('Medium tests (myfun)', ... +% {@test1 @test2},{'Test #1' 'Test #2'}); + +% S. Engblom 2013-12-03 (empty IDSTR, output OK) +% S. Engblom 2012-05-25 (Additional arguments) +% S. Engblom 2004-10-29 + +% input +ntest = size(funs,2); +if size(funnames,2) ~= ntest, error('Input does not match.'); end + +% run tests +nf = 0; +allok = 1; +for i = 1:size(funs,2) + try + ok = feval(funs{i},varargin{:}); + allok = allok && ok; + if ~ok + if ~isempty(idstr) + fprintf(1,'%s: %s failed.\n',idstr,funnames{i}); + end + nf = nf+1; + else + if ~isempty(idstr) + fprintf(1,'%s: %s passed.\n',idstr,funnames{i}); + end + end + catch + allok = 0; + if ~isempty(idstr) + fprintf(1,'%s: Error caught in %s.\n%s\n', ... + idstr,funnames{i},lasterr); + end + nf = nf+1; + end +end + +% concluding diagnostics +if ~isempty(idstr) + if nf == 0 + fprintf(1,'%s passed: %d test(s).\n',idstr,ntest); + else + fprintf(1,'%s failed: %d test(s) passed, %d failed.\n',idstr,ntest-nf,nf); + end +end diff --git a/stenglib/Utils/spblock.m b/stenglib/Utils/spblock.m new file mode 100644 index 00000000..d8299f2e --- /dev/null +++ b/stenglib/Utils/spblock.m @@ -0,0 +1,88 @@ +function S = spblock(i,j,v,mn) +%SPBLOCK Sparse matrix from blocks. +% S = SPBLOCK(I,J,V,[M N]) assembles an M-by-N sparse matrix S from +% the index vectors I and J and the cell vector V containing sparse +% matrices. The assembly is performed in blocks so that for each +% index k, the block V{k} is placed with the upper left corner in +% (I(k),J(k)) in S. Multiple indices are as usual summed together, +% see FSPARSE. Also, single index or blocks are automatically +% expanded so as to match the other inputs. +% +% S = SPBLOCK(I,J,V) does the same thing except that the size of the +% result is determined automatically as the extreme bottom right corner of +% any of the translated matrices in V. +% +% Example: +% A = sprand(3,3,0.5); +% S1 = spblock([1 4 7],[1 4 7],{A,-2*A,A}); +% S2 = kron([1 0 0; 0 -2 0; 0 0 1],A); % does the same thing +% +% % a different example +% B = sprand(3,2,0.8); +% S3 = spblock([1 1 4],[1 4 4],{A,B,A}); +% S4 = [A B sparse(3,1); sparse(3,3) A]; % ditto +% +% See also FSPARSE, FREPMAT, KRON, NDOP, CAT. + +% S. Engblom 2012-01-24 (Revision) +% S. Engblom 2007-01-29 (Minor revision) +% S. Engblom 2004-05-24 + +if nargin ~= 3 & nargin ~= 4 + error('spblock:e2','Unknown syntax.'); +end + +% normalize input +i = i(:)-1; isz = size(i,1); +j = j(:)-1; jsz = size(j,1); +v = v(:); vsz = size(v,1); +len = max([isz jsz vsz]); + +if isz ~= len + if isz ~= 1 + error('spblock:e1', ... + 'Index vectors must match the number of sparse blocks.'); + end + i = frepmat(i,len); +end +if jsz ~= len + if jsz ~= 1 + error('spblock:e1', ... + 'Index vectors must match the number of sparse blocks.'); + end + j = frepmat(j,len); +end +if vsz ~= len + if vsz ~= 1 + error('spblock:e3', ... + 'The sparse blocks must match the index vectors.'); + end + v = frepmat(v,len); +end + +% determine (an upper bound to) the number of nonzeros +nz = [0; cumsum(cellfun(@nnz,v))]; +sz = [cellfun('size',v,1) cellfun('size',v,2)]; +tnz = nz(end); + +% allocate indices and values +ii = zeros(tnz,1); +jj = zeros(tnz,1); +ss = zeros(tnz,1); + +% find on all blocks and build indices and values +for k = 1:len + inds = nz(k)+1:nz(k+1); + [ii(inds),jj(inds),ss(inds)] = find(v{k}); + ii(inds) = ii(inds)+i(k); + jj(inds) = jj(inds)+j(k); +end + +% the size of the resulting sparse matrix +if nargin < 4 + sz = sz+[i j]; + mn = max(sz); +end + +% assemble the result +S = fsparse(ii,jj,ss,mn); diff --git a/stenglib/Utils/stability.m b/stenglib/Utils/stability.m new file mode 100644 index 00000000..22ccc46d --- /dev/null +++ b/stenglib/Utils/stability.m @@ -0,0 +1,43 @@ +function h = stability(rho,sigma,varargin) +%STABILITY Plot of stability region for linear multistep method. +% STABILITY(RHO,SIGMA) plots the unit root boundary locus for the +% linear multistep method defined by the polynomials RHO and +% SIGMA. For the ODE Y'= F(t,Y) the method is given by RHO(E)*Y(n) = +% h*SIGMA(E)*F(t(n),Y(n)), where E is the forward shift operator. +% +% STABILITY(RHO,SIGMA,N) uses N points on the unit circle, with N = +% 200 the default. +% +% H = STABILITY(RHO,SIGMA,...) returns a handle H to the plot and +% sends any additional arguments to PLOT. +% +% Examples: +% % explicit/implicit Euler +% figure, hold on, +% stability([1 -1],[0 1],'r'); +% stability([1 -1],[1 0],'r--'); +% +% % BDF2 +% stability([3/2 -2 1/2],[1 0 0],'b--'); +% axis equal, hold off, +% legend('Euler Fwd','Euler Bwd','BDF2'); +% +% See also CONSISTENCY, PLOT, POLYVAL. + +% S. Engblom 2010-01-30 (Minor modifications) +% G. Söderlind 2010-01-29 + +% straightforward +if nargin < 3 || ~isnumeric(varargin{1}) + N = 200; +else + N = varargin{1}; + varargin = varargin(2:end); +end +z = exp(1i*linspace(0,2*pi,N+1)); +y = polyval(rho,z)./polyval(sigma,z); +if nargout > 0 + h = plot(real(y),imag(y),varargin{:}); +else + plot(real(y),imag(y),varargin{:}); +end diff --git a/stenglib/Utils/startup.m b/stenglib/Utils/startup.m new file mode 100644 index 00000000..60322f52 --- /dev/null +++ b/stenglib/Utils/startup.m @@ -0,0 +1,10 @@ +%STARTUP Add paths to UTILS stuff. + +% S. Engblom 2010-02-10 (Revision) +% S. Engblom 2005-03-22 + +s = pwd; +if exist('test','dir') + addpath([s '/test']); +end + diff --git a/stenglib/make_all.m b/stenglib/make_all.m new file mode 100644 index 00000000..9dcd4ca9 --- /dev/null +++ b/stenglib/make_all.m @@ -0,0 +1,13 @@ +%MAKE_ALL Make for all of stenglib/. + +% S. Engblom 2010-09-23 + +cd Tensor +startup +make + +cd ../Fast +startup +make + +cd .. \ No newline at end of file diff --git a/stenglib/startup.m b/stenglib/startup.m new file mode 100644 index 00000000..e5932207 --- /dev/null +++ b/stenglib/startup.m @@ -0,0 +1,10 @@ +%STARTUP Add paths to stenglib/. + +% S. Engblom 2005-03-22 + +s = pwd; +addpath([s '/Tensor']); +addpath([s '/Fast']); +addpath([s '/Utils']); +addpath([s '/Scicomp']); +addpath([s '/Misc']); diff --git a/urdme/src/nsm/mexmake_nsm.m b/urdme/src/nsm/mexmake_nsm.m index dd05abcb..120c6ba0 100755 --- a/urdme/src/nsm/mexmake_nsm.m +++ b/urdme/src/nsm/mexmake_nsm.m @@ -10,6 +10,7 @@ function mexmake_nsm(propensity_file,~) % global defines, if any define = []; +definew = []; % path = location of this make path = mfilename('fullpath'); @@ -35,6 +36,7 @@ function mexmake_nsm(propensity_file,~) [path '../inline.c'] ... [path '../report.c']}; define = [define '-DMALLOC\(n\)=mxMalloc\(n\) -DFREE\(p\)=mxFree\(p\)']; +definew = [definew '-D"MALLOC(n)=mxMalloc(n)" -D"FREE(p)=mxFree(p)" -D"srand48(k)=srand(k)" -D"drand48()=rand()"']; % mex extension mx = mexext; @@ -51,6 +53,11 @@ function mexmake_nsm(propensity_file,~) cflags = 'CFLAGS= -std=c99 '; mex('-silent','-largeArrayDims',[cflags define], ... include{:},link{:},source{:}); +elseif strcmp(mx,'mexw64') + cflags = ['CFLAGS=-fPIC -fno-omit-frame-pointer -std=c99 -O3 ' ... + '-D_GNU_SOURCE -pthread -fexceptions ']; + mex('-v','-largeArrayDims',[cflags definew], ... % [mexflags define], + include{:},link{:},source{:}); else error(['Platform not yet supported. Your MEX file extension is ' ... mx '. Please edit mexmake_nsm.m to allow for this extension.']); diff --git a/urdme/src/nsm/nsm.c b/urdme/src/nsm/nsm.c index c3c7e5c9..7b0fffdd 100644 --- a/urdme/src/nsm/nsm.c +++ b/urdme/src/nsm/nsm.c @@ -139,7 +139,7 @@ The output is a matrix U (Ndofs X length(tspan)). { double tt; double rdelta,rrdelta; - double rand,cum,old; + double randd,cum,old; double *srrate,*rrate; double *sdrate,*Ddiag; double *rtimes; @@ -259,16 +259,16 @@ The output is a matrix U (Ndofs X length(tspan)). /* First check if it is a reaction or a diffusion event. */ totrate = srrate[subvol]+sdrate[subvol]; - rand = drand48(); + randd = drand48(); - if (rand*totrate <= srrate[subvol]) { + if (randd*totrate <= srrate[subvol]) { /* Reaction event. */ event = -1; /* a) Determine the reaction re that did occur (direct SSA). */ - rand *= totrate; + randd *= totrate; for (re = 0, cum = rrate[subvol*Mreactions]; - re < Mreactions && rand > cum; + re < Mreactions && randd > cum; re++, cum += rrate[subvol*Mreactions+re]); /* elaborate floating point fix: */ @@ -319,10 +319,10 @@ The output is a matrix U (Ndofs X length(tspan)). event = 1; /* a) Determine which species... */ - rand *= totrate; - rand -= srrate[subvol]; + randd *= totrate; + randd -= srrate[subvol]; for (spec = 0, dof = subvol*Mspecies, cum = Ddiag[dof]*xx[dof]; - spec < Mspecies && rand > cum; + spec < Mspecies && randd > cum; spec++, cum += Ddiag[dof+spec]*xx[dof+spec]); /* elaborate floating point fix: */ @@ -344,11 +344,11 @@ The output is a matrix U (Ndofs X length(tspan)). /* b) and then the direction of diffusion. */ col = dof+spec; - rand = drand48()*Ddiag[col]; + randd = drand48()*Ddiag[col]; /* Search for diffusion direction. */ for (i = jcD[col], cum = 0.0; i < jcD[col+1]; i++) - if (irD[i] != col && (cum += prD[i]) > rand) break; + if (irD[i] != col && (cum += prD[i]) > randd) break; /* simple floating point fix: */ if (i >= jcD[col+1]) i = jcD[col+1]-1; diff --git a/workflows/DLCM/experiments/1_basic_test/basic_test.m b/workflows/DLCM/experiments/1_basic_test/basic_test.m index bef77ede..c713a58e 100644 --- a/workflows/DLCM/experiments/1_basic_test/basic_test.m +++ b/workflows/DLCM/experiments/1_basic_test/basic_test.m @@ -47,6 +47,8 @@ % diffusive pressure rate Drate = 1; +mesh_type = 1 + % fetch discretization (mesh_type = 1 or 2) if ~exist('mesh_type','var'), error('Must define mesh_type.'); end [P,E,T,gradquotient] = basic_mesh(mesh_type,Nvoxels); diff --git a/workflows/DLCM/experiments/1_basic_test/growth.m b/workflows/DLCM/experiments/1_basic_test/growth.m index 0d8b8034..94c0c9ed 100644 --- a/workflows/DLCM/experiments/1_basic_test/growth.m +++ b/workflows/DLCM/experiments/1_basic_test/growth.m @@ -72,6 +72,17 @@ % $$$ patch('Faces',R(jj,:),'Vertices',V,'FaceColor',[1 0 0]); % $$$ drawnow; + % visualization (somewhat slow) + figure(1), clf, + patch('Faces',R,'Vertices',V,'FaceColor',[0.9 0.9 0.9]); + hold on, + axis([-1 1 -1 1]); axis square, axis off + ii = find(U == 1); + patch('Faces',R(ii,:),'Vertices',V,'FaceColor',[0 1 0]); + jj = find(U > 1); + patch('Faces',R(jj,:),'Vertices',V,'FaceColor',[1 0 0]); + drawnow; + % classify the DOFs adof = find(U); diff --git a/workflows/DLCM/experiments/1_basic_test/mechanics.m b/workflows/DLCM/experiments/1_basic_test/mechanics.m index ca0690ae..76778746 100644 --- a/workflows/DLCM/experiments/1_basic_test/mechanics.m +++ b/workflows/DLCM/experiments/1_basic_test/mechanics.m @@ -33,6 +33,8 @@ % BC1 >= BC2 is the only thing that makes sense. And normalizing one % of them to 0 sets the pressure unit. +mesh_type = 1 + % fetch discretization (mesh_type = 1 or 2) if ~exist('mesh_type','var'), error('Must define mesh_type.'); end [P,E,T,gradquotient] = basic_mesh(mesh_type,Nvoxels); @@ -59,7 +61,6 @@ U(ii) = 0; % an empty bottom strip % classify the DOFs - neigh = full(sum(N,2)); adof = find(U); bdof_m = find(N*(U > 0) < neigh & U == 1); @@ -114,10 +115,10 @@ numel(sdof_m)); moves = full(gradquotient*grad); -% $$$ % pressure plot -% $$$ figure(3), clf, -% $$$ Pr_ = full(U); Pr_(Adof) = Pr; -% $$$ trisurf(T(1:3,:)',P(1,:),P(2,:),Pr_); +% pressure plot +figure(3), clf, +Pr_ = full(U); Pr_(Adof) = Pr; +trisurf(T(1:3,:)',P(1,:),P(2,:),Pr_); % visualize rates figure(mesh_type), clf, diff --git a/workflows/DLCM/experiments/1_basic_test/schematics.m b/workflows/DLCM/experiments/1_basic_test/schematics.m index 86a8b284..cfc7233b 100644 --- a/workflows/DLCM/experiments/1_basic_test/schematics.m +++ b/workflows/DLCM/experiments/1_basic_test/schematics.m @@ -12,6 +12,8 @@ % diffusive pressure rate Drate = 1; +mesh_type = 1; + % fetch discretization (mesh_type = 1 or 2) if ~exist('mesh_type','var'), error('Must define mesh_type.'); end [P,E,T,gradquotient] = basic_mesh(mesh_type,Nvoxels); @@ -42,6 +44,7 @@ 'FaceColor',graphics_color('vermillion')); % "The Method" +neigh = full(sum(N,2)); adof = find(U); bdof_m = find(N*(U > 0) < neigh & U == 1); sdof = find(U > 1); diff --git a/workflows/DLCM/experiments/2_avascular_tumour/AvascularTumour.m b/workflows/DLCM/experiments/2_avascular_tumour/AvascularTumour.m index 8dbf8652..0d746152 100644 --- a/workflows/DLCM/experiments/2_avascular_tumour/AvascularTumour.m +++ b/workflows/DLCM/experiments/2_avascular_tumour/AvascularTumour.m @@ -20,9 +20,9 @@ % S. Engblom 2017-02-11 % simulation interval -Tend = 1000; +Tend = 11; tspan = linspace(0,Tend,101); -report(tspan,'timeleft','init'); % (this estimator gets seriously confused!) +%report(tspan,'timeleft','init'); % (this estimator gets seriously confused!) % The user specified cutoff and rate parameters for the proliferation, % death, degradation and consumption rules. @@ -264,10 +264,9 @@ % update the visited sites VU = VU | U; end - report(tt,U,'done'); -return; +% return; % create a GIF animation @@ -280,14 +279,19 @@ hold on, axis([-1 1 -1 1]); axis square, axis off ii = find(Usave{i} == 1); - patch('Faces',R(ii,:),'Vertices',V, ... + single = patch('Faces',R(ii,:),'Vertices',V, ... 'FaceColor',graphics_color('bluish green')); + ii = find(Usave{i} == 2); - patch('Faces',R(ii,:),'Vertices',V, ... + double = patch('Faces',R(ii,:),'Vertices',V, ... 'FaceColor',graphics_color('vermillion')); + ii = find(Usave{i} == -1); - patch('Faces',R(ii,:),'Vertices',V, ... + dead = patch('Faces',R(ii,:),'Vertices',V, ... 'FaceColor',[0 0 0]); + + legend([single, double, dead],'single','double', 'dead'); + title(sprintf('Time = %d, Ncells = %d',tspan(i),full(sum(abs(Usave{i}))))); drawnow; M(i) = getframe(gcf); @@ -313,6 +317,7 @@ ylim([0 max(y)]); xlabel('time') ylabel('N cells') +legend('total', 'dead','double','single'); return; diff --git a/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/AvascularTumour.m b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/AvascularTumour.m new file mode 100644 index 00000000..84b55858 --- /dev/null +++ b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/AvascularTumour.m @@ -0,0 +1,334 @@ +% Simulation of an avascular tumour model. +% +% Avascular tumour growth: An initial circular population of cells (one +% per voxel) lie in a domain rich in oxygen. Cells consume oxygen at +% a constant rate, cons. Cells occupying a voxel with oxygen above +% cutoff_prol proliferate at a rate r_prol. Cells occupying +% voxels with an oxygen concentration below cutoff_die die at a +% rate r_die. Dead cells degrade and stop occupying space at a +% rate r_degrade. Voxels with cell concentrations above 1 are +% over-occupied and cause pressure, which will cause movement of cells. + +% M.C. Jayaweera & A. Graf Brolund 2021-01(revision) +% S. Engblom 2017-12-27 (revision) +% D. B. Wilson 2017-09-05 +% S. Engblom 2017-02-11 + +clear; +clc; +close all; + +%% Initial experiment setup +% cells live in a square of Nvoxels-by-Nvoxels +Nvoxels = 121; % odd so the BC for oxygen can by centered + +% fetch Cartesian discretization +[P,E,T,gradquotient] = basic_mesh(1,Nvoxels); +[V,R] = mesh2dual(P,E,T,'voronoi'); + +D = 1; % D_rate, the rate with which cells move in the domain. + % currently the rate is the same for visited voxels and non-visited + +% simulation interval +Tend = 101; % final time step +tspan = linspace(0,Tend,101); +timescaling=0.005; % time scaling + +% report(tspan,'timeleft','init'); %(this estimator gets seriously confused!) + +% the user specified cutoff and rate parameters for the proliferation, +% death, degradation and consumption rules. +cons = 0.0015; % consumption of oxygen by cells +cutoff_prol = 0.65; % the minimum amount of oxygen for proliferation +r_prol = 0.125; % rate of proliferation of singly occupied voxels +cutoff_die = 0.55; % the maximum amount of oxygen where cells can die +r_die = 0.125; % rate of death +r_degrade = 0.01; % rate of degradation for already dead cells +cutoff_bdof = 0.1; % lower bound for bdof +cutoff_remain = 0.01; % the minimum amount of living cells in a voxel +cutoff_deg = 0.0001; % the minimum amount of dead cells in a voxel + +% initial population: circular blob of living cells +start_value = 1; % cell concentrations in the initial blob +radius = 0.05; +r = sqrt(P(1,:).^2+P(2,:).^2); +ii = find(r < radius); % radius of the initial blob +U = fsparse(ii(:),1,start_value,[Nvoxels^2 1]); % intialize +U_new = fsparse(ii(:),1,start_value,[Nvoxels^2 1]); +U_dead = fsparse(ii(:),1,0,[Nvoxels^2 1]); +U_deadnew = fsparse(ii(:),1,0,[Nvoxels^2 1]); + +% boundary conditions +OBC1 = 0; % BC for the oxygen equation for unvisited boundary +OBC2 = 0; % BC for the visited boundary + +% assemble minus the Laplacian on this grid (ignoring BCs), the voxel +% volume vector, and the sparse neighbor matrix +[L,dM,N] = dt_operators(P,T); %N gives the neighbours +neigh = full(sum(N,2)); + +N_vec = zeros(size(N,1),4); % neighbour matrix used to find sdof_m +for k = 1:size(N,1) + temp = find(N(k,:)); + if length(temp) == 2 + temp = [temp, temp(1),temp(2)]; + elseif length(temp) == 3 + temp = [temp, temp(1)]; + end + N_vec(k,:) = temp; +end + +% dofs for the sources at the extreme outer circular boundary +[xc,yc] = getmidpointcircle(1/2*(Nvoxels+1),1/2*(Nvoxels+1),1/2*(Nvoxels-1)); +irem = find(xc < 1 | yc < 1 | xc > Nvoxels | yc > Nvoxels); +xc(irem) = []; +yc(irem) = []; +extdof = find(sparse(xc,yc,1,Nvoxels,Nvoxels)); + +% visit marker matrix: 1 for voxels who have been occupied +VU = (U ~= 0); + +% representation of solution: cell-vector of sparse matrices +Usave = cell(1,numel(tspan)); +Usave{1} = U; +Udsave = cell(1,numel(tspan)); +Udsave{1} = U_dead; + +% keeps track of oxygen and dofs for figures and understanding +Oxysave = cell(1,numel(tspan)); +bdofsave = cell(1,numel(tspan)); +sdofsave = cell(1,numel(tspan)); +sdofbsave = cell(1,numel(tspan)); + +tt = tspan(1); +i = 1; + +La = struct('X',0,'L',0,'U',0,'p',0,'q',0,'R',0); +OLa = struct('X',0,'L',0,'U',0,'p',0,'q',0,'R',0); + +% oxygen Laplacian +OLa.X = L; +OLai = fsparse(extdof,extdof,1,size(OLa.X)); +OLa.X = OLa.X-OLai*OLa.X+OLai; + +[OLa.L,OLa.U,OLa.p,OLa.q,OLa.R] = lu(OLa.X,'vector'); + + +%% Time loop +while tt <= tspan(end) + %% Initialise U and U_dead and classify the DOFs + U = U_new; + U_dead = U_deadnew; + U_and_U_dead = U | U_dead; + + adof = find(U_and_U_dead); % all filled voxels + % singularly occupied voxels on the boundary: + bdof_m = find(N*(U_and_U_dead ~= 0) < neigh & (U > cutoff_bdof & ... + U <= 1)); + sdof = find(U > 1); % sdof on the boundary + sdof_b = find(N*(U_and_U_dead ~=0) < neigh & (U > 1)); + % voxels with more than concentration 1 in them which may move, + % with a voxel containing less number of cells next to it: + sdof_m = find(U - min(U(N_vec),[],2) > 0 & U>1); + % empty voxels touching occupied ones + Idof = (N*(U_and_U_dead ~= 0) > 0 & U_and_U_dead == 0); + idof1 = find(Idof & ~VU); % "external" OBC1 + idof2 = find(Idof & VU); % "internal" OBC2 + idof = find(Idof); + ddof = find(U_dead > 0); % degrading voxels + + % "All DOFs" = adof + idof, like the "hull of adof" + Adof = [adof; idof]; + % The above will be enumerated within U, a Nvoxels^2-by-1 sparse + % matrix. Determine also a local enumeration, eg. [1 2 3 + % ... numel(Adof)]. + + Adof_ = (1:numel(Adof))'; + [bdof_m_,sdof_,sdof_m_,idof1_,idof2_,idof_,adof_, sdof_b_,ddof_] = ... + map(Adof_,Adof,bdof_m,sdof,sdof_m,idof1,idof2,idof,adof,sdof_b,ddof); + + %% Calculate Pressure and Oxygen systems + + % pressure Laplacian + La.X = L(Adof,Adof); + % remove emtpy voxels touching occupied ones + Lai = fsparse(idof_,idof_,1,size(La.X)); + La.X = La.X-Lai*La.X+Lai; + [La.L,La.U,La.p,La.q,La.R] = lu(La.X,'vector'); + + % RHS source term proportional to the over-occupancy and BCs + Pr = full(fsparse(sdof_,1,(U(sdof)-1)./dM(sdof),...% equilibrium at U=1 + [size(La.X,1) 1])); % RHS first... + Pr(La.q) = La.U\(La.L\(La.R(:,La.p)\Pr)); % ..then the solution + + % RHS source term proportional to the over-occupancy and BCs + Oxy = full(fsparse([extdof; adof],1, ... + [ones(size(extdof)); ... + -cons*full(U(adof)./dM(adof))], ... + [size(OLa.X,1) 1])); + Oxy(OLa.q) = OLa.U\(OLa.L\(OLa.R(:,OLa.p)\Oxy)); + + %% Movement calculations + + % movement of cells in sources, sdof_m + rates_sdof = zeros(length(Adof),1); + [ii,jj_] = find(N(sdof_m,Adof)); % neighbours + % pressure difference between cells and its neighbours + Pr_diff__ = max(Pr(sdof_m_(ii))-Pr(jj_),0); + grad_sdof = fsparse(ii,1,Pr_diff__*D, numel(sdof_m)); + rates_sdof(sdof_m_) = -gradquotient*grad_sdof; % sources lose cells + grad_N = fsparse(jj_,1, Pr_diff__*D, numel(Adof)); + rates_sdof = rates_sdof + gradquotient*grad_N; % neighbours gain cells + + % movement of cells on the boundary, not over-occupied, bdof_m + % this loop is similar to what is done above for sdof and can be + % rewritten in a similar fashion. Since this calculation is less + % common, it is not a big concern for efficiency + rates_bdof = zeros(length(Adof),1); + for ind=1:length(bdof_m_) + ix = bdof_m(ind); + ix_ = bdof_m_(ind); + + jx_ = find(N(ix,Adof)); % neighbours to the bdof + jx_ = jx_(U_and_U_dead(Adof(jx_)) == 0); % empty neighbours + Pr_diff = max(Pr(ix_)-Pr(jx_),0); % pressure difference + + rates_bdof(ix_) = -sum(D*Pr_diff); % loses cells + rates_bdof(jx_) = rates_bdof(jx_) + D*Pr_diff; % gain cells + end + + + %% Change calculation + %Change calculation of proliferation, death and degradation + + % proliferation + ind_prol = find((Oxy > cutoff_prol)); % index of proliferating cells + prol_conc = r_prol*U(ind_prol); % concentration born in each time step + + % death + ind_die = find(Oxy < cutoff_die); % index of dying cells + dead_conc = r_die*U(ind_die); % concentration dying in each time step + + % degradation + degrade_conc = U_deadnew(ddof)*r_degrade; % degraded in each time step + +%% Calculate timestep dt + + ind_rates_sdof_n = find(rates_sdof(sdof_m_)<0); % affected voxels + ind_rates_bdof_n = find(rates_bdof(bdof_m_)<0); + + % find the largest possible time step while avoiding U<0 + dt_death = U_new(ind_die)./(dead_conc); + dt_sdof = U_new(sdof_m(ind_rates_sdof_n))./ ... + (-rates_sdof(sdof_m_(ind_rates_sdof_n))); + dt_bdof = U_new(bdof_m(ind_rates_bdof_n))./ ... + (-rates_bdof(bdof_m_(ind_rates_bdof_n))); + + dt_unscaled = (min([dt_death; dt_sdof; dt_bdof;(0.1*Tend)])); + dt = dt_unscaled*timescaling; % scale dt smaller + + %% Report back and save time series of current states + if tspan(i+1) < tt+dt + iend = i+find(tspan(i+1:end) < tt+dt,1,'last'); + + % save relevant values + Usave(i+1:iend) = {U}; + Udsave(i+1:iend) = {U_dead}; + + Oxysave(i+1:iend) = {Oxy}; + bdofsave(i+1:iend) = {bdof_m}; + sdofsave(i+1:iend) = {sdof_m}; + sdofbsave{i+1:iend} = {sdof_b}; + + i = iend; + end + + %% Euler forward step + + %Proliferation + U_new(ind_prol)=U_new(ind_prol)+prol_conc*dt; + + %Death + U_new(ind_die) = U_new(ind_die) - dead_conc*dt; + U_deadnew(ind_die) = U_deadnew(ind_die) + dead_conc*dt; + ind_cutoff = find(U_new < cutoff_remain & (Oxy < cutoff_die)); + U_new(ind_cutoff) = 0; % remove cells below cutoff_remain + + % Degradation + U_deadnew(ddof) = U_deadnew(ddof) - degrade_conc*dt; + U_deadnew(U_deadnew < cutoff_deg) = 0; % remove cells below cutoff_deg + + % movement of cells in sources, sdof_m + U_new(Adof) = U_new(Adof) + rates_sdof.*dt; + + % movement of cells in boundary voxels, bdof_m + U_new(Adof) = U_new(Adof) + rates_bdof*dt; + + + %% Step in time + tt = tt+dt; +% report(tt,U,''); + + % update the visited sites + VU = VU | U_new; +end + +%report(tt,U,'done'); + +% return; + +%% Create a GIF-animation + +% create a GIF animation +Tumour = struct('cdata',{},'colormap',{}); +fig=figure(1); +clf, + +Umat=full(cell2mat(Usave)); +colorbar; +caxis([0 max(max(Umat))]) +colorlabel('Concentration of cells, U') + +for i = 1:numel(Usave) + % background + patch('Faces',R,'Vertices',V,'FaceColor',[0.9 0.9 0.9], ... + 'EdgeColor','none'); + + hold on, + axis([-1 1 -1 1]); axis square, axis off + + % colour living voxels after concentration level + ii = find(Usave{i}>0); + c = Umat(ii,i); + patch('Faces',R(ii,:),'Vertices',V,'FaceVertexCData',c, ... + 'FaceColor','flat'); + + % colour (fully) dead voxels black + ii = find(Usave{i} == 0 & Udsave{i} > 0); + p_dead = patch('Faces',R(ii,:),'Vertices',V, ... + 'FaceColor',[0 0 0]); + %legend(p_dead,'dead') + + title(sprintf('Time = %d',tspan(i))); + drawnow; + Tumour(i) = getframe(gcf); + + % saves the GIF + movie2gif(Tumour,{Tumour.cdata},'Tumour.gif', ... + 'delaytime',0.1,'loopcount',0); + +end + +%% SAVE DATA +saveData = struct('U', {U}, 'VU', {VU}, 'Usave', {Udsave}, 'tspan', {tspan}, ... + 'R', {R}, 'V', {V}, 'N', {N}, 'Udsave', {Udsave}, ... + 'Pr', {Pr}, 'Adof', {Adof},'adof', {adof}, 'adof_', {adof_}, ... + 'idof', {idof},'idof_', {idof_}, 'Nvoxels',{Nvoxels}, ... + 'P', {P}, 'bdof_m', {bdof_m}, 'bdof_m_', {bdof_m_}, ... + 'sdof_m', {sdof_m},'sdof_m_', {sdof_m_}, 'gradquotient', {gradquotient}, ... + 'Tend', {Tend}); +filename = "Data"; +filename_saveData = filename + ".mat"; +save(filename_saveData,'-struct','saveData'); + + diff --git a/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Experiments/AvascularTumour_Relaxation.m b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Experiments/AvascularTumour_Relaxation.m new file mode 100644 index 00000000..5cfa961e --- /dev/null +++ b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Experiments/AvascularTumour_Relaxation.m @@ -0,0 +1,244 @@ +% Simulation of an avascular tumour model. +% +% Avascular tumour growth, relaxation experiment: An initial circular +% population of cells lie crowded together (concentration>1). When +% released, the pressure exerted between the cells will have them move +% out in the domain. +% +% No proliferation, death, or degradation is allowed. + +% C. Jayaweera & A. Graf Brolund 2021-01 (revision) +% S. Engblom 2017-12-27 (revision) +% D. B. Wilson 2017-09-05 +% S. Engblom 2017-02-11 + +clear; +clc; +close all; + +% cells live in a square of Nvoxels-by-Nvoxels +Nvoxels = 121; % odd so the BC for oxygen can by centered + +% fetch Cartesian discretization +[P,E,T,gradquotient] = basic_mesh(1,Nvoxels); %gradquotient=1 for... +[V,R] = mesh2dual(P,E,T,'voronoi'); %cartesian mesh + +D = 1; % D_rate, the rate with which cells move in the domain. + % currently the rate is the same for visited voxels and non-visited + +% simulation interval +Tend = 100; +tspan = linspace(0,Tend,101); +timescaling = 0.005; + +% initial population: circular blob of living cells +start_value = 10; % cell concentrations in the initial blob +radius = 0.07; +r = sqrt(P(1,:).^2+P(2,:).^2); +ii = find(r < radius); % radius of the initial blob +U = fsparse(ii(:),1,start_value,[Nvoxels^2 1]); % initialize +U_new = fsparse(ii(:),1,start_value,[Nvoxels^2 1]); +U_dead = fsparse(ii(:),1,0,[Nvoxels^2 1]); +U_deadnew = fsparse(ii(:),1,0,[Nvoxels^2 1]); + +% parameters +cutoff_bdof = 0.1; % lower bound for bdof + +% boundary conditions +OBC1 = 0; % BC for the oxygen equation for unvisited boundary +OBC2 = 0; % BC for the visited boundary + +% assemble minus the Laplacian on this grid (ignoring BCs), the voxel +% volume vector, and the sparse neighbor matrix +[L,dM,N] = dt_operators(P,T); %N gives the neighbours +neigh = full(sum(N,2)); + +N_vec = zeros(size(N,1),4); % neighbour matrix used to find sdof_m +for k = 1:size(N,1) + temp = find(N(k,:)); + if length(temp) == 2 + temp = [temp, temp(1),temp(2)]; + elseif length(temp) == 3 + temp = [temp, temp(1)]; + end + N_vec(k,:) = temp; +end + +% visit marker matrix: 1 for voxels who have been occupied +VU = (U ~= 0); + +% representation of solution: cell-vector of sparse matrices +Usave = cell(1,numel(tspan)); +Usave{1} = U; +Udsave = cell(1,numel(tspan)); +Udsave{1} = U_dead; + +% keeps track of dofs for figures and understanding +bdofsave = cell(1,numel(tspan)); +sdofsave = cell(1,numel(tspan)); +sdofbsave = cell(1,numel(tspan)); + +tt = tspan(1); +i = 1; +La = struct('X',0,'L',0,'U',0,'p',0,'q',0,'R',0); + +while tt <= tspan(end) + U = U_new; + U_dead = U_deadnew; + + %% Init U and U_dead and classify the DOFs + U = U_new; + U_dead = U_deadnew; + U_and_U_dead = U | U_dead; + + %Classification of the DOFs + + adof = find(U_and_U_dead); % all filled voxels + % singularly occupied voxels on the boundary: + bdof_m = find(N*(U_and_U_dead ~= 0) < neigh & (U > cutoff_bdof & ... + U <= 1)); + sdof = find(U > 1); % source voxels,concentration more than 1 + % sdof on the boundary + sdof_b = find(N*(U_and_U_dead ~=0) < neigh & (U > 1)); + % voxels with more than concentration 1 in them which may move, + % with a voxel containing a lower concentrations next to it: + sdof_m = find(U - min(U(N_vec),[],2) > 0 & U>1); + % empty voxels touching occupied ones + Idof = (N*(U_and_U_dead ~= 0) > 0 & U_and_U_dead == 0); + idof1 = find(Idof & ~VU); % "external" OBC1 + idof2 = find(Idof & VU); % "internal" OBC2 + idof = find(Idof); + + % "All DOFs" = adof + idof, like the "hull of adof" + Adof = [adof; idof]; + % The above will be enumerated within U, a Nvoxels^2-by-1 sparse + % matrix. Determine also a local enumeration, eg. [1 2 3 + % ... numel(Adof)]. + + Adof_ = (1:numel(Adof))'; + [bdof_m_,sdof_,sdof_m_,idof1_,idof2_,idof_,adof_, sdof_b_] = ... + map(Adof_,Adof,bdof_m,sdof,sdof_m,idof1,idof2,idof,adof,sdof_b); + + %% Calculate Pressure + + % pressure Laplacian + La.X = L(Adof,Adof); + %remove emtpy voxels touching occupied ones + Lai = fsparse(idof_,idof_,1,size(La.X)); + La.X = La.X-Lai*La.X+Lai; + [La.L,La.U,La.p,La.q,La.R] = lu(La.X,'vector'); + + % RHS source term proportional to the over-occupancy and BCs + Pr = full(fsparse(sdof_,1,(U(sdof)-1)./dM(sdof),... %equilibrium at U=1 + [size(La.X,1) 1])); % RHS first... + Pr(La.q) = La.U\(La.L\(La.R(:,La.p)\Pr)); % ..then the solution + + %% Movement calculations + + % movement of cells in sources, sdof_m + rates_sdof = zeros(length(Adof),1); + [ii,jj_] = find(N(sdof_m,Adof)); % neighbours + % pressure difference between cells and its neighbours + Pr_diff__ = max(Pr(sdof_m_(ii))-Pr(jj_),0); + grad_sdof = fsparse(ii,1,Pr_diff__*D, numel(sdof_m)); + rates_sdof(sdof_m_) = -gradquotient*grad_sdof; % sources lose cells + grad_N = fsparse(jj_,1, Pr_diff__*D, numel(Adof)); + rates_sdof = rates_sdof + gradquotient*grad_N; % neighbours gain cells + + % movement of cells on the boundary, not over-occupied, bdof_m + % this loop is similar to what is done above for sdof and can be + % rewritten in a similar fashion. Since this calculation is less + % common, it is not a big concern for efficiency + rates_bdof = zeros(length(Adof),1); + for ind=1:length(bdof_m_) + ix = bdof_m(ind); + ix_ = bdof_m_(ind); + + jx_ = find(N(ix,Adof)); % neighbour to the voxel + jx_ = jx_(U_and_U_dead(Adof(jx_)) == 0); % empty neighbours + Pr_diff = max(Pr(ix_)-Pr(jx_),0); % pressure difference + + rates_bdof(ix_) = -sum(D*Pr_diff); % loses cells + rates_bdof(jx_) = rates_bdof(jx_) + D*Pr_diff; % gain cells + end + + %% Calculate timestep dt + + ind_rates_sdof_n = find(rates_sdof(sdof_m_)<0); % affected voxels + ind_rates_bdof_n = find(rates_bdof(bdof_m_)<0); + + % find the largest possible time step while avoiding U<0 + dt_sdof = U_new(sdof_m(ind_rates_sdof_n))./ ... + (-rates_sdof(sdof_m_(ind_rates_sdof_n))); + dt_bdof = U_new(bdof_m(ind_rates_bdof_n))./ ... + (-rates_bdof(bdof_m_(ind_rates_bdof_n))); + + dt = min([dt_sdof; dt_bdof;(0.1*Tend)])*timescaling; % scale dt smaller + + %% Report back and save time series of current states + + if tspan(i+1) < tt+dt + iend = i+find(tspan(i+1:end) < tt+dt,1,'last'); + + % save relevant values + Usave(i+1:iend) = {U}; + Udsave(i+1:iend) = {U_dead}; + + bdofsave(i+1:iend) = {bdof_m}; + sdofsave(i+1:iend) = {sdof_m}; + sdofbsave{i+1:iend} = {sdof_b}; + + i = iend; + end + + %% Euler forward step + + % movement of cells in sources, sdof_m + U_new(Adof) = U_new(Adof) + rates_sdof.*dt; + + % movement of cells in boundary voxels, bdof_m + U_new(Adof) = U_new(Adof) + rates_bdof*dt; + + tt = tt+dt; +% report(tt,U,''); + + % update the visited sites + VU = VU | U; +end +% report(tt,U,'done'); + +%% Create a GIF animation +Mnormal = struct('cdata',{},'colormap',{}); +figure(1), clf, + +Umat=full(cell2mat(Usave)); +colorbar +caxis([0 max(max(Umat))]) +colorlabel('Concentration of cells, U') +for i = 1:numel(Usave) + % background + patch('Faces',R,'Vertices',V,'FaceColor',[0.9 0.9 0.9], ... + 'EdgeColor','none'); + hold on, + axis([-1 1 -1 1]); axis square, axis off + + % colour living voxels after concentration level + ii = find(Usave{i}>0); + c = Umat(ii,i); + patch('Faces',R(ii,:),'Vertices',V,'FaceVertexCData',c, ... + 'FaceColor','flat'); + + % color (fully) dead voxels black + ii = find(Usave{i} == 0 & Udsave{i} > 0); + p_dead = patch('Faces',R(ii,:),'Vertices',V, ... + 'FaceColor',[0 0 0]); + legend(p_dead,'dead') + + title(sprintf('Time = %d',tspan(i))); + drawnow; + Mnormal(i) = getframe(gcf); +end + +% save the GIF +movie2gif(Mnormal,{Mnormal([1:2 end]).cdata},'Tumour.gif', ... + 'delaytime',0.1,'loopcount',0); diff --git a/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Experiments/AvascularTumour_StillGrowth.m b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Experiments/AvascularTumour_StillGrowth.m new file mode 100644 index 00000000..cf31993c --- /dev/null +++ b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Experiments/AvascularTumour_StillGrowth.m @@ -0,0 +1,242 @@ +% Simulation of an avascular tumour model. +% +% Avascular tumour growth, still growth experiment: An initial circular +% population cells (one per voxel) lie in a domain rich in oxygen. Cells +% consume oxygen at a constant rate, cons. Cells occupying a voxel with +% oxygen above cutoff_prol proliferate at a rate r_prol. Cells occupying +% voxels with an oxygen concentration below cutoff_die die at a +% rate r_die. Dead cells degrade and stop occupying space at a +% rate r_degrade. +% +% No movement of the cells is allowed. Pressure between cells is +% calculated but does not cause any actions. + +% C. Jayaweera & A. Graf Brolund 2021-01 (revision) +% S. Engblom 2017-12-27 (revision) +% D. B. Wilson 2017-09-05 +% S. Engblom 2017-02-11 + +clear; +clc; +close all; + +% cells live in a square of Nvoxels-by-Nvoxels +Nvoxels = 121; % odd so the BC for oxygen can by centered + +% fetch Cartesian discretization +[P,E,T,gradquotient] = basic_mesh(1,Nvoxels); +[V,R] = mesh2dual(P,E,T,'voronoi'); + +D = 1; % D_rate, the rate with which cells move in the domain. + % currently the rate is the same for visited voxels and non-visited + +% simulation interval +Tend = 100; +tspan = linspace(0,Tend,101); +timescaling = 0.005; + +% report(tspan,'timeleft','init'); % (this estimator gets seriously confused!) + +% The user specified cutoff and rate parameters for the proliferation, +% death, degradation and consumption rules. +cons = 0.0015; % consumption of oxygen by cells +cutoff_prol = 0.65; % the minimum amount of oxygen for proliferation +r_prol = 0.125; % rate of proliferation of singly occupied voxels +cutoff_die = 0.55; % the maximum amount of oxygen where cells can die +r_die = 0.125; % rate of death +r_degrade = 0.01; % rate of degradation for already dead cells +cutoff_deg = 0.0001; % the minimum amount of dead cells in a voxel +cutoff_remain = 0.01; % the minimum amount of alive cells in a voxel + +% Initial population: circular blob of living cells +start_value = 1; % cell concentrations in the initial blob +radius = 0.25; +r = sqrt(P(1,:).^2+P(2,:).^2); +ii = find(r < radius); % radius of the initial blob +U = fsparse(ii(:),1,start_value,[Nvoxels^2 1]); % intialize +U_new = fsparse(ii(:),1,start_value,[Nvoxels^2 1]); +U_dead = fsparse(ii(:),1,0,[Nvoxels^2 1]); +U_deadnew = fsparse(ii(:),1,0,[Nvoxels^2 1]); + +% boundary conditions +OBC1 = 0; % BC for the oxygen equation for unvisited boundary +OBC2 = 0; % BC for the visited boundary + +% assemble minus the Laplacian on this grid (ignoring BCs), the voxel +% volume vector, and the sparse neighbor matrix +[L,dM,N] = dt_operators(P,T); %N gives the neighbours +neigh = full(sum(N,2)); + +% dofs for the sources at the extreme outer circular boundary +[xc,yc] = getmidpointcircle(1/2*(Nvoxels+1),1/2*(Nvoxels+1),1/2*(Nvoxels-1)); +irem = find(xc < 1 | yc < 1 | xc > Nvoxels | yc > Nvoxels); +xc(irem) = []; +yc(irem) = []; +extdof = find(sparse(xc,yc,1,Nvoxels,Nvoxels)); + +% visit marker matrix: 1 for voxels who have been occupied +VU = (U ~= 0); + +% representation of solution: cell-vector of sparse matrices +Usave = cell(1,numel(tspan)); +Usave{1} = U; +Udsave = cell(1,numel(tspan)); +Udsave{1} = U_dead; + +% for keeping track of the oxygen +Oxysave = cell(1,numel(tspan)); + +tt = tspan(1); +i = 1; +La = struct('X',0,'L',0,'U',0,'p',0,'q',0,'R',0); +OLa = struct('X',0,'L',0,'U',0,'p',0,'q',0,'R',0); + +% oxygen Laplacian +OLa.X = L; +OLai = fsparse(extdof,extdof,1,size(OLa.X)); +OLa.X = OLa.X-OLai*OLa.X+OLai; +[OLa.L,OLa.U,OLa.p,OLa.q,OLa.R] = lu(OLa.X,'vector'); + +while tt <= tspan(end) + U = U_new; + U_dead = U_deadnew; + + %% Init U and U_dead and classify the DOFs + U = U_new; + U_dead = U_deadnew; + U_and_U_dead = U | U_dead; + + %Classification of the DOFs + adof = find(U_and_U_dead); % all filled voxels + sdof = find(U > 1); % source voxels,concentration more than 1 + % empty voxels touching occupied ones + Idof = (N*(U_and_U_dead ~= 0) > 0 & U_and_U_dead == 0); + idof1 = find(Idof & ~VU); % "external" OBC1 + idof2 = find(Idof & VU); % "internal" OBC2 + idof = find(Idof); + ddof = find(U_dead > 0); % degrading voxels + + % "All DOFs" = adof + idof, like the "hull of adof" + Adof = [adof; idof]; + % The above will be enumerated within U, a Nvoxels^2-by-1 sparse + % matrix. Determine also a local enumeration, eg. [1 2 3 + % ... numel(Adof)]. + + Adof_ = (1:numel(Adof))'; + [sdof_,idof1_,idof2_,idof_,adof_,ddof_] = ... + map(Adof_,Adof,sdof,idof1,idof2,idof,adof,ddof); + + %% Calculate Pressure and Oxygen systems + + % pressure Laplacian + La.X = L(Adof,Adof); + %remove emtpy voxels touching occupied ones + Lai = fsparse(idof_,idof_,1,size(La.X)); + La.X = La.X-Lai*La.X+Lai; + [La.L,La.U,La.p,La.q,La.R] = lu(La.X,'vector'); + + % RHS source term proportional to the over-occupancy and BCs + Pr = full(fsparse(sdof_,1,(U(sdof)-1)./dM(sdof), ...%equilibrium at U=1 + [size(La.X,1) 1])); % RHS first... + Pr(La.q) = La.U\(La.L\(La.R(:,La.p)\Pr)); % ..then the solution + + % RHS source term proportional to the over-occupancy and BCs + Oxy = full(fsparse([extdof; adof],1, ... + [ones(size(extdof)); ... + -cons*full(U(adof)./dM(adof))], ... + [size(OLa.X,1) 1])); + Oxy(OLa.q) = OLa.U\(OLa.L\(OLa.R(:,OLa.p)\Oxy)); + + %% Change calculation + % proliferation, death and degradation + + %proliferation + ind_prol = find((Oxy > cutoff_prol)); %index of proliferating cells + prol_conc = r_prol*U(ind_prol); + + %death + ind_die = find(Oxy < cutoff_die); %index for dying cells + dead_conc = r_die*U(ind_die); + + %degradation + degrade_conc = U_deadnew(ddof)*r_degrade; + + %% Calculate time step dt + % find the largest possible time step while avoiding U<0 + dt_death = U_new(ind_die)./(dead_conc); + + dt = min([dt_death;(0.1*Tend)])*timescaling; % scale dt smaller + + %% Report back and save time series of current states + + if tspan(i+1) < tt+dt + iend = i+find(tspan(i+1:end) < tt+dt,1,'last'); + + % save relevant values + Usave(i+1:iend) = {U}; + Udsave(i+1:iend) = {U_dead}; + + Oxysave(i+1:iend) = {Oxy}; + + i = iend; + end + + %% Euler steps + + %Proliferation + U_new(ind_prol) = U_new(ind_prol)+prol_conc*dt; + + %Death + U_new(ind_die) = U_new(ind_die) - dead_conc*dt; + U_deadnew(ind_die) = U_deadnew(ind_die) + dead_conc*dt; + ind_cutoff = find(U_new < cutoff_remain & (Oxy < cutoff_die)); + U_new(ind_cutoff) = 0; % remove cells below cutoff_remain + + % Degradation + U_deadnew(ddof) = U_deadnew(ddof) - degrade_conc*dt; + U_deadnew(U_deadnew < cutoff_deg) = 0; % remove cells below cutoff_deg + + %% Step in time + tt = tt+dt; +% report(tt,U,''); + + % update the visited sites + VU = VU | U; +end +% report(tt,U,'done'); + +%% Create a GIF animation +Mnormal = struct('cdata',{},'colormap',{}); +figure(1), clf, + +Umat=full(cell2mat(Usave)); +colorbar +caxis([min(min(Umat)) max(max(Umat))]) +colorlabel('Concentration of cells, U') +for i = 1:numel(Usave) + % background + patch('Faces',R,'Vertices',V,'FaceColor',[0.9 0.9 0.9], ... + 'EdgeColor','none'); + hold on, + axis([-1 1 -1 1]); axis square, axis off + + % colour living voxels after concentration level + ii = find(Usave{i}>0); + c = Umat(ii,i); + patch('Faces',R(ii,:),'Vertices',V,'FaceVertexCData',c, ... + 'FaceColor','flat'); + + % color (fully) dead voxels black + ii = find(Usave{i} == 0 & Udsave{i} > 0); + p_dead = patch('Faces',R(ii,:),'Vertices',V, ... + 'FaceColor',[0 0 0]); + legend(p_dead,'dead') + + title(sprintf('Time = %d',tspan(i))); + drawnow; + Mnormal(i) = getframe(gcf); +end + +% save the GIF +movie2gif(Mnormal,{Mnormal([1:2 end]).cdata},'Tumour.gif', ... + 'delaytime',0.1,'loopcount',0); \ No newline at end of file diff --git a/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/TumorGraphics.m b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/TumorGraphics.m new file mode 100644 index 00000000..b22266db --- /dev/null +++ b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/TumorGraphics.m @@ -0,0 +1,172 @@ +% Can be run after AvascularTumour for additional visuals + +snapshotfigure = 0; %creates a gif and 5 snapshot figures +doffigure = 1; %visualizes the sdof's and bdof's as well as the concentration in other voxels +deadfigure = 1; %visualizes the dead cells in every voxel +oxyfigure = 1; %visualizes the concentration oxygen in the grid + +%% Snapshot figure +if snapshotfigure == 1 + % create a GIF animation + Tumour = struct('cdata',{},'colormap',{}); + fig=figure(1); + clf, + + Umat=full(cell2mat(Usave)); + colorbar; + caxis([0 max(max(Umat))]) + colorlabel('Concentration of cells, U') + + snapshot = 0; %Save 5 snapshots + + for i = 1:numel(Usave) + % background + patch('Faces',R,'Vertices',V,'FaceColor',[0.9 0.9 0.9], ... + 'EdgeColor','none'); + + hold on, + axis([-1 1 -1 1]); axis square, axis off + + % colour living voxels after concentration level + ii = find(Usave{i}>0); + c = Umat(ii,i); + patch('Faces',R(ii,:),'Vertices',V,'FaceVertexCData',c, ... + 'FaceColor','flat'); + + % colour (fully) dead voxels black + ii = find(Usave{i} == 0 & Udsave{i} > 0); + p_dead = patch('Faces',R(ii,:),'Vertices',V, ... + 'FaceColor',[0 0 0]); + %legend(p_dead,'dead') + + title(sprintf('Time = %d',tspan(i))); + drawnow; + Tumour(i) = getframe(gcf); + + %Save 5 snapshots of the tumor progression + if i~= [1 ceil([0.24 0.49 0.74 1]*numel(Usave))] + elseif i==1 + filename = 'T=1.png'; + print(fig,filename,'-painters','-dpng'); + else + ii=ceil(i*(Tend/numel(Usave))); + filename = ['T=' num2str(ii) '.png']; + print(fig,filename,'-painters','-dpng'); + end + + + % saves the GIF + movie2gif(Tumour,{Tumour([1:2 end]).cdata},'Tumour.gif', ... + 'delaytime',0.1,'loopcount',0); + + end +end + +%% dof figure +if doffigure==1 + % create a GIF animation + Mdof = struct('cdata',{},'colormap',{}); + figure(2), clf, + + Umat=full(cell2mat(Usave)); + colorbar + caxis([0 max(max(Umat))]) + colorlabel('Concentration of cells, u') + for i = 1:numel(Usave) + + patch('Faces',R,'Vertices',V,'FaceColor',[0.9 0.9 0.9], ... + 'EdgeColor','none'); + hold on, + axis([-1 1 -1 1]); axis square, axis off + + ii = find(Usave{i}>0); + c = Umat(ii,i); + patch('Faces',R(ii,:),'Vertices',V,'FaceVertexCData',c,'FaceColor','flat'); + + % colour the different dof types + p_bdof = patch('Faces',R(bdofsave{i},:),'Vertices',V, ... + 'FaceColor','cyan'); + + p_sdof = patch('Faces',R(sdofsave{i},:),'Vertices',V, ... + 'FaceColor','magenta'); + + % colour (fully) dead voxels black + ii = find(Usave{i} == 0 & Udsave{i} >0); + p_dead = patch('Faces',R(ii,:),'Vertices',V, ... + 'FaceColor',[0 0 0]); + legend([p_bdof,p_sdof,p_dead],'bdof','sdof','dead') + + title(sprintf('Time = %d',tspan(i))); + + drawnow; + Mdof(i) = getframe(gcf); + end + + % saves the GIF + movie2gif(Mdof,{Mdof([1:2 end]).cdata},'TumourMdof.gif', ... + 'delaytime',0.1,'loopcount',0); +end + + +%% oxygen figure +if oxyfigure==1 + % create a GIF animation + Mnormal = struct('cdata',{},'colormap',{}); + figure(3), clf, + + Oxymat=full(cell2mat(Oxysave)); + colorbar + caxis([min(min(Oxymat)) max(max(Oxymat))]) + colorlabel('Concentration of oxygen, c') + for i = 1:numel(Usave) + + patch('Faces',R,'Vertices',V,'FaceColor',[0.9 0.9 0.9], ... + 'EdgeColor','none'); + hold on, + axis([-1 1 -1 1]); axis square, axis off + + % Colour of oxygen concentration + ii = find(Oxysave{i}>0); + c = Oxymat(ii,i); + patch('Faces',R(ii,:),'Vertices',V,'FaceVertexCData',c,'FaceColor','flat'); + + title(sprintf('Time = %d',tspan(i))); + drawnow; + Mnormal(i) = getframe(gcf); + end + % saves the GIF + movie2gif(Mnormal,{Mnormal([1:2 end]).cdata},'Oxynormal.gif', ... + 'delaytime',0.1,'loopcount',0); +end + +%% Dead figure +if deadfigure==1 + Mdead = struct('cdata',{},'colormap',{}); + figure(4), clf, + + Udmat=cell2mat(Udsave); + cmat = full(Udmat/max(max(Udmat))); + caxis([1 2]) + colorbar; + colormap 'gray' + + for i = 1:numel(Udsave) + patch('Faces',R,'Vertices',V,'FaceColor',[0.9 0.9 0.9], ... + 'EdgeColor','none'); + hold on, + axis([-1 1 -1 1]); axis square, axis off + + % colour (fully) dead voxels black + ii = find(Udsave{i}>0); + c = cmat(ii,i); + patch('Faces',R(ii,:),'Vertices',V,'FaceVertexCData',c,'FaceColor','flat'); + + title(sprintf('Time = %d',tspan(i))); + drawnow; + Mdead(i) = getframe(gcf); + end + % saves the GIF + movie2gif(Mdead,{Mdead([1:2 end]).cdata},'TumourMDead.gif', ... + 'delaytime',0.1,'loopcount',0); +end + diff --git a/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/colorlabel.m b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/colorlabel.m new file mode 100644 index 00000000..5ae66268 --- /dev/null +++ b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/colorlabel.m @@ -0,0 +1,75 @@ +function h=colorlabel(varargin) +% Usage: h=colorlabel([hcb],labstring,[options]) +% Given a string "labstring", colorlabel finds the colorbar on the current +% figure and labels the appropriate axis with that string. The handle of +% the axis label is returned as "h". If the figure contains more than one +% colorbar, provide the graphics handle "hcb" of the one that is to be +% labeled. You many also specify properties for the label object, e.g., +% colorlabel('x (cm)','FontSize',8,'FontWeight','bold'). +% +% Example: +% load clown +% imagesc(X) +% colorbar +% colorlabel('pixel brightness') + +% Written 7 February 2011 by Douglas H. Kelley, dhk [at] dougandneely.com. + +if nargin<1 + error(['Usage: ' mfilename '([hcb],labstring,[options])']) +end +if ischar(varargin{1}) % expect colorlabel(labstring,...) + labstring=varargin{1}; + hcb=findobj(gcf,'tag','Colorbar','-or','tag','MapColorbar', ... + '-or','tag','ColorWheel'); + if numel(varargin)<2 + argin=[varargin cell(1,2-numel(varargin))]; + else + argin=varargin; + end + options=argin(2:end); +else % expect colorlabel(hcb,labstring,...) + hcb=varargin{1}; + labstring=varargin{2}; + if numel(varargin)<3 + argin=[varargin cell(1,3-numel(varargin))]; + else + argin=varargin; + end + options=argin(3:end); +end +if numel(hcb)~=1 + error('Please specify exactly one colorbar.') +end +if ~ishandle(hcb) + error(['Sorry, ' num2str(hcb) ' is not a valid graphics handle.']) +elseif ~strcmp(get(hcb,'tag'),'Colorbar') && ... + ~strcmp(get(hcb,'tag'),'MapColorbar') && ... + ~strcmp(get(hcb,'tag'),'ColorWheel') + error(['Sorry, ' num2str(hcb) ... + ' is not a Colorbar, MapColorbar, or ColorWheel.']); +end +if strcmp(get(hcb,'tag'),'Colorbar') + if strfind(get(hcb,'Orientation'),'Horizontal') % horizontal colorbar + hh=get(hcb,'xlabel'); + else % vertical colorbar + hh=get(hcb,'ylabel'); + end +elseif strcmp(get(hcb,'tag'),'MapColorbar') % guess orientation from size of colorbar image + c0=get(findobj(hcb,'type','image'),'cdata'); + if size(c0,1)1 || ~isempty(options{:}) % if there are options, ... + set(hh,'string',labstring,'visible','on',options{:}); +else + set(hh,'string',labstring,'visible','on'); +end +if nargout>0 + h=hh; % return handle if requested +end diff --git a/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/license.txt b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/license.txt new file mode 100644 index 00000000..8622b937 --- /dev/null +++ b/workflows/DLCM/experiments/2_avascular_tumour/Project in SciComp - grp20/PDE-Proj/Utils/license.txt @@ -0,0 +1,24 @@ +Copyright (c) 2011, Douglas H. Kelley +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/workflows/DLCM/utils/map.m b/workflows/DLCM/utils/map.m index 7e586245..d9138873 100644 --- a/workflows/DLCM/utils/map.m +++ b/workflows/DLCM/utils/map.m @@ -20,5 +20,5 @@ str = cumsum([1 cellfun('prodofsize',varargin)]); varargout = cell(1,numel(varargin)); for i = 1:numel(varargout) - varargout{i} = L(ix(str(i):str(i+1)-1)); + varargout{i} = L(ix(str(i):str(i+1)-1)); end diff --git a/workflows/T=1.png b/workflows/T=1.png new file mode 100644 index 00000000..62ddb6e3 Binary files /dev/null and b/workflows/T=1.png differ diff --git a/workflows/T=11.png b/workflows/T=11.png new file mode 100644 index 00000000..749ae968 Binary files /dev/null and b/workflows/T=11.png differ diff --git a/workflows/T=3.png b/workflows/T=3.png new file mode 100644 index 00000000..7c946070 Binary files /dev/null and b/workflows/T=3.png differ diff --git a/workflows/T=6.png b/workflows/T=6.png new file mode 100644 index 00000000..b5fc9728 Binary files /dev/null and b/workflows/T=6.png differ diff --git a/workflows/T=9.png b/workflows/T=9.png new file mode 100644 index 00000000..1a2b85d1 Binary files /dev/null and b/workflows/T=9.png differ diff --git a/workflows/Tumour.gif b/workflows/Tumour.gif new file mode 100644 index 00000000..3489e778 Binary files /dev/null and b/workflows/Tumour.gif differ