001/*-
002 *******************************************************************************
003 * Copyright (c) 2011, 2016 Diamond Light Source Ltd.
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the Eclipse Public License v1.0
006 * which accompanies this distribution, and is available at
007 * http://www.eclipse.org/legal/epl-v10.html
008 *
009 * Contributors:
010 *    Peter Chang - initial API and implementation and/or initial documentation
011 *******************************************************************************/
012
013package org.eclipse.january.dataset;
014
015import java.lang.ref.SoftReference;
016import java.util.ArrayList;
017import java.util.Collections;
018import java.util.HashMap;
019import java.util.List;
020import java.util.Map;
021import java.util.TreeMap;
022
023import org.apache.commons.math3.complex.Complex;
024import org.apache.commons.math3.stat.descriptive.moment.Kurtosis;
025import org.apache.commons.math3.stat.descriptive.moment.Skewness;
026import org.eclipse.january.metadata.Dirtiable;
027import org.eclipse.january.metadata.MetadataType;
028
029
030/**
031 * Statistics class
032 * 
033 * TODO Where is mode? http://en.wikipedia.org/wiki/Mode_(statistics)
034 * 
035 */
036public class Stats {
037
038        private static class ReferencedDataset extends SoftReference<Dataset> {
039                public ReferencedDataset(Dataset d) {
040                        super(d);
041                }
042        }
043
044        private static class QStatisticsImpl<T> implements MetadataType {
045                private static final long serialVersionUID = -3296671666463190388L;
046                final static Double Q1 = 0.25;
047                final static Double Q2 = 0.5;
048                final static Double Q3 = 0.75;
049                Map<Double, T> qmap = new HashMap<Double, T>();
050                transient Map<Integer, Map<Double, ReferencedDataset>> aqmap = new HashMap<Integer, Map<Double, ReferencedDataset>>();
051                transient ReferencedDataset s; // store 0th element
052                transient Map<Integer, ReferencedDataset> smap = new HashMap<>();
053
054                @Dirtiable
055                private boolean isDirty = true;
056
057                @Override
058                public QStatisticsImpl<T> clone() {
059                        return new QStatisticsImpl<T>(this);
060                }
061
062                public QStatisticsImpl() {
063                }
064
065                private QStatisticsImpl(QStatisticsImpl<T> qstats) {
066                        if (qstats.s != null && qstats.s.get() != null) {
067                                s = new ReferencedDataset(qstats.s.get().getView(false));
068                        }
069                        qmap.putAll(qstats.qmap);
070                        for (Integer i : qstats.aqmap.keySet()) {
071                                aqmap.put(i, new HashMap<>(qstats.aqmap.get(i)));
072                        }
073                        smap.putAll(qstats.smap);
074                        isDirty = qstats.isDirty;
075                }
076
077                public void setQuantile(double q, T v) {
078                        qmap.put(q, v);
079                }
080
081                public T getQuantile(double q) {
082                        return qmap.get(q);
083                }
084
085                private Map<Double, ReferencedDataset> getMap(int axis) {
086                        Map<Double, ReferencedDataset> qm = aqmap.get(axis);
087                        if (qm == null) {
088                                qm = new HashMap<>();
089                                aqmap.put(axis, qm);
090                        }
091                        return qm;
092                }
093
094                public void setQuantile(int axis, double q, Dataset v) {
095                        Map<Double, ReferencedDataset> qm = getMap(axis);
096                        qm.put(q, new ReferencedDataset(v));
097                }
098
099                public Dataset getQuantile(int axis, double q) {
100                        Map<Double, ReferencedDataset> qm = getMap(axis);
101                        ReferencedDataset rd = qm.get(q);
102                        return rd == null ? null : rd.get();
103                }
104
105                Dataset getSortedDataset(int axis) {
106                        return smap.containsKey(axis) ? smap.get(axis).get() : null;
107                }
108
109                void setSortedDataset(int axis, Dataset v) {
110                        smap.put(axis, new ReferencedDataset(v));
111                }
112        }
113
114        // calculates statistics and returns sorted dataset (0th element if compound)
115        private static QStatisticsImpl<?> calcQuartileStats(final Dataset a) {
116                Dataset s = null;
117                final int is = a.getElementsPerItem();
118
119                if (is == 1) {
120                        s = DatasetUtils.sort(a);
121
122                        QStatisticsImpl<Double> qstats = new QStatisticsImpl<Double>();
123
124                        qstats.setQuantile(QStatisticsImpl.Q1, pQuantile(s, QStatisticsImpl.Q1));
125                        qstats.setQuantile(QStatisticsImpl.Q2, pQuantile(s, QStatisticsImpl.Q2));
126                        qstats.setQuantile(QStatisticsImpl.Q3, pQuantile(s, QStatisticsImpl.Q3));
127                        qstats.s = new ReferencedDataset(s);
128                        return qstats;
129                }
130
131                QStatisticsImpl<double[]> qstats = new QStatisticsImpl<double[]>();
132
133                Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef());
134                double[] q1 = new double[is];
135                double[] q2 = new double[is];
136                double[] q3 = new double[is];
137                qstats.setQuantile(QStatisticsImpl.Q1, q1);
138                qstats.setQuantile(QStatisticsImpl.Q2, q2);
139                qstats.setQuantile(QStatisticsImpl.Q3, q3);
140                for (int j = 0; j < is; j++) {
141                        ((CompoundDataset) a).copyElements(w, j);
142                        w.sort(null);
143
144                        q1[j] = pQuantile(w, QStatisticsImpl.Q1);
145                        q2[j] = pQuantile(w, QStatisticsImpl.Q2);
146                        q3[j] = pQuantile(w, QStatisticsImpl.Q3);
147                        if (j == 0)
148                                s = w.clone();
149                }
150                qstats.s = new ReferencedDataset(s);
151
152                return qstats;
153        }
154
155        static private QStatisticsImpl<?> getQStatistics(final Dataset a) {
156                QStatisticsImpl<?> m = a.getFirstMetadata(QStatisticsImpl.class);
157                if (m == null || m.isDirty) {
158                        m = calcQuartileStats(a);
159                        a.setMetadata(m);
160                }
161                return m;
162        }
163
164        static private QStatisticsImpl<?> getQStatistics(final Dataset a, final int axis) {
165                final int is = a.getElementsPerItem();
166                QStatisticsImpl<?> qstats = a.getFirstMetadata(QStatisticsImpl.class);
167
168                if (qstats == null || qstats.isDirty) {
169                        if (is == 1) {
170                                qstats = new QStatisticsImpl<Double>();
171                        } else {
172                                qstats = new QStatisticsImpl<double[]>();
173                        }
174                        a.setMetadata(qstats);
175                }
176
177                if (qstats.getQuantile(axis, QStatisticsImpl.Q2) == null) {
178                        if (is == 1) {
179                                Dataset s = DatasetUtils.sort(a, axis);
180
181                                qstats.setQuantile(axis, QStatisticsImpl.Q1, pQuantile(s, axis, QStatisticsImpl.Q1));
182                                qstats.setQuantile(axis, QStatisticsImpl.Q2, pQuantile(s, axis, QStatisticsImpl.Q2));
183                                qstats.setQuantile(axis, QStatisticsImpl.Q3, pQuantile(s, axis, QStatisticsImpl.Q3));
184                                qstats.setSortedDataset(axis, s);
185                        } else {
186                                Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef());
187                                CompoundDoubleDataset q1 = null, q2 = null, q3 = null;
188                                for (int j = 0; j < is; j++) {
189                                        ((CompoundDataset) a).copyElements(w, j);
190                                        w.sort(axis);
191        
192                                        final Dataset c = pQuantile(w, axis, QStatisticsImpl.Q1);
193                                        if (j == 0) {
194                                                q1 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef());
195                                                q2 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef());
196                                                q3 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef());
197                                        }
198                                        q1.setElements(c, j);
199        
200                                        q2.setElements(pQuantile(w, axis, QStatisticsImpl.Q2), j);
201        
202                                        q3.setElements(pQuantile(w, axis, QStatisticsImpl.Q3), j);
203                                }
204                                qstats.setQuantile(axis, QStatisticsImpl.Q1, q1);
205                                qstats.setQuantile(axis, QStatisticsImpl.Q2, q2);
206                                qstats.setQuantile(axis, QStatisticsImpl.Q3, q3);
207                        }
208                }
209
210                return qstats;
211        }
212
213        // process a sorted dataset
214        private static double pQuantile(final Dataset s, final double q) {
215                double f = (s.getSize() - 1) * q; // fraction of sample number
216                if (f < 0)
217                        return Double.NaN;
218                int qpt = (int) Math.floor(f); // quantile point
219                f -= qpt;
220
221                double quantile = s.getElementDoubleAbs(qpt);
222                if (f > 0) {
223                        quantile = (1-f)*quantile + f*s.getElementDoubleAbs(qpt+1);
224                }
225                return quantile;
226        }
227
228        // process a sorted dataset and returns a double or compound double dataset
229        private static Dataset pQuantile(final Dataset s, final int axis, final double q) {
230                final int rank = s.getRank();
231                final int is = s.getElementsPerItem();
232
233                int[] oshape = s.getShape();
234
235                double f = (oshape[axis] - 1) * q; // fraction of sample number
236                int qpt = (int) Math.floor(f); // quantile point
237                f -= qpt;
238
239                oshape[axis] = 1;
240                int[] qshape = ShapeUtils.squeezeShape(oshape, false);
241                Dataset qds = DatasetFactory.zeros(is, CompoundDoubleDataset.class, qshape);
242
243                IndexIterator qiter = qds.getIterator(true);
244                int[] qpos = qiter.getPos();
245                int[] spos = oshape;
246
247                while (qiter.hasNext()) {
248                        int i = 0;
249                        for (; i < axis; i++) {
250                                spos[i] = qpos[i];
251                        }
252                        spos[i++] = qpt;
253                        for (; i < rank; i++) {
254                                spos[i] = qpos[i-1];
255                        }
256
257                        Object obj = s.getObject(spos);
258                        qds.set(obj, qpos);
259                }
260
261                if (f > 0) {
262                        qiter = qds.getIterator(true);
263                        qpos = qiter.getPos();
264                        qpt++;
265                        Dataset rds = DatasetFactory.zeros(is, CompoundDoubleDataset.class, qshape);
266                        
267                        while (qiter.hasNext()) {
268                                int i = 0;
269                                for (; i < axis; i++) {
270                                        spos[i] = qpos[i];
271                                }
272                                spos[i++] = qpt;
273                                for (; i < rank; i++) {
274                                        spos[i] = qpos[i-1];
275                                }
276
277                                Object obj = s.getObject(spos);
278                                rds.set(obj, qpos);
279                        }
280                        rds.imultiply(f);
281                        qds.imultiply(1-f);
282                        qds.iadd(rds);
283                }
284
285                return qds;
286        }
287
288        /**
289         * Calculate quantile of dataset which is defined as the inverse of the cumulative distribution function (CDF)
290         * @param a
291         * @param q
292         * @return point at which CDF has value q
293         */
294        @SuppressWarnings("unchecked")
295        public static double quantile(final Dataset a, final double q) {
296                if (q < 0 || q > 1) {
297                        throw new IllegalArgumentException("Quantile requested is outside [0,1]");
298                }
299                QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a);
300                Double qv = qs.getQuantile(q);
301                if (qv == null) {
302                        qv = pQuantile(qs.s.get(), q);
303                        qs.setQuantile(q, qv);
304                }
305                return qv;
306        }
307
308        /**
309         * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF)
310         * @param a
311         * @param values
312         * @return points at which CDF has given values
313         */
314        @SuppressWarnings("unchecked")
315        public static double[] quantile(final Dataset a, final double... values) {
316                final double[] points  = new double[values.length];
317                QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a);
318                for (int i = 0; i < points.length; i++) {
319                        final double q = values[i];
320                        if (q < 0 || q > 1) {
321                                throw new IllegalArgumentException("Quantile requested is outside [0,1]");
322                        }
323                        Double qv = qs.getQuantile(q);
324                        if (qv == null) {
325                                qv = pQuantile(qs.s.get(), q);
326                                qs.setQuantile(q, qv);
327                        }
328                        points[i] = qv;
329                }
330
331                return points;
332        }
333
334        /**
335         * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF)
336         * @param a
337         * @param axis
338         * @param values
339         * @return points at which CDF has given values
340         */
341        @SuppressWarnings({ "unchecked" })
342        public static Dataset[] quantile(final Dataset a, int axis, final double... values) {
343                final Dataset[] points  = new Dataset[values.length];
344                final int is = a.getElementsPerItem();
345                axis = a.checkAxis(axis);
346
347                if (is == 1) {
348                        QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a, axis);
349                        for (int i = 0; i < points.length; i++) {
350                                final double q = values[i];
351                                if (q < 0 || q > 1) {
352                                        throw new IllegalArgumentException("Quantile requested is outside [0,1]");
353                                }
354                                Dataset qv = qs.getQuantile(axis, q);
355                                if (qv == null) {
356                                        qv = pQuantile(qs.getSortedDataset(axis), axis, q);
357                                        qs.setQuantile(axis, q, qv);
358                                }
359                                points[i] = qv;
360                        }
361                } else {
362                        QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a);
363                        Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef());
364                        for (int j = 0; j < is; j++) {
365                                boolean copied = false;
366
367                                for (int i = 0; i < points.length; i++) {
368                                        final double q = values[i];
369                                        if (q < 0 || q > 1) {
370                                                throw new IllegalArgumentException("Quantile requested is outside [0,1]");
371                                        }
372                                        Dataset qv = qs.getQuantile(axis, q);
373                                        if (qv == null) {
374                                                if (!copied) {
375                                                        copied = true;
376                                                        ((CompoundDataset) a).copyElements(w, j);
377                                                        w.sort(axis);
378                                                }
379                                                qv = pQuantile(w, axis, q);
380                                                qs.setQuantile(axis, q, qv);
381                                                if (j == 0) {
382                                                        points[i] = DatasetFactory.zeros(is, qv.getClass(), qv.getShapeRef());
383                                                }
384                                                ((CompoundDoubleDataset) points[i]).setElements(qv, j);
385                                        }
386                                }
387                        }
388                }
389
390                return points;
391        }
392
393        /**
394         * @param a dataset
395         * @param axis
396         * @return median
397         */
398        public static Dataset median(final Dataset a, int axis) {
399                axis = a.checkAxis(axis);
400                return getQStatistics(a, axis).getQuantile(axis, QStatisticsImpl.Q2);
401        }
402
403        /**
404         * @param a dataset
405         * @return median
406         */
407        public static Object median(final Dataset a) {
408                return getQStatistics(a).getQuantile(QStatisticsImpl.Q2);
409        }
410
411        /**
412         * Interquartile range: Q3 - Q1
413         * @param a
414         * @return range
415         */
416        @SuppressWarnings("unchecked")
417        public static Object iqr(final Dataset a) {
418                final int is = a.getElementsPerItem();
419                if (is == 1) {
420                        QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a);
421                        return qs.getQuantile(QStatisticsImpl.Q3) - qs.getQuantile(QStatisticsImpl.Q1);
422                }
423
424                QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a);
425                double[] q1 = (double[]) qs.getQuantile(QStatisticsImpl.Q1);
426                double[] q3 = ((double[]) qs.getQuantile(QStatisticsImpl.Q3)).clone();
427                for (int j = 0; j < is; j++) {
428                        q3[j] -= q1[j];
429                }
430                return q3;
431        }
432
433        /**
434         * Interquartile range: Q3 - Q1
435         * @param a
436         * @param axis
437         * @return range
438         */
439        public static Dataset iqr(final Dataset a, int axis) {
440                axis = a.checkAxis(axis);
441                QStatisticsImpl<?> qs = getQStatistics(a, axis);
442                Dataset q3 = qs.getQuantile(axis, QStatisticsImpl.Q3);
443
444                return Maths.subtract(q3, qs.getQuantile(axis, QStatisticsImpl.Q1));
445        }
446
447        static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, boolean[] ignoreInvalids) {
448                boolean ignoreNaNs = false;
449                boolean ignoreInfs = false;
450                if (a.hasFloatingPointElements()) {
451                        ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false;
452                        ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs;
453                }
454
455                HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class);
456                if (stats == null || stats.isDirty) {
457                        stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs);
458                        a.setMetadata(stats);
459                }
460        
461                return stats;
462        }
463
464        static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, final boolean[] ignoreInvalids, final int axis) {
465                boolean ignoreNaNs = false;
466                boolean ignoreInfs = false;
467                if (a.hasFloatingPointElements()) {
468                        ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false;
469                        ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs;
470                }
471        
472                HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class);
473                if (stats == null || stats.getSkewness(axis) == null || stats.isDirty) {
474                        stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs, axis);
475                        a.setMetadata(stats);
476                }
477        
478                return stats;
479        }
480
481        private static class HigherStatisticsImpl<T> implements MetadataType {
482                private static final long serialVersionUID = -6587974784104116792L;
483                T skewness;
484                T kurtosis;
485                transient Map<Integer, ReferencedDataset> smap = new HashMap<>();
486                transient Map<Integer, ReferencedDataset> kmap = new HashMap<>();
487
488                @Dirtiable
489                private boolean isDirty = true;
490
491                @Override
492                public HigherStatisticsImpl<T> clone() {
493                        return new HigherStatisticsImpl<>(this);
494                }
495
496                public HigherStatisticsImpl() {
497                }
498
499                private HigherStatisticsImpl(HigherStatisticsImpl<T> hstats) {
500                        skewness = hstats.skewness;
501                        kurtosis = hstats.kurtosis;
502                        smap.putAll(hstats.smap);
503                        kmap.putAll(hstats.kmap);
504                        isDirty = hstats.isDirty;
505                }
506
507//              public void setSkewness(T skewness) {
508//                      this.skewness = skewness;
509//              }
510//
511//              public void setKurtosis(T kurtosis) {
512//                      this.kurtosis = kurtosis;
513//              }
514//
515//              public T getSkewness() {
516//                      return skewness;
517//              }
518//
519//              public T getKurtosis() {
520//                      return kurtosis;
521//              }
522
523                public Dataset getSkewness(int axis) {
524                        ReferencedDataset rd = smap.get(axis);
525                        return rd == null ? null : rd.get();
526                }
527
528                public Dataset getKurtosis(int axis) {
529                        ReferencedDataset rd = kmap.get(axis);
530                        return rd == null ? null : rd.get();
531                }
532
533                public void setSkewness(int axis, Dataset s) {
534                        smap.put(axis, new ReferencedDataset(s));
535                }
536
537                public void setKurtosis(int axis, Dataset k) {
538                        kmap.put(axis, new ReferencedDataset(k));
539                }
540        }
541
542        static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs) {
543                final int is = a.getElementsPerItem();
544                final IndexIterator iter = a.getIterator();
545
546                if (is == 1) {
547                        Skewness s = new Skewness();
548                        Kurtosis k = new Kurtosis();
549                        if (ignoreNaNs) {
550                                while (iter.hasNext()) {
551                                        final double x = a.getElementDoubleAbs(iter.index);
552                                        if (Double.isNaN(x))
553                                                continue;
554                                        s.increment(x);
555                                        k.increment(x);
556                                }
557                        } else {
558                                while (iter.hasNext()) {
559                                        final double x = a.getElementDoubleAbs(iter.index);
560                                        s.increment(x);
561                                        k.increment(x);
562                                }
563                        }
564
565                        HigherStatisticsImpl<Double> stats = new HigherStatisticsImpl<Double>();
566                        stats.skewness = s.getResult();
567                        stats.kurtosis = k.getResult();
568                        return stats;
569                }
570                final Skewness[] s = new Skewness[is];
571                final Kurtosis[] k = new Kurtosis[is];
572
573                for (int j = 0; j < is; j++) {
574                        s[j] = new Skewness();
575                        k[j] = new Kurtosis();
576                }
577                if (ignoreNaNs) {
578                        while (iter.hasNext()) {
579                                boolean skip = false;
580                                for (int j = 0; j < is; j++) {
581                                        if (Double.isNaN(a.getElementDoubleAbs(iter.index + j))) {
582                                                skip = true;
583                                                break;
584                                        }
585                                }
586                                if (!skip)
587                                        for (int j = 0; j < is; j++) {
588                                                final double val = a.getElementDoubleAbs(iter.index + j);
589                                                s[j].increment(val);
590                                                k[j].increment(val);
591                                        }
592                        }
593                } else {
594                        while (iter.hasNext()) {
595                                for (int j = 0; j < is; j++) {
596                                        final double val = a.getElementDoubleAbs(iter.index + j);
597                                        s[j].increment(val);
598                                        k[j].increment(val);
599                                }
600                        }
601                }
602                final double[] ts = new double[is];
603                final double[] tk = new double[is];
604                for (int j = 0; j < is; j++) {
605                        ts[j] = s[j].getResult();
606                        tk[j] = k[j].getResult();
607                }
608
609                HigherStatisticsImpl<double[]> stats = new HigherStatisticsImpl<double[]>();
610                stats.skewness = ts;
611                stats.kurtosis = tk;
612                return stats;
613        }
614
615        static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) {
616                final int rank = a.getRank();
617                final int is = a.getElementsPerItem();
618                final int[] oshape = a.getShape();
619                final int alen = oshape[axis];
620                oshape[axis] = 1;
621        
622                final int[] nshape = ShapeUtils.squeezeShape(oshape, false);
623                final Dataset sk;
624                final Dataset ku;
625                HigherStatisticsImpl<?> stats = null;
626        
627                if (is == 1) {
628                        if (stats == null) {
629                                stats = new HigherStatisticsImpl<Double>();
630                                a.setMetadata(stats);
631                        }
632                        sk = DatasetFactory.zeros(DoubleDataset.class, nshape);
633                        ku = DatasetFactory.zeros(DoubleDataset.class, nshape);
634                        final IndexIterator qiter = sk.getIterator(true);
635                        final int[] qpos = qiter.getPos();
636                        final int[] spos = oshape;
637        
638                        while (qiter.hasNext()) {
639                                int i = 0;
640                                for (; i < axis; i++) {
641                                        spos[i] = qpos[i];
642                                }
643                                spos[i++] = 0;
644                                for (; i < rank; i++) {
645                                        spos[i] = qpos[i - 1];
646                                }
647        
648                                Skewness s = new Skewness();
649                                Kurtosis k = new Kurtosis();
650                                if (ignoreNaNs) {
651                                        for (int j = 0; j < alen; j++) {
652                                                spos[axis] = j;
653                                                final double val = a.getDouble(spos);
654                                                if (Double.isNaN(val))
655                                                        continue;
656        
657                                                s.increment(val);
658                                                k.increment(val);
659                                        }
660                                } else {
661                                        for (int j = 0; j < alen; j++) {
662                                                spos[axis] = j;
663                                                final double val = a.getDouble(spos);
664                                                s.increment(val);
665                                                k.increment(val);
666                                        }
667                                }
668                                sk.set(s.getResult(), qpos);
669                                ku.set(k.getResult(), qpos);
670                        }
671                } else {
672                        if (stats == null) {
673                                stats = new HigherStatisticsImpl<double[]>();
674                                a.setMetadata(stats);
675                        }
676                        sk = DatasetFactory.zeros(is, CompoundDoubleDataset.class, nshape);
677                        ku = DatasetFactory.zeros(is, CompoundDoubleDataset.class, nshape);
678                        final IndexIterator qiter = sk.getIterator(true);
679                        final int[] qpos = qiter.getPos();
680                        final int[] spos = oshape;
681                        final Skewness[] s = new Skewness[is];
682                        final Kurtosis[] k = new Kurtosis[is];
683                        final double[] ts = new double[is];
684                        final double[] tk = new double[is];
685        
686                        for (int j = 0; j < is; j++) {
687                                s[j] = new Skewness();
688                                k[j] = new Kurtosis();
689                        }
690                        while (qiter.hasNext()) {
691                                int i = 0;
692                                for (; i < axis; i++) {
693                                        spos[i] = qpos[i];
694                                }
695                                spos[i++] = 0;
696                                for (; i < rank; i++) {
697                                        spos[i] = qpos[i-1];
698                                }
699        
700                                for (int j = 0; j < is; j++) {
701                                        s[j].clear();
702                                        k[j].clear();
703                                }
704                                int index = a.get1DIndex(spos);
705                                if (ignoreNaNs) {
706                                        boolean skip = false;
707                                        for (int j = 0; j < is; j++) {
708                                                if (Double.isNaN(a.getElementDoubleAbs(index + j))) {
709                                                        skip = true;
710                                                        break;
711                                                }
712                                        }
713                                        if (!skip)
714                                                for (int j = 0; j < is; j++) {
715                                                        final double val = a.getElementDoubleAbs(index + j);
716                                                        s[j].increment(val);
717                                                        k[j].increment(val);
718                                                }
719                                } else {
720                                        for (int j = 0; j < is; j++) {
721                                                final double val = a.getElementDoubleAbs(index + j);
722                                                s[j].increment(val);
723                                                k[j].increment(val);
724                                        }
725                                }
726                                for (int j = 0; j < is; j++) {
727                                        ts[j] = s[j].getResult(); 
728                                        tk[j] = k[j].getResult(); 
729                                }
730                                sk.set(ts, qpos);
731                                ku.set(tk, qpos);
732                        }
733                }
734
735                stats.setSkewness(axis, sk);
736                stats.setKurtosis(axis, ku);
737                return stats;
738        }
739
740        /**
741         * @param a dataset
742         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
743         * @return skewness
744         * @since 2.0
745         */
746        public static Object skewness(final Dataset a, final boolean... ignoreInvalids) {
747                return getHigherStatistic(a, ignoreInvalids).skewness;
748        }
749
750        /**
751         * @param a dataset
752         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
753         * @return kurtosis
754         * @since 2.0
755         */
756        public static Object kurtosis(final Dataset a, final boolean... ignoreInvalids) {
757                return getHigherStatistic(a, ignoreInvalids).kurtosis;
758        }
759
760        /**
761         * @param a dataset
762         * @param axis
763         * @param ignoreInvalids see {@link Dataset#max(int, boolean...)}
764         * @return skewness along axis in dataset
765         * @since 2.0
766         */
767        public static Dataset skewness(final Dataset a, int axis, final boolean... ignoreInvalids) {
768                axis = a.checkAxis(axis);
769                return getHigherStatistic(a, ignoreInvalids, axis).getSkewness(axis);
770        }
771
772        /**
773         * @param a dataset
774         * @param axis
775         * @param ignoreInvalids see {@link Dataset#max(int, boolean...)}
776         * @return kurtosis along axis in dataset
777         * @since 2.0
778         */
779        public static Dataset kurtosis(final Dataset a, int axis, final boolean... ignoreInvalids) {
780                axis = a.checkAxis(axis);
781                return getHigherStatistic(a, ignoreInvalids, axis).getKurtosis(axis);
782        }
783
784        /**
785         * @param a dataset
786         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
787         * @return sum of dataset
788         * @since 2.0
789         */
790        public static Object sum(final Dataset a, final boolean... ignoreInvalids) {
791                return a.sum(ignoreInvalids);
792        }
793
794        /**
795         * @param a dataset
796         * @param dtype
797         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
798         * @return typed sum of dataset
799         * @since 2.0
800         */
801        public static Object typedSum(final Dataset a, int dtype, final boolean... ignoreInvalids) {
802                if (a.isComplex()) {
803                        Complex m = (Complex) a.sum(ignoreInvalids);
804                        return m;
805                } else if (a instanceof CompoundDataset) {
806                        return  DTypeUtils.fromDoublesToBiggestPrimitives((double[]) a.sum(ignoreInvalids), dtype);
807                }
808                return DTypeUtils.fromDoubleToBiggestNumber(DTypeUtils.toReal(a.sum(ignoreInvalids)), dtype);
809        }
810
811        /**
812         * @param a dataset
813         * @param dtype
814         * @param axis
815         * @param ignoreInvalids see {@link Dataset#max(int, boolean...)}
816         * @return typed sum of items along axis in dataset
817         * @since 2.0
818         */
819        public static Dataset typedSum(final Dataset a, int dtype, int axis, final boolean... ignoreInvalids) {
820                return DatasetUtils.cast(a.sum(axis, ignoreInvalids), dtype);
821        }
822
823        /**
824         * @param a dataset
825         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
826         * @return product of all items in dataset
827         * @since 2.0
828         */
829        public static Object product(final Dataset a, final boolean... ignoreInvalids) {
830                return typedProduct(a, a.getDType(), ignoreInvalids);
831        }
832
833        /**
834         * @param a dataset
835         * @param axis
836         * @param ignoreInvalids see {@link Dataset#max(int, boolean...)}
837         * @return product of items along axis in dataset
838         * @since 2.0
839         */
840        public static Dataset product(final Dataset a, final int axis, final boolean... ignoreInvalids) {
841                return typedProduct(a, a.getDType(), axis, ignoreInvalids);
842        }
843
844        /**
845         * @param a dataset
846         * @param dtype
847         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
848         * @return typed product of all items in dataset
849         * @since 2.0
850         */
851        public static Object typedProduct(final Dataset a, final int dtype, final boolean... ignoreInvalids) {
852                final boolean ignoreNaNs;
853                final boolean ignoreInfs;
854                if (a.hasFloatingPointElements()) {
855                        ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false;
856                        ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs;
857                } else {
858                        ignoreNaNs = false;
859                        ignoreInfs = false;
860                }
861
862                if (a.isComplex()) {
863                        IndexIterator it = a.getIterator();
864                        double rv = 1, iv = 0;
865
866                        while (it.hasNext()) {
867                                final double r1 = a.getElementDoubleAbs(it.index);
868                                final double i1 = a.getElementDoubleAbs(it.index + 1);
869                                if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) {
870                                        continue;
871                                }
872                                if (ignoreInfs  && (Double.isInfinite(r1) || Double.isInfinite(i1))) {
873                                        continue;
874                                }
875                                final double tv = r1*rv - i1*iv;
876                                iv = r1*iv + i1*rv;
877                                rv = tv;
878                                if (Double.isNaN(rv) && Double.isNaN(iv)) {
879                                        break;
880                                }
881                        }
882
883                        return new Complex(rv, iv);
884                }
885
886                IndexIterator it = a.getIterator();
887                final int is;
888                final long[] lresults;
889                final double[] dresults;
890
891                switch (dtype) {
892                case Dataset.BOOL:
893                case Dataset.INT8: case Dataset.INT16:
894                case Dataset.INT32: case Dataset.INT64:
895                        long lresult = 1;
896                        while (it.hasNext()) {
897                                lresult *= a.getElementLongAbs(it.index);
898                        }
899                        return new Long(lresult);
900                case Dataset.ARRAYINT8: case Dataset.ARRAYINT16:
901                case Dataset.ARRAYINT32: case Dataset.ARRAYINT64:
902                        lresult = 1;
903                        is = a.getElementsPerItem();
904                        lresults = new long[is];
905                        for (int j = 0; j < is; j++) {
906                                lresults[j] = 1;
907                        }
908                        while (it.hasNext()) {
909                                for (int j = 0; j < is; j++)
910                                        lresults[j] *= a.getElementLongAbs(it.index+j);
911                        }
912                        return lresults;
913                case Dataset.FLOAT32: case Dataset.FLOAT64:
914                        double dresult = 1.;
915                        while (it.hasNext()) {
916                                final double x = a.getElementDoubleAbs(it.index);
917                                if (ignoreNaNs && Double.isNaN(x)) {
918                                        continue;
919                                }
920                                if (ignoreInfs && Double.isInfinite(x)) {
921                                        continue;
922                                }
923                                dresult *= x;
924                                if (Double.isNaN(dresult)) {
925                                        break;
926                                }
927                        }
928                        return Double.valueOf(dresult);
929                case Dataset.ARRAYFLOAT32:
930                case Dataset.ARRAYFLOAT64:
931                        is = a.getElementsPerItem();
932                        double[] vals = new double[is];
933                        dresults = new double[is];
934                        for (int j = 0; j < is; j++) {
935                                dresults[j] = 1.;
936                        }
937                        while (it.hasNext()) {
938                                boolean okay = true;
939                                for (int j = 0; j < is; j++) {
940                                        final double val = a.getElementDoubleAbs(it.index + j);
941                                        if (ignoreNaNs && Double.isNaN(val)) {
942                                                okay = false;
943                                                break;
944                                        }
945                                        if (ignoreInfs && Double.isInfinite(val)) {
946                                                okay = false;
947                                                break;
948                                        }
949                                        vals[j] = val;
950                                }
951                                if (okay) {
952                                        for (int j = 0; j < is; j++) {
953                                                double val = vals[j];
954                                                dresults[j] *= val;
955                                        }
956                                }
957                        }
958                        return dresults;
959                }
960
961                return null;
962        }
963
964        /**
965         * @param a dataset
966         * @param dtype
967         * @param axis
968         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
969         * @return typed product of items along axis in dataset
970         * @since 2.0
971         */
972        public static Dataset typedProduct(final Dataset a, final int dtype, int axis, final boolean... ignoreInvalids) {
973                axis = a.checkAxis(axis);
974                final int[] oshape = a.getShape();
975                final int is = a.getElementsPerItem();
976                final int alen = oshape[axis];
977                oshape[axis] = 1;
978
979                final boolean ignoreNaNs;
980                final boolean ignoreInfs;
981                if (a.hasFloatingPointElements()) {
982                        ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false;
983                        ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs;
984                } else {
985                        ignoreNaNs = false;
986                        ignoreInfs = false;
987                }
988                @SuppressWarnings("deprecation")
989                Dataset result = DatasetFactory.zeros(is, oshape, dtype);
990
991                IndexIterator qiter = result.getIterator(true);
992                int[] qpos = qiter.getPos();
993                int[] spos;
994
995                // TODO add getLongArray(long[], int...) to CompoundDataset
996                while (qiter.hasNext()) {
997                        spos = qpos.clone();
998
999                        if (a.isComplex()) {
1000                                double rv = 1, iv = 0;
1001                                switch (dtype) {
1002                                case Dataset.COMPLEX64:
1003                                        ComplexFloatDataset af = (ComplexFloatDataset) a;
1004                                        for (int j = 0; j < alen; j++) {
1005                                                spos[axis] = j;
1006                                                final float r1 = af.getReal(spos);
1007                                                final float i1 = af.getImag(spos);
1008                                                if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) {
1009                                                        continue;
1010                                                }
1011                                                if (ignoreInfs  && (Float.isInfinite(r1) || Float.isInfinite(i1))) {
1012                                                        continue;
1013                                                }
1014                                                final double tv = r1*rv - i1*iv;
1015                                                iv = r1*iv + i1*rv;
1016                                                rv = tv;
1017                                                if (Double.isNaN(rv) && Double.isNaN(iv)) {
1018                                                        break;
1019                                                }
1020                                        }
1021                                        break;
1022                                case Dataset.COMPLEX128:
1023                                        ComplexDoubleDataset ad = (ComplexDoubleDataset) a;
1024                                        for (int j = 0; j < alen; j++) {
1025                                                spos[axis] = j;
1026                                                final double r1 = ad.getReal(spos);
1027                                                final double i1 = ad.getImag(spos);
1028                                                if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) {
1029                                                        continue;
1030                                                }
1031                                                if (ignoreInfs  && (Double.isInfinite(r1) || Double.isInfinite(i1))) {
1032                                                        continue;
1033                                                }
1034                                                final double tv = r1*rv - i1*iv;
1035                                                iv = r1*iv + i1*rv;
1036                                                rv = tv;
1037                                                if (Double.isNaN(rv) && Double.isNaN(iv)) {
1038                                                        break;
1039                                                }
1040                                        }
1041                                        break;
1042                                }
1043
1044                                result.set(new Complex(rv, iv), qpos);
1045                        } else {
1046                                final long[] lresults;
1047                                final double[] dresults;
1048
1049                                switch (dtype) {
1050                                case Dataset.BOOL:
1051                                case Dataset.INT8: case Dataset.INT16:
1052                                case Dataset.INT32: case Dataset.INT64:
1053                                        long lresult = 1;
1054                                        for (int j = 0; j < alen; j++) {
1055                                                spos[axis] = j;
1056                                                lresult *= a.getInt(spos);
1057                                        }
1058                                        result.set(lresult, qpos);
1059                                        break;
1060                                case Dataset.ARRAYINT8:
1061                                        lresults = new long[is];
1062                                        for (int k = 0; k < is; k++) {
1063                                                lresults[k] = 1;
1064                                        }
1065                                        for (int j = 0; j < alen; j++) {
1066                                                spos[axis] = j;
1067                                                final byte[] va = (byte[]) a.getObject(spos);
1068                                                for (int k = 0; k < is; k++) {
1069                                                        lresults[k] *= va[k];
1070                                                }
1071                                        }
1072                                        result.set(lresults, qpos);
1073                                        break;
1074                                case Dataset.ARRAYINT16:
1075                                        lresults = new long[is];
1076                                        for (int k = 0; k < is; k++) {
1077                                                lresults[k] = 1;
1078                                        }
1079                                        for (int j = 0; j < alen; j++) {
1080                                                spos[axis] = j;
1081                                                final short[] va = (short[]) a.getObject(spos);
1082                                                for (int k = 0; k < is; k++) {
1083                                                        lresults[k] *= va[k];
1084                                                }
1085                                        }
1086                                        result.set(lresults, qpos);
1087                                        break;
1088                                case Dataset.ARRAYINT32:
1089                                        lresults = new long[is];
1090                                        for (int k = 0; k < is; k++) {
1091                                                lresults[k] = 1;
1092                                        }
1093                                        for (int j = 0; j < alen; j++) {
1094                                                spos[axis] = j;
1095                                                final int[] va = (int[]) a.getObject(spos);
1096                                                for (int k = 0; k < is; k++) {
1097                                                        lresults[k] *= va[k];
1098                                                }
1099                                        }
1100                                        result.set(lresults, qpos);
1101                                        break;
1102                                case Dataset.ARRAYINT64:
1103                                        lresults = new long[is];
1104                                        for (int k = 0; k < is; k++) {
1105                                                lresults[k] = 1;
1106                                        }
1107                                        for (int j = 0; j < alen; j++) {
1108                                                spos[axis] = j;
1109                                                final long[] va = (long[]) a.getObject(spos);
1110                                                for (int k = 0; k < is; k++) {
1111                                                        lresults[k] *= va[k];
1112                                                }
1113                                        }
1114                                        result.set(lresults, qpos);
1115                                        break;
1116                                case Dataset.FLOAT32: case Dataset.FLOAT64:
1117                                        double dresult = 1.;
1118                                        for (int j = 0; j < alen; j++) {
1119                                                spos[axis] = j;
1120                                                final double x = a.getDouble(spos); 
1121                                                if (ignoreNaNs && Double.isNaN(x)) {
1122                                                        continue;
1123                                                }
1124                                                if (ignoreInfs && Double.isInfinite(x)) {
1125                                                        continue;
1126                                                }
1127                                                dresult *= x;
1128                                                if (Double.isNaN(dresult)) {
1129                                                        break;
1130                                                }
1131                                        }
1132                                        result.set(dresult, qpos);
1133                                        break;
1134                                case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64:
1135                                        CompoundDataset da = (CompoundDataset) a;
1136                                        double[] dvalues = new double[is];
1137                                        dresults = new double[is];
1138                                        for (int k = 0; k < is; k++) {
1139                                                dresults[k] = 1.;
1140                                        }
1141                                        for (int j = 0; j < alen; j++) {
1142                                                spos[axis] = j;
1143                                                da.getDoubleArray(dvalues, spos);
1144                                                boolean okay = true;
1145                                                for (int k = 0; k < is; k++) {
1146                                                        final double val = dvalues[k];
1147                                                        if (ignoreNaNs && Double.isNaN(val)) {
1148                                                                okay = false;
1149                                                                break;
1150                                                        }
1151                                                        if (ignoreInfs && Double.isInfinite(val)) {
1152                                                                okay = false;
1153                                                                break;
1154                                                        }
1155                                                }
1156                                                if (okay) {
1157                                                        for (int k = 0; k < is; k++) {
1158                                                                dresults[k] *= dvalues[k];
1159                                                        }
1160                                                }
1161                                        }
1162                                        result.set(dresults, qpos);
1163                                        break;
1164                                }
1165                        }
1166                }
1167
1168                result.setShape(ShapeUtils.squeezeShape(oshape, axis));
1169                return result;
1170        }
1171
1172        /**
1173         * @param a dataset
1174         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
1175         * @return cumulative product of items in flattened dataset
1176         * @since 2.0
1177         */
1178        public static Dataset cumulativeProduct(final Dataset a, final boolean... ignoreInvalids) {
1179                return cumulativeProduct(a.flatten(), 0, ignoreInvalids);
1180        }
1181
1182        /**
1183         * @param a dataset
1184         * @param axis
1185         * @param ignoreInvalids see {@link Dataset#max(int, boolean...)}
1186         * @return cumulative product of items along axis in dataset
1187         * @since 2.0
1188         */
1189        public static Dataset cumulativeProduct(final Dataset a, int axis, final boolean... ignoreInvalids) {
1190                axis = a.checkAxis(axis);
1191                int dtype = a.getDType();
1192                int[] oshape = a.getShape();
1193                int alen = oshape[axis];
1194                oshape[axis] = 1;
1195
1196                final boolean ignoreNaNs;
1197                final boolean ignoreInfs;
1198                if (a.hasFloatingPointElements()) {
1199                        ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false;
1200                        ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs;
1201                } else {
1202                        ignoreNaNs = false;
1203                        ignoreInfs = false;
1204                }
1205                Dataset result = DatasetFactory.zeros(a);
1206                PositionIterator pi = result.getPositionIterator(axis);
1207
1208                int[] pos = pi.getPos();
1209
1210                while (pi.hasNext()) {
1211
1212                        if (a.isComplex()) {
1213                                double rv = 1, iv = 0;
1214                                switch (dtype) {
1215                                case Dataset.COMPLEX64:
1216                                        ComplexFloatDataset af = (ComplexFloatDataset) a;
1217                                        ComplexFloatDataset rf = (ComplexFloatDataset) result;
1218                                        for (int j = 0; j < alen; j++) {
1219                                                if (!Double.isNaN(rv) || !Double.isNaN(iv)) {
1220                                                        pos[axis] = j;
1221                                                        final float r1 = af.getReal(pos);
1222                                                        final float i1 = af.getImag(pos);
1223                                                        if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) {
1224                                                                continue;
1225                                                        }
1226                                                        if (ignoreInfs  && (Float.isInfinite(r1) || Float.isInfinite(i1))) {
1227                                                                continue;
1228                                                        }
1229                                                        final double tv = r1*rv - i1*iv;
1230                                                        iv = r1*iv + i1*rv;
1231                                                        rv = tv;
1232                                                }
1233                                                rf.set((float) rv, (float) iv, pos);
1234                                        }
1235                                        break;
1236                                case Dataset.COMPLEX128:
1237                                        ComplexDoubleDataset ad = (ComplexDoubleDataset) a;
1238                                        ComplexDoubleDataset rd = (ComplexDoubleDataset) result;
1239                                        for (int j = 0; j < alen; j++) {
1240                                                if (!Double.isNaN(rv) || !Double.isNaN(iv)) {
1241                                                        pos[axis] = j;
1242                                                        final double r1 = ad.getReal(pos);
1243                                                        final double i1 = ad.getImag(pos);
1244                                                        if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) {
1245                                                                continue;
1246                                                        }
1247                                                        if (ignoreInfs  && (Double.isInfinite(r1) || Double.isInfinite(i1))) {
1248                                                                continue;
1249                                                        }
1250                                                        final double tv = r1*rv - i1*iv;
1251                                                        iv = r1*iv + i1*rv;
1252                                                        rv = tv;
1253                                                }
1254                                                rd.set(rv, iv, pos);
1255                                        }
1256                                        break;
1257                                }
1258                        } else {
1259                                final int is;
1260                                final long[] lresults;
1261                                final double[] dresults;
1262
1263                                switch (dtype) {
1264                                case Dataset.BOOL:
1265                                case Dataset.INT8: case Dataset.INT16:
1266                                case Dataset.INT32: case Dataset.INT64:
1267                                        long lresult = 1;
1268                                        for (int j = 0; j < alen; j++) {
1269                                                pos[axis] = j;
1270                                                lresult *= a.getInt(pos);
1271                                                result.set(lresult, pos);
1272                                        }
1273                                        break;
1274                                case Dataset.ARRAYINT8:
1275                                        is = a.getElementsPerItem();
1276                                        lresults = new long[is];
1277                                        for (int k = 0; k < is; k++) {
1278                                                lresults[k] = 1;
1279                                        }
1280                                        for (int j = 0; j < alen; j++) {
1281                                                pos[axis] = j;
1282                                                final byte[] va = (byte[]) a.getObject(pos);
1283                                                for (int k = 0; k < is; k++) {
1284                                                        lresults[k] *= va[k];
1285                                                }
1286                                                result.set(lresults, pos);
1287                                        }
1288                                        break;
1289                                case Dataset.ARRAYINT16:
1290                                        is = a.getElementsPerItem();
1291                                        lresults = new long[is];
1292                                        for (int k = 0; k < is; k++) {
1293                                                lresults[k] = 1;
1294                                        }
1295                                        for (int j = 0; j < alen; j++) {
1296                                                pos[axis] = j;
1297                                                final short[] va = (short[]) a.getObject(pos);
1298                                                for (int k = 0; k < is; k++) {
1299                                                        lresults[k] *= va[k];
1300                                                }
1301                                                result.set(lresults, pos);
1302                                        }
1303                                        break;
1304                                case Dataset.ARRAYINT32:
1305                                        is = a.getElementsPerItem();
1306                                        lresults = new long[is];
1307                                        for (int k = 0; k < is; k++) {
1308                                                lresults[k] = 1;
1309                                        }
1310                                        for (int j = 0; j < alen; j++) {
1311                                                pos[axis] = j;
1312                                                final int[] va = (int[]) a.getObject(pos);
1313                                                for (int k = 0; k < is; k++) {
1314                                                        lresults[k] *= va[k];
1315                                                }
1316                                                result.set(lresults, pos);
1317                                        }
1318                                        break;
1319                                case Dataset.ARRAYINT64:
1320                                        is = a.getElementsPerItem();
1321                                        lresults = new long[is];
1322                                        for (int k = 0; k < is; k++) {
1323                                                lresults[k] = 1;
1324                                        }
1325                                        for (int j = 0; j < alen; j++) {
1326                                                pos[axis] = j;
1327                                                final long[] va = (long[]) a.getObject(pos);
1328                                                for (int k = 0; k < is; k++) {
1329                                                        lresults[k] *= va[k];
1330                                                }
1331                                                result.set(lresults, pos);
1332                                        }
1333                                        break;
1334                                case Dataset.FLOAT32: case Dataset.FLOAT64:
1335                                        double dresult = 1.;
1336                                        for (int j = 0; j < alen; j++) {
1337                                                if (!Double.isNaN(dresult)) {
1338                                                        pos[axis] = j;
1339                                                        final double x = a.getDouble(pos);
1340                                                        if (ignoreNaNs && Double.isNaN(x)) {
1341                                                                continue;
1342                                                        }
1343                                                        if (ignoreInfs && Double.isInfinite(x)) {
1344                                                                continue;
1345                                                        }
1346                                                        dresult *= x;
1347                                                }
1348                                                result.set(dresult, pos);
1349                                        }
1350                                        break;
1351                                case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64:
1352                                        is = a.getElementsPerItem();
1353                                        CompoundDataset da = (CompoundDataset) a;
1354                                        double[] dvalues = new double[is];
1355                                        dresults = new double[is];
1356                                        for (int k = 0; k < is; k++) {
1357                                                dresults[k] = 1.;
1358                                        }
1359                                        for (int j = 0; j < alen; j++) {
1360                                                pos[axis] = j;
1361                                                da.getDoubleArray(dvalues, pos);
1362                                                boolean okay = true;
1363                                                for (int k = 0; k < is; k++) {
1364                                                        final double val = dvalues[k];
1365                                                        if (ignoreNaNs && Double.isNaN(val)) {
1366                                                                okay = false;
1367                                                                break;
1368                                                        }
1369                                                        if (ignoreInfs && Double.isInfinite(val)) {
1370                                                                okay = false;
1371                                                                break;
1372                                                        }
1373                                                }
1374                                                if (okay) {
1375                                                        for (int k = 0; k < is; k++) {
1376                                                                dresults[k] *= dvalues[k];
1377                                                        }
1378                                                }
1379                                                result.set(dresults, pos);
1380                                        }
1381                                        break;
1382                                }
1383                        }
1384                }
1385
1386                return result;
1387        }
1388
1389        /**
1390         * @param a dataset
1391         * @param ignoreInvalids see {@link IDataset#max(boolean...)}
1392         * @return cumulative sum of items in flattened dataset
1393         * @since 2.0
1394         */
1395        public static Dataset cumulativeSum(final Dataset a, final boolean... ignoreInvalids) {
1396                return cumulativeSum(a.flatten(), 0, ignoreInvalids);
1397        }
1398
1399        /**
1400         * @param a dataset
1401         * @param axis
1402         * @param ignoreInvalids see {@link Dataset#max(int, boolean...)}
1403         * @return cumulative sum of items along axis in dataset
1404         * @since 2.0
1405         */
1406        public static Dataset cumulativeSum(final Dataset a, int axis, final boolean... ignoreInvalids) {
1407                axis = a.checkAxis(axis);
1408                int dtype = a.getDType();
1409                int[] oshape = a.getShape();
1410                int alen = oshape[axis];
1411                oshape[axis] = 1;
1412
1413                final boolean ignoreNaNs;
1414                final boolean ignoreInfs;
1415                if (a.hasFloatingPointElements()) {
1416                        ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false;
1417                        ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs;
1418                } else {
1419                        ignoreNaNs = false;
1420                        ignoreInfs = false;
1421                }
1422                Dataset result = DatasetFactory.zeros(a);
1423                PositionIterator pi = result.getPositionIterator(axis);
1424
1425                int[] pos = pi.getPos();
1426
1427                while (pi.hasNext()) {
1428
1429                        if (a.isComplex()) {
1430                                double rv = 0, iv = 0;
1431                                switch (dtype) {
1432                                case Dataset.COMPLEX64:
1433                                        ComplexFloatDataset af = (ComplexFloatDataset) a;
1434                                        ComplexFloatDataset rf = (ComplexFloatDataset) result;
1435                                        for (int j = 0; j < alen; j++) {
1436                                                if (!Double.isNaN(rv) || !Double.isNaN(iv)) {
1437                                                        pos[axis] = j;
1438                                                        final float r1 = af.getReal(pos);
1439                                                        final float i1 = af.getImag(pos);
1440                                                        if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) {
1441                                                                continue;
1442                                                        }
1443                                                        if (ignoreInfs  && (Float.isInfinite(r1) || Float.isInfinite(i1))) {
1444                                                                continue;
1445                                                        }
1446                                                        rv += r1;
1447                                                        iv += i1;
1448                                                }
1449                                                rf.set((float) rv, (float) iv, pos);
1450                                        }
1451                                        break;
1452                                case Dataset.COMPLEX128:
1453                                        ComplexDoubleDataset ad = (ComplexDoubleDataset) a;
1454                                        ComplexDoubleDataset rd = (ComplexDoubleDataset) result;
1455                                        for (int j = 0; j < alen; j++) {
1456                                                if (!Double.isNaN(rv) || !Double.isNaN(iv)) {
1457                                                        pos[axis] = j;
1458                                                        final double r1 = ad.getReal(pos);
1459                                                        final double i1 = ad.getImag(pos);
1460                                                        if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) {
1461                                                                continue;
1462                                                        }
1463                                                        if (ignoreInfs  && (Double.isInfinite(r1) || Double.isInfinite(i1))) {
1464                                                                continue;
1465                                                        }
1466                                                        rv += r1;
1467                                                        iv += i1;
1468                                                }
1469                                                rd.set(rv, iv, pos);
1470                                        }
1471                                        break;
1472                                }
1473                        } else {
1474                                final int is;
1475                                final long[] lresults;
1476                                final double[] dresults;
1477
1478                                switch (dtype) {
1479                                case Dataset.BOOL:
1480                                case Dataset.INT8: case Dataset.INT16:
1481                                case Dataset.INT32: case Dataset.INT64:
1482                                        long lresult = 0;
1483                                        for (int j = 0; j < alen; j++) {
1484                                                pos[axis] = j;
1485                                                lresult += a.getInt(pos);
1486                                                result.set(lresult, pos);
1487                                        }
1488                                        break;
1489                                case Dataset.ARRAYINT8:
1490                                        is = a.getElementsPerItem();
1491                                        lresults = new long[is];
1492                                        for (int j = 0; j < alen; j++) {
1493                                                pos[axis] = j;
1494                                                final byte[] va = (byte[]) a.getObject(pos);
1495                                                for (int k = 0; k < is; k++) {
1496                                                        lresults[k] += va[k];
1497                                                }
1498                                                result.set(lresults, pos);
1499                                        }
1500                                        break;
1501                                case Dataset.ARRAYINT16:
1502                                        is = a.getElementsPerItem();
1503                                        lresults = new long[is];
1504                                        for (int j = 0; j < alen; j++) {
1505                                                pos[axis] = j;
1506                                                final short[] va = (short[]) a.getObject(pos);
1507                                                for (int k = 0; k < is; k++) {
1508                                                        lresults[k] += va[k];
1509                                                }
1510                                                result.set(lresults, pos);
1511                                        }
1512                                        break;
1513                                case Dataset.ARRAYINT32:
1514                                        is = a.getElementsPerItem();
1515                                        lresults = new long[is];
1516                                        for (int j = 0; j < alen; j++) {
1517                                                pos[axis] = j;
1518                                                final int[] va = (int[]) a.getObject(pos);
1519                                                for (int k = 0; k < is; k++) {
1520                                                        lresults[k] += va[k];
1521                                                }
1522                                                result.set(lresults, pos);
1523                                        }
1524                                        break;
1525                                case Dataset.ARRAYINT64:
1526                                        is = a.getElementsPerItem();
1527                                        lresults = new long[is];
1528                                        for (int j = 0; j < alen; j++) {
1529                                                pos[axis] = j;
1530                                                final long[] va = (long[]) a.getObject(pos);
1531                                                for (int k = 0; k < is; k++) {
1532                                                        lresults[k] += va[k];
1533                                                }
1534                                                result.set(lresults, pos);
1535                                        }
1536                                        break;
1537                                case Dataset.FLOAT32: case Dataset.FLOAT64:
1538                                        double dresult = 0.;
1539                                        for (int j = 0; j < alen; j++) {
1540                                                if (!Double.isNaN(dresult)) {
1541                                                        pos[axis] = j;
1542                                                        final double x = a.getDouble(pos);
1543                                                        if (ignoreNaNs && Double.isNaN(x)) {
1544                                                                continue;
1545                                                        }
1546                                                        if (ignoreInfs && Double.isInfinite(x)) {
1547                                                                continue;
1548                                                        }
1549                                                        dresult += x;
1550                                                }
1551                                                result.set(dresult, pos);
1552                                        }
1553                                        break;
1554                                case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64:
1555                                        is = a.getElementsPerItem();
1556                                        CompoundDataset da = (CompoundDataset) a;
1557                                        double[] dvalues = new double[is];
1558                                        dresults = new double[is];
1559                                        for (int j = 0; j < alen; j++) {
1560                                                pos[axis] = j;
1561                                                da.getDoubleArray(dvalues, pos);
1562                                                boolean okay = true;
1563                                                for (int k = 0; k < is; k++) {
1564                                                        final double val = dvalues[k];
1565                                                        if (ignoreNaNs && Double.isNaN(val)) {
1566                                                                okay = false;
1567                                                                break;
1568                                                        }
1569                                                        if (ignoreInfs && Double.isInfinite(val)) {
1570                                                                okay = false;
1571                                                                break;
1572                                                        }
1573                                                }
1574                                                if (okay) {
1575                                                        for (int k = 0; k < is; k++) {
1576                                                                dresults[k] += dvalues[k];
1577                                                        }
1578                                                }
1579                                                result.set(dresults, pos);
1580                                        }
1581                                        break;
1582                                }
1583                        }
1584                }
1585
1586                return result;
1587        }
1588
1589        /**
1590         * @param a
1591         * @return average deviation value of all items the dataset
1592         */
1593        public static Object averageDeviation(final Dataset a) {
1594                final IndexIterator it = a.getIterator();
1595                final int is = a.getElementsPerItem();
1596
1597                if (is == 1) {
1598                        double mean = (Double) a.mean();
1599                        double sum = 0.0;
1600
1601                        while (it.hasNext()) {
1602                                sum += Math.abs(a.getElementDoubleAbs(it.index) - mean);
1603                        }
1604
1605                        return sum / a.getSize();
1606                }
1607
1608                double[] means = (double[]) a.mean();
1609                double[] sums = new double[is];
1610
1611                while (it.hasNext()) {
1612                        for (int j = 0; j < is; j++)
1613                                sums[j] += Math.abs(a.getElementDoubleAbs(it.index + j) - means[j]);
1614                }
1615
1616                double n = a.getSize();
1617                for (int j = 0; j < is; j++)
1618                        sums[j] /= n;
1619
1620                return sums;
1621        }
1622
1623        /**
1624         * The residual is the sum of squared differences
1625         * @param a
1626         * @param b
1627         * @return residual value
1628         */
1629        public static double residual(final Dataset a, final Dataset b) {
1630                return a.residual(b);
1631        }
1632
1633        /**
1634         * The residual is the sum of squared differences
1635         * @param a
1636         * @param b
1637         * @param w
1638         * @return residual value
1639         */
1640        public static double weightedResidual(final Dataset a, final Dataset b, final Dataset w) {
1641                return a.residual(b, w, false);
1642        }
1643
1644        /**
1645         * Calculate approximate outlier values. These are defined as the values in the dataset
1646         * that are approximately below and above the given thresholds - in terms of percentages
1647         * of dataset size.
1648         * <p>
1649         * It approximates by limiting the number of items (given by length) used internally by
1650         * data structures - the larger this is, the more accurate will those outlier values become.
1651         * The actual thresholds used are returned in the array.
1652         * <p>
1653         * Also, the low and high values will be made distinct if possible by adjusting the thresholds
1654         * @param a
1655         * @param lo percentage threshold for lower limit
1656         * @param hi percentage threshold for higher limit
1657         * @param length maximum number of items used internally, if negative, then unlimited
1658         * @return double array with low and high values, and low and high percentage thresholds
1659         */
1660        public static double[] outlierValues(final Dataset a, double lo, double hi, final int length) {
1661                return outlierValues(a, null, true, lo, hi, length);
1662        }
1663
1664        /**
1665         * Calculate approximate outlier values. These are defined as the values in the dataset
1666         * that are approximately below and above the given thresholds - in terms of percentages
1667         * of dataset size.
1668         * <p>
1669         * It approximates by limiting the number of items (given by length) used internally by
1670         * data structures - the larger this is, the more accurate will those outlier values become.
1671         * The actual thresholds used are returned in the array.
1672         * <p>
1673         * Also, the low and high values will be made distinct if possible by adjusting the thresholds
1674         * @param a
1675         * @param mask can be null
1676         * @param value value of mask to match to include for calculation
1677         * @param lo percentage threshold for lower limit
1678         * @param hi percentage threshold for higher limit
1679         * @param length maximum number of items used internally, if negative, then unlimited
1680         * @return double array with low and high values, and low and high percentage thresholds
1681         * @since 2.1
1682         */
1683        public static double[] outlierValues(final Dataset a, final Dataset mask, final boolean value, double lo, double hi, final int length) {
1684                if (lo <= 0 || hi <= 0 || lo >= hi || hi >= 100  || Double.isNaN(lo)|| Double.isNaN(hi)) {
1685                        throw new IllegalArgumentException("Thresholds must be between (0,100) and in order");
1686                }
1687                final int size = a.getSize();
1688                int nl = Math.max((int) ((lo*size)/100), 1);
1689                if (length > 0 && nl > length)
1690                        nl = length;
1691                int nh = Math.max((int) (((100-hi)*size)/100), 1);
1692                if (length > 0 && nh > length)
1693                        nh = length;
1694
1695                IndexIterator it = mask == null ? a.getIterator() : a.getBooleanIterator(mask, value);
1696                double[] results = Math.max(nl, nh) > 640 ? outlierValuesMap(a, it, nl, nh) : outlierValuesList(a, it, nl, nh);
1697
1698                results[2] = results[2]*100./size;
1699                results[3] = 100. - results[3]*100./size;
1700                return results;
1701        }
1702
1703        static double[] outlierValuesMap(final Dataset a, final IndexIterator it, int nl, int nh) {
1704                final TreeMap<Double, Integer> lMap = new TreeMap<Double, Integer>();
1705                final TreeMap<Double, Integer> hMap = new TreeMap<Double, Integer>();
1706
1707                int ml = 0;
1708                int mh = 0;
1709                while (it.hasNext()) {
1710                        Double x = a.getElementDoubleAbs(it.index);
1711                        if (Double.isNaN(x)) {
1712                                continue;
1713                        }
1714                        Integer i;
1715                        if (ml == nl) {
1716                                Double k = lMap.lastKey();
1717                                if (x < k) {
1718                                        i = lMap.get(k) - 1;
1719                                        if (i == 0) {
1720                                                lMap.remove(k);
1721                                        } else {
1722                                                lMap.put(k, i);
1723                                        }
1724                                        i = lMap.get(x);
1725                                        if (i == null) {
1726                                                lMap.put(x, 1);
1727                                        } else {
1728                                                lMap.put(x, i + 1);
1729                                        }
1730                                }
1731                        } else {
1732                                i = lMap.get(x);
1733                                if (i == null) {
1734                                        lMap.put(x, 1);
1735                                } else {
1736                                        lMap.put(x, i + 1);
1737                                }
1738                                ml++;
1739                        }
1740
1741                        if (mh == nh) {
1742                                Double k = hMap.firstKey();
1743                                if (x > k) {
1744                                        i = hMap.get(k) - 1;
1745                                        if (i == 0) {
1746                                                hMap.remove(k);
1747                                        } else {
1748                                                hMap.put(k, i);
1749                                        }
1750                                        i = hMap.get(x);
1751                                        if (i == null) {
1752                                                hMap.put(x, 1);
1753                                        } else {
1754                                                hMap.put(x, i+1);
1755                                        }
1756                                }
1757                        } else {
1758                                i = hMap.get(x);
1759                                if (i == null) {
1760                                        hMap.put(x, 1);
1761                                } else {
1762                                        hMap.put(x, i+1);
1763                                }
1764                                mh++;
1765                        }
1766                }
1767
1768                // Attempt to make values distinct
1769                double lx = lMap.lastKey();
1770                double hx = hMap.firstKey();
1771                if (lx >= hx) {
1772                        Double h = hMap.higherKey(lx);
1773                        if (h != null) {
1774                                hx = h;
1775                                mh--;
1776                        } else {
1777                                Double l = lMap.lowerKey(hx);
1778                                if (l != null) {
1779                                        lx = l;
1780                                        ml--;
1781                                }
1782                        }
1783                        
1784                }
1785                return new double[] {lMap.lastKey(), hMap.firstKey(), ml, mh};
1786        }
1787
1788        static double[] outlierValuesList(final Dataset a, final IndexIterator it, int nl, int nh) {
1789                final List<Double> lList = new ArrayList<Double>(nl);
1790                final List<Double> hList = new ArrayList<Double>(nh);
1791
1792                double lx = Double.POSITIVE_INFINITY;
1793                double hx = Double.NEGATIVE_INFINITY;
1794
1795                while (it.hasNext()) {
1796                        double x = a.getElementDoubleAbs(it.index);
1797                        if (Double.isNaN(x)) {
1798                                continue;
1799                        }
1800                        if (x < lx) {
1801                                if (lList.size() == nl) {
1802                                        lList.remove(lx);
1803                                }
1804                                lList.add(x);
1805                                lx = Collections.max(lList);
1806                        } else if (x == lx) {
1807                                if (lList.size() < nl) {
1808                                        lList.add(x);
1809                                }
1810                        }
1811
1812                        if (x > hx) {
1813                                if (hList.size() == nh) {
1814                                        hList.remove(hx);
1815                                }
1816                                hList.add(x);
1817                                hx = Collections.min(hList);
1818                        } else if (x == hx) {
1819                                if (hList.size() < nh) {
1820                                        hList.add(x);
1821                                }
1822                        }
1823                }
1824
1825                nl = lList.size();
1826                nh = hList.size();
1827
1828                // Attempt to make values distinct
1829                if (lx >= hx) {
1830                        Collections.sort(hList);
1831                        for (double h : hList) {
1832                                if (h > hx) {
1833                                        hx = h;
1834                                        break;
1835                                }
1836                                nh--;
1837                        }
1838                        if (lx >= hx) {
1839                                Collections.sort(lList);
1840                                Collections.reverse(lList);
1841                                for (double l : lList) {
1842                                        if (l < lx) {
1843                                                lx = l;
1844                                                break;
1845                                        }
1846                                        nl--;
1847                                }
1848                        }
1849                }
1850                return new double[] {lx, hx, nl, nh};
1851        }
1852
1853        /**
1854         * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null, rowvar = true, bias = false and ddof = null.
1855         * @param a
1856         * @return covariance array of a
1857         */
1858        public static Dataset covariance(final Dataset a) {
1859                return covariance(a, true, false, null); 
1860        }
1861
1862        /**
1863         * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null.
1864         * @param a
1865         * @return covariance array of a
1866         * @since 2.0
1867         */
1868        public static Dataset covariance(final Dataset a, 
1869                        boolean rowvar, boolean bias, Integer ddof) {
1870                return covariance(a, null, rowvar, bias, ddof);
1871        }
1872
1873        /**
1874         * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null, rowvar = true, bias = false and ddof = null.
1875         * @param a
1876         * @return covariance array of a concatenated with b
1877         */
1878        public static Dataset covariance(final Dataset a, final Dataset b) {
1879                return covariance(a, b, true, false, null);
1880        }
1881
1882        /**
1883         * Calculate the covariance matrix (array) of a concatenated with b. This 
1884         * method is directly based on the implementation in numpy (cov). 
1885         * @param a Array containing multiple variable and observations. Each row represents a variable, each column an observation.
1886         * @param b An extra set of variables and observations. Must be of same type as a and have a compatible shape. 
1887         * @param rowvar When true (default), each row is a variable; when false each column is a variable.
1888         * @param bias Default normalisation is (N - 1) - N is number of observations. If set true, normalisation is (N). 
1889         * @param ddof Default normalisation is (N - 1). If ddof is set, then normalisation is (N - ddof).
1890         * @return covariance array of a concatenated with b
1891         * @since 2.0
1892         */
1893        public static Dataset covariance (final Dataset a, final Dataset b, 
1894                        boolean rowvar, boolean bias, Integer ddof) {
1895                
1896                //Create a working copy of the dataset & check its rank.
1897                Dataset vars = a.clone();
1898                if (a.getRank() == 1) {
1899                        vars.setShape(1, a.getShape()[0]);
1900                }
1901                
1902                //1D of variables, so consider rows as variables
1903                if (vars.getShape()[0] == 1) {
1904                        rowvar = true;
1905                }
1906                
1907                //nr is the number of records; axis is index
1908                int nr, axis;
1909                if (rowvar) {
1910                        nr = vars.getShape()[1];
1911                        axis = 0;
1912                } else {
1913                        nr = vars.getShape()[0];
1914                        axis = 1;
1915                }
1916                
1917                //Set the reduced degrees of freedom & normalisation factor
1918                if (ddof == null) {
1919                        if (bias == false) {
1920                                ddof = 1;
1921                        } else {
1922                                ddof = 0;
1923                        }
1924                }
1925                double norm_fact = nr - ddof;
1926                if (norm_fact <= 0.) {
1927                        //TODO Some sort of warning here?
1928                        norm_fact = 0.;
1929                }
1930                
1931                //Concatenate additional set of variables with main set
1932                if (b != null) {
1933                        //Create a working copy of the dataset & check its rank.
1934                        Dataset extraVars = b.clone();
1935                        if (b.getRank() == 1) {
1936                                extraVars.setShape(1, a.getShape()[0]);
1937                        }
1938                        vars = DatasetUtils.concatenate(new Dataset[]{vars, extraVars}, axis);
1939                }
1940                
1941                //Calculate deviations & covariance matrix
1942                Dataset varsMean = vars.mean(1-axis, false);
1943                //-vars & varsMean need same shape (this is a hack!)
1944                int[] meanShape = vars.getShape();
1945                meanShape[1-axis] = 1;
1946                varsMean.setShape(meanShape);
1947                vars.isubtract(varsMean);
1948                Dataset cov;
1949                if (rowvar) {
1950                        cov = Maths.divide(LinearAlgebra.dotProduct(vars, Maths.conjugate(vars.transpose())), norm_fact).squeeze();
1951                } else {
1952                        cov = Maths.divide(LinearAlgebra.dotProduct(vars.transpose(), Maths.conjugate(vars)), norm_fact).squeeze();
1953                }
1954                return cov;
1955        }
1956}