|
Java example source code file (OneWayAnova.java)
The OneWayAnova.java Java example source code/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.math3.stat.inference; import java.util.ArrayList; import java.util.Collection; import org.apache.commons.math3.distribution.FDistribution; import org.apache.commons.math3.exception.ConvergenceException; import org.apache.commons.math3.exception.DimensionMismatchException; import org.apache.commons.math3.exception.MaxCountExceededException; import org.apache.commons.math3.exception.NullArgumentException; import org.apache.commons.math3.exception.OutOfRangeException; import org.apache.commons.math3.exception.util.LocalizedFormats; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.commons.math3.util.MathUtils; /** * Implements one-way ANOVA (analysis of variance) statistics. * * <p> Tests for differences between two or more categories of univariate data * (for example, the body mass index of accountants, lawyers, doctors and * computer programmers). When two categories are given, this is equivalent to * the {@link org.apache.commons.math3.stat.inference.TTest}. * </p>
Collection must contain
* <code>double[] arrays.
* <li> There must be at least two double[] arrays in the
* <code>categoryData collection and each of these arrays must
* contain at least two values.</li>* This implementation computes the F statistic using the definitional * formula<pre> * F = msbg/mswg</pre> * where<pre> * msbg = between group mean square * mswg = within group mean square</pre> * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html"> * here</a> * * @param categoryData <code>Collection ofdouble[]
* arrays each containing data for one category
* @return Fvalue
* @throws NullArgumentException if <code>categoryData is null
* @throws DimensionMismatchException if the length of the <code>categoryData
* array is less than 2 or a contained <code>double[] array does not have
* at least two values
*/
public double anovaFValue(final Collection<double[]> categoryData)
throws NullArgumentException, DimensionMismatchException {
AnovaStats a = anovaStats(categoryData);
return a.F;
}
/**
* Computes the ANOVA P-value for a collection of <code>double[]
* arrays.
*
* <p>Preconditions:
Collection must contain
* <code>double[] arrays.
* <li> There must be at least two double[] arrays in the
* <code>categoryData collection and each of these arrays must
* contain at least two values.</li>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F is the F value and
Collection must contain
* {@link SummaryStatistics}.</li>
* <li> There must be at least two {@link SummaryStatistics} in the
* <code>categoryData collection and each of these statistics must
* contain at least two values.</li>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F is the F value and
Collection must contain
* <code>double[] arrays.
* <li> There must be at least two double[] arrays in the
* <code>categoryData collection and each of these arrays must
* contain at least two values.</li>
* <li>alpha must be strictly greater than 0 and less than or equal to 0.5.
* </li>
* This implementation uses the
* {@link org.apache.commons.math3.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F is the F value and double[]
* arrays each containing data for one category
* @param alpha significance level of the test
* @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws NullArgumentException if <code>categoryData is null
* @throws DimensionMismatchException if the length of the <code>categoryData
* array is less than 2 or a contained <code>double[] array does not have
* at least two values
* @throws OutOfRangeException if <code>alpha is not in the range (0, 0.5]
* @throws ConvergenceException if the p-value can not be computed due to a convergence error
* @throws MaxCountExceededException if the maximum number of iterations is exceeded
*/
public boolean anovaTest(final Collection<double[]> categoryData,
final double alpha)
throws NullArgumentException, DimensionMismatchException,
OutOfRangeException, ConvergenceException, MaxCountExceededException {
if ((alpha <= 0) || (alpha > 0.5)) {
throw new OutOfRangeException(
LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
alpha, 0, 0.5);
}
return anovaPValue(categoryData) < alpha;
}
/**
* This method actually does the calculations (except P-value).
*
* @param categoryData <code>Collection of double[]
* arrays each containing data for one category
* @param allowOneElementData if true, allow computation for one catagory
* only or for one data element per category
* @return computed AnovaStats
* @throws NullArgumentException if <code>categoryData is null
* @throws DimensionMismatchException if <code>allowOneElementData is false and the number of
* categories is less than 2 or a contained SummaryStatistics does not contain
* at least two values
*/
private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData,
final boolean allowOneElementData)
throws NullArgumentException, DimensionMismatchException {
MathUtils.checkNotNull(categoryData);
if (!allowOneElementData) {
// check if we have enough categories
if (categoryData.size() < 2) {
throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
categoryData.size(), 2);
}
// check if each category has enough data
for (final SummaryStatistics array : categoryData) {
if (array.getN() <= 1) {
throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
(int) array.getN(), 2);
}
}
}
int dfwg = 0;
double sswg = 0;
double totsum = 0;
double totsumsq = 0;
int totnum = 0;
for (final SummaryStatistics data : categoryData) {
final double sum = data.getSum();
final double sumsq = data.getSumsq();
final int num = (int) data.getN();
totnum += num;
totsum += sum;
totsumsq += sumsq;
dfwg += num - 1;
final double ss = sumsq - ((sum * sum) / num);
sswg += ss;
}
final double sst = totsumsq - ((totsum * totsum) / totnum);
final double ssbg = sst - sswg;
final int dfbg = categoryData.size() - 1;
final double msbg = ssbg / dfbg;
final double mswg = sswg / dfwg;
final double F = msbg / mswg;
return new AnovaStats(dfbg, dfwg, F);
}
/**
Convenience class to pass dfbg,dfwg,F values around within OneWayAnova.
No get/set methods provided.
*/
private static class AnovaStats {
/** Degrees of freedom in numerator (between groups). */
private final int dfbg;
/** Degrees of freedom in denominator (within groups). */
private final int dfwg;
/** Statistic. */
private final double F;
/**
* Constructor
* @param dfbg degrees of freedom in numerator (between groups)
* @param dfwg degrees of freedom in denominator (within groups)
* @param F statistic
*/
private AnovaStats(int dfbg, int dfwg, double F) {
this.dfbg = dfbg;
this.dfwg = dfwg;
this.F = F;
}
}
}
Other Java examples (source code examples)Here is a short list of links related to this Java OneWayAnova.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.