23 #ifndef O2SCL_VEC_STATS_H 24 #define O2SCL_VEC_STATS_H 39 #include <o2scl/err_hnd.h> 40 #include <o2scl/vector.h> 42 #ifndef DOXYGEN_NO_O2NS 55 template<
class vec_t>
double vector_mean(
size_t n,
const vec_t &data) {
57 for(
size_t i=0;i<n;i++) {
58 mean+=(data[i]-mean)/(i+1);
89 for(
size_t i=0;i<n;i++) {
90 long double delta=(data[i]-mean);
91 var+=(delta*delta-var)/(i+1);
108 template<
class vec_t>
127 template<
class vec_t>
131 O2SCL_ERR2(
"Cannot compute variance with less than 2 elements",
135 double var=vector_variance_fmean<vec_t>(n,data,mean);
153 template<
class vec_t>
174 O2SCL_ERR2(
"Cannot compute variance with less than 2 elements",
178 double mean=vector_mean<vec_t>(n,data);
179 double var=vector_variance_fmean<vec_t>(n,data,mean);
214 template<
class vec_t>
217 double sd=vector_variance_fmean<vec_t>(n,data,mean);
218 return std::sqrt(sd);
235 template<
class vec_t>
256 O2SCL_ERR2(
"Cannot compute std. dev. with less than 2 elements",
260 double mean=vector_mean<vec_t>(n,data);
261 double var=vector_variance_fmean<vec_t>(n,data,mean);
262 return std::sqrt(var*n/(n-1));
300 O2SCL_ERR2(
"Cannot compute std. dev. with less than 2 elements",
304 double sd=vector_variance_fmean<vec_t>(n,data,mean);
305 return std::sqrt(sd*n/(n-1));
322 template<
class vec_t>
double vector_stddev(
const vec_t &data,
double mean) {
343 if (n==0)
return 0.0;
346 for(
size_t i=0;i<n;i++) {
347 sum+=fabs(data[i]-mean);
386 template<
class vec_t>
388 double mean=vector_mean<vec_t>(n,data);
407 template<
class vec_t>
428 template<
class vec_t>
double vector_skew(
size_t n,
const vec_t &data,
429 double mean,
double stddev) {
430 long double skew=0.0;
431 for(
size_t i=0;i<n;i++) {
432 long double x=(data[i]-mean)/stddev;
433 skew+=(x*x*x-skew)/(i+1);
455 double mean,
double stddev) {
475 template<
class vec_t>
double vector_skew(
size_t n,
const vec_t &data) {
476 double mean=vector_mean<vec_t>(n,data);
477 double sd=vector_stddev<vec_t>(n,data,mean);
517 template<
class vec_t>
521 for(
size_t i=0;i<n;i++) {
522 long double x=(data[i]-mean)/stddev;
523 avg+=(x*x*x*x-avg)/(i+1);
544 template<
class vec_t>
567 double mean=vector_mean<vec_t>(n,data);
568 double sd=vector_stddev<vec_t>(n,data,mean);
608 template<
class vec_t>
612 O2SCL_ERR2(
"Cannot compute lag1 with less than 2 elements",
617 long double v=(data[0]-mean)*(data[0]-mean);
618 for(
size_t i=1;i<n;i++) {
619 long double delta0=data[i-1]-mean;
620 long double delta1=data[i]-mean;
621 q+=(delta0*delta1-q)/(i+1);
622 v+=(delta1*delta1-v)/(i+1);
644 template<
class vec_t>
666 (
size_t n,
const vec_t &data) {
667 double mean=vector_mean<vec_t>(n,data);
704 template<
class vec_t>
713 long double q=0.0, v=0.0;
714 for(
size_t i=0;i<k;i++) {
715 v+=(data[i]-mean)*(data[i]-mean)/(i+1);
717 for(
size_t i=k;i<n;i++) {
718 long double delta0=data[i-k]-mean;
719 long double delta1=data[i]-mean;
720 q+=(delta0*delta1-q)/(i+1);
721 v+=(delta1*delta1-v)/(i+1);
739 template<
class vec_t>
759 (
size_t n,
const vec_t &data,
size_t k) {
760 double mean=vector_mean<vec_t>(n,data);
778 (
const vec_t &data,
size_t k) {
788 (
const vec_t &data, resize_vec_t &ac_vec) {
789 size_t kmax=data.size()/2;
793 for(
size_t k=1;k<kmax;k++) {
825 (
const vec_t &ac_vec, resize_vec_t &five_tau_over_M) {
826 five_tau_over_M.resize(0);
829 for (
size_t M=1;M<ac_vec.size();M++) {
831 for(
size_t s=1;s<=M;s++) {
834 double val=(1.0+2.0*sum)/((
double)M)*5.0;
835 if (len_set==
false && val<=1.0) {
839 five_tau_over_M.push_back(val);
860 template<
class vec_t,
class vec2_t>
862 double mean1,
double mean2) {
864 for(
size_t i=0;i<n;i++) {
865 double delta1=(data1[i]-mean1);
866 double delta2=(data2[i]-mean2);
867 covar+=(delta1*delta2-covar)/(i+1);
869 return covar*n/(n-1);
888 template<
class vec_t,
class vec2_t>
890 double mean1,
double mean2) {
911 template<
class vec_t,
class vec2_t>
913 const vec2_t &data2) {
915 double mean1=vector_mean<vec_t>(n,data1);
916 double mean2=vector_mean<vec_t>(n,data2);
917 for(
size_t i=0;i<n;i++) {
918 long double delta1=(data1[i]-mean1);
919 long double delta2=(data2[i]-mean2);
920 covar+=(delta1*delta2-covar)/(i+1);
922 return covar*n/(n-1);
942 template<
class vec_t,
class vec2_t>
944 const vec2_t &data2) {
967 template<
class vec_t,
class vec2_t>
969 const vec2_t &data2) {
973 O2SCL_ERR2(
"Cannot compute correlation with no elements",
979 double sum_cross=0.0;
981 double delta_x, delta_y;
982 double mean_x, mean_y;
996 for (i=1; i < n; ++i) {
998 delta_x=data1[i] - mean_x;
999 delta_y=data2[i] - mean_y;
1000 sum_xsq += delta_x * delta_x * ratio;
1001 sum_ysq += delta_y * delta_y * ratio;
1002 sum_cross += delta_x * delta_y * ratio;
1003 mean_x += delta_x / (i + 1.0);
1004 mean_y += delta_y / (i + 1.0);
1007 r=sum_cross / (std::sqrt(sum_xsq) * std::sqrt(sum_ysq));
1031 template<
class vec_t,
class vec2_t>
1033 const vec2_t &data2) {
1054 template<
class vec_t,
class vec2_t>
1056 size_t n2,
const vec2_t &data2) {
1057 double var1=vector_variance<vec_t>(n1,data1);
1058 double var2=vector_variance<vec2_t>(n2,data2);
1059 return (((n1-1)*var1)+((n2-1)*var2))/(n1+n2-2);
1079 template<
class vec_t,
class vec2_t>
1081 const vec2_t &data2) {
1104 template<
class vec_t>
1108 double index=f*(n-1);
1109 size_t lhs=((size_t)index);
1110 double delta=index-lhs;
1111 if (n==0)
return 0.0;
1112 if (lhs==n-1)
return data[lhs];
1113 return (1-delta)*data[lhs]+delta*data[lhs+1];
1135 template<
class vec_t>
1137 return vector_quantile_sorted<vec_t>(data.size(),data,f);
1156 template<
class vec_t>
1159 if (n==0)
return 0.0;
1164 if (lhs==rhs)
return data[lhs];
1166 return (data[lhs]+data[rhs])/2.0;
1185 template<
class vec_t>
1187 return vector_median_sorted<vec_t>(data.size(),data);
1201 template<
class vec_t,
class vec2_t,
class vec3_t>
1203 const vec3_t &err) {
1205 for(
size_t i=0;i<n;i++) {
1206 chi2+=pow((obs[i]-exp[i])/err[i],2.0);
1222 template<
class vec_t,
class vec2_t,
class vec3_t>
1224 const vec3_t &err) {
1225 return vector_chi_squared<vec_t,vec2_t,vec3_t>(obs.size(),obs,exp,err);
1232 (
size_t n,
const vec_t &v) {
1233 if (n<=1)
return 0.0;
1266 (
size_t n,
const vec_t &v,
double f) {
1268 if (f<0.0 || f>1.0) {
1269 O2SCL_ERR(
"Invalid fraction for vector_sorted_quantile",
1273 double index=f*(n-1);
1274 size_t lhs=(int)index;
1275 double delta=index-lhs;
1285 result=(1-delta)*v[lhs]+delta*v[(lhs+1)];
1295 (
size_t n, vec_t &v) {
1296 vector_sort<vec_t,double>(n,v);
1342 template<
class vec_t,
class vec2_t>
1345 long double wmean=0.0;
1347 for(
size_t i=0;i<n;i++) {
1348 double wi=weights[i];
1351 wmean+=(data[i]-wmean)*(wi/W);
1373 template<
class vec_t,
class vec2_t>
1375 return wvector_mean<vec_t,vec2_t>(data.size(),data,weights);
1390 for(
size_t i=0;i<n;i++) {
1391 double wi=weights[i];
1409 return wvector_factor<vec_t>(weights.size(),weights);
1425 template<
class vec_t,
class vec2_t>
1427 const vec2_t &weights,
double wmean) {
1428 long double wvariance=0.0;
1430 for(
size_t i=0;i<n;i++) {
1431 double wi=weights[i];
1433 const long double delta=data[i]-wmean;
1435 wvariance+=(delta*delta-wvariance)*(wi/W);
1455 template<
class vec_t,
class vec2_t>
1457 const vec2_t &weights,
double wmean) {
1467 template<
class vec_t,
class vec2_t>
1469 const vec2_t &weights,
double wmean) {
1472 (n,data,weights,wmean);
1474 const double wvar=scale*variance;
1484 template<
class vec_t,
class vec2_t>
1486 const vec2_t &weights,
double wmean) {
1487 return wvector_variance<vec_t,vec2_t>(data.size(),data,weights,wmean);
1496 template<
class vec_t,
class vec2_t>
1498 const vec2_t &weights) {
1501 return wvector_variance<vec_t,vec2_t>(n,data,weights,wmean);
1510 template<
class vec_t,
class vec2_t>
1519 template<
class vec_t,
class vec2_t,
class vec3_t>
1521 const vec2_t &data2,
1522 const vec3_t &weights) {
1527 for(
size_t i=0;i<n;i++) {
1528 double wi=weights[i];
1531 double delta1=(data1[i]-mean1);
1532 double delta2=(data2[i]-mean2);
1533 covar+=(wi/W)*(delta1*delta2-covar);
1544 template<
class vec_t,
class vec2_t,
class vec3_t>
1546 const vec3_t &weights) {
1547 return wvector_covariance<vec_t,vec2_t,vec3_t>
1548 (data1.size(),data1,data2,weights);
1557 template<
class vec_t,
class vec2_t>
1559 const vec2_t &weights,
double wmean) {
1569 template<
class vec_t,
class vec2_t>
1571 const vec2_t &weights,
double wmean) {
1572 return wvector_stddev_fmean<vec_t,vec2_t>
1573 (data.size(),data,weights,wmean);
1582 template<
class vec_t,
class vec2_t>
1584 const vec2_t &weights) {
1595 template<
class vec_t,
class vec2_t>
1606 template<
class vec_t,
class vec2_t>
1608 const vec2_t &weights,
double wmean) {
1610 (n,data,weights,wmean);
1612 double wvar=scale*variance;
1622 template<
class vec_t,
class vec2_t>
1624 const vec2_t &weights,
double wmean) {
1625 return wvector_stddev<vec_t,vec2_t>(data.size(),data,weights,wmean);
1634 template<
class vec_t,
class vec2_t>
1636 const vec2_t &weights,
double wmean) {
1637 long double wtss=0.0;
1638 for(
size_t i=0;i<n;i++) {
1639 double wi=weights[i];
1641 const long double delta=data[i]-wmean;
1642 wtss+=wi*delta*delta;
1655 template<
class vec_t,
class vec2_t>
1657 const vec2_t &weights,
double wmean) {
1658 return wvector_sumsq<vec_t,vec2_t>(data.size(),data,weights,wmean);
1667 template<
class vec_t,
class vec2_t>
1669 const vec2_t &weights) {
1681 template<
class vec_t,
class vec2_t>
1683 return wvector_sumsq<vec_t,vec2_t>(data.size(),data,weights);
1691 template<
class vec_t,
class vec2_t>
1694 long double wabsdev=0.0;
1696 for(
size_t i=0;i<n;i++) {
1697 double wi=weights[i];
1699 const long double delta=fabs(data[i]-wmean);
1701 wabsdev+=(delta-wabsdev)*(wi/W);
1712 template<
class vec_t,
class vec2_t>
1715 return wvector_absdev<vec_t,vec2_t>(data.size(),data,weights,wmean);
1723 template<
class vec_t,
class vec2_t>
1725 const vec2_t &weights) {
1736 template<
class vec_t,
class vec2_t>
1738 return wvector_absdev<vec_t,vec2_t>(data.size(),data,weights);
1747 template<
class vec_t,
class vec2_t>
1749 double wmean,
double wsd) {
1750 long double wskew=0.0;
1752 for(
size_t i=0;i<n;i++) {
1753 double wi=weights[i];
1755 const long double x=(data[i]-wmean)/wsd;
1757 wskew+=(x*x*x-wskew)*(wi/W);
1769 template<
class vec_t,
class vec2_t>
1771 double wmean,
double wsd) {
1772 return wvector_skew<vec_t,vec2_t>(data.size(),data,weights,wmean,wsd);
1781 template<
class vec_t,
class vec2_t>
1794 template<
class vec_t,
class vec2_t>
1796 return wvector_skew<vec_t,vec2_t>(data.size(),data,weights);
1805 template<
class vec_t,
class vec2_t>
1807 double wmean,
double wsd) {
1808 long double wavg=0.0;
1810 for(
size_t i=0;i<n;i++) {
1811 double wi=weights[i];
1813 const long double x=(data[i]-wmean)/wsd;
1815 wavg+=(x*x*x*x-wavg)*(wi/W);
1827 template<
class vec_t,
class vec2_t>
1829 double wmean,
double wsd) {
1830 return wvector_kurtosis<vec_t,vec2_t>
1831 (data.size(),data,weights,wmean,wsd);
1840 template<
class vec_t,
class vec2_t>
1842 const vec2_t &weights) {
1854 template<
class vec_t,
class vec2_t>
1856 return wvector_kurtosis<vec_t,vec2_t>(data,weights);
1864 template<
class vec_t,
class vec2_t>
1866 const vec2_t &mult,
size_t k,
1870 for(
size_t i=0;i<n;i++) {
1871 size_t m=((size_t)(mult[i]*(1.0+1.0e-10)));
1874 "in vector_lagk_autocorr_mult().",
exc_einval);
1881 "in vector_lagk_autocorr_mult().",
exc_einval);
1884 long double q=0.0, v=0.0;
1885 size_t im=0, ix=0, im2=0, ix2=0;
1886 for(
size_t i=0;i<k;i++) {
1887 v+=(data[ix]-mean)*(data[ix]-mean)/(i+1);
1889 if (im>=((
size_t)(mult[ix]*(1.0+1.0e-10)))) {
1894 for(
size_t i=k;i<n2;i++) {
1895 long double delta0=data[ix2]-mean;
1896 long double delta1=data[ix]-mean;
1897 q+=(delta0*delta1-q)/(i+1);
1898 v+=(delta1*delta1-v)/(i+1);
1900 if (im>=((
size_t)(mult[ix]*(1.0+1.0e-10)))) {
1905 if (im2>=((
size_t)(mult[ix2]*(1.0+1.0e-10)))) {
1916 template<
class vec_t,
class vec2_t>
1918 const vec2_t &mult,
size_t k) {
1926 template<
class vec_t,
class vec2_t>
1928 const vec2_t &mult,
size_t k,
1935 template<
class vec_t,
class vec2_t>
1937 const vec2_t &mult,
size_t k) {
1944 template<
class vec_t,
class vec2_t,
class resize_vec_t>
1946 (
size_t n2,
const vec_t &data,
const vec2_t &mult, resize_vec_t &ac_vec) {
1949 for(
size_t i=0;i<n2;i++) {
1950 n+=((size_t)(mult[i]*(1.0+1.0e-10)));
1955 ac_vec.resize(kmax);
1957 for(
size_t k=1;k<kmax;k++) {
1965 template<
class vec_t,
class vec2_t,
class resize_vec_t>
1967 (
const vec_t &data,
const vec2_t &mult, resize_vec_t &ac_vec) {
1974 #ifndef DOXYGEN_NO_O2NS double vector_lagk_autocorr(size_t n, const vec_t &data, size_t k, double mean)
Lag-k autocorrelation.
double vector_mean(size_t n, const vec_t &data)
Compute the mean of the first n elements of a vector.
double vector_sorted_quantile(size_t n, const vec_t &v, double f)
Obtain a quantile from a sorted vector.
double wvector_mean(size_t n, const vec_t &data, const vec2_t &weights)
Compute the mean of weighted data.
The main O<span style='position: relative; top: 0.3em; font-size: 0.8em'>2</span>scl O$_2$scl names...
double wvector_absdev(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the absolute deviation of data about a specified mean.
void vector_autocorr_vector(const vec_t &data, resize_vec_t &ac_vec)
Construct an autocorrelation vector.
double wvector_variance_fmean(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the variance of a weighted vector with a mean known in advance.
double vector_bin_size_scott(size_t n, const vec_t &v)
Optimal bin size using Scott's method for the first n elements.
double vector_correlation(size_t n, const vec_t &data1, const vec2_t &data2)
Pearson's correlation.
double vector_variance_fmean(size_t n, const vec_t &data, double mean)
Compute variance with specified mean known in advance.
invalid argument supplied by user
double vector_skew(size_t n, const vec_t &data, double mean, double stddev)
Skewness with specified mean and standard deviation.
double vector_kurtosis(size_t n, const vec_t &data, double mean, double stddev)
Kurtosis with specified mean and standard deviation.
double vector_absdev(size_t n, const vec_t &data, double mean)
Absolute deviation from the specified mean.
double wvector_kurtosis(size_t n, const vec_t &data, const vec2_t &weights, double wmean, double wsd)
Compute the kurtosis of data with specified mean and standard deviation.
void vector_autocorr_vector_mult(size_t n2, const vec_t &data, const vec2_t &mult, resize_vec_t &ac_vec)
Construct an autocorrelation vector using a multiplier using the first n2 elements of vectors data an...
double vector_variance(size_t n, const vec_t &data, double mean)
Compute the variance with specified mean.
double vector_pvariance(size_t n1, const vec_t &data1, size_t n2, const vec2_t &data2)
The pooled variance of two vectors.
double vector_median_sorted(size_t n, const vec_t &data)
Return the median of sorted (ascending or descending) data.
double vector_stddev(size_t n, const vec_t &data)
Standard deviation with specified mean.
double wvector_stddev_fmean(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the standard deviation of a weighted vector with a mean known in advance. ...
double wvector_covariance(size_t n, const vec_t &data1, const vec2_t &data2, const vec3_t &weights)
The weighted covariance of two vectors.
double wvector_stddev(size_t n, const vec_t &data, const vec2_t &weights)
Compute the standard deviation of a weighted vector where mean is computed automatically.
double wvector_variance(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the variance of a weighted vector with specified mean.
#define O2SCL_ERR2(d, d2, n)
Set an error, two-string version.
double wvector_sumsq(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the weighted sum of squares of data about the specified weighted mean.
#define O2SCL_ERR(d, n)
Set an error with message d and code n.
double vector_covariance(size_t n, const vec_t &data1, const vec2_t &data2, double mean1, double mean2)
Compute the covariance of two vectors.
double vector_quantile_sorted(size_t n, const vec_t &data, const double f)
Quantile from sorted data (ascending only)
double vector_lag1_autocorr(size_t n, const vec_t &data, double mean)
Lag-1 autocorrelation.
double vector_chi_squared(size_t n, const vec_t &obs, const vec2_t &exp, const vec3_t &err)
Compute the chi-squared statistic.
double vector_lagk_autocorr_mult(size_t n, const vec_t &data, const vec2_t &mult, size_t k, double mean)
Lag-k autocorrelation for the first n elements with a vector multiplier given the mean...
double wvector_factor(size_t n, const vec_t &weights)
Compute a normalization factor for weighted data.
double vector_bin_size_freedman(size_t n, vec_t &v)
Optimal bin size using the Freedman-Diaconis rule for the first n elements.
double wvector_skew(size_t n, const vec_t &data, const vec2_t &weights, double wmean, double wsd)
Compute the skewness of data with specified mean and standard deviation.
double vector_stddev_fmean(size_t n, const vec_t &data, double mean)
Standard deviation with specified mean known in advance.
size_t vector_autocorr_tau(const vec_t &ac_vec, resize_vec_t &five_tau_over_M)
Use the Goodman method to compute the autocorrelation length.