Correlation and covariance

#include <iostream> #include <cmath> #include <vector> #include <numeric> using namespace std; double uniform(void) { return double(rand()) / double(RAND_MAX); } vector<double> generate_items(int count) { vector<double> v; for(int i = 0; i < count; i++) { v.push_back(uniform()); } return v; } double sum(const vector<double>& v) { return accumulate(begin(v), end(v), 0.0); } double avg(const vector<double>& v) { return sum(v) / v.size(); } double stdev(const vector<double>& v) { vector<double> diff_squares; int n = v.size(); double v_avg = avg(v); for(double vi : v) { double diff_square = pow(vi - v_avg, 2); diff_squares.push_back(diff_square); } double factor = 1/ (double) (n-1); return sqrt(factor * sum(diff_squares)); } double covariance(const vector<double>& v1, const vector<double>& v2) { int n = v1.size(); if(v1.size() == v2.size()) { double v1_avg = avg(v1); double v2_avg = avg(v2); vector<double> diff_products; for(int i = 1; i < n; i++) { double diff_prod = (v1[i] - v1_avg) * (v2[i] - v2_avg); diff_products.push_back(diff_prod); } double factor = 1/ (double) (n - 1); return factor * sum(diff_products); } else { throw "Computing covariance requires that the two datasets are of equal length."; } } // Pearson correlation coefficient double correlation(const vector<double>& v1, const vector<double>& v2) { if(v1.size() == v2.size()) { return covariance(v1, v2) / (stdev(v1) * stdev(v2)); } else { throw "Computing correlation requires that the two datasets are of equal length."; } } int main() { const int count = 1000000; vector<double> v1, v2; v1 = generate_items(count); v2 = generate_items(count); try { cout << "Correlation: " << correlation(v1, v2) << endl; cout << "Covariance: " << covariance(v1, v2) << endl; // Correlation: -0.000784722 // Covariance: -6.53279e-05 } catch(char const* exception_msg) { cout << exception_msg << endl; } return 0; }