Math.Net中Correlation类的使用说明
在介绍其使用之前,还是介绍一下Correlation类型的实现和源码。该类型是静态类,其中的静态方法都是每一个类型的相关系数的计算,因此在使用的时候,根据需要进行调用即可。其包含的内容如下,为了方便大家观看,已经将其中的注释翻译为英文了,也相对于说明吧。
/// <summary>2个数据集的相关度计算类</summary>public static class Correlation
{
/// <summary>计算皮尔逊积差相关系数,一般的一元线性系数指的就是这个系数</summary> /// <param name="dataA">数据样本A.数据样本的前后顺序不会影响数值结果</param> /// <param name="dataB">数据样本B.</param> /// <returns>返回皮尔逊积差相关系数.</returns> public static double Pearson(IEnumerable<double> dataA, IEnumerable<double> dataB)
{
int n = 0;
double r = 0.0;
double meanA = 0;
double meanB = 0;
double varA = 0;
double varB = 0;
using (IEnumerator<double> ieA = dataA.GetEnumerator())
using (IEnumerator<double> ieB = dataB.GetEnumerator())
{
while (ieA.MoveNext())
{
if (!ieB.MoveNext())
{
throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
}
double currentA = ieA.Current;
double currentB = ieB.Current;
double deltaA = currentA - meanA;
double scaleDeltaA = deltaA/++n;
double deltaB = currentB - meanB;
double scaleDeltaB = deltaB/n;
meanA += scaleDeltaA;
meanB += scaleDeltaB;
varA += scaleDeltaA*deltaA*(n - 1);
varB += scaleDeltaB*deltaB*(n - 1);
r += (deltaA*deltaB*(n - 1))/n;
}
if (ieB.MoveNext())
{
throw new ArgumentOutOfRangeException("dataA", Resources.ArgumentArraysSameLength);
}
}
return r/Math.Sqrt(varA*varB);
}
/// <summary>计算加权皮尔逊积差相关系数.</summary> /// <param name="dataA">数据样本A.</param> /// <param name="dataB">数据样本B.</param> /// <param name="weights">数据权重.</param> /// <returns>加权皮尔逊积差相关系数.</returns> public static double WeightedPearson(IEnumerable<double> dataA, IEnumerable<double> dataB, IEnumerable<double> weights)
{
int n = 0;
double meanA = 0;
double meanB = 0;
double varA = 0;
double varB = 0;
double sumWeight = 0;
double covariance = 0;
using (IEnumerator<double> ieA = dataA.GetEnumerator())
using (IEnumerator<double> ieB = dataB.GetEnumerator())
using (IEnumerator<double> ieW = weights.GetEnumerator())
{
while (ieA.MoveNext())
{
if (!ieB.MoveNext())
{
throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
}
if (!ieW.MoveNext())
{
throw new ArgumentOutOfRangeException("weights", Resources.ArgumentArraysSameLength);
}
++n;
double xi = ieA.Current;
double yi = ieB.Current;
double wi = ieW.Current;
double temp = sumWeight + wi;
double deltaX = xi - meanA;
double rX = deltaX*wi/temp;
meanA += rX;
varA += sumWeight*deltaX*rX;
double deltaY = yi - meanB;
double rY = deltaY*wi/temp;
meanB += rY;
varB += sumWeight*deltaY*rY;
sumWeight = temp;
covariance += deltaX*deltaY*(n - 1)*wi/n;
}
if (ieB.MoveNext())
{
throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
}
if (ieW.MoveNext())
{
throw new ArgumentOutOfRangeException("weights", Resources.ArgumentArraysSameLength);
}
}
return covariance/Math.Sqrt(varA*varB);
}
/// <summary>计算皮尔逊积差相关矩阵</summary> /// <param name="vectors">数据矩阵</param> /// <returns>皮尔逊积差相关矩阵.</returns> public static Matrix<double> PearsonMatrix(params double[][] vectors)
{
var m = Matrix<double>.Build.DenseIdentity(vectors.Length);
for (int i = 0; i < vectors.Length; i++)
{
for (int j = i + 1; j < vectors.Length; j++)
{
var c = Pearson(vectors[i], vectors[j]);
m.At(i, j, c);
m.At(j, i, c);
}
}
return m;
}
/// <summary> 计算皮尔逊积差相关矩阵</summary> /// <param name="vectors">数据集合.</param> /// <returns>皮尔逊积差相关矩阵.</returns> public static Matrix<double> PearsonMatrix(IEnumerable<double[]> vectors)
{
return PearsonMatrix(vectors as double[][] ?? vectors.ToArray());
}
/// <summary> /// 斯皮尔曼等级相关系数
/// </summary> /// <param name="dataA">数据集A.</param> /// <param name="dataB">数据集B.</param> /// <returns>斯皮尔曼等级相关系数.</returns> public static double Spearman(IEnumerable<double> dataA, IEnumerable<double> dataB)
{
return Pearson(Rank(dataA), Rank(dataB));
}
/// <summary> /// 斯皮尔曼等级相关矩阵
/// Computes the Spearman Ranked Correlation matrix.
/// </summary> /// <param name="vectors">数据集.</param> /// <returns>斯皮尔曼等级相关矩阵.</returns> public static Matrix<double> SpearmanMatrix(params double[][] vectors)
{
return PearsonMatrix(vectors.Select(Rank).ToArray());
}
/// <summary>计算斯皮尔曼等级相关矩阵</summary> /// <param name="vectors">数据集合.</param> /// <returns>斯皮尔曼等级相关矩阵.</returns> public static Matrix<double> SpearmanMatrix(IEnumerable<double[]> vectors)
{
return PearsonMatrix(vectors.Select(Rank).ToArray());
}
static double[] Rank(IEnumerable<double> series)
{
if (series == null)
{
return new double[0];
}
// WARNING: do not try to cast series to an array and use it directly,
// as we need to sort it (inplace operation) var data = series.ToArray();
return ArrayStatistics.RanksInplace(data, RankDefinition.Average);
}
}
3.使用案例
使用非常简单,看下面代码,随便生成的一个数据,没有啥意思,实际中,大家按需进行吧。
1 //先生成数据集合data 2 var chiSquare = new ChiSquared(5);
3 Console.WriteLine(@"2. Generate 1000 samples of the ChiSquare(5) distribution");
4 var data = new double[1000];
5 for (var i = 0; i < data.Length; i++)
6 {
7 data[i] = chiSquare.Sample();
8 }
910 //生成数据集合dataB11 var chiSquareB = new ChiSquared(2);
12 var dataB = new double[1000];
13 for (var i = 0; i < data.Length; i++)
14 {
15 dataB[i] = chiSquareB.Sample();
16 }
1718 // 5. 计算data和dataB的相关系数19 var r1 = Correlation.Pearson(data, dataB);
20 var r2 = Correlation.Spearman(data, dataB);