在介绍其使用之前,还是介绍一下Correlation类型的实现和源码。该类型是静态类,其中的静态方法都是每一个类型的相关系数的计算,因此在使用的时候,根据需要进行调用即可。其包含的内容如下,为了方便大家观看,已经将其中的注释翻译为英文了,也相对于说明吧。

/// <summary>2个数据集的相关度计算类</summary>public static class Correlation
{
  /// <summary>计算皮尔逊积差相关系数,一般的一元线性系数指的就是这个系数</summary>  /// <param name="dataA">数据样本A.数据样本的前后顺序不会影响数值结果</param>  /// <param name="dataB">数据样本B.</param>  /// <returns>返回皮尔逊积差相关系数.</returns>  public static double Pearson(IEnumerable<double> dataA, IEnumerable<double> dataB)
  {
    int n = 0;
    double r = 0.0;
    double meanA = 0;
    double meanB = 0;
    double varA = 0;
    double varB = 0;
    using (IEnumerator<double> ieA = dataA.GetEnumerator())
    using (IEnumerator<double> ieB = dataB.GetEnumerator())
    {
      while (ieA.MoveNext())
      {
        if (!ieB.MoveNext())
        {
          throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
        }
        double currentA = ieA.Current;
        double currentB = ieB.Current;
        double deltaA = currentA - meanA;
        double scaleDeltaA = deltaA/++n;
        double deltaB = currentB - meanB;
        double scaleDeltaB = deltaB/n;
        meanA += scaleDeltaA;
        meanB += scaleDeltaB;
        varA += scaleDeltaA*deltaA*(n - 1);
        varB += scaleDeltaB*deltaB*(n - 1);
        r += (deltaA*deltaB*(n - 1))/n;
      }
      if (ieB.MoveNext())
      {
        throw new ArgumentOutOfRangeException("dataA", Resources.ArgumentArraysSameLength);
      }
    }
    return r/Math.Sqrt(varA*varB);
  }
  /// <summary>计算加权皮尔逊积差相关系数.</summary>  /// <param name="dataA">数据样本A.</param>  /// <param name="dataB">数据样本B.</param>  /// <param name="weights">数据权重.</param>  /// <returns>加权皮尔逊积差相关系数.</returns>  public static double WeightedPearson(IEnumerable<double> dataA, IEnumerable<double> dataB, IEnumerable<double> weights)
  {
    int n = 0;
    double meanA = 0;
    double meanB = 0;
    double varA = 0;
    double varB = 0;
    double sumWeight = 0;
    double covariance = 0;
    using (IEnumerator<double> ieA = dataA.GetEnumerator())
    using (IEnumerator<double> ieB = dataB.GetEnumerator())
    using (IEnumerator<double> ieW = weights.GetEnumerator())
    {
      while (ieA.MoveNext())
      {
        if (!ieB.MoveNext())
        {
          throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
        }
        if (!ieW.MoveNext())
        {
          throw new ArgumentOutOfRangeException("weights", Resources.ArgumentArraysSameLength);
        }
        ++n;
        double xi = ieA.Current;
        double yi = ieB.Current;
        double wi = ieW.Current;
        double temp = sumWeight + wi;
        double deltaX = xi - meanA;
        double rX = deltaX*wi/temp;
        meanA += rX;
        varA += sumWeight*deltaX*rX;
        double deltaY = yi - meanB;
        double rY = deltaY*wi/temp;
        meanB += rY;
        varB += sumWeight*deltaY*rY;
        sumWeight = temp;
        covariance += deltaX*deltaY*(n - 1)*wi/n;
      }
      if (ieB.MoveNext())
      {
        throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
      }
      if (ieW.MoveNext())
      {
        throw new ArgumentOutOfRangeException("weights", Resources.ArgumentArraysSameLength);
      }
    }
    return covariance/Math.Sqrt(varA*varB);
  }
  /// <summary>计算皮尔逊积差相关矩阵</summary>  /// <param name="vectors">数据矩阵</param>  /// <returns>皮尔逊积差相关矩阵.</returns>  public static Matrix<double> PearsonMatrix(params double[][] vectors)
  {
    var m = Matrix<double>.Build.DenseIdentity(vectors.Length);
    for (int i = 0; i < vectors.Length; i++)
    {
      for (int j = i + 1; j < vectors.Length; j++)
      {
        var c = Pearson(vectors[i], vectors[j]);
        m.At(i, j, c);
        m.At(j, i, c);
      }
    }
    return m;
  }
  /// <summary> 计算皮尔逊积差相关矩阵</summary>  /// <param name="vectors">数据集合.</param>  /// <returns>皮尔逊积差相关矩阵.</returns>  public static Matrix<double> PearsonMatrix(IEnumerable<double[]> vectors)
  {
    return PearsonMatrix(vectors as double[][] ?? vectors.ToArray());
  }
  /// <summary>  /// 斯皮尔曼等级相关系数
  /// </summary>  /// <param name="dataA">数据集A.</param>  /// <param name="dataB">数据集B.</param>  /// <returns>斯皮尔曼等级相关系数.</returns>  public static double Spearman(IEnumerable<double> dataA, IEnumerable<double> dataB)
  {
    return Pearson(Rank(dataA), Rank(dataB));
  }
  /// <summary>  /// 斯皮尔曼等级相关矩阵
  /// Computes the Spearman Ranked Correlation matrix.
  /// </summary>  /// <param name="vectors">数据集.</param>  /// <returns>斯皮尔曼等级相关矩阵.</returns>  public static Matrix<double> SpearmanMatrix(params double[][] vectors)
  {
    return PearsonMatrix(vectors.Select(Rank).ToArray());
  }
  /// <summary>计算斯皮尔曼等级相关矩阵</summary>  /// <param name="vectors">数据集合.</param>  /// <returns>斯皮尔曼等级相关矩阵.</returns>  public static Matrix<double> SpearmanMatrix(IEnumerable<double[]> vectors)
  {
    return PearsonMatrix(vectors.Select(Rank).ToArray());
  }
  static double[] Rank(IEnumerable<double> series)
  {
    if (series == null)
    {
      return new double[0];
    }
    // WARNING: do not try to cast series to an array and use it directly,
    // as we need to sort it (inplace operation)    var data = series.ToArray();
    return ArrayStatistics.RanksInplace(data, RankDefinition.Average);
  }
}

3.使用案例

使用非常简单,看下面代码,随便生成的一个数据,没有啥意思,实际中,大家按需进行吧。

 1 //先生成数据集合data 2 var chiSquare = new ChiSquared(5);
 3 Console.WriteLine(@"2. Generate 1000 samples of the ChiSquare(5) distribution");
 4 var data = new double[1000];
 5 for (var i = 0; i < data.Length; i++)
 6 {
 7     data[i] = chiSquare.Sample();
 8 }
 910 //生成数据集合dataB11 var chiSquareB = new ChiSquared(2);
12 var dataB = new double[1000];
13 for (var i = 0; i < data.Length; i++)
14 {
15     dataB[i] = chiSquareB.Sample();
16 }
1718 // 5. 计算data和dataB的相关系数19 var r1 =  Correlation.Pearson(data, dataB);
20 var r2 = Correlation.Spearman(data, dataB);