使用犰狳库在 c++ 中计算 Dunn's Index 时出错

Error calculating Dunn's Index in c++ using Armadillo library

提问人:Duplic8e 提问时间:10/23/2023 最后编辑:desertnautDuplic8e 更新时间:10/23/2023 访问量:40

问:

我一直在尝试使用犰狳库为我正在研究的更大算法找到 Dunns 索引。每当我运行代码时,我都会收到一个输出和一个错误,说我超出了索引。我提供了下面的代码和我用来测试的主要函数。该代码还包含我添加的随机检查,以尝试解决问题。Dunns index:-nan(ind)

#include <iostream>
#include <armadillo>

using namespace std;
using namespace arma;

double dunns(int clusters_number, const mat& distM, const uvec& ind) {
    // Determine the number of unique clusters
    int i = max(ind);
    vec denominator;

    for (int i2 = 1; i2 <= i; ++i2) {
        uvec indi = find(ind == i2);
        uvec indj = find(ind != i2);

        // Check if indi and indj are not empty
        if (!indi.is_empty() && !indj.is_empty()) {
            mat temp;

            // Check if indices are within bounds before submatrix extraction
            if (indi.max() < distM.n_rows && indj.max() < distM.n_cols) {
                temp = distM.submat(indi, indj);
                denominator = join_cols(denominator, vectorise(temp));
            }
            else {
                // Debugging: Print indices that caused the error
                cout << "Error: Indices out of bounds for Cluster " << i2 << endl;
            }
        }
    }

    double num = 0.0;  // Initialize num to 0.0

    // Check if denominator is not empty before finding the minimum
    if (!denominator.is_empty()) {
        num = min(denominator);
    }

    mat neg_obs = zeros<mat>(distM.n_rows, distM.n_cols);

    for (int ix = 1; ix <= i; ++ix) {
        uvec indxs = find(ind == ix);

        // Check if indxs is not empty
        if (!indxs.is_empty()) {
            // Check if indices are within bounds before setting elements
            if (indxs.max() < distM.n_rows) {
                neg_obs.submat(indxs, indxs).fill(1.0);
            }
        }
    }

    // Print intermediate values
    cout << "Intermediate Values:" << endl;
    cout << "Denominator: " << denominator << endl;
    cout << "num: " << num << endl;

    mat dem = neg_obs % distM;
    double max_dem = max(max(dem));

    // Print max_dem
    cout << "max_dem: " << max_dem << endl;

    double DI = num / max_dem;
    return DI;
}

int main() {
    // New inputs for testing
    int clusters_number = 2;

    // Modified dissimilarity matrix (4x4)
    mat distM(4, 4);
    distM << 0.0 << 1.0 << 2.0 << 3.0
        << 1.0 << 0.0 << 1.0 << 2.0
        << 2.0 << 1.0 << 0.0 << 1.0
        << 3.0 << 2.0 << 1.0 << 0.0;

    // Modified cluster indices (4x1)
    arma::uvec ind;
    ind << 1 << 1 << 2 << 2;

    // Print the input dissimilarity matrix
    cout << "Dissimilarity Matrix:" << endl;
    cout << distM << endl;

    // Print the cluster indices
    cout << "Cluster Indices:" << endl;
    cout << ind << endl;

    double DI = dunns(clusters_number, distM, ind);

    cout << "Dunn's Index: " << DI << endl;

    return 0;
}

数据格式:似乎正确。我使用 double 作为相异矩阵,使用 arma::uvec 作为聚类索引,这是合适的。

数据一致性:相异矩阵和聚类索引中数据点的对齐方式似乎是正确的。矩阵中的每个数据点对应于聚类索引中的一个条目。

相异矩阵中似乎没有任何空聚类或缺失的数据点。数据似乎是完整的。

鉴于数据似乎正确对齐,并且没有明显的空聚类或缺失数据问题,令人困惑的是,在子矩阵提取过程中我仍然遇到“索引越界”错误。

C++ 机器学习 分层聚类 犰狳

评论


答: 暂无答案