开发者

I need some help on these formulas implementation

开发者 https://www.devze.com 2023-02-23 23:53 出处:网络
I need some help with imp开发者_开发百科lementing these formulas. I think I implemented them correctly but for some reason I don\'t get the expected results:

I need some help with imp开发者_开发百科lementing these formulas. I think I implemented them correctly but for some reason I don't get the expected results:

I need some help on these formulas implementation

This is the code for NMI, I, and H functions respectively. Are the formulas implemented correctly ? Thanks

int totalN = getTotalN(events);
        double h1 = H(clusters, totalN);
        double h2 = H(events, totalN);
        double valueI = I(clusters, events, totalN);
        double value_NMI = valueI / (double) ((h1 + h2) / (double) 2);
        System.out.println("NMI: " + value_NMI);

static public double I(HashMap<String, ArrayList<String>> clusters, HashMap<String, ArrayList<String>> events, int totalN) {

        //store sorted content to contents
        Iterator<Map.Entry<String, ArrayList<String>>> it = events.entrySet().iterator();
        Iterator<Map.Entry<String, ArrayList<String>>> it2 = clusters.entrySet().iterator();

        String key;
        ArrayList<String> event;
        ArrayList<String> cluster;

        double valueI = 0;
        while (it.hasNext()) {

            Map.Entry<String, ArrayList<String>> mapItem = it.next();
            key = mapItem.getKey();

            //if cluster doesn't exist
            //if(!clusters.containsKey(key)) continue;
            //cluster = clusters.get(key);

            event = mapItem.getValue();

            while (it2.hasNext()) {
                Map.Entry<String, ArrayList<String>> mapItem2 = it2.next();
                cluster = mapItem2.getValue();

            float common_docs = 0;
            for (int i=0; i< event.size(); i++) {   
                for (int j=0; j< cluster.size(); j++) { 
                    if (event.get(i).equals(cluster.get(j))) {
                        common_docs = common_docs + 1;
                        break;
                    }
                }
            }


            if (common_docs != 0) valueI = valueI + ( ( common_docs / (float) totalN) * Math.log((common_docs * totalN) / (float) (event.size() * cluster.size())) );       
            }
        }

        return valueI;
    }


    static public double H(HashMap<String, ArrayList<String>> clusters, int totalN) {

        //store sorted content to contents
        Iterator<Map.Entry<String, ArrayList<String>>> it = clusters.entrySet().iterator();
        ArrayList<String> cluster;

        double entropy = 0;
        while (it.hasNext()) {

            Map.Entry<String, ArrayList<String>> mapItem = it.next();
            cluster = mapItem.getValue();

            double ratio = cluster.size() / (float) totalN;
            entropy = entropy + ratio * Math.log(ratio);

        }

        return -entropy;
    }

    static public int getTotalN(HashMap<String, ArrayList<String>> dataset) {

        int totalN = 0;
        Iterator<Map.Entry<String, ArrayList<String>>> it = dataset.entrySet().iterator();
        ArrayList<String> item;

        while (it.hasNext()) {

            Map.Entry<String, ArrayList<String>> mapItem = it.next();
            item = mapItem.getValue();

            for (int i=0; i< item.size(); i++) {
                totalN = totalN + 1;
            }

        }

        return totalN ;
    }


I guess no. I just checked I(C,E), and there you don't reset it2 in every iteration of it, which would be necessary for the nested sum.


Iterator it2 in method I should be initialize inside the loop on it. You could simplify your code and avoid this kind of error by using the "foreach" notation:

static public double I(HashMap<String, ArrayList<String>> clusters, HashMap<String, ArrayList<String>> events, int totalN) {

    String key;
    ArrayList<String> event;
    ArrayList<String> cluster;

    double valueI = 0;
    for (Map.Entry<String, ArrayList<String>> mapItem: events.entrySet()) {
        key = mapItem.getKey();

        //if cluster doesn't exist
        //if(!clusters.containsKey(key)) continue;
        //cluster = clusters.get(key);

        event = mapItem.getValue();

        for (Map.Entry<String, ArrayList<String>> mapItem2: clusters.entrySet()) {
            cluster = mapItem2.getValue();

            float common_docs = 0;
            for (int i = 0; i < event.size(); i++) {
                for (int j = 0; j < cluster.size(); j++) {
                    if (event.get(i).equals(cluster.get(j))) {
                        common_docs = common_docs + 1;
                        break;
                    }
                }
            }


            if (common_docs != 0) {
                valueI = valueI + ((common_docs / (float) totalN) * Math.log((common_docs * totalN) / (float) (event.size() * cluster.size())));
            }
        }
    }

    return valueI;
}

static public double H(HashMap<String, ArrayList<String>> clusters, int totalN) {

    //store sorted content to contents
    ArrayList<String> cluster;

    double entropy = 0;
    for (Map.Entry<String, ArrayList<String>> mapItem: clusters.entrySet()) {
        cluster = mapItem.getValue();

        double ratio = cluster.size() / (float) totalN;
        entropy = entropy + ratio * Math.log(ratio);

    }

    return -entropy;
}

static public int getTotalN(HashMap<String, ArrayList<String>> dataset) {

    int totalN = 0;
    ArrayList<String> item;

    for (Map.Entry<String, ArrayList<String>> mapItem: dataset.entrySet()) {
        item = mapItem.getValue();

        for (int i = 0; i < item.size(); i++) {
            totalN = totalN + 1;
        }

    }

    return totalN;
}


My guess is that you're not getting the expected results because of floating point rounding errors (see this for further details). I haven't looked at the code in your methods implementing the three functions, but I see that you use float and double, which may cause you troubles. You may want to use BigDecimal instead.

0

精彩评论

暂无评论...
验证码 换一张
取 消