I need some help with imp开发者_开发百科lementing these formulas. I think I implemented them correctly but for some reason I don't get the expected results:
This is the code for NMI, I, and H functions respectively. Are the formulas implemented correctly ? Thanks
int totalN = getTotalN(events);
double h1 = H(clusters, totalN);
double h2 = H(events, totalN);
double valueI = I(clusters, events, totalN);
double value_NMI = valueI / (double) ((h1 + h2) / (double) 2);
System.out.println("NMI: " + value_NMI);
static public double I(HashMap<String, ArrayList<String>> clusters, HashMap<String, ArrayList<String>> events, int totalN) {
//store sorted content to contents
Iterator<Map.Entry<String, ArrayList<String>>> it = events.entrySet().iterator();
Iterator<Map.Entry<String, ArrayList<String>>> it2 = clusters.entrySet().iterator();
String key;
ArrayList<String> event;
ArrayList<String> cluster;
double valueI = 0;
while (it.hasNext()) {
Map.Entry<String, ArrayList<String>> mapItem = it.next();
key = mapItem.getKey();
//if cluster doesn't exist
//if(!clusters.containsKey(key)) continue;
//cluster = clusters.get(key);
event = mapItem.getValue();
while (it2.hasNext()) {
Map.Entry<String, ArrayList<String>> mapItem2 = it2.next();
cluster = mapItem2.getValue();
float common_docs = 0;
for (int i=0; i< event.size(); i++) {
for (int j=0; j< cluster.size(); j++) {
if (event.get(i).equals(cluster.get(j))) {
common_docs = common_docs + 1;
break;
}
}
}
if (common_docs != 0) valueI = valueI + ( ( common_docs / (float) totalN) * Math.log((common_docs * totalN) / (float) (event.size() * cluster.size())) );
}
}
return valueI;
}
static public double H(HashMap<String, ArrayList<String>> clusters, int totalN) {
//store sorted content to contents
Iterator<Map.Entry<String, ArrayList<String>>> it = clusters.entrySet().iterator();
ArrayList<String> cluster;
double entropy = 0;
while (it.hasNext()) {
Map.Entry<String, ArrayList<String>> mapItem = it.next();
cluster = mapItem.getValue();
double ratio = cluster.size() / (float) totalN;
entropy = entropy + ratio * Math.log(ratio);
}
return -entropy;
}
static public int getTotalN(HashMap<String, ArrayList<String>> dataset) {
int totalN = 0;
Iterator<Map.Entry<String, ArrayList<String>>> it = dataset.entrySet().iterator();
ArrayList<String> item;
while (it.hasNext()) {
Map.Entry<String, ArrayList<String>> mapItem = it.next();
item = mapItem.getValue();
for (int i=0; i< item.size(); i++) {
totalN = totalN + 1;
}
}
return totalN ;
}
I guess no. I just checked I(C,E), and there you don't reset it2 in every iteration of it, which would be necessary for the nested sum.
Iterator it2 in method I should be initialize inside the loop on it. You could simplify your code and avoid this kind of error by using the "foreach" notation:
static public double I(HashMap<String, ArrayList<String>> clusters, HashMap<String, ArrayList<String>> events, int totalN) {
String key;
ArrayList<String> event;
ArrayList<String> cluster;
double valueI = 0;
for (Map.Entry<String, ArrayList<String>> mapItem: events.entrySet()) {
key = mapItem.getKey();
//if cluster doesn't exist
//if(!clusters.containsKey(key)) continue;
//cluster = clusters.get(key);
event = mapItem.getValue();
for (Map.Entry<String, ArrayList<String>> mapItem2: clusters.entrySet()) {
cluster = mapItem2.getValue();
float common_docs = 0;
for (int i = 0; i < event.size(); i++) {
for (int j = 0; j < cluster.size(); j++) {
if (event.get(i).equals(cluster.get(j))) {
common_docs = common_docs + 1;
break;
}
}
}
if (common_docs != 0) {
valueI = valueI + ((common_docs / (float) totalN) * Math.log((common_docs * totalN) / (float) (event.size() * cluster.size())));
}
}
}
return valueI;
}
static public double H(HashMap<String, ArrayList<String>> clusters, int totalN) {
//store sorted content to contents
ArrayList<String> cluster;
double entropy = 0;
for (Map.Entry<String, ArrayList<String>> mapItem: clusters.entrySet()) {
cluster = mapItem.getValue();
double ratio = cluster.size() / (float) totalN;
entropy = entropy + ratio * Math.log(ratio);
}
return -entropy;
}
static public int getTotalN(HashMap<String, ArrayList<String>> dataset) {
int totalN = 0;
ArrayList<String> item;
for (Map.Entry<String, ArrayList<String>> mapItem: dataset.entrySet()) {
item = mapItem.getValue();
for (int i = 0; i < item.size(); i++) {
totalN = totalN + 1;
}
}
return totalN;
}
My guess is that you're not getting the expected results because of floating point rounding errors (see this for further details). I haven't looked at the code in your methods implementing the three functions, but I see that you use float
and double
, which may cause you troubles. You may want to use BigDecimal
instead.
精彩评论