I am trying to fix a problem in a UTF-8 encoded database. During the migration to the current database system it looks like the data was compressed to Latin-1 encoding. I wrote a quick Groovy script to find and report the encoding errors. But, it fails to match anything.
If I write the analogous code in Java the report works fine!
So, I presume as a rookie with Groovy I'm doing something stupid. Can anyone see the error in my approach? I've tried hard-coding the characters in the select statement but i cannot get any of the special characters to work.
Thanks of course in advance.
aH
Groovy Code
import groovy.sql.Sql
def dias = ["è": "é", "…": "…"]
def atk = Sql.newInstance(
'jdbc:mysql://myURL:3306/myDB?useUnicode=true&characterEncoding=utf8',
'user',
'pass',
'com.mysql.jdbc.Driver'
)
dias.each() {x ->
println x.getKey()
atk.eachRow("SELECT `noteContent` FROM `ArchDescriptionRepeatingData`"
+ "WHERE `noteContent` LIKE(\"%" + x.getKey() + "%\")"
) { y ->
println y
}
}
as requested here is the java code, which I added some extra functionality to:
import java.sql.*;
import java.util.*;
import java.util.regex.*;
public class Diacritic {
private Connection connection;
private HashMap<String, String> dia;
Diacritic() throws SQLException, ClassNotFoundException {
Class.forName("com.mysql.jdbc.Driver");
connection = DriverManager.getConnection(
"jdbc:mysql://myURL:3306/myDB"
+ "?user=user&password=pass");
dia = new HashMap<String, String>();
populateArray();
System.out.println("Component Titles");
for(Map.Entry e : dia.entrySet()) {
String st = e.getKey().toString();
Pattern p = Pattern.compile(st);
System.out.println();
System.out.println(st + "\t\t->\t" + e.getValue().toString());
System.out.println();
Statement s = connection.createStatement();
ResultSet r = s.executeQuery("SELECT `itemValue` FROM `ListOrderedItems`"
+ "WHERE `itemValue` LIKE(\"%" + st + "%\")"
//+ "LIMIT 0, 100000"
);
int x = 0;
while (r.next()) {
x++;
Matcher m = p.matcher(r.getString(1));
if(m.find()) {
int start = 0;
开发者_JS百科 int mstart = m.start();
int mend = m.end();
int end = r.getString(1).length();
if (mstart - start <= 10 || end - mend <= 10) {
System.out.println(x + ": " + r.getString(1)
+ "\t->\t"
+ "\t" + r.getString(1).substring(0,mstart)
+ e.getValue().toString()
+ r.getString(1).substring(mend)
);
}
else {
System.out.println(x + ": " + r.getString(1).substring((mstart - 10), (mend + 10))
+ "\t->\t"
+ "\t" + r.getString(1).substring((mstart - 10),mstart)
+ e.getValue().toString()
+ r.getString(1).substring(mend, (mend + 10))
);
}
}
}
s.close();
}
}
public static void main (String[] args) throws SQLException, ClassNotFoundException {
Diacritic d = new Diacritic();
}
public void populateArray() {
dia.put("è", "é");
dia.put("…", "…");
dia.put("—", "");
dia.put("‑”", "-");
dia.put("é", "é");
dia.put("æ", "æ");
dia.put("ë", "ë");
//etc...
}
}
精彩评论