开发者

Replace characters to make international letters (diacritics)

开发者 https://www.devze.com 2023-01-26 16:52 出处:网络
I\'m trying to emulate the way an international keyboard works. If you use one of the dead keys followed by a letter, it combines them into the corresponding character. For example, typing in `a would

I'm trying to emulate the way an international keyboard works. If you use one of the dead keys followed by a letter, it combines them into the corresponding character. For example, typing in `a would result in à and ^o results in ô, etc.

I can't seem to get my regex to work properly (I suck at regex!), but this is what I have so far (demo):

var txt = "Replacing 'a ^u ~n 'e ^I 'c",

    combos = {
        'a': ['à', 'á', 'ä', 'â'],
        'A': ['À', 'Á', 'Ä', 'Â'],
        'e': ['è', 'é', 'ë', 'ê'],
        'E': ['È', 'É', 'Ë', 'Ê'],
        'i': ['ì', 'í', 'ï', 'î'],
        'I': ['Ì', 'Í', 'Ï', 'Î'],
        'o': ['ò', 'ó', 'ö', 'ô'],
        'O': ['Ò', 'Ó', 'Ö', 'Ô'],
        'u': ['ù', 'ú', 'ü', 'û'],
        'U': ['Ù', 'Ú', 'Ü', 'Û'],
        'y': 'ý',
        'Y': 'Ý',
        'c': 'ç',
        'C': 'Ç',
        'n': 'ñ',
        'N': 'Ñ'
    },

    bslash = /`[(aeiou)]/gi,
    fslash = /\'[(aeiouyc)]/gi,
    ddots = /\"[(aeiou)]/gi,
    caret = /\^[(aeiou)]/gi,
    tidle = /~[(n)]/gi;

// global match
if (txt.match(/[`|\'|\"|\^|~][aeiouycn]/i)) {

    // back slash - replace `a with à
    if (bslash.test(txt)) {
        txt = txt.replace(bslash, function(r) {
            // r contains the `, so remove it with a slice
            return combos[r.slice(-1)][0];
        });
    }

    // forward slash - replace 'a with á, etc
    if (fslash.test(txt)) {
        txt = txt.replace(fslash, function(r) {
            r = r.slice(-1);
            return (r == 'c' || r == 'y') ? combos[r][0] : combos[r][3];
        });
    }

    // double dots - replace `开发者_C百科a with à
    if (ddots.test(txt)) {
        txt = txt.replace(ddots, function(r) {
            return combos[r.slice(-1)][4];
        });
    }

    // caret - replace ^a with â
    if (caret.test(txt)) {
        txt = txt.replace(caret, function(r) {
            return combos[r.slice(-1)][3];
        });
    }

    // tidle - replace ~n with ñ
    if (tidle.test(txt)) {
        txt = txt.replace(tidle, function(r) {
            return combos[r.slice(-1)][0];
        });
    }

    document.write(txt);
}

Also, if you know of a more efficient method to do this same thing, I've love to hear it!


I updated my answer above with the problem Aefxx found - thanks! But I decided to go with Kenny's method as it is cleaner, thanks all! :) (updated demo)

var txt = "Replacing 'a ^u ~n 'e ^I 'c",

 combos = {
  '`' :{ a:'à', A:'À', e:'è', E:'È', i:'ì', I:'Ì', o:'ò', O:'Ò', u:'ù', U:'Ù' },
  "'" :{ a:'á', A:'Á', e:'é', E:'É', i:'í', I:'Í', o:'ó', O:'Ó', u:'ú', U:'Ú', y:'ý', Y:'Ý', c:'ç', C:'Ç' },
  '"' :{ a:'ä', A:'Ä', e:'ë', E:'Ë', i:'ï', I:'Ï', o:'ö', O:'Ö', u:'ü', U:'Ü' },
  '^' :{ a:'â', A:'Â', e:'ê', E:'Ê', i:'î', I:'Î', o:'ô', O:'Ô', u:'û', U:'Û' },
  '~' :{ n:'ñ', N:'Ñ' }
 };

 txt = txt.replace(/([`\'~\^\"])([a-z])/ig, function(s, accent, letter){
   return (accent in combos) ? combos[accent][letter] || s : s;
 });

 document.write(txt);


A more complete approach is the JavaScript port of the Apache Lucene ASCII Folding algorithm you can find at https://github.com/mplatt/fold-to-ascii-js It handles the diacritics you mentioned and many more characters.


var txt = "Replacing 'a ^u ~n 'e ^I 'c";

var combos = {
   '^': {a: 'â', A: 'Â', e: 'ê', E: 'Ê', ...},
   "'": {a: 'á', ...},
   ...
};

return txt.replace(/([`'~^"])([a-z])/ig, function(s, accent, letter){
  if (accent in combos) {
    return combos[accent][letter] || s;
  }
  return s;
}


Ok, problem solved. You made a mistake many of use do (me included) quite often. Calling replace on a string without assignment won't work, you're just replacing into the wild.

...
// Notice the assignment of the replaced text here !!!
txt = txt.replace(bslash, function(r) {
        // r contains the `, so remove it with a slice
        return combos[r.slice(-1)][0];
    });


Must a regex be used? it seems easier to just go through the entire string. Well, that would be like coding the automaton by hand instead, but using the table already defined by combos

0

精彩评论

暂无评论...
验证码 换一张
取 消