I run a bot on IRC. It captures a url pattern which a user sends in a message and returns the title.
Often times, the title has a numeric/character entity such as "
or '
. These entities are literally being printed out. I'd l开发者_如何学Pythonike to convert them to their string literal versions, so "
would become "
.
Is anyone aware of some sort of library or utility I could use in node.js/JS without having to define my own map/dictionary, such as below?
var dictionary = {
'"' : '"',
};
We recently were in need for this for out JavaScript Room chat bot here on SO, we couldn't find anything for Node.js, so we ended up with this piece of code which works fine (for most cases):
var _entities = {
" ": " ",
"¡": "¡",
"¢": "¢",
"£": "£",
"¤": "¤",
"¥": "¥",
"¦": "¦",
"§": "§",
"¨": "¨",
"©": "©",
"ª": "ª",
"«": "«",
"¬": "¬",
"­": "",
"®": "®",
"¯": "¯",
"°": "°",
"±": "±",
"²": "²",
"³": "³",
"´": "´",
"µ": "µ",
"¶": "¶",
"·": "·",
"¸": "¸",
"¹": "¹",
"º": "º",
"»": "»",
"¼": "¼",
"½": "½",
"¾": "¾",
"¿": "¿",
"À": "À",
"Á": "Á",
"Â": "Â",
"Ã": "Ã",
"Ä": "Ä",
"Å": "Å",
"Æ": "Æ",
"Ç": "Ç",
"È": "È",
"É": "É",
"Ê": "Ê",
"Ë": "Ë",
"Ì": "Ì",
"Í": "Í",
"Î": "Î",
"Ï": "Ï",
"Ð": "Ð",
"Ñ": "Ñ",
"Ò": "Ò",
"Ó": "Ó",
"Ô": "Ô",
"Õ": "Õ",
"Ö": "Ö",
"×": "×",
"Ø": "Ø",
"Ù": "Ù",
"Ú": "Ú",
"Û": "Û",
"Ü": "Ü",
"Ý": "Ý",
"Þ": "Þ",
"ß": "ß",
"à": "à",
"á": "á",
"â": "â",
"ã": "ã",
"ä": "ä",
"å": "å",
"æ": "æ",
"ç": "ç",
"è": "è",
"é": "é",
"ê": "ê",
"ë": "ë",
"ì": "ì",
"í": "í",
"î": "î",
"ï": "ï",
"ð": "ð",
"ñ": "ñ",
"ò": "ò",
"ó": "ó",
"ô": "ô",
"õ": "õ",
"ö": "ö",
"÷": "÷",
"ø": "ø",
"ù": "ù",
"ú": "ú",
"û": "û",
"ü": "ü",
"ý": "ý",
"þ": "þ",
"ÿ": "ÿ",
"ƒ": "ƒ",
"Α": "Α",
"Β": "Β",
"Γ": "Γ",
"Δ": "Δ",
"Ε": "Ε",
"Ζ": "Ζ",
"Η": "Η",
"Θ": "Θ",
"Ι": "Ι",
"Κ": "Κ",
"Λ": "Λ",
"Μ": "Μ",
"Ν": "Ν",
"Ξ": "Ξ",
"Ο": "Ο",
"Π": "Π",
"Ρ": "Ρ",
"Σ": "Σ",
"Τ": "Τ",
"Υ": "Υ",
"Φ": "Φ",
"Χ": "Χ",
"Ψ": "Ψ",
"Ω": "Ω",
"α": "α",
"β": "β",
"γ": "γ",
"δ": "δ",
"ε": "ε",
"ζ": "ζ",
"η": "η",
"θ": "θ",
"ι": "ι",
"κ": "κ",
"λ": "λ",
"μ": "μ",
"ν": "ν",
"ξ": "ξ",
"ο": "ο",
"π": "π",
"ρ": "ρ",
"ς": "ς",
"σ": "σ",
"τ": "τ",
"υ": "υ",
"φ": "φ",
"χ": "χ",
"ψ": "ψ",
"ω": "ω",
"ϑ": "ϑ",
"ϒ": "ϒ",
"ϖ": "ϖ",
"•": "•",
"…": "…",
"′": "′",
"″": "″",
"‾": "‾",
"⁄": "⁄",
"℘": "℘",
"ℑ": "ℑ",
"ℜ": "ℜ",
"™": "™",
"ℵ": "ℵ",
"←": "←",
"↑": "↑",
"→": "→",
"↓": "↓",
"↔": "↔",
"↵": "↵",
"⇐": "⇐",
"⇑": "⇑",
"⇒": "⇒",
"⇓": "⇓",
"⇔": "⇔",
"∀": "∀",
"∂": "∂",
"∃": "∃",
"∅": "∅",
"∇": "∇",
"∈": "∈",
"∉": "∉",
"∋": "∋",
"∏": "∏",
"∑": "∑",
"−": "−",
"∗": "∗",
"√": "√",
"∝": "∝",
"∞": "∞",
"∠": "∠",
"∧": "∧",
"∨": "∨",
"∩": "∩",
"∪": "∪",
"∫": "∫",
"∴": "∴",
"∼": "∼",
"≅": "≅",
"≈": "≈",
"≠": "≠",
"≡": "≡",
"≤": "≤",
"≥": "≥",
"⊂": "⊂",
"⊃": "⊃",
"⊄": "⊄",
"⊆": "⊆",
"⊇": "⊇",
"⊕": "⊕",
"⊗": "⊗",
"⊥": "⊥",
"⋅": "⋅",
"⌈": "⌈",
"⌉": "⌉",
"⌊": "⌊",
"⌋": "⌋",
"⟨": "〈",
"⟩": "〉",
"◊": "◊",
"♠": "♠",
"♣": "♣",
"♥": "♥",
"♦": "♦",
""": "\"",
"&": "&",
"<": "<",
">": ">",
"Œ": "Œ",
"œ": "œ",
"Š": "Š",
"š": "š",
"Ÿ": "Ÿ",
"ˆ": "ˆ",
"˜": "˜",
" ": " ",
" ": " ",
" ": " ",
"‌": "",
"‍": "",
"‎": "",
"‏": "",
"–": "–",
"—": "—",
"‘": "‘",
"’": "’",
"‚": "‚",
"“": "“",
"”": "”",
"„": "„",
"†": "†",
"‡": "‡",
"‰": "‰",
"‹": "‹",
"›": "›",
"€": "€",
};
function unescape_entity(input) {
if (input.charAt(1) === '#') {
return String.fromCharCode(parseInt(input.substr(2), 10));
} else if (_entities.hasOwnProperty(input)) {
return _entities[input];
} else {
return null;
}
}
function unescape2(input) {
var entityRe = /&(#?)(\d{1,5}|\w{1,8});/gm;
return input.replace(entityRe, unescape_entity);
}
exports.unescape = unescape2;
Of course if you need a full mapping... that would take a "bit" more effort, you can find a Python version on the web (iirc ActiveState) which works better, but that uses Pythons stdlib for the mappings, so someone in the end has to do the job and provide those mappings for JS.
If you use the numeric encoding you can use String.fromCharCode(codepoint)
;
精彩评论