i was wonderi开发者_如何学Cng if there is a MYSQL function to decode text with html entities. I have seen some approaches using REPLACE but it looks kinda hard to manage all the entities.
You can create function like below
DELIMITER $$
DROP FUNCTION IF EXISTS `HTML_UnEncode`$$
CREATE FUNCTION `HTML_UnEncode`(X VARCHAR(255)) RETURNS VARCHAR(255) CHARSET latin1 DETERMINISTIC
BEGIN
DECLARE TextString VARCHAR(255) ;
SET TextString = X ;
#quotation mark
IF INSTR( X , '"' )
THEN SET TextString = REPLACE(TextString, '"','"') ;
END IF ;
#apostrophe
IF INSTR( X , ''' )
THEN SET TextString = REPLACE(TextString, ''','"') ;
END IF ;
#ampersand
IF INSTR( X , '&' )
THEN SET TextString = REPLACE(TextString, '&','&') ;
END IF ;
#less-than
IF INSTR( X , '<' )
THEN SET TextString = REPLACE(TextString, '<','<') ;
END IF ;
#greater-than
IF INSTR( X , '>' )
THEN SET TextString = REPLACE(TextString, '>','>') ;
END IF ;
#non-breaking space
IF INSTR( X , ' ' )
THEN SET TextString = REPLACE(TextString, ' ',' ') ;
END IF ;
#inverted exclamation mark
IF INSTR( X , '¡' )
THEN SET TextString = REPLACE(TextString, '¡','¡') ;
END IF ;
#cent
IF INSTR( X , '¢' )
THEN SET TextString = REPLACE(TextString, '¢','¢') ;
END IF ;
#pound
IF INSTR( X , '£' )
THEN SET TextString = REPLACE(TextString, '£','£') ;
END IF ;
#currency
IF INSTR( X , '¤' )
THEN SET TextString = REPLACE(TextString, '¤','¤') ;
END IF ;
#yen
IF INSTR( X , '¥' )
THEN SET TextString = REPLACE(TextString, '¥','¥') ;
END IF ;
#broken vertical bar
IF INSTR( X , '¦' )
THEN SET TextString = REPLACE(TextString, '¦','¦') ;
END IF ;
#section
IF INSTR( X , '§' )
THEN SET TextString = REPLACE(TextString, '§','§') ;
END IF ;
#spacing diaeresis
IF INSTR( X , '¨' )
THEN SET TextString = REPLACE(TextString, '¨','¨') ;
END IF ;
#copyright
IF INSTR( X , '©' )
THEN SET TextString = REPLACE(TextString, '©','©') ;
END IF ;
#feminine ordinal indicator
IF INSTR( X , 'ª' )
THEN SET TextString = REPLACE(TextString, 'ª','ª') ;
END IF ;
#angle quotation mark (left)
IF INSTR( X , '«' )
THEN SET TextString = REPLACE(TextString, '«','«') ;
END IF ;
#negation
IF INSTR( X , '¬' )
THEN SET TextString = REPLACE(TextString, '¬','¬') ;
END IF ;
#soft hyphen
IF INSTR( X , '­' )
THEN SET TextString = REPLACE(TextString, '­','') ;
END IF ;
#registered trademark
IF INSTR( X , '®' )
THEN SET TextString = REPLACE(TextString, '®','®') ;
END IF ;
#spacing macron
IF INSTR( X , '¯' )
THEN SET TextString = REPLACE(TextString, '¯','¯') ;
END IF ;
#degree
IF INSTR( X , '°' )
THEN SET TextString = REPLACE(TextString, '°','°') ;
END IF ;
#plus-or-minus
IF INSTR( X , '±' )
THEN SET TextString = REPLACE(TextString, '±','±') ;
END IF ;
#superscript 2
IF INSTR( X , '²' )
THEN SET TextString = REPLACE(TextString, '²','²') ;
END IF ;
#superscript 3
IF INSTR( X , '³' )
THEN SET TextString = REPLACE(TextString, '³','³') ;
END IF ;
#spacing acute
IF INSTR( X , '´' )
THEN SET TextString = REPLACE(TextString, '´','´') ;
END IF ;
#micro
IF INSTR( X , 'µ' )
THEN SET TextString = REPLACE(TextString, 'µ','µ') ;
END IF ;
#paragraph
IF INSTR( X , '¶' )
THEN SET TextString = REPLACE(TextString, '¶','¶') ;
END IF ;
#middle dot
IF INSTR( X , '·' )
THEN SET TextString = REPLACE(TextString, '·','·') ;
END IF ;
#spacing cedilla
IF INSTR( X , '¸' )
THEN SET TextString = REPLACE(TextString, '¸','¸') ;
END IF ;
#superscript 1
IF INSTR( X , '¹' )
THEN SET TextString = REPLACE(TextString, '¹','¹') ;
END IF ;
#masculine ordinal indicator
IF INSTR( X , 'º' )
THEN SET TextString = REPLACE(TextString, 'º','º') ;
END IF ;
#angle quotation mark (right)
IF INSTR( X , '»' )
THEN SET TextString = REPLACE(TextString, '»','»') ;
END IF ;
#fraction 1/4
IF INSTR( X , '¼' )
THEN SET TextString = REPLACE(TextString, '¼','¼') ;
END IF ;
#fraction 1/2
IF INSTR( X , '½' )
THEN SET TextString = REPLACE(TextString, '½','½') ;
END IF ;
#fraction 3/4
IF INSTR( X , '¾' )
THEN SET TextString = REPLACE(TextString, '¾','¾') ;
END IF ;
#inverted question mark
IF INSTR( X , '¿' )
THEN SET TextString = REPLACE(TextString, '¿','¿') ;
END IF ;
#multiplication
IF INSTR( X , '×' )
THEN SET TextString = REPLACE(TextString, '×','×') ;
END IF ;
#division
IF INSTR( X , '÷' )
THEN SET TextString = REPLACE(TextString, '÷','÷') ;
END IF ;
#capital a, grave accent
IF INSTR( X , 'À' )
THEN SET TextString = REPLACE(TextString, 'À','À') ;
END IF ;
#capital a, acute accent
IF INSTR( X , 'Á' )
THEN SET TextString = REPLACE(TextString, 'Á','Á') ;
END IF ;
#capital a, circumflex accent
IF INSTR( X , 'Â' )
THEN SET TextString = REPLACE(TextString, 'Â','Â') ;
END IF ;
#capital a, tilde
IF INSTR( X , 'Ã' )
THEN SET TextString = REPLACE(TextString, 'Ã','Ã') ;
END IF ;
#capital a, umlaut mark
IF INSTR( X , 'Ä' )
THEN SET TextString = REPLACE(TextString, 'Ä','Ä') ;
END IF ;
#capital a, ring
IF INSTR( X , 'Å' )
THEN SET TextString = REPLACE(TextString, 'Å','Å') ;
END IF ;
#capital ae
IF INSTR( X , 'Æ' )
THEN SET TextString = REPLACE(TextString, 'Æ','Æ') ;
END IF ;
#capital c, cedilla
IF INSTR( X , 'Ç' )
THEN SET TextString = REPLACE(TextString, 'Ç','Ç') ;
END IF ;
#capital e, grave accent
IF INSTR( X , 'È' )
THEN SET TextString = REPLACE(TextString, 'È','È') ;
END IF ;
#capital e, acute accent
IF INSTR( X , 'É' )
THEN SET TextString = REPLACE(TextString, 'É','É') ;
END IF ;
#capital e, circumflex accent
IF INSTR( X , 'Ê' )
THEN SET TextString = REPLACE(TextString, 'Ê','Ê') ;
END IF ;
#capital e, umlaut mark
IF INSTR( X , 'Ë' )
THEN SET TextString = REPLACE(TextString, 'Ë','Ë') ;
END IF ;
#capital i, grave accent
IF INSTR( X , 'Ì' )
THEN SET TextString = REPLACE(TextString, 'Ì','Ì') ;
END IF ;
#capital i, acute accent
IF INSTR( X , 'Í' )
THEN SET TextString = REPLACE(TextString, 'Í','Í') ;
END IF ;
#capital i, circumflex accent
IF INSTR( X , 'Î' )
THEN SET TextString = REPLACE(TextString, 'Î','Î') ;
END IF ;
#capital i, umlaut mark
IF INSTR( X , 'Ï' )
THEN SET TextString = REPLACE(TextString, 'Ï','Ï') ;
END IF ;
#capital eth, Icelandic
IF INSTR( X , 'Ð' )
THEN SET TextString = REPLACE(TextString, 'Ð','Ð') ;
END IF ;
#capital n, tilde
IF INSTR( X , 'Ñ' )
THEN SET TextString = REPLACE(TextString, 'Ñ','Ñ') ;
END IF ;
#capital o, grave accent
IF INSTR( X , 'Ò' )
THEN SET TextString = REPLACE(TextString, 'Ò','Ò') ;
END IF ;
#capital o, acute accent
IF INSTR( X , 'Ó' )
THEN SET TextString = REPLACE(TextString, 'Ó','Ó') ;
END IF ;
#capital o, circumflex accent
IF INSTR( X , 'Ô' )
THEN SET TextString = REPLACE(TextString, 'Ô','Ô') ;
END IF ;
#capital o, tilde
IF INSTR( X , 'Õ' )
THEN SET TextString = REPLACE(TextString, 'Õ','Õ') ;
END IF ;
#capital o, umlaut mark
IF INSTR( X , 'Ö' )
THEN SET TextString = REPLACE(TextString, 'Ö','Ö') ;
END IF ;
#capital o, slash
IF INSTR( X , 'Ø' )
THEN SET TextString = REPLACE(TextString, 'Ø','Ø') ;
END IF ;
#capital u, grave accent
IF INSTR( X , 'Ù' )
THEN SET TextString = REPLACE(TextString, 'Ù','Ù') ;
END IF ;
#capital u, acute accent
IF INSTR( X , 'Ú' )
THEN SET TextString = REPLACE(TextString, 'Ú','Ú') ;
END IF ;
#capital u, circumflex accent
IF INSTR( X , 'Û' )
THEN SET TextString = REPLACE(TextString, 'Û','Û') ;
END IF ;
#capital u, umlaut mark
IF INSTR( X , 'Ü' )
THEN SET TextString = REPLACE(TextString, 'Ü','Ü') ;
END IF ;
#capital y, acute accent
IF INSTR( X , 'Ý' )
THEN SET TextString = REPLACE(TextString, 'Ý','Ý') ;
END IF ;
#capital THORN, Icelandic
IF INSTR( X , 'Þ' )
THEN SET TextString = REPLACE(TextString, 'Þ','Þ') ;
END IF ;
#small sharp s, German
IF INSTR( X , 'ß' )
THEN SET TextString = REPLACE(TextString, 'ß','ß') ;
END IF ;
#small a, grave accent
IF INSTR( X , 'à' )
THEN SET TextString = REPLACE(TextString, 'à','à') ;
END IF ;
#small a, acute accent
IF INSTR( X , 'á' )
THEN SET TextString = REPLACE(TextString, 'á','á') ;
END IF ;
#small a, circumflex accent
IF INSTR( X , 'â' )
THEN SET TextString = REPLACE(TextString, 'â','â') ;
END IF ;
#small a, tilde
IF INSTR( X , 'ã' )
THEN SET TextString = REPLACE(TextString, 'ã','ã') ;
END IF ;
#small a, umlaut mark
IF INSTR( X , 'ä' )
THEN SET TextString = REPLACE(TextString, 'ä','ä') ;
END IF ;
#small a, ring
IF INSTR( X , 'å' )
THEN SET TextString = REPLACE(TextString, 'å','å') ;
END IF ;
#small ae
IF INSTR( X , 'æ' )
THEN SET TextString = REPLACE(TextString, 'æ','æ') ;
END IF ;
#small c, cedilla
IF INSTR( X , 'ç' )
THEN SET TextString = REPLACE(TextString, 'ç','ç') ;
END IF ;
#small e, grave accent
IF INSTR( X , 'è' )
THEN SET TextString = REPLACE(TextString, 'è','è') ;
END IF ;
#small e, acute accent
IF INSTR( X , 'é' )
THEN SET TextString = REPLACE(TextString, 'é','é') ;
END IF ;
#small e, circumflex accent
IF INSTR( X , 'ê' )
THEN SET TextString = REPLACE(TextString, 'ê','ê') ;
END IF ;
#small e, umlaut mark
IF INSTR( X , 'ë' )
THEN SET TextString = REPLACE(TextString, 'ë','ë') ;
END IF ;
#small i, grave accent
IF INSTR( X , 'ì' )
THEN SET TextString = REPLACE(TextString, 'ì','ì') ;
END IF ;
#small i, acute accent
IF INSTR( X , 'í' )
THEN SET TextString = REPLACE(TextString, 'í','í') ;
END IF ;
#small i, circumflex accent
IF INSTR( X , 'î' )
THEN SET TextString = REPLACE(TextString, 'î','î') ;
END IF ;
#small i, umlaut mark
IF INSTR( X , 'ï' )
THEN SET TextString = REPLACE(TextString, 'ï','ï') ;
END IF ;
#small eth, Icelandic
IF INSTR( X , 'ð' )
THEN SET TextString = REPLACE(TextString, 'ð','ð') ;
END IF ;
#small n, tilde
IF INSTR( X , 'ñ' )
THEN SET TextString = REPLACE(TextString, 'ñ','ñ') ;
END IF ;
#small o, grave accent
IF INSTR( X , 'ò' )
THEN SET TextString = REPLACE(TextString, 'ò','ò') ;
END IF ;
#small o, acute accent
IF INSTR( X , 'ó' )
THEN SET TextString = REPLACE(TextString, 'ó','ó') ;
END IF ;
#small o, circumflex accent
IF INSTR( X , 'ô' )
THEN SET TextString = REPLACE(TextString, 'ô','ô') ;
END IF ;
#small o, tilde
IF INSTR( X , 'õ' )
THEN SET TextString = REPLACE(TextString, 'õ','õ') ;
END IF ;
#small o, umlaut mark
IF INSTR( X , 'ö' )
THEN SET TextString = REPLACE(TextString, 'ö','ö') ;
END IF ;
#small o, slash
IF INSTR( X , 'ø' )
THEN SET TextString = REPLACE(TextString, 'ø','ø') ;
END IF ;
#small u, grave accent
IF INSTR( X , 'ù' )
THEN SET TextString = REPLACE(TextString, 'ù','ù') ;
END IF ;
#small u, acute accent
IF INSTR( X , 'ú' )
THEN SET TextString = REPLACE(TextString, 'ú','ú') ;
END IF ;
#small u, circumflex accent
IF INSTR( X , 'û' )
THEN SET TextString = REPLACE(TextString, 'û','û') ;
END IF ;
#small u, umlaut mark
IF INSTR( X , 'ü' )
THEN SET TextString = REPLACE(TextString, 'ü','ü') ;
END IF ;
#small y, acute accent
IF INSTR( X , 'ý' )
THEN SET TextString = REPLACE(TextString, 'ý','ý') ;
END IF ;
#small thorn, Icelandic
IF INSTR( X , 'þ' )
THEN SET TextString = REPLACE(TextString, 'þ','þ') ;
END IF ;
#small y, umlaut mark
IF INSTR( X , 'ÿ' )
THEN SET TextString = REPLACE(TextString, 'ÿ','ÿ') ;
END IF ;
RETURN TextString ;
END$$
DELIMITER ;
And execute below query :
SELECT HTML_UnEncode('this is a <a>test, nothing more');
The following function can be used to decode numeric entities like 〹
CREATE FUNCTION entity_decode(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
NO SQL
DETERMINISTIC
BEGIN
DECLARE tmp TEXT CHARSET utf8 DEFAULT txt;
DECLARE entity TEXT CHARSET utf8;
DECLARE pos1 INT DEFAULT 1;
DECLARE pos2 INT;
DECLARE codepoint INT;
IF txt IS NULL THEN
RETURN NULL;
END IF;
LOOP
SET pos1 = LOCATE('&#', tmp, pos1);
IF pos1 = 0 THEN
RETURN tmp;
END IF;
SET pos2 = LOCATE(';', tmp, pos1 + 2);
IF pos2 > pos1 THEN
SET entity = SUBSTRING(tmp, pos1, pos2 - pos1 + 1);
IF entity REGEXP '^&#[[:digit:]]+;$' THEN
SET codepoint = CAST(SUBSTRING(entity, 3, pos2 - pos1 - 2) AS UNSIGNED);
IF codepoint > 31 THEN
SET tmp = CONCAT(LEFT(tmp, pos1 - 1), CHAR(codepoint USING utf32), SUBSTRING(tmp, pos2 + 1));
END IF;
END IF;
END IF;
SET pos1 = pos1 + 1;
END LOOP;
END
This adds support for hex encoded numeric values (which I get from Perl using "use HTML::Entities qw(encode_entities_numeric);".
DELIMITER $$
CREATE FUNCTION entity_decode(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8
NO SQL
DETERMINISTIC
BEGIN
DECLARE tmp TEXT CHARSET utf8 DEFAULT txt;
DECLARE entity TEXT CHARSET utf8;
DECLARE pos1 INT DEFAULT 1;
DECLARE pos2 INT;
DECLARE codepoint INT;
IF txt IS NULL THEN
RETURN NULL;
END IF;
LOOP
SET pos1 = LOCATE('&#', tmp, pos1);
IF pos1 = 0 THEN
RETURN tmp;
END IF;
SET pos2 = LOCATE(';', tmp, pos1 + 2);
IF pos2 > pos1 THEN
SET entity = SUBSTRING(tmp, pos1, pos2 - pos1 + 1);
IF entity REGEXP '^&#[[:digit:]]+;$' THEN
SET codepoint = CAST(SUBSTRING(entity, 3, pos2 - pos1 - 2) AS UNSIGNED);
IF codepoint > 31 THEN
SET tmp = CONCAT(LEFT(tmp, pos1 - 1), CHAR(codepoint USING utf32), SUBSTRING(tmp, pos2 + 1));
END IF;
END IF;
IF entity REGEXP '^&#x[[:digit:]]+;$' THEN
SET codepoint = CAST(CONV(SUBSTRING(entity, 4, pos2 - pos1 - 3), 16, 10) AS UNSIGNED);
IF codepoint > 31 THEN
SET tmp = CONCAT(LEFT(tmp, pos1 - 1), CHAR(codepoint USING utf32), SUBSTRING(tmp, pos2 + 1));
END IF;
END IF;
END IF;
SET pos1 = pos1 + 1;
END LOOP;
END$$
DELIMITER ;
I have created this function:
DELIMITER $$
DROP FUNCTION IF EXISTS DecodeHTML$$
CREATE FUNCTION DecodeHTML(X VARCHAR(255)) RETURNS VARCHAR(255) CHARSET utf8 DETERMINISTIC
BEGIN
DECLARE TextString VARCHAR(255);
DECLARE word VARCHAR(255);
SET TextString = X;
SET word = 'á';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'á');
END IF;
SET word = 'é';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'é') ;
END IF;
SET word = 'í';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'í') ;
END IF;
SET word = 'ó';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ó') ;
END IF;
SET word = 'ú';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ú') ;
END IF;
SET word = 'Á';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Á') ;
END IF;
SET word = 'É';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'É') ;
END IF;
SET word = 'Í';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Í') ;
END IF;
SET word = 'Ó';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ó') ;
END IF;
SET word = 'Ú';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ú') ;
END IF;
SET word = 'À';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'À');
END IF;
SET word = 'Â';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Â');
END IF;
SET word = 'Ä';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ä');
END IF;
SET word = 'Æ';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Æ');
END IF;
SET word = 'È';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'È');
END IF;
SET word = 'Ê';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ê');
END IF;
SET word = 'Ì';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ì');
END IF;
SET word = 'Ð';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ð');
END IF;
SET word = 'Ò';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ò');
END IF;
SET word = 'Ù';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ù');
END IF;
SET word = 'Ô';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ô');
END IF;
SET word = 'Ö';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ö');
END IF;
SET word = 'Ø';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ø');
END IF;
SET word = 'Ü';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ü');
END IF;
SET word = 'ü';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ü');
END IF;
SET word = 'ÿ';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ÿ');
END IF;
SET word = 'ý';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ý');
END IF;
SET word = 'õ';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'õ');
END IF;
SET word = 'ï';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ï');
END IF;
SET word = 'ç';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ç');
END IF;
SET word = 'ë';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ë');
END IF;
SET word = 'Î';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Î');
END IF;
SET word = '€';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'€') ;
END IF;
SET word = 'ñ';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'ñ') ;
END IF;
SET word = 'Ñ';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'Ñ') ;
END IF;
SET word = '£';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'£') ;
END IF;
SET word = '©';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'©') ;
END IF;
SET word = '®';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'®');
END IF;
SET word = ' ';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,' ');
END IF;
SET word = '¤';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'¤');
END IF;
SET word = '¦';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'¦');
END IF;
SET word = '¶';
IF INSTR( X , word ) THEN
SET TextString = REPLACE(TextString, word,'¶');
END IF;
RETURN TextString;
END$$
DELIMITER ;
Nope, there is no function in mySQL to decode html entities since it is not concerned with html :)
精彩评论