From 043e59f117d4862a0e3e25e7466ddbc6d54120e7 Mon Sep 17 00:00:00 2001 From: Fil <fil@rezo.net> Date: Sat, 11 Feb 2006 23:24:52 +0000 Subject: [PATCH] simplification de la regexep is_ascii/is_utf8 (plantait chez James) --- ecrire/inc_charsets.php | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/ecrire/inc_charsets.php b/ecrire/inc_charsets.php index 5d249824a4..e07ced7d01 100644 --- a/ecrire/inc_charsets.php +++ b/ecrire/inc_charsets.php @@ -507,21 +507,19 @@ function bom_utf8($texte) { // http://us2.php.net/manual/fr/function.mb-detect-encoding.php#50087 // http://w3.org/International/questions/qa-forms-utf-8.html function is_utf8($string) { - return preg_match('%^(?: - [\x09\x0A\x0D\x20-\x7E] # ASCII - | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte - | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs - | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte - | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates - | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 - | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 - | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 - )*$%xs', $string); + return preg_match(',^(?:' + . '[\x09\x0A\x0D\x20-\x7E]' # ASCII + . '|[\xC2-\xDF][\x80-\xBF]' # non-overlong 2-byte + . '|\xE0[\xA0-\xBF][\x80-\xBF]' # excluding overlongs + . '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' # straight 3-byte + . '|\xED[\x80-\x9F][\x80-\xBF]' # excluding surrogates + . '|\xF0[\x90-\xBF][\x80-\xBF]{2}' # planes 1-3 + . '|[\xF1-\xF3][\x80-\xBF]{3}' # planes 4-15 + . '|\xF4[\x80-\x8F][\x80-\xBF]{2}' # plane 16 + . ')*$,s', $string); } function is_ascii($string) { - return preg_match('%^(?: - [\x09\x0A\x0D\x20-\x7E] # ASCII - )*$%xs', $string); + return preg_match(',^[\x09\x0A\x0D\x20-\x7E]*$,s', $string); } // Transcode une page (attrapee sur le web, ou un squelette) en essayant -- GitLab