From 0666a97a1967fc9078fd3a856efea731d609c63c Mon Sep 17 00:00:00 2001
From: Fil <fil@rezo.net>
Date: Sat, 25 Sep 2004 20:11:26 +0000
Subject: [PATCH] =?UTF-8?q?backport=20des=20r=C3=A9centes=20modifs=20spip-?=
 =?UTF-8?q?lab=20sur=20correcteur=20ortho=20+=20proxy=20orthograph?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

eCVS: ----------------------------------------------------------------------
---
 ecrire/articles_ortho.php | 115 ++++++++++++++++++++++----------------
 ecrire/inc_charsets.php3  |  65 +++++++++++++++------
 ecrire/inc_sites.php3     |  22 ++++----
 ecrire/inc_texte.php3     |  29 +++++-----
 ecrire/lab_ortho.php      | 113 +++++++++++++++++++------------------
 5 files changed, 198 insertions(+), 146 deletions(-)

diff --git a/ecrire/articles_ortho.php b/ecrire/articles_ortho.php
index 8f2d5cc7ac..98b2381bd2 100644
--- a/ecrire/articles_ortho.php
+++ b/ecrire/articles_ortho.php
@@ -6,14 +6,7 @@ include_spip("ecrire.php");
 include_spip("ortho.php");
 include_spip("layer.php"); // Pour $browser_name
 
-$articles_surtitre = lire_meta("articles_surtitre");
-$articles_soustitre = lire_meta("articles_soustitre");
-$articles_descriptif = lire_meta("articles_descriptif");
-$articles_urlref = lire_meta("articles_urlref");
-$articles_chapeau = lire_meta("articles_chapeau");
-$articles_ps = lire_meta("articles_ps");
-$articles_redac = lire_meta("articles_redac");
-$articles_mots = lire_meta("articles_mots");
+//charset_texte('utf-8');
 
 
 //
@@ -65,33 +58,33 @@ $ortho = "";
 // Gros hack IE pour le "position: fixed"
 $code_ie = "<!--[if IE]>
 <style type=\"text/css\" media=\"screen\">
-body {
-	height: 100%; margin: 0px; padding: 0px;
-	overflow: hidden;
-}
-.ortho-content {
-	position: absolute; left: 0px;
-	height: 100%; margin: 0px; padding: 0px;
-	width: 72%;
-	overflow-y: auto;
-}
-#ortho-fixed {
-	position: absolute; right: 0px; width: 25%;
-	height: 100%; margin: 0px; padding: 0px;
-	overflow: hidden;
-}
-.ortho-padding {
-	padding: 12px;
-}
+	body {
+		height: 100%; margin: 0px; padding: 0px;
+		overflow: hidden;
+	}
+	.ortho-content {
+		position: absolute; left: 0px;
+		height: 100%; margin: 0px; padding: 0px;
+		width: 72%;
+		overflow-y: auto;
+	}
+	#ortho-fixed {
+		position: absolute; right: 0px; width: 25%;
+		height: 100%; margin: 0px; padding: 0px;
+		overflow: hidden;
+	}
+	.ortho-padding {
+		padding: 12px;
+	}
 </style>
 <script type=\"text/javascript\">
-onload = function() { ortho-content.focus(); }
+	onload = function() { document.getElementById('ortho-content').focus(); }
 </script>
 <![endif]-->";
 
 debut_html(_T('ortho_orthographe'), $code_ie);
 
-changer_typo('','article'.$id_article);
+changer_typo($lang_article);
 
 // Ajouts et suppressions de mots par l'utilisateur
 gerer_dico_ortho($lang_article);
@@ -105,9 +98,9 @@ echo "<div class='ortho-padding serif'>";
 debut_cadre_enfonce();
 
 foreach ($champs as $champ) {
-	$$champ = preparer_ortho($$champ, $lang_article);
 	$ortho .= $$champ." ";
 }
+$ortho = preparer_ortho($ortho, $lang_article);
 $result_ortho = corriger_ortho($ortho, $lang_article);
 if (is_array($result_ortho)) {
 	$mots = $result_ortho['mauvais'];
@@ -120,12 +113,6 @@ if (is_array($result_ortho)) {
 	}
 
 	panneau_ortho($result_ortho);
-
-	foreach ($champs as $champ) {
-		list($$champ, $echap[$champ]) = echappe_html($$champ);
-		$$champ = souligner_ortho($$champ, $lang_article, $result_ortho);
-		$echap[$champ] = afficher_ortho($echap[$champ]);
-	}
 }
 else {
 	$erreur = $result_ortho;
@@ -133,9 +120,6 @@ else {
 	echo traduire_nom_langue($lang_article);
 	echo "). ";
 	echo _T('ortho_verif_impossible')."</b>";
-	foreach ($champs as $champ) {
-		$$champ = afficher_ortho($$champ);
-	}
 }
 
 fin_cadre_enfonce();
@@ -146,21 +130,54 @@ echo "</div>";
 //
 // Colonne de gauche : textes de l'article
 //
-echo "<div class='ortho-content'>";
+echo "<div class='ortho-content' id='ortho-content'>";
 echo "<div class='ortho-padding serif'>";
 
+// Traitement des champs : soulignement des mots mal orthographies
+foreach ($champs as $champ) {
+	switch ($champ) {
+	case 'texte':
+	case 'chapo':
+	case 'descriptif':
+	case 'ps':
+		// Mettre de cote les <code>, <cadre>, etc.
+		list($$champ, $echap) = echappe_html($$champ, "ORTHO");
+		$$champ = propre($$champ);
+		break;
+	default:
+		$echap = "";
+		$$champ = typo($$champ);
+		break;
+	}
+	// On passe en UTF-8 juste pour la correction
+	$$champ = preparer_ortho($$champ, $lang_article);
+	if (is_array($result_ortho))
+		$$champ = souligner_ortho($$champ, $lang_article, $result_ortho);
+	// Et on repasse dans le charset original pour remettre les echappements
+	$$champ = afficher_ortho($$champ);
+	if ($echap)
+		$$champ = echappe_retour($$champ, $echap, "ORTHO");
+}
+// Traitement identique pour les notes de bas de page
+if ($les_notes) {
+	$les_notes = preparer_ortho($les_notes, $lang_article);
+	if (is_array($result_ortho))
+		$les_notes = souligner_ortho($les_notes, $lang_article, $result_ortho);
+	$les_notes = afficher_ortho($les_notes);
+}
+
 debut_cadre_relief();
 
 if ($surtitre) {
 	echo "<span $dir_lang><font face='arial,helvetica' size='3'><b>";
-	echo typo($surtitre);
+	echo $surtitre;
 	echo "</b></font></span>\n";
 }
 gros_titre($titre);
 
 if ($soustitre) {
 	echo "<span $dir_lang><font face='arial,helvetica' size='3'><b>";
-	echo typo($soustitre);
+	echo $soustitre;
 	echo "</b></font></span>\n";
 }
 
@@ -169,16 +186,13 @@ if ($descriptif OR $url_site OR $nom_site) {
 	echo "<font size='2' face='Verdana,Arial,Sans,sans-serif'>";
 	$texte_case = ($descriptif) ? "{{"._T('info_descriptif')."}} $descriptif\n\n" : '';
 	$texte_case .= ($nom_site.$url_site) ? "{{"._T('info_urlref')."}} [".$nom_site."->".$url_site."]" : '';
-	echo propre($texte_case, $echap['descriptif']);
+	echo $descriptif;
 	echo "</font>";
 	echo "</div>";
 }
 
 
-
-//////////////////////////////////////////////////////
 // Corps de l'article
-//
 
 echo "\n\n<div align='justify'>";
 
@@ -189,15 +203,18 @@ if ($virtuel) {
 }
 else {
 	echo "<div $dir_lang><b>";
-	echo justifier(propre($chapo, $echap['chapo']));
+	echo $chapo;
 	echo "</b></div>\n\n";
 
-	echo "<div $dir_lang>".justifier(propre($texte, $echap['texte']))."</div>";
+	echo "<div $dir_lang>";
+	echo $texte;
+	echo "</div>";
 
 	if ($ps) {
 		echo debut_cadre_enfonce();
 		echo "<div $dir_lang><font size='2' face='Verdana,Arial,Sans,sans-serif'>";
-		echo justifier("<b>"._T('info_ps')."</b> ".propre($ps, $echap['ps']));
+		echo "<b>"._T('info_ps')."</b> ";
+		echo $ps;
 		echo "</font></div>";
 		echo fin_cadre_enfonce();
 	}
@@ -205,7 +222,7 @@ else {
 	if ($les_notes) {
 		echo debut_cadre_relief();
 		echo "<div $dir_lang><font size='2'>";
-		echo justifier("<b>"._T('info_notes')."&nbsp;:</b> ".$les_notes);
+		echo "<b>"._T('info_notes')."&nbsp;:</b> ".$les_notes;
 		echo "</font></div>";
 		echo fin_cadre_relief();
 	}
diff --git a/ecrire/inc_charsets.php3 b/ecrire/inc_charsets.php3
index 11d4951513..516438f858 100644
--- a/ecrire/inc_charsets.php3
+++ b/ecrire/inc_charsets.php3
@@ -2160,7 +2160,7 @@ function charset2unicode($texte, $charset='AUTO', $forcer = false) {
 function unicode2charset($texte, $charset='AUTO') {
 	static $CHARSET_REVERSE;
 	if ($charset == 'AUTO')
-		$charset=lire_meta('charset');
+		$charset = lire_meta('charset');
 
 	switch($charset) {
 	case 'utf-8':
@@ -2170,23 +2170,31 @@ function unicode2charset($texte, $charset='AUTO') {
 	default:
 		$charset = load_charset($charset);
 
-		// array_flip
 		if (!is_array($CHARSET_REVERSE[$charset])) {
-			$trans = $GLOBALS['CHARSET'][$charset];
-			foreach ($trans as $chr => $uni) 
-				$CHARSET_REVERSE[$charset][$uni] = $chr;
+			$CHARSET_REVERSE[$charset] = array_flip($GLOBALS['CHARSET'][$charset]);
 		}
 
-		while ($a = strpos(' '.$texte, '&')) {
-			$traduit .= substr($texte,0,$a-1);
-			$texte = substr($texte,$a-1);
-			if (eregi('^&#0*([0-9]+);',$texte,$match) AND ($s = $CHARSET_REVERSE[$charset][$match[1]]))
-				$texte = str_replace($match[0], chr($s), $texte);
-			// avancer d'un cran
-			$traduit .= $texte[0];
-			$texte = substr($texte,1);
+		$trans = array();
+		// Construire la table de remplacements
+		// 1. Entites decimales (type "&#123;")
+		if (preg_match_all(',&#([0-9]+);,', $texte, $regs, PREG_PATTERN_ORDER)) {
+			$entites = array_flip($regs[1]);
+			foreach ($entites as $e => $v) {
+				if ($s = ($e < 128) ? $e : $CHARSET_REVERSE[$charset][intval($e)])
+					$trans['&#'.$e.';'] = chr($s);
+			}
+		}
+		// 2. Entites hexadecimales (type "&#xD;")
+		if (preg_match_all(',&#x([0-9a-zA-Z]+);,', $texte, $regs, PREG_PATTERN_ORDER)) {
+			$entites = array_flip($regs[1]);
+			foreach ($entites as $e => $v) {
+				$h = hexdec($e);
+				if ($s = ($h < 128) ? $h : $CHARSET_REVERSE[$charset][$h])
+					$trans['&#x'.$e.';'] = chr($s);
+			}
 		}
-		return $traduit.$texte;
+		$texte = strtr($texte, $trans);
+		return $texte;
 	}
 }
 
@@ -2279,16 +2287,39 @@ function utf_8_to_unicode($source) {
 
 // UTF-32 : utilise en interne car plus rapide qu'UTF-8
 function utf_32_to_unicode($source) {
-	$texte = "";
-	// Plusieurs iterations pour eviter l'explosion memoire
-	while ($source) {
+	/*while ($source) {
 		$words = unpack("V*", substr($source, 0, 1024));
 		$source = substr($source, 1024);
 		foreach ($words as $word) {
 			if ($word < 128) $texte .= chr($word);
 			else if ($word != 65279) $texte .= '&#'.$word.';';
 		}
+	}*/
+
+	// Attention, cette implementation peut produire des erreurs dans de tres rares cas :
+	// caracteres multiples de 256 et superieurs a 0x900
+	$chars = array();
+	$len = strlen($source);
+	$chunk_len = 16384;
+	// Extraire la liste des caracteres utilises
+	// (plusieurs iterations pour eviter l'explosion memoire)
+	for ($i = 0; $i <= $len; $i += $chunk_len) {
+		$chars = $chars + array_flip(unpack("V*", substr($source, $i, $chunk_len)));
+	}
+	$cherche = $remplace = array();
+	foreach ($chars as $c => $v) {
+		$from = pack("V", $c);
+		if ($c < 128)
+			$to = chr($c);
+		else if ($c != 65279)
+			$to = '&#'.$c.';';
+		else 
+			$to = '';
+		$cherche[] = $from;
+		$remplace[] = $to;
 	}
+	$texte = str_replace($cherche, $remplace, $source);
+	
 	return $texte;
 }
 
diff --git a/ecrire/inc_sites.php3 b/ecrire/inc_sites.php3
index 978405f362..cb855bacea 100644
--- a/ecrire/inc_sites.php3
+++ b/ecrire/inc_sites.php3
@@ -559,21 +559,21 @@ function afficher_syndic_articles($titre_table, $requete, $afficher_site = false
 	if ($num_rows <= 1.5 * $nombre_aff) $nombre_aff = $num_rows;
 
 		if ($num_rows > 0) {
-			echo "<P><TABLE WIDTH=100% CELLPADDING=0 CELLSPACING=0 BORDER=0><TR><TD WIDTH=100% BACKGROUND=''>";
-			echo "<TABLE WIDTH=100% CELLPADDING=3 CELLSPACING=0 BORDER=0>";
+			echo "<p><table width='100%' cellpadding='0' cellspacing='0' border='0'><tr><td width='100%' background=''>";
+			echo "<table width='100%' cellpadding='3' cellspacing='0' border='0'>";
 
 			bandeau_titre_boite($titre_table, true);
 
 			if ($num_rows > $nombre_aff) {
-				echo "<tr><td background='' class='arial2' colspan=4>";
+				echo "<tr><td background='' class='arial2' colspan='4'>";
 				for ($i = 0; $i < $num_rows; $i = $i + $nombre_aff){
 					$deb = $i + 1;
 					$fin = $i + $nombre_aff;
 					if ($fin > $num_rows) $fin = $num_rows;
 					if ($debut_liste_sites[$n_liste_sites] == $i) {
-						echo "[<B>$deb-$fin</B>] ";
+						echo "[<b>$deb-$fin</b>] ";
 					} else {
-						echo "[<A HREF='".$adresse_page.$lien_url."debut_liste_sites[$n_liste_sites]=$i'>$deb-$fin</A>] ";
+						echo "[<a href='".$adresse_page.$lien_url."debut_liste_sites[$n_liste_sites]=$i'>$deb-$fin</a>] ";
 					}
 				}
 				echo "</td></tr>";
@@ -601,7 +601,7 @@ function afficher_syndic_articles($titre_table, $requete, $afficher_site = false
 					echo "<tr bgcolor='$couleur'>";
 					
 					echo "<td class='arial1'>";
-					echo "<A HREF='$url'>";
+					echo "<a href='$url'>";
 					if ($statut=='publie') {
 						if (acces_restreint_rubrique($id_rubrique))
 							$puce = 'puce-verte-anim.gif';
@@ -620,17 +620,17 @@ function afficher_syndic_articles($titre_table, $requete, $afficher_site = false
 							$puce = 'puce-rouge-anim.gif';
 					}
 
-					echo "<img src='img_pack/$puce' width='7' height='7' border='0'>";
+					echo "<img src='img_pack/$puce' width='7' height='7' border='0' alt='' />";
 
 					if ($statut == "refuse")
 						echo "<font color='black'>&nbsp;&nbsp;$titre</font>";
 					else
 						echo "&nbsp;&nbsp;".$titre;
 
-					echo "</A>";
+					echo "</a>";
 
-					if (strlen($lesauteurs)>0) echo "<br>"._T('info_auteurs_nombre')." <font color='#336666'>$lesauteurs</font>";
-					if (strlen($descriptif)>0) echo "<br>"._T('info_descriptif_nombre')." <font color='#336666'>$descriptif</font>";
+					if (strlen($lesauteurs)>0) echo "<br />"._T('info_auteurs_nombre')." <font color='#336666'>$lesauteurs</font>";
+					if (strlen($descriptif)>0) echo "<br />"._T('info_descriptif_nombre')." <font color='#336666'>$descriptif</font>";
 					
 					echo "</td>";
 					
@@ -672,7 +672,7 @@ function afficher_syndic_articles($titre_table, $requete, $afficher_site = false
 			$compteur_liste++;
 
 		}
-		echo "</TABLE></TD></TR></TABLE>";
+		echo "</table></td></tr></table></p>";
 	}
 }
 
diff --git a/ecrire/inc_texte.php3 b/ecrire/inc_texte.php3
index 5828db7f9d..60633c336b 100644
--- a/ecrire/inc_texte.php3
+++ b/ecrire/inc_texte.php3
@@ -355,7 +355,7 @@ function couper_intro($texte, $long) {
 
 // Securite : empecher l'execution de code PHP
 function interdire_scripts($source) {
-	$source = eregi_replace("<(\%|\?|([[:space:]]*)script)", "&lt;\\1", $source);
+	$source = preg_replace(",<(\%|\?|([[:space:]]*)script),", "&lt;\\1", $source);
 	return $source;
 }
 
@@ -376,9 +376,8 @@ function typo_fr($letexte) {
 			"&deg;" => "&#176;"
 		);
 		$chars = array(160 => '~', 187 => '&#187;', 171 => '&#171;', 148 => '&#148;', 147 => '&#147;', 176 => '&#176;');
-		$charset = lire_meta('charset');
-		include_ecrire('inc_charsets.php3');
 
+		include_ecrire('inc_charsets.php3');
 		while (list($c, $r) = each($chars)) {
 			$c = unicode2charset(charset2unicode(chr($c), 'iso-8859-1', 'forcer'));
 			$trans[$c] = $r;
@@ -589,7 +588,7 @@ function extraire_lien ($regs) {
 		include_ecrire("inc_filtres.php3");
 		$lien_texte = supprimer_numero($lien_texte);
 	}
-	else if (ereg('^\?(.*)$', $lien_url, $regs)) {
+	else if (preg_match(',^\?(.*)$,s', $lien_url, $regs)) {
 		// Liens glossaire
 		$lien_url = substr($lien_url, 1);
 		$class_lien = "glossaire";
@@ -599,14 +598,14 @@ function extraire_lien ($regs) {
 		$class_lien = "out";
 		// texte vide ?
 		if ((!$lien_texte) and (!$lien_interne)) {
-			$lien_texte = ereg_replace('"', '', $lien_url);
+			$lien_texte = str_replace('"', '', $lien_url);
 			if (strlen($lien_texte)>40)
 				$lien_texte = substr($lien_texte,0,35).'...';
 			$class_lien = "url";
 			$lien_texte = "<html>$lien_texte</html>";
 		}
 		// petites corrections d'URL
-		if (ereg("^www\.[^@]+$",$lien_url))
+		if (preg_match(",^www\.[^@]+$,",$lien_url))
 			$lien_url = "http://".$lien_url;
 		else if (strpos($lien_url, "@") && email_valide($lien_url))
 			$lien_url = "mailto:".$lien_url;
@@ -622,7 +621,7 @@ function extraire_lien ($regs) {
 // Traitement des listes (merci a Michael Parienti)
 //
 function traiter_listes ($texte) {
-	$parags = split ("\n[[:space:]]*\n", $texte);
+	$parags = preg_split(",\n[[:space:]]*\n,", $texte);
 	unset($texte);
 
 	// chaque paragraphe est traite a part
@@ -637,7 +636,7 @@ function traiter_listes ($texte) {
 		// chaque item a sa profondeur = nb d'etoiles
 		unset ($type);
 		while (list(,$item) = each($lignes)) {
-			ereg("^([*]*|[#]*)([^*#].*)", $item, $regs);
+			preg_match(",^([*]*|[#]*)([^*#].*)$,s", $item, $regs);
 			$profond = strlen($regs[1]);
 
 			if ($profond > 0) {
@@ -724,10 +723,10 @@ function traiter_raccourcis_generale($letexte) {
 		$puce = $GLOBALS['puce'];
 
 	// Harmoniser les retours chariot
-	$letexte = ereg_replace ("\r\n?", "\n",$letexte);
+	$letexte = preg_replace(",\r\n?,", "\n", $letexte);
 
 	// Corriger HTML
-	$letexte = eregi_replace("</?p>","\n\n\n",$letexte);
+	$letexte = preg_replace(",</?p>,i", "\n\n\n", $letexte);
 
 	//
 	// Notes de bas de page
@@ -740,9 +739,9 @@ function traiter_raccourcis_generale($letexte) {
 		$num_note = false;
 
 		// note auto ou pas ?
-		if (ereg("^ *<([^>]*)>", $note_texte, $regs)){
+		if (preg_match(",^ *<([^>]*)>,", $note_texte, $regs)){
 			$num_note = $regs[1];
-			$note_texte = ereg_replace ("^ *<([^>]*)>", "", $note_texte);
+			$note_texte = str_replace($regs[0], "", $note_texte);
 		} else {
 			$compt_note++;
 			$num_note = $compt_note;
@@ -792,7 +791,7 @@ function traiter_raccourcis_generale($letexte) {
 		if (preg_match_all($regexp, $letexte, $matches, PREG_SET_ORDER))
 		foreach ($matches as $regs) {
 			$terme = trim($regs[1]);
-			$terme_underscore = urlencode(ereg_replace('[[:space:]]+', '_', $terme));
+			$terme_underscore = urlencode(preg_replace(',\s+,', '_', $terme));
 			if (strstr($url_glossaire_externe,"%s"))
 				$url = str_replace("%s", $terme_underscore, $url_glossaire_externe);
 			else
@@ -834,8 +833,8 @@ function traiter_raccourcis_generale($letexte) {
 	//
 	// Tableaux
 	//
-	$letexte = ereg_replace("^\n?\|", "\n\n|", $letexte);
-	$letexte = ereg_replace("\|\n?$", "|\n\n", $letexte);
+	$letexte = preg_replace(",^\n?\|,", "\n\n|", $letexte);
+	$letexte = preg_replace(",\|\n?$,", "|\n\n", $letexte);
 
 	$tableBeginPos = strpos($letexte, "\n\n|");
 	$tableEndPos = strpos($letexte, "|\n\n");
diff --git a/ecrire/lab_ortho.php b/ecrire/lab_ortho.php
index 07c547e1c4..8c9f2ca8da 100644
--- a/ecrire/lab_ortho.php
+++ b/ecrire/lab_ortho.php
@@ -153,9 +153,21 @@ function post_ortho($url, $texte, $lang) {
 
 	$t = parse_url($url);
 	if ($t['scheme'] != 'http') return false;
-	$host = $t['host'];
-	$port = $t['port'] ? $t['port'] : 80;
-	$path = $t['path'] ? $t['path'] : "/";
+
+	$http_proxy = lire_meta("http_proxy");
+	if (eregi("^http://([^:]*)(:(.*))?", $http_proxy, $rr)) {
+		$host= $rr[1];
+		if($rr[2]) {
+			$port= $rr[3];
+		} else {
+			$port= 80;
+		}
+		$path= $url;
+	} else {
+		$host = $t['host'];
+		$port = $t['port'] ? $t['port'] : 80;
+		$path = $t['path'] ? $t['path'] : "/";
+	}
 
 	// Envoyer la requete en POST
 	$f = @fsockopen($host, $port, $errno, $errstr, 2);
@@ -410,15 +422,21 @@ function corriger_ortho($texte, $lang, $charset = 'AUTO') {
 		// Caracteres ASCII non-alphanumeriques
 		$texte = preg_replace(",[^-a-zA-Z0-9\x80-\xFF']+,", ' ', $texte);
 	}
-	$texte = preg_replace(',\s[-\']+,', ' ', $texte); // tirets de typo
-	$texte = preg_replace(',\'\s,', ' ', $texte); // apostrophes utilisees comme guillemets
-	$texte = preg_replace(',\s+,', ' ', $texte);
+
+	### ici j'ai un bug de sorcellerie si je laisse le \s du lab : un caractere
+	### accentue comme "a`" se fait decouper en deux : le second octet de sa
+	### representation utf-8 est mange par le preg_replace !! Or, je ne vois
+	### pas de difference entre spip-stable et spip-lab a ce niveau, et le bug
+	### n'est pas dans le lab.
+	$texte = preg_replace(', [-\']+,', ' ', $texte); // tirets de typo
+	$texte = preg_replace(',\' ,', ' ', $texte); // apostrophes utilisees comme guillemets
+	#$texte = preg_replace(', +,', ' ', $texte);	# inutile
 
 	// Virer les mots contenant au moins un chiffre
 	$texte = preg_replace(', ([^ ]*\d[^ ]* )+,', ' ', $texte);
 
 	// Melanger les mots
-	$mots = preg_split(',\s+,u', $texte);
+	$mots = preg_split(', +,u', $texte);
 	sort($mots);
 	$mots = array_unique($mots);
 
@@ -597,6 +615,38 @@ function panneau_ortho($ortho_result) {
 	}
 }
 
+
+function souligner_match_ortho(&$texte, $cherche, $remplace) {
+	// Eviter les &mdash;, etc.
+	if ($cherche{0} == '&' AND $cherche{strlen($cherche) - 1} == ';') return;
+
+	if ($cherche{0} == '>') 
+		$texte = str_replace($cherche, $remplace, $texte);
+	else {
+		// Ne pas remplacer a l'interieur des tags HTML
+		$table = explode($cherche, $texte);
+		unset($avant);
+		$texte = '';
+		foreach ($table as $s) {
+			if (!isset($avant)) {
+				$avant = $s;
+				continue;
+			}
+			$ok = true;
+			$texte .= $avant;
+			// Detecter si le match a eu lieu dans un tag HTML
+			if (is_int($deb_tag = strrpos($texte, '<'))) {
+				if (strrpos($texte, '>') <= $deb_tag)
+					$ok = false;
+			}
+			if ($ok) $texte .= $remplace;
+			else $texte .= $cherche;
+			$avant = $s;
+		}
+		$texte .= $avant;
+	}
+}
+
 function souligner_ortho($texte, $lang, $ortho_result) {
 	global $id_suggest;
 	$vu = array();
@@ -619,29 +669,7 @@ function souligner_ortho($texte, $lang, $ortho_result) {
 				$vu[$cherche] = 1;
 				$html = "<a class='ortho' onclick=\"suggest($id);return false;\" href=''>$mot_html</a>";
 				$remplace = str_replace($mot, $html, $cherche);
-				$table = explode($cherche, $texte);
-				unset($avant);
-				$texte = '';
-				foreach ($table as $s) {
-					if (!isset($avant)) {
-						$avant = $s;
-						continue;
-					}
-					$ok = true;
-					$texte .= $avant;
-					if (is_int($deb_tag = strrpos($texte, '<'))) {
-						if (strrpos($texte, '>') <= $deb_tag)
-							$ok = false;
-					}
-					if ($ok) {
-						$texte .= $remplace;
-					}
-					else {
-						$texte .= $cherche;
-					}
-					$avant = $s;
-				}
-				$texte .= $avant;
+				souligner_match_ortho($texte, $cherche, $remplace);
 			}
 		}
 	}
@@ -656,29 +684,7 @@ function souligner_ortho($texte, $lang, $ortho_result) {
 				$vu[$cherche] = 1;
 				$html = "<a class='ortho-dico' onclick=\"suggest($id);return false;\" href=''>$mot_html</a>";
 				$remplace = str_replace($mot, $html, $cherche);
-				$table = explode($cherche, $texte);
-				unset($avant);
-				$texte = '';
-				foreach ($table as $s) {
-					if (!isset($avant)) {
-						$avant = $s;
-						continue;
-					}
-					$ok = true;
-					$texte .= $avant;
-					if (is_int($deb_tag = strrpos($texte, '<'))) {
-						if (strrpos($texte, '>') <= $deb_tag)
-							$ok = false;
-					}
-					if ($ok) {
-						$texte .= $remplace;
-					}
-					else {
-						$texte .= $cherche;
-					}
-					$avant = $s;
-				}
-				$texte .= $avant;
+				souligner_match_ortho($texte, $cherche, $remplace);
 			}
 		}
 	}
@@ -687,7 +693,6 @@ function souligner_ortho($texte, $lang, $ortho_result) {
 	//echo "<div style='font-weight: bold; color: red;'>$dt s.</div>";
 	
 	$texte = preg_replace(',(^ | $),', '', $texte);
-	$texte = afficher_ortho($texte);
 	return $texte;
 }
 
-- 
GitLab