Browse Source

fix: utiliser la librairie HtmlPurifier dans la fonction inc_safehtml_dist(). On met en place une config par defaut pas trop sévère, mais on laisse la possibilité de proposer une configuration personalisée via un fichier pour les utilisateurs avancés

Refs: spip/spip#3926
remotes/checkIfPRContentChanged-1659974100248228647/refactor_texte_safety
Cerdic 2 months ago
parent
commit
2d479f291c
  1. 14
      inc/HTMLPurifier.extended.php
  2. 76
      inc/safehtml.php

14
inc/HTMLPurifier.extended.php

@ -0,0 +1,14 @@
<?php
class HTMLPurifier_URIScheme_generic extends HTMLPurifier_URIScheme {
public function doValidate(&$uri, $config, $context) {
return true;
}
}
class HTMLPurifier_URIScheme_tcp extends HTMLPurifier_URIScheme_generic {
}
class HTMLPurifier_URIScheme_udp extends HTMLPurifier_URIScheme_generic {
}
class HTMLPurifier_URIScheme_ssh extends HTMLPurifier_URIScheme_generic {
}

76
inc/safehtml.php

@ -14,46 +14,52 @@ if (!defined('_ECRIRE_INC_VERSION')) {
return;
}
// Controle la presence de la lib safehtml et cree la fonction
// de transformation du texte qui l'exploite
function inc_safehtml_dist($t) {
static $process, $test;
if (!$test) {
$process = false;
if ($f = find_in_path('lib/safehtml/classes')) {
define('XML_HTMLSAX3', $f . '/');
require_once XML_HTMLSAX3 . 'safehtml.php';
$process = new safehtml();
$process->deleteTags[] = 'param'; // sinon bug Firefox
/**
* Sanitization du HTML via la librairie Purifier
* @param string $t
* @return false|string
*/
function inc_safehtml_dist(string $t): string {
static $purifier;
if (!isset($purifier)) {
require_spip('lib/htmlpurifier/HTMLPurifier.standalone');
require_spip('inc/HTMLPurifier.extended');
$config = HTMLPurifier_Config::createDefault();
if ($iniFile = find_in_path('safehtml/htmlpurifier.ini')) {
$config->loadIni($iniFile);
}
if ($process) {
$test = 1;
} # ok
else {
$test = -1;
} # se rabattre sur une fonction de securite basique
$config->set('Attr.EnableID', true);
$config->set('HTML.TidyLevel', 'none');
$config->set('Cache.SerializerPath', rtrim(realpath(_DIR_TMP), '/'));
// trop severe pour une utilisation generique ?
#$config->set('HTML.SafeIframe', true);
#$config->set('URI.SafeIframeRegexp', '%^http[s]?://[a-z0-9\.]*' . $_SERVER['HTTP_HOST'] . '/%iS');
$config->set('Attr.AllowedFrameTargets', ['_blank']);
$config->set('Attr.AllowedRel', 'nofollow,print,external,bookmark');
$config->set('URI.AllowedSchemes', ['http' => true, 'https' => true, 'mailto' => true, 'ftp' => true, 'nntp' => true, 'news' => true, 'tel' => true, 'tcp' => true, 'udp' => true, 'ssh' => true,]);
}
$html = $config->getHTMLDefinition(true);
$html->manager->addModule('Forms');
$html->manager->registeredModules['Forms']->safe = true;
$purifier = new HTMLPurifier($config);
}
if ($test > 0) {
# reset ($process->clear() ne vide que _xhtml...),
# on doit pouvoir programmer ca plus propremement
$process->_counter = [];
$process->_stack = [];
$process->_dcCounter = [];
$process->_dcStack = [];
$process->_listScope = 0;
$process->_liStack = [];
# $process->parse(''); # cas particulier ?
$process->clear();
$t = $process->parse($t);
// HTML Purifier prefere l'utf-8
$charset = (empty($GLOBALS['meta']['charset']) ? _DEFAULT_CHARSET : $GLOBALS['meta']['charset']);
if ($charset === 'utf-8') {
$t = $purifier->purify($t);
} else {
$t = entites_html($t);
} // tres laid, en cas d'erreur
// supprimer un <li></li> provenant d'un <li> ouvrant seul+safehtml
// cf https://core.spip.net/issues/2201
$t = str_replace('<li></li>', '', $t);
$t = unicode2charset($purifier->purify(charset2unicode($t)));
}
return $t;
}

Loading…
Cancel
Save