From 8813fa863abbb77d6df98d8ecef7191fbb824cd5 Mon Sep 17 00:00:00 2001 From: Fil <fil@rezo.net> Date: Wed, 25 Jan 2006 18:02:10 +0000 Subject: [PATCH] =?UTF-8?q?Syndication=20:=20-=20un=20point=20d'entr=C3=A9?= =?UTF-8?q?e=20pre=5Fsyndication,=20avant=20l'analyse=20du=20fichier=20RSS?= =?UTF-8?q?=20-=20option=20globale=20pour=20d=C3=A9sactiver=20le=20control?= =?UTF-8?q?e=20des=20dates=20des=20items=20RSS=20-=20une=20gestion=20des?= =?UTF-8?q?=20dates=20courtes=20du=20type=202006-01=20ou=202006=20(Minh=20?= =?UTF-8?q?Ha=20Duong)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ecrire/inc_index.php3 | 13 +++++++------ ecrire/inc_syndic.php | 27 +++++++++++++++++++++++---- ecrire/inc_version.php3 | 4 ++++ 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/ecrire/inc_index.php3 b/ecrire/inc_index.php3 index e522e6b490..9cdb9033e7 100644 --- a/ecrire/inc_index.php3 +++ b/ecrire/inc_index.php3 @@ -589,12 +589,15 @@ function effectuer_une_indexation($nombre_indexations = 1) { if (isset($INDEX_iteration_nb_maxi[$table])) $limit = min($limit,$INDEX_iteration_nb_maxi[$table]); + // indexer en priorite les '1' (a reindexer), ensuite les '' + // (statut d'indexation inconnu), enfin les 'idx' (ceux dont + // l'indexation a precedemment echoue, p. ex. a cause d'un timeout) foreach (array('1', '', 'idx') as $mode) { - $s = spip_query("SELECT $table_primary, idx FROM $table + $s = spip_query("SELECT $table_primary AS id FROM $table WHERE idx='$mode' AND $critere LIMIT $limit"); while ($t = spip_fetch_array($s)) { - $vu[$table] .= $t[0].", "; - indexer_objet($table, $t[0], $t[1]); + $vu[$table] .= $t['id'].", "; + indexer_objet($table, $t['id'], $mode); } if ($vu) break; } @@ -613,10 +616,8 @@ function executer_une_indexation_syndic() { } function creer_liste_indexation() { - $tables = liste_index_tables(); - while (list(,$table) = each($tables)) { + foreach (liste_index_tables() as $table) spip_query("UPDATE $table SET idx='1' WHERE idx!='non'"); - } } function purger_index() { diff --git a/ecrire/inc_syndic.php b/ecrire/inc_syndic.php index b127d69c5f..805a22a55f 100644 --- a/ecrire/inc_syndic.php +++ b/ecrire/inc_syndic.php @@ -118,6 +118,8 @@ function cdata_echappe_retour(&$table, &$echappe_cdata) { function analyser_backend($rss, $url_syndic='') { include_ecrire("inc_texte"); # pour couper() + $rss = pipeline('pre_syndication', $rss); + // Echapper les CDATA $echappe_cdata = array(); if (preg_match_all(',<!\[CDATA\[(.*)]]>,Uims', $rss, @@ -212,9 +214,16 @@ function analyser_backend($rss, $url_syndic='') { preg_match(',<date>([^<]*)<,Uims',$item,$match)) $la_date = my_strtotime($match[1]); - if ($la_date < time() - 365 * 24 * 3600 - OR $la_date > time() + 48 * 3600) - $la_date = time(); + // controle de validite de la date + // pour eviter qu'un backend errone passe toujours devant + // (note: ca pourrait etre defini site par site, mais ca risque d'etre + // plus lourd que vraiment utile) + if ($GLOBALS['controler_dates_rss']) { + if ($la_date < time() - 365 * 24 * 3600 + OR $la_date > time() + 48 * 3600) + $la_date = time(); + } + $data['date'] = $la_date; // Honorer le <lastbuilddate> en forcant la date @@ -483,14 +492,24 @@ function syndic_a_jour($now_id_syndic, $statut = 'off') { // http://www.w3.org/TR/NOTE-datetime function my_strtotime($la_date) { + // format complet if (preg_match( - ',^([0-9]+-[0-9]+-[0-9]+T[0-9]+:[0-9]+(:[0-9]+)?)(\.[0-9]+)?' + ',^([0-9]+-[0-9]+-[0-9]+[T ][0-9]+:[0-9]+(:[0-9]+)?)(\.[0-9]+)?' .'(Z|([-+][0-9][0-9]):[0-9]+)?$,', $la_date, $match)) { $la_date = str_replace("T", " ", $match[1])." GMT"; return strtotime($la_date) - intval($match[5]) * 3600; } + // YYYY + if (preg_match(',^([0-9][0-9][0-9][0-9])$,', $la_date, $match)) + return strtotime($match[1]."-01-01"); + + // YYYY-MM + if (preg_match(',^([0-9][0-9][0-9][0-9]-[0-9][0-9])$,', $la_date, $match)) + return strtotime($match[1]."-01"); + + // utiliser strtotime en dernier ressort $s = strtotime($la_date); if ($s > 0) return $s; diff --git a/ecrire/inc_version.php3 b/ecrire/inc_version.php3 index 49eb856156..2ff816d4e2 100644 --- a/ecrire/inc_version.php3 +++ b/ecrire/inc_version.php3 @@ -158,6 +158,9 @@ $ortho_servers = array ('http://ortho.spip.net/ortho_serveur.php'); // Produire du TeX ou du MathML ? $traiter_math = 'tex'; +// Controler les dates des item dans les flux RSS ? +$controler_dates_rss = true; + // // Plugins @@ -171,6 +174,7 @@ $spip_pipeline = array( 'pre_propre' => '|extraire_multi', 'post_propre' => '', 'pre_indexation' => '', + 'pre_syndication' => '', 'post_syndication' => '' ); # la matrice standard (fichiers definissant les fonctions a inclure) -- GitLab