From 8813fa863abbb77d6df98d8ecef7191fbb824cd5 Mon Sep 17 00:00:00 2001
From: Fil <fil@rezo.net>
Date: Wed, 25 Jan 2006 18:02:10 +0000
Subject: [PATCH] =?UTF-8?q?Syndication=20:=20-=20un=20point=20d'entr=C3=A9?=
 =?UTF-8?q?e=20pre=5Fsyndication,=20avant=20l'analyse=20du=20fichier=20RSS?=
 =?UTF-8?q?=20-=20option=20globale=20pour=20d=C3=A9sactiver=20le=20control?=
 =?UTF-8?q?e=20des=20dates=20des=20items=20RSS=20-=20une=20gestion=20des?=
 =?UTF-8?q?=20dates=20courtes=20du=20type=202006-01=20ou=202006=20(Minh=20?=
 =?UTF-8?q?Ha=20Duong)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ecrire/inc_index.php3   | 13 +++++++------
 ecrire/inc_syndic.php   | 27 +++++++++++++++++++++++----
 ecrire/inc_version.php3 |  4 ++++
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/ecrire/inc_index.php3 b/ecrire/inc_index.php3
index e522e6b490..9cdb9033e7 100644
--- a/ecrire/inc_index.php3
+++ b/ecrire/inc_index.php3
@@ -589,12 +589,15 @@ function effectuer_une_indexation($nombre_indexations = 1) {
 		if (isset($INDEX_iteration_nb_maxi[$table]))
 		  $limit = min($limit,$INDEX_iteration_nb_maxi[$table]);
 
+		// indexer en priorite les '1' (a reindexer), ensuite les ''
+		// (statut d'indexation inconnu), enfin les 'idx' (ceux dont
+		// l'indexation a precedemment echoue, p. ex. a cause d'un timeout)
 		foreach (array('1', '', 'idx') as $mode) {
-			$s = spip_query("SELECT $table_primary, idx FROM $table
+			$s = spip_query("SELECT $table_primary AS id FROM $table
 			WHERE idx='$mode' AND $critere LIMIT $limit");
 			while ($t = spip_fetch_array($s)) {
-				$vu[$table] .= $t[0].", ";
-				indexer_objet($table, $t[0], $t[1]);
+				$vu[$table] .= $t['id'].", ";
+				indexer_objet($table, $t['id'], $mode);
 			}
 			if ($vu) break;
 		}
@@ -613,10 +616,8 @@ function executer_une_indexation_syndic() {
 }
 
 function creer_liste_indexation() {
-	$tables = liste_index_tables();
-	while (list(,$table) = each($tables)) {
+	foreach (liste_index_tables() as $table)
 		spip_query("UPDATE $table SET idx='1' WHERE idx!='non'");
-	}
 }
 
 function purger_index() {
diff --git a/ecrire/inc_syndic.php b/ecrire/inc_syndic.php
index b127d69c5f..805a22a55f 100644
--- a/ecrire/inc_syndic.php
+++ b/ecrire/inc_syndic.php
@@ -118,6 +118,8 @@ function cdata_echappe_retour(&$table, &$echappe_cdata) {
 function analyser_backend($rss, $url_syndic='') {
 	include_ecrire("inc_texte"); # pour couper()
 
+	$rss = pipeline('pre_syndication', $rss);
+
 	// Echapper les CDATA
 	$echappe_cdata = array();
 	if (preg_match_all(',<!\[CDATA\[(.*)]]>,Uims', $rss,
@@ -212,9 +214,16 @@ function analyser_backend($rss, $url_syndic='') {
 		preg_match(',<date>([^<]*)<,Uims',$item,$match))
 			$la_date = my_strtotime($match[1]);
 
-		if ($la_date < time() - 365 * 24 * 3600
-		OR $la_date > time() + 48 * 3600)
-			$la_date = time();
+		// controle de validite de la date
+		// pour eviter qu'un backend errone passe toujours devant
+		// (note: ca pourrait etre defini site par site, mais ca risque d'etre
+		// plus lourd que vraiment utile)
+		if ($GLOBALS['controler_dates_rss']) {
+			if ($la_date < time() - 365 * 24 * 3600
+			OR $la_date > time() + 48 * 3600)
+				$la_date = time();
+		}
+
 		$data['date'] = $la_date;
 
 		// Honorer le <lastbuilddate> en forcant la date
@@ -483,14 +492,24 @@ function syndic_a_jour($now_id_syndic, $statut = 'off') {
 // http://www.w3.org/TR/NOTE-datetime
 function my_strtotime($la_date) {
 
+	// format complet
 	if (preg_match(
-	',^([0-9]+-[0-9]+-[0-9]+T[0-9]+:[0-9]+(:[0-9]+)?)(\.[0-9]+)?'
+	',^([0-9]+-[0-9]+-[0-9]+[T ][0-9]+:[0-9]+(:[0-9]+)?)(\.[0-9]+)?'
 	.'(Z|([-+][0-9][0-9]):[0-9]+)?$,',
 	$la_date, $match)) {
 		$la_date = str_replace("T", " ", $match[1])." GMT";
 		return strtotime($la_date) - intval($match[5]) * 3600;
 	}
 
+	// YYYY
+	if (preg_match(',^([0-9][0-9][0-9][0-9])$,', $la_date, $match))
+		return strtotime($match[1]."-01-01");
+
+	// YYYY-MM
+	if (preg_match(',^([0-9][0-9][0-9][0-9]-[0-9][0-9])$,', $la_date, $match))
+		return strtotime($match[1]."-01");
+
+	// utiliser strtotime en dernier ressort
 	$s = strtotime($la_date);
 	if ($s > 0)
 		return $s;
diff --git a/ecrire/inc_version.php3 b/ecrire/inc_version.php3
index 49eb856156..2ff816d4e2 100644
--- a/ecrire/inc_version.php3
+++ b/ecrire/inc_version.php3
@@ -158,6 +158,9 @@ $ortho_servers = array ('http://ortho.spip.net/ortho_serveur.php');
 // Produire du TeX ou du MathML ?
 $traiter_math = 'tex';
 
+// Controler les dates des item dans les flux RSS ?
+$controler_dates_rss = true;
+
 
 //
 // Plugins
@@ -171,6 +174,7 @@ $spip_pipeline = array(
 	'pre_propre' => '|extraire_multi',
 	'post_propre' => '',
 	'pre_indexation' => '',
+	'pre_syndication' => '',
 	'post_syndication' => ''
 );
 # la matrice standard (fichiers definissant les fonctions a inclure)
-- 
GitLab