Browse Source

Petit travail collaboratif avec James de nettoyage : on passe à la poubelle les sites dont les urls sont en doublon, en conservant de préférence les sites https.

Un petit génie y travaillera régulièrement.
svn/root
marcimat@rezo.net 2 years ago
parent
commit
25e827259d
7 changed files with 98 additions and 7 deletions
  1. +1
    -0
      .gitattributes
  2. +3
    -3
      base/univers.php
  3. +36
    -0
      genie/univers_check.php
  4. +50
    -0
      genie/univers_check_doublons.php
  5. +6
    -3
      inc/univers.php
  6. +1
    -1
      plugin.xml
  7. +1
    -0
      univers_pipelines.php

+ 1
- 0
.gitattributes View File

@ -5,6 +5,7 @@ base/univers.php -text
css/version.tree.css -text
exec/universparse.php -text
genie/univers_check.php -text
genie/univers_check_doublons.php -text
genie/univers_feed.php -text
genie/univers_referers.php -text
genie/univers_statsv.php -text


+ 3
- 3
base/univers.php View File

@ -149,16 +149,16 @@ function univers_upgrade($nom_meta_base_version, $version_cible) {
// ajout du champ url_clean
spip_timer('up');
maj_tables(array('spip_websites'));
sql_update( 'spip_websites', array(
sql_update('spip_websites', array(
'url_clean' =>
"TRIM(LEADING 'www.' FROM " .
"TRIM(LEADING 'https://' FROM " .
"TRIM(LEADING 'http://' FROM " .
"TRIM(TRAILING '/' FROM " .
"TRIM(TRAILING 'spip.php' FROM url" .
")))))"
')))))'
));
echo "Urls cleans calculées en " . spip_timer('up');
echo 'Urls cleans calculées en ' . spip_timer('up');
ecrire_meta($nom_meta_base_version, $current_version = '0.1.10', 'non');
}
}


+ 36
- 0
genie/univers_check.php View File

@ -73,5 +73,41 @@ function genie_univers_check_dist($t) {
"statut IN ('prop','publie') AND status='dead' AND retry>=10"
);
univers_check_doublons();
return 0;
}
function univers_check_doublons() {
spip_timer('up');
$websites = sql_allfetsel(
['url_clean', 'count(*) as nb_doublons'],
'spip_websites',
[
"statut='publie'",
"status=''",
],
'url_clean',
'',
'0,50',
'nb_doublons > 1'
);
$websites = array_column($websites, 'url_clean');
foreach ($websites as $website) {
$doublons = sql_allfetsel(
'id_website',
'spip_websites',
['url_clean = ' . sql_quote($website)],
'',
["IF(LEFT(url, 5) = 'https', 1, 0) DESC", 'date DESC'],
'1,100'
);
$doublons = array_column($doublons, 'id_website');
sql_updateq(
'spip_websites',
['statut' => 'poub'],
sql_in('id_website', $doublons)
);
}
echo 'Doublons supprimés en ' . spip_timer('up');
}

+ 50
- 0
genie/univers_check_doublons.php View File

@ -0,0 +1,50 @@
<?php
/**
* Plugin Univers SPIP
* (c) 2010 Cedric
* Distribue sous licence GPL
*/
/**
* Tache periodique d'analyse de nettoyage des doublons
*
* @param mixed $t Unused parameter
* @return integer 0
*/
function genie_univers_check_doublons_dist($t) {
spip_timer('up');
$websites = sql_allfetsel(
['url_clean', 'count(*) as nb_doublons'],
'spip_websites',
[
"statut='publie'",
"status=''",
],
'url_clean',
'',
'0,50',
'nb_doublons > 1'
);
$websites = array_column($websites, 'url_clean');
foreach ($websites as $website) {
$doublons = sql_allfetsel(
'id_website',
'spip_websites',
['url_clean = ' . sql_quote($website)],
'',
["IF(LEFT(url, 5) = 'https', 1, 0) DESC", 'date DESC'],
'1,100'
);
$doublons = array_column($doublons, 'id_website');
sql_updateq(
'spip_websites',
['statut' => 'poub'],
sql_in('id_website', $doublons)
);
}
spip_log(count($websites) . ' sites doublons supprimés en ' . spip_timer('up'), 'univers_check');
return 0;
}

+ 6
- 3
inc/univers.php View File

@ -35,9 +35,12 @@ function univers_proposer_site($url) {
$base = preg_replace(',(spip[.]php3?).*$,i', '\\1', $base);
$url_clean = univers_url_clean($base);
if (
$id_website = sql_getfetsel('id_website', 'spip_websites', 'url='.sql_quote($base)) or
$id_website = sql_getfetsel('id_website', 'spip_websites', 'url_clean='.sql_quote($url_clean), '', 'date DESC', '0,1')
if ($id_website = sql_getfetsel('id_website', 'spip_websites', 'url='.sql_quote($base)) or
$id_website = sql_getfetsel(
'id_website',
'spip_websites',
'url_clean='.sql_quote($url_clean), '', 'date DESC', '0,1'
)
) {
return $id_website;
}


+ 1
- 1
plugin.xml View File

@ -3,7 +3,7 @@
<auteur>&#169; 2010 C&eacute;dric MORIN</auteur>
<licence>GPL</licence>
<etat>test</etat>
<version>0.2.29</version>
<version>0.2.30</version>
<version_base>0.1.10</version_base>
<categorie>statistique</categorie>
<install>base/univers.php</install>


+ 1
- 0
univers_pipelines.php View File

@ -18,6 +18,7 @@ function univers_taches_generales_cron(array $taches_generales) {
$taches_generales['univers_referers'] = 12*3600;
$taches_generales['univers_statsv'] = 3*24*3600;
$taches_generales['univers_check'] = 97;
$taches_generales['univers_check_doublons'] = 12*3600;
return $taches_generales;
}

Loading…
Cancel
Save