@ -1,47 +1,52 @@
< ? php
/*
/**
* Plugin Univers SPIP
* ( c ) 2010 Cedric
* Distribue sous licence GPL
*
*/
include_spip ( 'inc/filtres' );
include_spip ( 'inc/distant' );
include_spip ( 'inc/meta' );
function univers_proposer_site ( $url , $desc = " " ){
function univers_proposer_site ( $url , $desc = '' ) {
$parts = parse_url ( $url );
// pas d'IP fixe !
if ( preg_match ( " ;^[0-9] { 1,3}[.][0-9] { 1,3}[.][0-9] { 1,3}[.][0-9] { 1,3} $ ; " , $parts [ 'host' ]))
if ( preg_match ( ';^[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}$;' , $parts [ 'host' ])) {
return false ;
}
// pas de fausse url venant de feedburner ou google !
// on pourrait faire mieux en suivant la redirection pour
// attraper le vrai nom du site
if ( preg_match ( " ,(feeds \ .feedburner \ .com|feedproxy \ .google \ .com),i " , $parts [ 'host' ]))
if ( preg_match ( ',(feeds\.feedburner\.com|feedproxy\.google\.com),i' , $parts [ 'host' ])) {
return false ;
}
#if (is_utf8($parts['host']))
# return false;
#$parts['path'] = preg_replace(",(IMG|local)/cache.+spip[.]php,","spip.php",$parts['path']);
$base = $parts [ 'scheme' ] . " :// " . $parts [ 'host' ] . $parts [ 'path' ];
$base = $parts [ 'scheme' ] . '://' . $parts [ 'host' ] . $parts [ 'path' ];
$base = univers_nettoyer_url ( $base );
$base = preg_replace ( " ,(spip[.]php3?).* $ ,i " , '\\1' , $base );
$base_short = preg_replace ( " ,spip[.]php.* $ ,i " , '' , $base );
if ( $id_website = sql_getfetsel ( 'id_website' , " spip_websites " , " url= " . sql_quote ( $base ))
OR
( $base_short !== $base AND $id_website = sql_getfetsel ( 'id_website' , " spip_websites " , " url= " . sql_quote ( $base_short ))))
return $id_website ;
$base = preg_replace ( ',(spip[.]php3?).*$,i' , '\\1' , $base );
$base_short = preg_replace ( ',spip[.]php.*$,i' , '' , $base );
if ( $id_website = sql_getfetsel ( 'id_website' , 'spip_websites' , 'url=' . sql_quote ( $base )) or
( $base_short !== $base and
$id_website = sql_getfetsel ( 'id_website' , 'spip_websites' , 'url=' . sql_quote ( $base_short ))
)
) {
return $id_website ;
}
$id_website = sql_insertq ( " spip_websites " , array ( 'url' => $base , 'descriptif' => $desc , 'date' => date ( 'Y-m-d H:i:s' )));
$id_website = sql_insertq ( 'spip_websites' , array ( 'url' => $base , 'descriptif' => $desc , 'date' => date ( 'Y-m-d H:i:s' )));
// relire et verifier
$url = sql_getfetsel ( " url " , " spip_websites " , " id_website= " . intval ( $id_website ));
$url = sql_getfetsel ( 'url' , 'spip_websites' , 'id_website=' . intval ( $id_website ));
if ( $url !== $base ) {
// il y a eu un caractere exotique dans l'url, on la vire
sql_delete ( 'spip_websites' , " id_website= " . intval ( $id_website ));
sql_delete ( 'spip_websites' , 'id_website=' . intval ( $id_website ));
return false ;
}
@ -49,207 +54,122 @@ function univers_proposer_site($url, $desc=""){
}
// reparer les urls mal formees
function univers_nettoyer_url ( $url ){
$url = preg_replace ( " ,(IMG|local)/cache.+spip[.]php, " , " spip.php " , $url );
$url = preg_replace ( " ,(index|spip)[.]php/.+,i " , " spip.php " , $url );
$url = preg_replace ( " ,(index|spip|forum|article)[.]php3/.+,i " , " " , $url );
$url = preg_replace ( " ,/(plugins|ecrire|IMG|local|squelettes)/.+[.]php,i " , " " , $url );
function univers_nettoyer_url ( $url ) {
$url = preg_replace ( ',(IMG|local)/cache.+spip[.]php,' , 'spip.php' , $url );
$url = preg_replace ( ',(index|spip)[.]php/.+,i' , 'spip.php' , $url );
$url = preg_replace ( ',(index|spip|forum|article)[.]php3/.+,i' , '' , $url );
$url = preg_replace ( ',/(plugins|ecrire|IMG|local|squelettes)/.+[.]php,i' , '' , $url );
return $url ;
}
function univers_nettoyer_urls (){
function univers_nettoyer_urls () {
// urls mal formees
$res = sql_select ( " id_website,url " , " spip_websites " ,
" url REGEXP '(index|spip)[.]php/.+' "
$res = sql_select (
'id_website,url' ,
'spip_websites' ,
" url REGEXP '(index|spip)[.]php/.+' "
. " OR url REGEXP '/(plugins|ecrire|IMG|local|squelettes)/.+[.]php $ ' "
);
while ( $row = sql_fetch ( $res )){
);
while ( $row = sql_fetch ( $res )) {
$url = univers_nettoyer_url ( $row [ 'url' ]);
if ( $url != $row [ 'url' ]){
spip_log ( " nettoyage " . $row [ 'id_website' ] . " : " . $row [ 'url' ] . " => $url " , 'universclean' );
sql_updateq ( " spip_websites " , array ( 'url' => $url ), " id_website= " . intval ( $row [ 'id_website' ]));
if ( $url != $row [ 'url' ]) {
spip_log ( 'nettoyage ' . $row [ 'id_website' ] . ':' . $row [ 'url' ] . " => $url " , 'universclean' );
sql_updateq ( 'spip_websites' , array ( 'url' => $url ), 'id_website=' . intval ( $row [ 'id_website' ]));
}
}
// urls refusees (feed google/feedburner)
sql_delete ( 'spip_websites' , 'url LIKE' . sql_quote ( 'http://feeds.feedburner.com%' ));
sql_delete ( 'spip_websites' , 'url LIKE' . sql_quote ( 'http://feedproxy.google.com%' ));
sql_delete ( 'spip_websites' , 'url LIKE' . sql_quote ( 'http://feeds.feedburner.com%' ));
sql_delete ( 'spip_websites' , 'url LIKE' . sql_quote ( 'http://feedproxy.google.com%' ));
// nettoyage des liens morts (suite a delete ci-dessus)
$res = sql_select ( 'P.id_website' , 'spip_websites_plugins AS P LEFT JOIN spip_websites AS W ON P.id_website = W.id_website' , 'W.id_website IS NULL' );
while ( $row = sql_fetch ( $res )){
sql_delete ( 'spip_websites_plugins' , 'id_website=' . intval ( $row [ 'id_website' ]));
$res = sql_select (
'P.id_website' ,
'spip_websites_plugins AS P LEFT JOIN spip_websites AS W ON P.id_website = W.id_website' ,
'W.id_website IS NULL'
);
while ( $row = sql_fetch ( $res )) {
sql_delete ( 'spip_websites_plugins' , 'id_website=' . intval ( $row [ 'id_website' ]));
}
}
function univers_rechercher_referers ( $date ){
$res = sql_select ( 'referer' , 'spip_referers' , " date= " . sql_quote ( $date ) . " AND referer LIKE '%spip.php%' AND referer NOT LIKE 'https://contrib.spip.net%' AND referer NOT LIKE '%localhost%' " );
spip_log ( " Import depuis les referer du $date : %spip.php% , " . sql_count ( $res ), " universreferers " );
function univers_rechercher_referers ( $date ) {
$res = sql_select (
'referer' ,
'spip_referers' ,
'date=' .
sql_quote ( $date ) .
" AND referer LIKE '%spip.php%' AND referer NOT LIKE 'https://contrib.spip.net%' " .
" AND referer NOT LIKE '%localhost%' "
);
spip_log ( " Import depuis les referer du $date : %spip.php% , " . sql_count ( $res ), 'universreferers' );
while ( $row = sql_fetch ( $res )) {
spip_log ( " Import referer : " . $row [ 'referer' ], " universreferers " );
spip_log ( 'Import referer : ' . $row [ 'referer' ], 'universreferers' );
univers_proposer_site ( $row [ 'referer' ]);
}
$res = sql_select ( 'referer' , 'spip_referers' , " date= " . sql_quote ( $date ) . " AND referer LIKE '%/ecrire/%' AND referer NOT LIKE 'https://contrib.spip.net%' AND referer NOT LIKE '%localhost%' " );
spip_log ( " Import depuis les referer du $date : %/ecrire/% , " . sql_count ( $res ), " universreferers " );
while ( $row = sql_fetch ( $res )) {
spip_log ( " Import referer : " . $row [ 'referer' ], " universreferers " );
univers_proposer_site ( preg_replace ( ',/ecrire/.*$,Uims' , '/spip.php' , $row [ 'referer' ]));
}
}
# 21 dec 2012 : fin du monde , plus de yahoo boss
/*
function univers_boss_request ( $query , $args = array ()){
$apiid = " vKArhavV34HzXT9wvczrzUX58tMbO_TDOyDZqoi6o243vTtNPSZXW0GOYuuP4NxqUA-- " ;
$uri = " http://boss.yahooapis.com/ysearch/web/v1/ $query ?appid= $apiid " ;
$params = array_merge ( array (
'format' => 'xml' ,
'start' => 0 ,
'count' => 50 ,
'lang' => 'fr' ,
'region' => 'fr' ,
'style' => 'raw' ,
), $args );
foreach ( $params as $k => $v )
$uri = parametre_url ( $uri , $k , $v , '&' );
include_spip ( 'inc/distant' );
$res = recuperer_page ( $uri );
return $res ;
}
function univers_boss_explore ( $echo = false ){
$explore = array (
0 => array ( '"spip.php"' , array ()),
1 => array ( '"spip.php?page=backend"' , array ()),
2 => array ( '"spip.php?page=plan"' , array ()),
3 => array ( '"spip.php?page=sommaire"' , array ()),
4 => array ( " Si votre message se réfère à un article publié sur le Web, ou à une page fournissant plus d'informations, vous pouvez indiquer ci-après le titre de la page et son adresse " , array ())
$res = sql_select (
'referer' ,
'spip_referers' ,
'date=' .
sql_quote ( $date ) .
" AND referer LIKE '%/ecrire/%' AND referer NOT LIKE 'https://contrib.spip.net%' " .
" AND referer NOT LIKE '%localhost%' "
);
$regions = array (
array ( 'ar' , 'es' ),
array ( 'at' , 'de' ),
array ( 'au' , 'en' ),
array ( 'br' , 'pt' ),
array ( 'ca' , 'en' ),
array ( 'ca' , 'fr' ),
array ( 'ct' , 'ca' ),
array ( 'cl' , 'es' ),
array ( 'co' , 'es' ),
array ( 'cz' , 'cs' ),
array ( 'dk' , 'da' ),
array ( 'fi' , 'fi' ),
array ( 'fr' , 'fr' ),
array ( 'de' , 'de' ),
array ( 'hk' , 'tzh' ),
array ( 'hu' , 'hu' ),
array ( 'id' , 'en' ),
array ( 'id' , 'id' ),
array ( 'in' , 'en' ),
array ( 'il' , 'he' ),
array ( 'it' , 'it' ),
array ( 'jp' , 'jp' ),
array ( 'kr' , 'kr' ),
array ( 'my' , 'en' ),
array ( 'my' , 'ms' ),
array ( 'mx' , 'es' ),
array ( 'nl' , 'nl' ),
array ( 'nz' , 'en' ),
array ( 'no' , 'no' ),
array ( 'pe' , 'es' ),
array ( 'ph' , 'tl' ),
array ( 'ph' , 'en' ),
array ( 'ro' , 'ro' ),
array ( 'ru' , 'ru' ),
array ( 'sg' , 'en' ),
array ( 'es' , 'es' ),
array ( 'se' , 'sv' ),
array ( 'ch' , 'fr' ),
array ( 'ch' , 'de' ),
array ( 'ch' , 'it' ),
array ( 'th' , 'th' ),
array ( 'tw' , 'tzh' ),
array ( 'tr' , 'tr' ),
array ( 'uk' , 'en' ),
array ( 'us' , 'en' ),
array ( 'us' , 'es' ),
array ( 've' , 'es' ),
array ( 'vn' , 'vi' ),
);
$start = rand ( 0 , 19 ) * 50 ;
$region = $regions [ rand ( 0 , count ( $regions ) - 1 )];
$ex = $explore [ rand ( 0 , 3 )];
spip_log ( " BOSS $ex[0] / $start / " . implode ( ',' , $region ), 'univers' );
$res = univers_boss_request ( $ex [ 0 ], array_merge ( $ex [ 1 ], array ( 'start' => $start , 'region' => $region [ 0 ], 'lang' => $region [ 1 ])));
$links = extraire_balises ( $res , 'url' );
foreach ( $links as $link )
if ( preg_match ( " ,<url>(.*)</url>,Uims " , $link , $reg )
AND preg_match ( " ,http://[^? \" '#;:]*(spip|forum|article)[.]php3?,Uims " , $reg [ 1 ]))
$urls [] = trim ( $reg [ 1 ]);
$urls = array_unique ( $urls );
foreach ( $urls as $url ){
spip_log ( " $url " , 'univers' );
univers_proposer_site ( $url );
spip_log ( " Import depuis les referer du $date : %/ecrire/% , " . sql_count ( $res ), 'universreferers' );
while ( $row = sql_fetch ( $res )) {
spip_log ( 'Import referer : ' . $row [ 'referer' ], 'universreferers' );
univers_proposer_site ( preg_replace ( ',/ecrire/.*$,Uims' , '/spip.php' , $row [ 'referer' ]));
}
}
*/
function univers_rechercher_sites_spip ( $req , $start = 0 , $max = 10 , $step = 10 , $var = 'start' ){
function univers_rechercher_sites_spip ( $req , $start = 0 , $max = 10 , $step = 10 , $var = 'start' ) {
$urls = array ();
while ( $start < $max ){
while ( $start < $max ) {
$url = parametre_url ( $req , $var , $start , '&' );
$page = recuperer_page ( $url );
$h3 = extraire_balises ( $page , 'h3' );
foreach ( $h3 as $h ){
$a = extraire_balise ( $h , 'a' );
$href = extraire_attribut ( $a , " href " );
if ( preg_match ( " ;^([a-z] { 3,5})://;i " , $href )
AND strpos ( $href , 'inurl:' ) === FALSE
AND strpos ( $href , 'google' ) === FALSE
AND strpos ( $href , 'spip.php' ) !== FALSE ) {
$href = preg_replace ( " ,spip[.]php?.* $ ,i " , " spip.php " , $href );
$h3 = extraire_balises ( $page , 'h3' );
foreach ( $h3 as $h ) {
$a = extraire_balise ( $h , 'a' );
$href = extraire_attribut ( $a , 'href' );
if ( preg_match ( ';^([a-z]{3,5})://;i' , $href )
and strpos ( $href , 'inurl:' ) === false
and strpos ( $href , 'google' ) === false
and strpos ( $href , 'spip.php' ) !== false ) {
$href = preg_replace ( ',spip[.]php?.*$,i' , 'spip.php' , $href );
$urls [ $href ] = textebrut ( $a );
}
}
$start += $step ;
#sleep(1);
}
return $urls ;
}
function univers_twitter_extraire_feed_urls ( $url , $enlarge = true ){
function univers_twitter_extraire_feed_urls ( $url , $enlarge = true ) {
$long = array ();
$urls = array ();
$page = recuperer_page ( $url );
$page = str_replace ( " <b> " , " " , $page );
$page = str_replace ( " </b> " , " " , $page );
$page = str_replace ( '<b>' , '' , $page );
$page = str_replace ( '</b>' , '' , $page );
$titles = extraire_balises ( $page , " title " );
$page = preg_replace ( " ,</?title>,ims " , " \n " , implode ( $titles ));
$titles = extraire_balises ( $page , 'title' );
$page = preg_replace ( ',</?title>,ims' , " \n " , implode ( $titles ));
preg_match_all ( " ,https?://[^? \" '#;: \ s]*,ims " , $page , $regs , PREG_SET_ORDER );
$urls = array_map ( 'reset' , $regs );
foreach ( $urls as $k => $url ) {
if ( ! preg_match ( " ,https?://[^? \" '#;:]*spip[.]php3?,Uims " , $url )){
$urls = array_map ( 'reset' , $regs );
foreach ( $urls as $k => $url ) {
if ( ! preg_match ( " ,https?://[^? \" '#;:]*spip[.]php3?,Uims " , $url )) {
// essayer de l'elargir
if ( ! isset ( $long [ $url ])){
#var_dump($url);
$long [ $url ] = recuperer_lapage ( $url , false , 'GET' , 100000 );
if ( ! isset ( $long [ $url ])) {
$long [ $url ] = recuperer_lapage ( $url , false , 'GET' , 100000 );
$long [ $url ] = ( is_string ( $long [ $url ]) ? $long [ $url ] : false );
}
if ( $long [ $url ]){
#var_dump("$url > ".$long[$url]);
if ( $long [ $url ]) {
$urls [ $k ] = $url = $long [ $url ];
}
}
if ( ! preg_match ( " ,https?://[^? \" '#;:]*spip[.]php3?,Uims " , $url )){
if ( ! preg_match ( " ,https?://[^? \" '#;:]*spip[.]php3?,Uims " , $url )) {
unset ( $urls [ $k ]);
}
}
@ -257,76 +177,60 @@ function univers_twitter_extraire_feed_urls($url, $enlarge=true){
return $urls ;
}
function univers_googlenews_extraire_feed_urls ( $url ){
return univers_identica_extraire_feed_urls ( $url , false );
}
function univers_identica_extraire_feed_urls ( $url ){
$urls = array ();
$page = recuperer_page ( $url );
$page = str_replace ( " <b> " , " " , $page );
$page = str_replace ( " </b> " , " " , $page );
preg_match_all ( " ,http://[^? \" '#;:]*spip[.]php3?,Uims " , $page , $regs , PREG_SET_ORDER );
$urls = array_map ( 'reset' , $regs );
$urls = array_unique ( $urls );
return $urls ;
}
function univers_spipnet_extraire_feed_urls ( $url ){
function univers_spipnet_extraire_feed_urls ( $url ) {
$urls = array ();
$page = recuperer_page ( $url );
$links = extraire_balises ( $page , 'link' );
foreach ( $links as $link )
if ( preg_match ( " ,<link>(.*)</link>,Uims " , $link , $reg ))
$links = extraire_balises ( $page , 'link' );
foreach ( $links as $link ) {
if ( preg_match ( ',<link>(.*)</link>,Uims' , $link , $reg )) {
$urls [] = trim ( $reg [ 1 ]);
}
}
$urls = array_unique ( $urls );
return $urls ;
}
function univers_delisarka_extraire_feed_urls ( $url ){
return univers_spipnet_extraire_feed_urls ( $url );
}
function univers_feed_watch ( $echo = false ){
function univers_feed_watch ( $echo = false ) {
$explore = array (
//MORT 'twitter'=>"http://search.twitter.com/search.atom?q=spip.php",
'twitter' => 'https://blog.spip.net/?page=backend-twitter-spip' ,
'spipnet' => " https://www.spip.net/?page=backend-sites-sous-spip&id_article=884 " ,
//MORT 'googlenews'=>"http://news.google.fr/news?pz=1&cf=all&ned=fr&hl=fr&q=%22spip.php%22&cf=all&output=rss",
//MORT 'delisarka'=>'http://feeds.delicious.com/v2/rss/sarkaspip?count=10',
//MORT 'identica'=>"http://identi.ca/search/notice/rss?q=spip.php",
'spipnet' => 'https://www.spip.net/?page=backend-sites-sous-spip&id_article=884' ,
);
$feed = 0 ;
if ( isset ( $GLOBALS [ 'meta' ][ 'univers_feedwatch' ]))
if ( isset ( $GLOBALS [ 'meta' ][ 'univers_feedwatch' ])) {
$feed = $GLOBALS [ 'meta' ][ 'univers_feedwatch' ];
}
$i = 0 ;
while ( $i ++< $feed )
list ( $type , $url ) = each ( $explore );
while ( $i ++< $feed ) {
list ( $type , $url ) = each ( $explore );
}
list ( $type , $url ) = each ( $explore );
if ( function_exists ( $f = " univers_ $type " . " _extraire_feed_urls " )) {
spip_log ( $s = " Analyse Feed $url " , 'univers' );
if ( $echo )
list ( $type , $url ) = each ( $explore );
if ( function_exists ( $f = " univers_ $type " . '_extraire_feed_urls' )) {
spip_log ( $s = " Analyse Feed $url " , 'univers' );
if ( $echo ) {
echo " $s <br /> " ;
}
$liste = $f ( $url );
foreach ( $liste as $url ){
spip_log ( $s = " $url " , 'univers' );
if ( $echo )
foreach ( $liste as $url ) {
spip_log ( $s = " $url " , 'univers' );
if ( $echo ) {
echo " $s <br /> " ;
}
univers_proposer_site ( $url );
}
}
$feed ++ ;
if ( $feed >= count ( $explore ))
if ( $feed >= count ( $explore )) {
$feed = 0 ;
}
ecrire_meta ( 'univers_feedwatch' , $feed );
// un coup de netoyage sur les urls mal formees
univers_nettoyer_urls ();
}
?>