Skip to content
Extraits de code Groupes Projets
Valider 5cdbd7d2 rédigé par renato's avatar renato
Parcourir les fichiers

The search engine is now able to search with words in AND and NOT.

The syntax is:
+word to receive only results where word is present
-word to receive only results where word is not present
parent ad6aedf0
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
...@@ -164,12 +164,9 @@ function mots_indexation($texte, $min_long = 3) { ...@@ -164,12 +164,9 @@ function mots_indexation($texte, $min_long = 3) {
include_spip('inc/charsets'); include_spip('inc/charsets');
include_spip('inc/texte'); include_spip('inc/texte');
// Point d'entree pour traiter le texte avant indexation
$texte = pipeline('pre_indexation', $texte);
// Recuperer les parametres des modeles // Recuperer les parametres des modeles
$texte = traiter_modeles($texte, true); $texte = traiter_modeles($texte, true);
// Supprimer les tags HTML // Supprimer les tags HTML
$texte = preg_replace(',<.*>,Ums',' ',$texte); $texte = preg_replace(',<.*>,Ums',' ',$texte);
...@@ -181,7 +178,8 @@ function mots_indexation($texte, $min_long = 3) { ...@@ -181,7 +178,8 @@ function mots_indexation($texte, $min_long = 3) {
$texte_c = ' '.translitteration_complexe ($texte, 'AUTO', true); $texte_c = ' '.translitteration_complexe ($texte, 'AUTO', true);
else else
$texte_c = ''; $texte_c = '';
$texte = translitteration($texte).$texte_c; $texte = translitteration($texte);
if($texte!=trim($texte_c)) $texte .= $texte_c;
# NB. tous les caracteres non translitteres sont retournes en utf-8 # NB. tous les caracteres non translitteres sont retournes en utf-8
// OPTIONNEL // Gestion du tiret '-' : // OPTIONNEL // Gestion du tiret '-' :
...@@ -189,9 +187,11 @@ function mots_indexation($texte, $min_long = 3) { ...@@ -189,9 +187,11 @@ function mots_indexation($texte, $min_long = 3) {
# $texte = preg_replace(',(\w+)-(\w+),', '\1 \2 \1\2', $texte); # $texte = preg_replace(',(\w+)-(\w+),', '\1 \2 \1\2', $texte);
// Supprimer les caracteres de ponctuation, les guillemets... // Supprimer les caracteres de ponctuation, les guillemets...
$e = "],:;*\"!\r\n\t\\/)}{[|@<>$%'`?\~.^+(-"; $e = "],:;*\"!\r\n\t\\/)}{[|@<>$%'`?\~.^(";
$texte = strtr($texte, $e, ereg_replace('.', ' ', $e)); $texte = strtr($texte, $e, ereg_replace('.', ' ', $e));
//delete +\- not at the beginning of a word
$texte = preg_replace(",(?:\S)[\-+],"," ",$texte);
// Cas particulier : sigles d'au moins deux lettres // Cas particulier : sigles d'au moins deux lettres
$texte = preg_replace("/ ([A-Z][0-9A-Z]{1,".($min_long - 1)."}) /", $texte = preg_replace("/ ([A-Z][0-9A-Z]{1,".($min_long - 1)."}) /",
' \\1___ ', $texte.' '); ' \\1___ ', $texte.' ');
...@@ -723,12 +723,26 @@ AND rec.id_table = $id_table", ...@@ -723,12 +723,26 @@ AND rec.id_table = $id_table",
// http://doc.spip.org/@requete_dico // http://doc.spip.org/@requete_dico
function requete_dico($val) { function requete_dico($val) {
$min_long = 3; $min_long = 3;
preg_match(",^([+\-]?)(.*),",$val,$mod);
switch($mod[1]) {
case '':
$mode = "OR";
break;
case '+':
$mode = "AND";
break;
case '-':
$mode = "NOT";
break;
}
//set logical operator between the various where parts
$val = $mod[2];
// cas normal // cas normal
if (strlen($val) > $min_long) { if (strlen($val) > $min_long) {
return array("dico LIKE "._q($val. "%"), "dico = " . _q($val)); return array("dico LIKE "._q($val. "%"), "dico = " . _q($val),$mode);
} else } else
return array("dico = "._q($val."___"), "dico = "._q($val."___")); return array("dico = "._q($val."___"), "dico = "._q($val."___"),$mode);
} }
...@@ -740,16 +754,16 @@ function requete_hash ($rech) { ...@@ -740,16 +754,16 @@ function requete_hash ($rech) {
$s = mots_indexation($rech); $s = mots_indexation($rech);
unset($dico); unset($dico);
unset($h); unset($h);
// cherche les mots dans le dico // cherche les mots dans le dico
while (list(, $val) = each($s)) { while (list(, $val) = each($s)) {
list($rq, $rq_strict) = requete_dico ($val); list($rq, $rq_strict,$mode) = requete_dico ($val);
if ($rq) if ($rq)
$dico[] = $rq; $dico[$mode][$val] = $rq;
if ($rq_strict) if ($rq_strict)
$dico_strict[] = $rq_strict; $dico_strict[$mode][$val] = $rq_strict;
} }
// Attention en MySQL 3.x il faut passer par HEX(hash) // Attention en MySQL 3.x il faut passer par HEX(hash)
// alors qu'en MySQL 4.1 c'est interdit ! // alors qu'en MySQL 4.1 c'est interdit !
$vers = spip_query("SELECT VERSION() AS v"); $vers = spip_query("SELECT VERSION() AS v");
...@@ -764,29 +778,88 @@ function requete_hash ($rech) { ...@@ -764,29 +778,88 @@ function requete_hash ($rech) {
} }
// compose la recherche dans l'index // compose la recherche dans l'index
if ($dico_strict) { $cond = "";
$result2 = spip_query("SELECT $select_hash FROM spip_index_dico WHERE " .join(" OR ", $dico_strict)); if ($dico_strict["OR"]) $cond = join(" OR ", $dico_strict["OR"]);
if ($cond) {
$result2 = spip_query("SELECT $select_hash FROM spip_index_dico WHERE ".$cond);
while ($row2 = spip_fetch_array($result2)) while ($row2 = spip_fetch_array($result2))
$h_strict[] = $hex_fmt.$row2['h']; $h_strict[] = $hex_fmt.$row2['h'];
} }
if ($dico) {
$result2 = spip_query("SELECT $select_hash FROM spip_index_dico WHERE " .join(" OR ", $dico)); $cond = "";
if ($dico_strict["AND"]) $cond = join(" OR ", $dico_strict["AND"]);
if ($cond) {
$result2 = spip_query("SELECT $select_hash FROM spip_index_dico WHERE ".$cond);
while ($row2 = spip_fetch_array($result2))
$h_strict_and[] = $hex_fmt.$row2['h'];
}
$cond = "";
if ($dico["OR"]) $cond = join(" OR ", $dico["OR"]);
if ($cond) {
$result2 = spip_query("SELECT $select_hash FROM spip_index_dico WHERE ".$cond);
while ($row2 = spip_fetch_array($result2)) while ($row2 = spip_fetch_array($result2))
$h[] = $hex_fmt.$row2['h']; $h[] = $hex_fmt.$row2['h'];
} }
$cond = "";
if ($dico["AND"]) $cond = join(" OR ", $dico["AND"]);
if ($cond) {
$result2 = spip_query("SELECT $select_hash,dico FROM spip_index_dico WHERE ".$cond);
while ($row2 = spip_fetch_array($result2)) {
//store the condition that selected the hash (the word typed by the user)
foreach($dico["AND"] as $key=>$val) {
$mot_and = substr($key,1);
if(strpos($row2['dico'],$mot_and)===0)
$h_and[$mot_and][] = $hex_fmt.$row2['h'];
}
}
}
$cond = "";
if ($dico["NOT"]) $cond = join(" OR ", $dico["NOT"]);
if ($cond) {
$result2 = spip_query("SELECT $select_hash FROM spip_index_dico WHERE ".$cond);
while ($row2 = spip_fetch_array($result2))
$h_not[] = $hex_fmt.$row2['h'];
}
if ($h_strict) if ($h_strict)
$hash_recherche_strict = join(",", $h_strict); $hash_recherche_strict = join(",", $h_strict);
else else
$hash_recherche_strict = "0"; $hash_recherche_strict = "0";
if ($h_strict_and)
$hash_recherche_strict_and = join(",", $h_strict_and);
else
$hash_recherche_strict_and = "0";
if ($h) if ($h)
$hash_recherche = join(",", $h); $hash_recherche = join(",", $h);
else else
$hash_recherche = "0"; $hash_recherche = "0";
return array($hash_recherche, $hash_recherche_strict); if ($h_and) {
foreach($h_and as $key=>$val)
$hash_recherche_and[$key] = join(",", $h_and[$key]);
} else
$hash_recherche_and = "0";
if ($h_not)
$hash_recherche_not = join(",", $h_not);
else
$hash_recherche_not = "0";
return array($hash_recherche, $hash_recherche_strict, $hash_recherche_not, $hash_recherche_and, $hash_recherche_strict_and);
} }
// //
...@@ -815,23 +888,63 @@ function prepare_recherche($recherche, $primary = 'id_article', $id_table='artic ...@@ -815,23 +888,63 @@ function prepare_recherche($recherche, $primary = 'id_article', $id_table='artic
if (!$cache[$recherche][$primary]) { if (!$cache[$recherche][$primary]) {
if (!$cache[$recherche]['hash']) if (!$cache[$recherche]['hash'])
$cache[$recherche]['hash'] = requete_hash($recherche); $cache[$recherche]['hash'] = requete_hash($recherche);
list($hash_recherche, $hash_recherche_strict) list($hash_recherche, $hash_recherche_strict, $hash_recherche_not, $hash_recherche_and, $hash_recherche_strict_and)
= $cache[$recherche]['hash']; = $cache[$recherche]['hash'];
$strict = array(); $strict = array();
if ($hash_recherche_strict) if ($hash_recherche_strict)
foreach (split(',',$hash_recherche_strict) as $h) foreach (split(',',$hash_recherche_strict) as $h)
$strict[$h] = 99; $strict[$h] = 99;
if ($hash_recherche_strict_and)
foreach (split(',',$hash_recherche_strict_and) as $h)
$strict[$h] = 99;
$index_id_table = id_index_table($nom_table); $index_id_table = id_index_table($nom_table);
$points = array(); $points = array();
$s = spip_query("SELECT hash,points,id_objet as id FROM spip_index WHERE hash IN ($hash_recherche) AND id_table='$index_id_table'");
$objet_and = array();
$object_not = array();
if($hash_recherche_and) {
//$hash_recherche_and is an array of mots=>comma separated hashes
$list_hashes = join(",",$hash_recherche_and);
$pow = 1;
foreach($hash_recherche_and as $key=>$val) {
$hash_groupes[] = "$pow*".calcul_mysql_in("hash",$val);
$pow *= 2;
}
$count_groupes = join(" + ",$hash_groupes);
while ($r = spip_fetch_array($s)) $s = spip_query("SELECT id_objet as id,COUNT(DISTINCT $count_groupes) as count_groupes FROM spip_index WHERE id_table='$index_id_table' AND hash IN ($list_hashes) GROUP BY id HAVING count_groupes=".count($hash_recherche_and));
$points[$r['id']] //if no ids are found, pass at least id = 0 in order to exclude any result
+= (1 + $strict[$r['hash']]) * $r['points']; $objet_and[] = 0;
spip_free_result($s); while ($r = spip_fetch_array($s))
arsort($points, SORT_NUMERIC); $objet_and[]=$r['id'];
}
if($hash_recherche_not) {
$s = spip_query("SELECT DISTINCT id_objet as id FROM spip_index WHERE hash IN ($hash_recherche_not) AND id_table='$index_id_table'");
while ($r = spip_fetch_array($s))
$objet_not[]=$r['id'];
}
if(count($objet_and))
$list_and = " AND id_objet IN (".join(",",$objet_and).")";
if(count($objet_not))
$list_not = " AND id_objet NOT IN (".join(",",$objet_not).")";
if($hash_recherche) {
$list_hash = " AND hash IN (".$hash_recherche.")";
}
if($list_hash || $list_and || $list_not) {
$query = "SELECT hash,points,id_objet as id FROM spip_index WHERE id_table='$index_id_table'".$list_and.$list_not.$list_hash;
$s = spip_query($query);
while ($r = spip_fetch_array($s))
$points[$r['id']]
+= (1 + $strict[$r['hash']]) * $r['points'];
spip_free_result($s);
arsort($points, SORT_NUMERIC);
}
# calculer le {id_article IN()} et le {... as points} # calculer le {id_article IN()} et le {... as points}
if (!count($points)) { if (!count($points)) {
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter