PHP Classes
Icontem

File: gnix.php


  Search   All class groups All class groups   Latest entries Latest entries   Top 10 charts Top 10 charts   Newsletter Newsletter   Blog Blog   Forums Forums   Help FAQ Help FAQ  
  Login   Register  
Recommend this page to a friend! ReTweet ReTweet Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Cesar D. Rodas  >  Guaranix Full Text  >  gnix.php  
File: gnix.php
Role: Class source
Content type: text/plain
Description: The main class of this project
Class: Guaranix Full Text
Index text documents for full text searching
 

Contents

Class file image Download
<?

define('MYSQL', 'mysql' , true);
define('SQLITE', 'sqlite' , true);
define('SQLITE_TIMEOUT',1000,true);
define('SEPARADORES'," ,|-:;\n\r\t!?<>(){}[]#\\/&.$%='@\"",true);

$gnixpath = dirname(__FILE__);

include "${gnixpath}/libtextcat/saddorlibtextcat.php";
include "${gnixpath}/stemmer/english.php";
include "${gnixpath}/stemmer/spanish.php";
include "${gnixpath}/mysql.php";
include "${gnixpath}/sqlite.php";
include "${gnixpath}/tokenizer.php";

$WordCache = array();
$StemmingCache = array();
$LangCache = array();
$TipoCache = array();


class Gnix
{
    var $db; /* database handler */
    var $textcat; /* libtextcat handler*/
    var $stemmer; /* stemmer handler*/
    var $lang; /* lang */
	var $langId;
    var $id; /* document id*/
	var $docid; /* document id*/
    var $wordlist = array();

    var $main_writed; /* an auxiliar var*/
    function Gnix($param, $db = SQLITE)
    {
        if (!isset($param['db']))
        	die("There is missing the database name in the param "); 
        if ($db == SQLITE)
            $this->db = new gnix_sqlite;
        else if ($db == MYSQL)
            $this->db = new gnix_mysql;

        $this->db->open($param['db'],$param['host'],$param['user'],$param['pass']);
        $this->textcat = new SaddorLibTextCat("libtextcat");
    }


    /*
        Install
    */
    function Install()
    {
        $fp = fopen("db.".$this->db->version,"r");
            $content = fread($fp,filesize("db.".$this->db->version));
        fclose($fp);
        $this->db->exec($content);
    }


    /*
        Index Function 
    */
    function Index($texto,$ranking = 1)
    {
        if (is_array($texto))
        {
            $this->IndexArray($texto);
            return;
        }
        $this->main_writed = false;
        $this->RegisterText();
        $this->GetLang($texto);
		$this->RegisterLang($this->lang);
        $this->IndexFullText($texto,$ranking);
    }

    function IndexArray($texto,$ranking = 1)
    {
        if (!is_array($texto))
        {
            $this->Index($texto);
            return;
        }
        $this->main_writed = false;
        $this->RegisterText();
        foreach ($texto as $clave => $valor)
        {
			$this->GetLang($valor);
			$this->RegisterLang($this->lang);
			$this->IndexFullText($valor,$clave,$ranking);
        }
    }

    function RegisterText()
    {
        $this->db->query("select max(docid) as total from ft_docs");
        $result = $this->db->getvalue();
		$result['total']++;
        $this->db->query("insert into ft_docs(docid) values('".$result['total']."')");
        $this->id = $this->db->lastinsert();
		$this->docid = $result['total'];
    }

    function RegisterWord($word)
    {
		global $WordCache;
		
		if (isset($WordCache[$this->langId][$word]))
			return $WordCache[$word][$this->langId];
		
			

        $this->db->query("select id from ft_word where word = '${word}'");
        if ($this->db->count() == 0)
        {
			$stemmed = $this->Stemmer($word);	
            $this->db->query("insert into ft_word(id,word,stemmed,lang) values('".$this->RegisterStemmed($stemmed)."','".$word."','".$stemmed."','".$this->langId."')");
			$WordCache[$word][$this->langId] = $this->db->lastinsert();
			return $WordCache[$word][$this->langId];
        }
		
        $result = $this->db->getvalue();
        $WordCache[$word][$this->langId] = $result[0];
		return $result[0];
    }
	
	function RegisterStemmed($word)
	{
	    $this->db->query("select id from ft_word where stemmed = '${word}' and lang = '".$this->langId."' limit 1");
        if ($this->db->count() == 0)
        {
            $this->db->query("select max(id) from ft_word");
            $result = $this->db->getvalue();
        	return $result[0]+1;
        }
        $result = $this->db->getvalue();
        return $result[0];
	}

	function RegisterLang($lang)
	{
		global $LangCache;
		if (isset($LangCache[$lang]))
			return $LangCache[$lang];
			
	    $this->db->query("select id from ft_lang where lang = '${lang}'");
        if ($this->db->count() == 0)
        {
            $this->db->query("insert into ft_lang(lang) values('".$lang."')");
            $this->langId = $this->db->lastinsert();
        	$LangCache[$lang] = $this->langId;	
			return;
		}
        $result = $this->db->getvalue();
        $this->langId = $result[0];
		$LangCache[$lang] = $this->langId;
	}
	
    function RegisterTipo($word)
    {
		global $TipoCache;
		if (isset($TipoCache[$word]))
			return $TipoCache[$word];
			
        $this->db->query("select id from ft_tipos where titulo = '${word}'");
        if ($this->db->count() == 0)
        {
            $this->db->query("insert into ft_tipos(titulo) values('".$word."')");
			$TipoCache[$word] = $this->db->lastinsert();
            return $TipoCache[$word];
        }
        $result = $this->db->getvalue();
		$TipoCache[$word] = $result[0];
        return $result[0];
    }

    function IndexFullText($text, $titulo = 'text', $ranking = 1)
    {
		
        $tipo = $this->RegisterTipo($titulo);
		 
        if ( $this->main_writed == false) 
        {
            $this->db->query("update ft_docs set contenido = '".addslashes($text)."',tipo = '".$tipo."' where docid = ".$this->docid);
            $this->main_writed = true;
        }
        else
            $this->db->query("insert into ft_docs(docid,tipo,contenido) values('{$this->docid}','${tipo}','".addslashes($text)."')");

		
		$text = strtolower($text);
        $t = microtime_float();
		$index = $this->InvertedIndex($text);
		
		
		$sql = "";
		if ($this->db->version == "MySQL")
			$sql .= "insert into ft_index(docid,wordid,posicion,ranking,lang) values";
			
		foreach ($index as $pos => $wordid)
		{
			$rank =  $ranking / ($pos+1);
			if ($this->db->version == "MySQL")
				$sql .= "('".$this->docid."','${wordid}','${pos}','${rank}','".$this->langId."'),";
			else
				$sql .= "insert into ft_index(docid,wordid,posicion,ranking,lang) values('".$this->docid."','${wordid}','${pos}','${rank}','".$this->langId."');";
		}
		
		if ($this->db->version == "MySQL")
			$sql = substr($sql,0,strlen($sql)  -1 );
			
		$this->db->insert($sql);
		unset($sql);
		
		
		$t = microtime_float() - $t;
		print "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Done in <strong>".$t."</strong> seconds<br>";

    }

	function InvertedIndex($text)
	{		
		$Word = array();
		$word = strtok($text,separadores);
		while ($word != NULL)
        {
			if (strlen($word) > 1)
			{
				$Word[] = $this->RegisterWord($word);
			}
			$word = strtok(separadores);
		}
		return $Word;
	}
	
    /*
        Lang Tools
    */
    function GetLang($text)
    {
        $lang = "unknown";
        $this->textcat->WhatLang($text);

        $langy = $this->textcat->ranking;
        
		foreach ($langy as $langx => $points)
        {
            $lang  = $langx;
            break;
        }
		$this->stemmer = NULL;
        if ($lang == "english")
            $this->stemmer = new EnglishStemmer;
        elseif ($lang == "spanish")
            $this->stemmer = new SpanishStemmer;
        else
            print "The stemmer for lang ".$lang." is not supported<br>";
			
        $this->lang =  $lang;
    }       
    function Stemmer($word) 
    {
		global $StemmingCache;
		
		if (!isset($StemmingCache[$word]))
		{	
			$stemmedword = $this->stemmer == NULL ? $word : $this->stemmer->Stem($word);
			$StemmingCache[$word] = $stemmedword; 					
		}
		else
		{ 
			$stemmedword = $StemmingCache[$word] ;
		}
		return $stemmedword;
		
    }


    /*
        Search Function
    */
    function Search($arg,$page_star = 0)
    {
		$return = array();
		
		$token = new Tokenizer;
		$token->parser(stripslashes($arg));
		$sql = $this->BuildSQL();
		
				die($sql);
				
		$sql1 = str_replace("[options]","count(*)",$sql);

		
		$this->db->query($sql1);
		$result = $this->db->getvalue();
		$return['Total'] =  $result[0];
		if ($return['Total'] == 0)
			return $return;
			
		$sql1 = str_replace("[options]","t0.*",$sql)."group by docid order by ranking  limit ${page_star},20";

		$this->db->query($sql1);
		
		
		while ($row =  $this->db->getvalue())
			$id[] = $row[0]; /*Ids order by rank*/
		
		$sql = "select ft_docs.docid,ft_tipos.titulo, ft_docs.contenido from ft_docs inner join ft_tipos on (ft_docs.tipo = ft_tipos.id) where docid IN (".implode(",",$id).")";
		
		
		$this->db->query($sql);
		while ($row =  $this->db->getvalue())
			$tmpresults[$row[0]][$row[1]] = $row[2];
		
		//Sort by the rank
		for($i=0; $i < count($id); $i++)
		{
			foreach($tmpresults[ $id[$i] ]  as $tipo => $contenido)
			{
				$return[$id[$i]][$tipo] = $contenido;
			}
		}
		unset($tmpresults);

		return $return;
    }
	
	function BuildSQL()
	{
		global $TreeArray;
		global $boolean;
		$i=0;
		$sql = "select [options] from [table] where";
		
		for($e=0; $e < count($TreeArray); $e++)
		{
			if ($TreeArray[$e] == "")
				continue;
			if (array_search($TreeArray[$e],$boolean))
				continue;
			if (strchr($TreeArray[$e]," ") === false)
				$sql.= " (t${i}.wordid = '".$this->Word2Id($TreeArray[$e++])."') ";
			else
			{
				$word = explode(" ",$TreeArray[$e++]);
				$sql.="(";
				foreach($word as $w)
				{
					$sql.= " t${i}.wordid = '".$this->Word2Id($w)."' and";
					$x = $i+1;
					$sql .= " t${i}.posicion + 1 = t${x}.posicion and";

					$i++;
				}
				$sql = substr($sql,0,strlen($sql) - 3);
				$sql .= ")";
			}
			if ($TreeArray[$e] == "NOT")
				$sql.=" and ".$TreeArray[$e];
			else
				$sql.=" ".$TreeArray[$e];	
			$i++;
		}
		
		$table = "ft_index as t0 ";
		for($e = 1; $e < $i; $e++)
			$table.=" inner join ft_index as t${e} on (t0.docid = t${e}.docid) ";
		$sql = str_replace("[table]",$table,$sql);
		return $sql;
	}

	function Word2Id($word)
	{
		$sql = "select id from ft_word where word = '${word}'";
		$this->db->query($sql);
		$result = $this->db->getvalue();
		return isset($result[0]) ? $result[0] : -1;
	}
	
}



function microtime_float()
{
   list($usec, $sec) = explode(" ", microtime());
   return ((float)$usec + (float)$sec);
}

?>

 
  Advertise on this site Advertise on this site   Site map Site map   Statistics Statistics   Site tips Site tips   Privacy policy Privacy policy   Contact Contact  

For more information send a message to :
info at phpclasses dot org.
Copyright (c) Icontem 1999-2009 PHP Classes - PHP Class Scripts
  PHP Book Reviews - Reviews of books and other products