| 
<?
/*
 Author: Alexey G. Piyanin (e-mail: drdrzlo at mail dot ru)
 Date:   Jun 7 2006
 Title:  Get wikipedia page content
 */
 include('SAXParser.php');
 
 function character($str){
 global $isComment,$startContent,$endContent,$commentPos;
 if(!$startContent){
 if($isComment && trim($str)=='start content') $startContent=true;
 }else{
 if($isComment && trim($str)=='end content') { $endContent=$commentPos; /*return(-1);*/ }
 }
 }
 
 function comment($start,$pos){
 global $isComment,$startContent,$commentPos,$beginContent;
 //----
 if($startContent && !$start && $beginContent==0) $beginContent=$pos+3;
 //----
 $isComment=$start;
 //----
 $commentPos=$pos;
 }
 
 $URL = 'http://en.wikipedia.org/wiki/Kalimpong';
 #---
 $isComment    = false;
 $commentPos   = 0;
 $startContent = false;
 #---
 $beginContent = 0;
 $endContent   = 0;
 #---
 $parser = new HTML_SAXParser();
 $parser->initFunc('','','character','comment');
 #---
 $content=join('',file($URL)); // ATTENTION!!! replace for correct loading content
 ?>
 <html>
 <body>
 <center>Source page:<br><iframe src="<?=$URL?>" width="600" height="400" ></iframe><br><br></center>
 Content:<br>
 <?
 $parser->parseString($content);
 //----
 echo substr($content,$beginContent,$endContent-$beginContent);
 ?>
 </body></html>
 |