Aziz Saleh – Software development

  • Home
  • Downloads
    • AzizMVC Downloads
      • Database Plugin
        • Template Plugin
    • Flash Builder 4
      • Bypass, Hijack, Beat, and Defeat Same Origin Policy
      • Flash Click Tracker
      • Flash Link Hider
      • Pacman source V 1.0
    • Java Downloads
      • Currency Converter
    • Javascript Downloads
    • PHP/MySQL Downloads
      • Aziz MVC
      • AzizMVC Documentation
      • Contact Form Maker
      • NetFlix API 1.0 Wrapper
      • Simple Blog
    • Visual Basic Downloads
      • Currency Converter
        • Key Logger w/Store, email, registry options
  • Resume & Portfolio
  • Ask me a question
  • Contact Me

Monthly Archives: May 2010

Content Extraction using Preg match

Posted on May 8, 2010 by Aziz Posted in PHP Leave a comment

This tiny class allows you to easily extract data from various website. It uses the fileOpen class I created, but can easily be changed if you need it to.

The implementation file shows you how easily it can be implemented, you basically need 3 lines of code to do the extraction and a for loop to show the data. It enables you to remove duplicate results as well.

Sample Implementation:

<?php

require_once('extractThis.class.php');

// Example get table of contents
$test = new extractThis('http://www.fortran.com/F77_std/rjcnf0001.html');
$test->setReg('/<li><a href="(.*)" name=".*">(.*)<\/a>/Uism');
$test->doExtract(true);

// results
foreach($test->array0 as $sub => $link)
{
    echo "Link: $link &raquo; ".$test->array1[$sub]."<br />";
}

// Example for google search results
$test = new extractThis('http://www.google.com/search?q=aziz+saleh');
$test->setReg('/<h3><a href="(.*)".*>(.*)<\/a><\/h3>/Uism');
$test->doExtract(true);

// results
foreach($test->array0 as $sub => $searchLink)
{
    echo "<a href=\"$searchLink\">".$test->array1[$sub]."</a><br />";
}

// Example for vb forum topic results
$test = new extractThis('http://forums.digitalpoint.com/forumdisplay.php?f=37');
$test->setReg('/<a href="showthread\.php\?t=(.*)" .*>(.*)<\/a>.*<a href="member\.php\?u=(.*)" .*">(.*)<\/a><\/span>/Uism');
$test->doExtract(true);

// results
foreach($test->array0 as $sub => $topicID)
{
    echo "<a href=\"http://forums.digitalpoint.com/showthread.php?t=$topicID\">".$test->array1[$sub]."</a>
    Posted By <a href=\"http://forums.digitalpoint.com/member.php?u=".$test->array2[$sub]."\">".$test->array3[$sub]."</a><br />";
}

Main Class:

<?php

class extractThis
{
    /* URL to extract from */
    public $extractURL;

    /* Regular expressions to find */
    public $extractRegs;

    /* Where the returns of findings are stored */
    public $extractReturns = array();

    /* Constructor, just add the URL */
    public function __construct($url) { $this->extractURL = $url;}

    /* Add regular expression */
    public function setReg($expression){ $this->extractRegs = $expression;}

    /* Actually do extraction */
    public function doExtract($doOption)
    {
        include('fileOpen.class.php');
        $page = new openURL($this->extractURL);
        $page->doOpen(2);
        if($page->errorNumber)
        {
            die("$page->errorNumber ($page->errorMessage)<br />\n");
        }
        preg_match_all($this->extractRegs,$page->returnPage(),$returns);

        for($x=1;$x<=count($returns)-1;$x++)
        {
            $this->extractReturns[$x-1] = $returns[$x];
        }
        if($doOption){ $this->removeDubs();}else{ $this->doOrganize();}
    }

    /* Function to organize array */
    public function doOrganize()
    {
        $this->array0 = array();

        foreach($this->extractReturns as $sub => $item)
        {
            $this->array0[$sub] = $item;            
            $mySub = 1;
            while($this->extractReturns[$mySub] != NULL)
            {
                $this->{"array$mySub"}[$sub] = $this->extractReturns[$mySub][$sub];
                $mySub++;
            }
        }
    }
    /* function to organize array & remove duplicates */
    public function removeDubs()
    {
        $this->array0 = array();
        foreach($this->extractReturns[0] as $subSub => $theItem)
        {
            if(!in_array($theItem,$this->array0))
            {
                $this->array0[$subSub] = $theItem;
                $mySub = 1;
                while(is_array($this->extractReturns[$mySub]))
                {    
                    $this->{"array$mySub"}[$subSub] = $this->extractReturns[$mySub][$subSub];
                    $mySub++;
                }
            }
        }
    }
}

Helper Class (fileOpen.class.php):

<?php

/* Class for the Open */
class openURL
{
    /* Page content */
    private $pageContent;

    /* Page URL */
    private $pageURL;

    /* Error number */
    public $errorNumber = NULL;

    /* Error message */
    public $errorMessage = NULL;

    /* Fsockopen Headers array */
    private $fsockopenHeaders = array();

    /* Curl Options */
    private $curlOptions = array();

    /* File Get Contents Array */
    private $fgcOptions = array();

    /* Page Handeler */
    private $fileHandler;

    /* constructor, set page url */
    public function __construct($pageURL)
    {
        $this->pageURL = $pageURL;
    }

    public function doOpen($openOption)
    {
        /* do actual opening */    
        switch ($openOption)
        {
            /* fopen */
            case 1:
            $this->myfopen();
            break;

            /* file_get_contents */
            case 2:
            $this->myfile_get_contents();
            break;

            /* fsockopen */
            case 3:
            $this->myfoscokopen();
            break;

            /* curl */
            case 4:
            $this->mycurl();
            break;
        }
    }

    /* Add to curl options */
    public function addToCurl($curlOption,$curlValue){$this->curlOptions[$curlOption] = $curlValue;}

    /* Add to headers for fsockopen */
    public function addToFsockopen($headerText){$this->fsockopenHeaders[] = $headerText;}

    /* Add to stream context (file_get_contents) */
    public function addToFileGetContents($fgcOption,$fgcValue){$this->fgcOptions[$fgcOption] = $fgcValue;}

    /* Return page content */
    public function returnPage(){ return $this->pageContent;}

    /* Function to compose headers for fsockopen */
    private function composefsoHeaders()
    {
        $tempUse = NULL;
        foreach($this->fsockopenHeaders as $curHeader)
        {
            $tempUse .= $curHeader."\r\n";
        }
        $tempUse .= "\r\n";
        return $tempUse;
    }

    /* Function to compose headers for file_get_contents */
    private function composefgcHeaders()
    {
        $tempUse = array(
            'http' => array()
        );

        foreach($this->fgcOptions as $fgcOption => $fcgValue)
        {
            $tempUse['http'][$fgcOption] = $fcgValue."\r\n";
        }
        return $tempUse;
    }

    /* Actual read contents */
    private function readContents()
    {
        $tempUse = NULL;
        while (!feof($this->fileHandler))
        {
            $this->pageContent .= fread($this->fileHandler,1024);
        }
        unset($tempUse);
    }        

    /* fopen function */
    public function myfopen()
    {
        /* use ob_start to record errors */
        ob_start();

        $this->fileHandler = fopen($this->pageURL,'r');

        if($this->fileHandler)
        {
            ob_end_clean();
            $this->readContents();
            fclose($this->fileHandler);
        } else {
            $this->errorMessage = ob_get_contents();
            $this->errorNumber = '001-FPE';
            ob_end_clean();
        }
    }    

    /* file_get_contents function */
    public function myfile_get_contents()
    {
        $context = stream_context_create($this->composefgcHeaders());
        ob_start();
        $this->pageContent = file_get_contents($this->pageURL,false, $context);
        $this->errorMessage = ob_get_contents();

        /* check for errors */
        if($this->errorMessage != NULL)
        {
            $this->errorNumber = '002-FPE';    
        }
        ob_end_clean();
    }

    /* fsockopen function */    
    public function myfoscokopen()
    {
        /* Supress errors using ob_start */
        ob_start();

        $this->fileHandler = fsockopen($this->pageURL, 80,$this->errorNumber, $this->errorMessage, 30);

        if($this->fileHandler)
        {
            fwrite($this->fileHandler,$this->composefsoHeaders());
            ob_end_clean();
            $this->readContents();
        } else {
            $this->errorMessage .= ob_get_contents();
            $this->errorNumber = '003-FPE';
            ob_end_clean();
        }
    }

    /* curl function */
    public function mycurl()
    {

        $this->fileHandler = curl_init();
        curl_setopt($this->fileHandler, CURLOPT_URL, $this->pageURL);
        curl_setopt($this->fileHandler, CURLOPT_RETURNTRANSFER, true);          
        foreach($this->curlOptions as $option => $value)
        {        
            curl_setopt($this->fileHandler, $option, $value);
        }
        $this->pageContent = curl_exec($this->fileHandler);
        $this->errorMessage = curl_errno($this->fileHandler);
        $this->errorNumber = curl_error($this->fileHandler);

        curl_close($this->fileHandler);
    }
}

File open class

Posted on May 8, 2010 by Aziz Posted in PHP Leave a comment

This class basically encapsulates the following four built in functions: fopen, file_get_contents, filesockopen, and curl.

The only reason I created this class is because sometime some of my work will work on some hosting and some will not because of the file open method I use. This will enable me to easily ask people to switch functions without having to implement much coding on their side.

The class allows users to add headers to file_get_contents, filesockopen, and curl (including referrals, cookies, field inputs, etc..).

Sample Implementation:

<?php

require_once('fileOpen.class.php');

/*
File Open fopen example
*/
$page = new openURL('http://www.google.com');
$page->doOpen(1);
echo $page->returnPage();

/*
File Open file_get_contents example
*/
$page = new openURL('http://www.google.com');
$page->addToFileGetContents('method','GET');
$page->doOpen(2);
echo $page->returnPage();

/*
File Open fsockopen example
*/
$page = new openURL('www.google.com');
$page->addToFsockopen("GET / HTTP/1.1");
$page->addToFsockopen("Host: www.google.com");
$page->addToFsockopen("Connection: Close\r\n");
$page->doOpen(3);
echo $page->returnPage();

/*
File Open curl example
*/
$page = new openURL('us3.php.net/search.php');
$page->addToCurl(CURLOPT_REFERER,'us3.php.net');
$page->addToCurl(CURLOPT_POST,1);
$page->addToCurl(CURLOPT_POSTFIELDS,'pattern=curl&show=quickref&lang=en');    
$page->addToCurl(CURLOPT_FOLLOWLOCATION,true);    
$page->doOpen(4);
echo $page->returnPage();

/* Check for any errors */
/*
if($page->errorNumber)
{
    echo "$page->errorNumber ($page->errorMessage)<br />\n";
}
*/

Main Class:

<?php

/* Class for the Open */
class openURL
{
    /* Page content */
    private $pageContent;

    /* Page URL */
    private $pageURL;

    /* Error number */
    public $errorNumber = NULL;

    /* Error message */
    public $errorMessage = NULL;

    /* Fsockopen Headers array */
    private $fsockopenHeaders = array();

    /* Curl Options */
    private $curlOptions = array();

    /* File Get Contents Array */
    private $fgcOptions = array();

    /* Page Handeler */
    private $fileHandler;

    /* constructor, set page url */
    public function __construct($pageURL)
    {
        $this->pageURL = $pageURL;
    }

    public function doOpen($openOption)
    {
        /* do actual opening */    
        switch ($openOption)
        {
            /* fopen */
            case 1:
            $this->myfopen();
            break;

            /* file_get_contents */
            case 2:
            $this->myfile_get_contents();
            break;

            /* fsockopen */
            case 3:
            $this->myfoscokopen();
            break;

            /* curl */
            case 4:
            $this->mycurl();
            break;
        }
    }

    /* Add to curl options */
    public function addToCurl($curlOption,$curlValue){$this->curlOptions[$curlOption] = $curlValue;}

    /* Add to headers for fsockopen */
    public function addToFsockopen($headerText){$this->fsockopenHeaders[] = $headerText;}

    /* Add to stream context (file_get_contents) */
    public function addToFileGetContents($fgcOption,$fgcValue){$this->fgcOptions[$fgcOption] = $fgcValue;}

    /* Return page content */
    public function returnPage(){ return $this->pageContent;}

    /* Function to compose headers for fsockopen */
    private function composefsoHeaders()
    {
        $tempUse = NULL;
        foreach($this->fsockopenHeaders as $curHeader)
        {
            $tempUse .= $curHeader."\r\n";
        }
        $tempUse .= "\r\n";
        return $tempUse;
    }

    /* Function to compose headers for file_get_contents */
    private function composefgcHeaders()
    {
        $tempUse = array(
            'http' => array()
        );

        foreach($this->fgcOptions as $fgcOption => $fcgValue)
        {
            $tempUse['http'][$fgcOption] = $fcgValue."\r\n";
        }
        return $tempUse;
    }

    /* Actual read contents */
    private function readContents()
    {
        $tempUse = NULL;
        while (!feof($this->fileHandler))
        {
            $this->pageContent .= fread($this->fileHandler,1024);
        }
        unset($tempUse);
    }        

    /* fopen function */
    public function myfopen()
    {
        /* use ob_start to record errors */
        ob_start();

        $this->fileHandler = fopen($this->pageURL,'r');

        if($this->fileHandler)
        {
            ob_end_clean();
            $this->readContents();
            fclose($this->fileHandler);
        } else {
            $this->errorMessage = ob_get_contents();
            $this->errorNumber = '001-FPE';
            ob_end_clean();
        }
    }    

    /* file_get_contents function */
    public function myfile_get_contents()
    {
        $context = stream_context_create($this->composefgcHeaders());
        ob_start();
        $this->pageContent = file_get_contents($this->pageURL,false, $context);
        $this->errorMessage = ob_get_contents();

        /* check for errors */
        if($this->errorMessage != NULL)
        {
            $this->errorNumber = '002-FPE';    
        }
        ob_end_clean();
    }

    /* fsockopen function */    
    public function myfoscokopen()
    {
        /* Supress errors using ob_start */
        ob_start();

        $this->fileHandler = fsockopen($this->pageURL, 80,$this->errorNumber, $this->errorMessage, 30);

        if($this->fileHandler)
        {
            fwrite($this->fileHandler,$this->composefsoHeaders());
            ob_end_clean();
            $this->readContents();
        } else {
            $this->errorMessage .= ob_get_contents();
            $this->errorNumber = '003-FPE';
            ob_end_clean();
        }
    }

    /* curl function */
    public function mycurl()
    {

        $this->fileHandler = curl_init();
        curl_setopt($this->fileHandler, CURLOPT_URL, $this->pageURL);
        curl_setopt($this->fileHandler, CURLOPT_RETURNTRANSFER, true);          
        foreach($this->curlOptions as $option => $value)
        {        
            curl_setopt($this->fileHandler, $option, $value);
        }
        $this->pageContent = curl_exec($this->fileHandler);
        $this->errorMessage = curl_errno($this->fileHandler);
        $this->errorNumber = curl_error($this->fileHandler);

        curl_close($this->fileHandler);
    }
}

Recent Posts

  • PHP does not have MySQL support enabled.
  • Your PHP installation appears to be missing the MySQL extension which is required by WordPress
  • Undefined mysql functions (mysql_connect, mysql_select_db, mysql_query) – PDO alternative
  • Checking if you have Javascript installed by PHP
  • Process Maxmind countries, regions and cities – Parser to MySQL or SqLite

Recent Comments

  • fathima on Currency Converter Using Java Netbeans
  • Chris on Currency Converter
  • Khalid Ahmad Qweder on Resume & Portfolio
  • Michel on Currency Converter
  • Michel on Currency Converter

Archives

  • March 2014
  • September 2013
  • November 2012
  • October 2012
  • November 2011
  • February 2011
  • December 2010
  • November 2010
  • August 2010
  • May 2010
  • September 2009

Categories

  • Flash Builder 4
  • General
  • Java
  • Javascript
  • MySQL
  • PHP
  • Visual Basic

Meta

  • Register
  • Log in
  • Entries RSS
  • Comments RSS
  • WordPress.org
CyberChimps ©2025