开发者

how do I optimize this code to extract title

开发者 https://www.devze.com 2023-03-06 09:54 出处:网络
below is a sample code which I used to extract title of any website: function fread_url($url,$ref=\"\")

below is a sample code which I used to extract title of any website:

function fread_url($url,$ref="")
    {
        if(function_exists("curl_init")){
            $ch = curl_init();
            $user_agent = "googlebot";
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
            curl_setopt( $ch, CURLOPT_HTTPGET, 1 );
            curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
            curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 );
            curl_setopt( $ch, CURLOPT_URL, $url );
            curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
            curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
            $html = curl_exec($ch);
            curl_close($ch);
        }
        else{
            $html.= file_get_contents($urweb);
               }
        return $html;
    }
////////////////////////////////////
$doc = new DOMDocument(); @$doc->loadHTML(@fread_url($urweb));  
$titlelist = $doc->getElementsByTagName("title"); 
if($titlelist->length > 0){   $wbtitle = $titlelist->item(0)->nodeValue; } 
echo $wbtitle;

My question is how can I modify this script to access a website for 5 seconds time and if no title available then return ampty?? righ开发者_如何学JAVAt now for some websites it take morethan 5 seconds to extract title or even take more.


Set a timeout for cURL.

curl_setopt($ch, CURLOPT_TIMEOUT, 5);

It looks like you're trying to do that already with CURLOPT_CONNECTTIMEOUT, but that's

The number of seconds to wait while trying to connect

whereas CURLOPT_TIMEOUT timeout is

The maximum number of seconds to allow cURL functions to execute.

http://php.net/manual/en/function.curl-setopt.php


You can rewrite the function entirely as follows. You could also make another function if you need to keep the fread_url() function.

function get_page_title($url, $ref = "") {
    if (function_exists("curl_init")) {
        $ch = curl_init();
        $user_agent = "googlebot";
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
        curl_setopt( $ch, CURLOPT_HTTPGET, 1 );
        curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
        curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 );
        curl_setopt( $ch, CURLOPT_URL, $url );
        curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
        $html = curl_exec($ch);
        curl_close($ch);
    } else {
        $html = file_get_contents($urweb);
    }

    if ($html === false || empty($html))
        return false;

    $doc = new DOMDocument();
    @$doc->loadHTML($html);  
    $titlelist = $doc->getElementsByTagName("title"); 

    return $titlelist->length > 0 ? $titlelist->item(0)->nodeValue : '';
}

$wbtitle = get_page_title($urlweb);
0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号