find link in pdf extension

2019-07-28 05:25发布

问题:

i need to get links with pdf extension. my code is :

<?php

set_time_limit (0);
curl_setopt($ch, CURLOPT_URL,"http://example.com");
curl_setopt($ch, CURLOPT_TIMEOUT, 0); 
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
$result=curl_exec ($ch);
curl_close ($ch);

preg_match_all( '/<a href="(http:\/\/www.[^0-9].+?)"/', $result, $output, PREG_SET_ORDER); // read all links

foreach($output as $item  ){ 
    $n=strlen($item);
    $m=$n-3;
    $buffer_n=$item;
    $buffer_m="";
    $buffer_m=$buffer_n[$m].$buffer_n[$m+1].$buffer_n[$m+2];
    $ekstension = 'pdf';
        if ($buffer_m == $ekstension) {
           print_r($item);
           echo '<br>';
         }
}

?>

the code give me blank result, what's wrong with my code? thank you very much :)

回答1:

solved using with simple html dom, the code is :

<?php

set_time_limit(0);
include 'simple_html_dom.php';
$url = 'example';
$html = file_get_html($url) or die ('invalid url');

foreach($html->find('a') as $e) {
    $link= $e->href;
    if (preg_match('/\.pdf$/i', $link)) {
    }
}

print_r($result);

?>