Warning: preg_match_all() [function.preg-match-all

2019-08-29 04:56发布

Errors:

Warning: preg_match_all() [function.preg-match-all]: Unknown modifier 'g' in /Users/julian/Sites/abc.php on line 23 Warning: preg_match_all() [function.preg-match-all]: Unknown modifier 'g' in /Users/julian/Sites/abc.php on line 23

Here is my code:

<?php

class Crawler {
protected $markup = ”;
    public function __construct($uri) {
        $this->markup = $this->getMarkup($uri);
    }
    public function getMarkup($uri) {
        return file_get_contents($uri);
    }
    public function get($type) {
        $method = "_get_links";
        if (method_exists($this, $method))
                return call_user_method($method, $this);
             }
    }
    protected function _get_images() {
        if (!empty($this->markup)){
            preg_match_all(htmlspecialchars("<img([^>]+)/>i"), $this->markup, $images);
            return $images[1];
    }
    }
    protected function _get_links() {
        if (!empty($this->markup)){
            preg_match_all(htmlspecialchars("<a([^>]+)>(.*?)</a>/i"), $this->markup, $links);
            return $links;
        }
    }
}
$crawl = new Crawler("http://google.com/");
$images = $crawl->get(‘images’);
$links = $crawl->get(‘links’);
echo $links;
?>

2条回答
放荡不羁爱自由
2楼-- · 2019-08-29 05:44

Try this:

class Crawler {

protected $markup = ”;
public function __construct($uri) {
    $this->markup = $this->getMarkup($uri);
}
public function getMarkup($uri) {
    return file_get_contents($uri);
}
public function get($type) {
    $method = "_get_links";
    if (method_exists($this, $method)){

      return call_user_method($method, $this);
    }
}
protected function _get_images() {
    if (!empty($this->markup)){
        preg_match_all("/<img([^>]+)\/>/i", $this->markup, $images);
        return $images[1];
}
}
protected function _get_links() {
    if (!empty($this->markup)){
        preg_match_all("/<a([^>]+)\>(.*?)\<\/a\>/i", $this->markup, $links);
        return $links;
    }
}
}
$crawl = new Crawler("http://google.com/");
$images = $crawl->get(‘images’);
$links = $crawl->get(‘links’);
print_r($links);
查看更多
放我归山
3楼-- · 2019-08-29 05:48

You are lacking delimiters. Correct regexes are:

↓             ↓
~<img([^>]+)/>~i
~<a([^>]+)>(.*?)</a>~i
↑                   ↑

But please be aware that it is generally not recommended to parse HTML using regular expressions. Instead you might consider using DOM.

Note: Unknown modifier 'g' in PHP there is no g modifier you just use preg_match_all() instead of preg_match() if you want all matches.

查看更多
登录 后发表回答