How to parse the data from Google Alerts?

2019-01-07 06:41发布

Firstly, How would you get Google Alerts information into a database other than to parse the text of the email message that Google sends you?

It seems that there is no Google Alerts API.

If you must parse text, how would you go about parsing out the relevant pieces of the email message?

2条回答
够拽才男人
2楼-- · 2019-01-07 07:23

When you create the alert, set the "Deliver To" to "Feed" and then you can consume the feed XML as you would any other feed. This is much easier to parse and digest into a database.

查看更多
Luminary・发光体
3楼-- · 2019-01-07 07:46
class googleAlerts{
    public function createAlert($alert){
        $USERNAME = 'XXXXXX@gmail.com';
        $PASSWORD = 'YYYYYY';
        $COOKIEFILE = 'cookies.txt';

        $ch = curl_init();
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
        curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($ch, CURLOPT_COOKIEJAR, $COOKIEFILE);
        curl_setopt($ch, CURLOPT_COOKIEFILE, $COOKIEFILE);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
        curl_setopt($ch, CURLOPT_TIMEOUT, 120);

        curl_setopt($ch, CURLOPT_URL,
            'https://accounts.google.com/ServiceLogin?hl=en&service=alerts&continue=http://www.google.com/alerts/manage');
        $data = curl_exec($ch);

        $formFields = $this->getFormFields($data);

        $formFields['Email']  = $USERNAME;
        $formFields['Passwd'] = $PASSWORD;
        unset($formFields['PersistentCookie']);

        $post_string = '';
        foreach($formFields as $key => $value) {
            $post_string .= $key . '=' . urlencode($value) . '&';
        }

        $post_string = substr($post_string, 0, -1);

        curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
        curl_setopt($ch, CURLOPT_POST, 1);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);

        $result = curl_exec($ch);

        if (strpos($result, '<title>') === false) {
            return false;

        } else {
            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts');
            curl_setopt($ch, CURLOPT_POST, 0);
            curl_setopt($ch, CURLOPT_POSTFIELDS, null);

            $result = curl_exec($ch);

            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/create');
            curl_setopt($ch, CURLOPT_POST, 0);
            $result = curl_exec($ch);
            //var_dump($result);
            $result = $this->getFormFieldsCreate($result);
            $result['q'] = $alert;
            $result['t'] = '7';
            $result['f'] = '1';
            $result['l'] = '0';
            $result['e'] = 'feed';
            unset($result['PersistentCookie']);

            $post_string = '';
            foreach($result as $key => $value) {
                $post_string .= $key . '=' . urlencode($value) . '&';
            }

            $post_string = substr($post_string, 0, -1);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
            $result = curl_exec($ch);
            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/manage');
            $result = curl_exec($ch);
            if (preg_match_all('%'.$alert.'(?=</a>).*?<a href=[\'"]http://www.google.com/alerts/feeds/([^\'"]+)%i', $result, $matches)) {
                return ('http://www.google.com/alerts/feeds/'.$matches[1][0]);
            } else {
                return false;
            }


        }
    }

    private function getFormFields($data)
    {
        if (preg_match('/(<form.*?id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            die('didnt find login form');
        }
    }
    private function getFormFieldsCreate($data)
    {
        if (preg_match('/(<form.*?name=.?.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            die('didnt find login form1');
        }
    }


    private function getInputs($form)
    {
        $inputs = array();

        $elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);

        if ($elements > 0) {
            for($i = 0; $i < $elements; $i++) {
                $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);

                if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
                    $name  = $name[1];
                    $value = '';

                    if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
                        $value = $value[1];
                    }

                    $inputs[$name] = $value;
                }
            }
        }

        return $inputs;
    }
}
$alert = new googleAlerts;
echo $alert->createAlert('YOUR ALERT');

It will return link to rss feed of your newly created alert

查看更多
登录 后发表回答