Login using UrlFetchApp function and scrape privat

2019-07-22 13:06发布

问题:

I need to scrape private data from portal which doesn't support API. IMPORTXML can't do this because of login.

I have a link with information of from & to date and the content is table with cost data. I need to login and scrape simple table into my Google Sheet.

I need to log in into this website:

https://www.glami.cz/registrace/prihlasit

and than scrape this url:

https://partner.glami.cz/s/e-commerce/days/866/?from=2016-12-01&to=2016-12-09

The form on this site is:

<form action="/registrace/prihlasit/" method="post" id="frm-signIn">
<dl class="form">
    <dt><label for="frm-signIn-username" class="required">Emailová adresa</label></dt>
    <dd><input type="text" name="username" id="frm-signIn-username" required data-nette-rules='[{"op":":filled","msg":"Prosím, vyplňte emailovou adresu."}]' class="text"></dd>
    <dt><label for="frm-signIn-password" class="required">Heslo</label></dt>
    <dd><input type="password" name="password" id="frm-signIn-password" required data-nette-rules='[{"op":":filled","msg":"Prosím, vyplňte heslo."}]' class="text"></dd>
    <dt></dt>
    <dd><input type="submit" name="send" value="Přihlásit se" class="button"></dd>
</dl>
<div><input type="hidden" name="_do" value="signIn-submit"></div>
</form>

I have this code which works for other websites. In this case the response from logger is still "didnt log in".

function fetchAdminPage() {
   var url = "https://www.glami.cz/registrace/prihlasit";
   var options = {
      "method": "post",
      "payload": {
        'username': 'LOGIN',
        'password': 'PASSWORD',
        'send': 'Přihlásit se',
        '_do': 'signIn-submit',
        "testcookie": 1
      },
      "followRedirects": false
   };
   var response = UrlFetchApp.fetch(url, options);
   if ( response.getResponseCode() == 200 ) {
     // Incorrect user/pass combo
     Logger.log("didnt log in");
   } else if ( response.getResponseCode() == 302 ) {
     // Logged-in
     var headers = response.getAllHeaders();
     if ( typeof headers['Set-Cookie'] !== 'undefined' ) {
        // Make sure that we are working with an array of cookies
        var cookies = typeof headers['Set-Cookie'] == 'string' ? [ headers['Set-Cookie'] ] : headers['Set-Cookie'];
        for (var i = 0; i < cookies.length; i++) {
           // We only need the cookie's value - it might have path, expiry time, etc here
           cookies[i] = cookies[i].split( ';' )[0];
        };
        url = "https://partner.glami.cz/s/e-commerce/days/866/?from=2016-12-01&to=2016-12-09";
        options = {
            "method": "get",
            // Set the cookies so that we appear logged-in
            "headers": {
               "Cookie": cookies.join(';')
            }
        };
        response = UrlFetchApp.fetch(url, options);
     };
     Logger.log(response.getContentText()); 
   };     
}

The problem has to be I guess somewhere in payload/options or in url adress. Can't figure out what's wrong and how to log in succesfully.