Nightmare conditional wait()

2019-01-27 04:32发布

问题:

I'm trying to crawl a webpage using Nightmare, but want to wait for #someelem to be present, only if it actually exists. Otherwise, I want Nightmare to move on. How can this be done using .wait()?

I can't use .wait(ms). Using .wait(selector) means Nightmare will keep waiting until the element is present, but if the page will never have this element, Nightmare will keep waiting forever.

The last option is to use .wait(fn). And I've tried something like this

.wait(function(cheerio) {
            var $ = cheerio.load(document.body.outerHTML);
            var attempt = 0;

            function doEval() {
                if ( $('#elem').length > 0 ) {
                    return true;
                }
                else {
                    attempt++;

                    if ( attempt < 10 ) {
                        setTimeout(doEval,2000); //This seems iffy.
                    }
                    else {
                        return true;
                    }
                }
            }

            return doEval();
        },cheerio)

So, wait and attempt again (upto a threshold), and if the element is not found, then just move on. The code seems wrong around setTimeout, because .wait is done at the browser-scope.

Thanks in advance!

回答1:

I don't think passing the cheerio library as you have it is going to work very well. The arguments get serialized (more or less) to be passed to the child Electron process, so passing an entire library probably won't work.

On the up side, the fn part of .wait(fn) is executed in the page context - meaning you have full access to document and the methods it has (eg, querySelector). You could also have access to the page's jQuery context if it exists, or you could even use .inject() to inject it if not.

Setting that aside, you're right insofar as .wait() (and .evaluate(), for that matter) expect a synchronous method, at least until something like promises could be used directly in .evaluate().

Until that is available, you could use .action() to mimic the behavior you want:

var Nightmare = require('nightmare');

Nightmare.action('deferredWait', function(done) {
  var attempt = 0;
  var self = this;

  function doEval() {
    self.evaluate_now(function(selector) {
      return (document.querySelector(selector) !== null);
    }, function(result) {
      if (result) {
        done(null, true);
      } else {
        attempt++;
        if (attempt < 10) {
          setTimeout(doEval, 2000); //This seems iffy.
        } else {
          done(null, false);
        }
      }
    }, '#elem');
  };
  doEval();
  return this;
});

var nightmare = Nightmare();
nightmare.goto('http://example.com')
  .deferredWait()
  .then(function(result) {
    console.log(result);
  });


回答2:

  1. As mentioned in documentation of nightmarejs,

.wait(selector) Wait until the element selector is present e.g. .wait('#pay-button')

the wait wait works in this case only till the element first becomes visible if it does not then it will work till default timeout of 30s

  1. wait with function

    .wait(function () { return (document.querySelector(selector) === null); })

where selector is the element based on whose existence in the DOM we wait.



回答3:

Here i create a function to get html sources for different conditions, i'm crawling the TimeWarnerCable page to get info about TV, Internet and Bundle plans, so my function get some parameters and react for each one in different calls. You can use the .exists() to check selectors and then continue with nightmare

function getSource(url,serviceQuantity,zip){
  var defer=Q.defer();
  var Nightmare = require('nightmare');
  var nightmare = Nightmare({openDevTools:browserDev ,show: browserVisible,'webPreferences':{partition: 'nopersist'}});

  nightmare
  .goto(url)
  .cookies.clear()
  .wait(2000)
  .exists('div.messagebox-wrapper.twc-container[style="display: block;"]')
  .then(function(noZipSet){
    if (noZipSet){
      debug('No zipcode settled down');
      nightmare
      .insert('fieldset > div > input[placeholder="Enter Your ZIP Code"]',zip)
      .type('fieldset > div > input[placeholder="Enter Your ZIP Code"]', '\u000d');//I do "Enter" because nightmare can't find the submit button
    }else{
      debug('Zipcode settled down');
      nightmare
      .click('div.section.newHeaderIcons > div > ul > li:nth-child(4) > div > a')
      .wait(2000)
      .insert('form.geoLoc > fieldset > div > input[placeholder="Update Your ZIP Code"]',zip)
      .type('form.geoLoc > fieldset > div > input[placeholder="Update Your ZIP Code"]', '\u000d');//I do "Enter" because nightmare can't find the submit button
    }
    nightmare
    .wait(8500)
    .exists('div[style="display: block;"] > div > div > div > div > div > div > div.parsys.oof-error-content > div > div > div > div > div > div > p[style="color: #333333;"]')
    .then(function(zipNotAvailable){
      if (zipNotAvailable){
        debug('Service not available in '+zip+' for '+serviceQuantity+' services');
        nightmare
          .end()
          .then(function(){
            defer.resolve('');
          });
      }else{
        debug('Service available on the zipcode');
      switch (serviceQuantity) {
          case 1:
              nightmare
                  .evaluate(function(){
                    return document.querySelector('html').innerHTML;
                  })
                  .end()
                  .then(function (result) {
                    defer.resolve(result);
                  })
                  .catch(function (error) {
                    debug('ERROR >> Search failed:', error);
                  });
              break;
          case 2:
              nightmare
                .click('#tv-filter')
                .wait(500)
                .click('#internet-filter')
                .wait(500)
                .evaluate(function(){
                  return document.querySelector('html').innerHTML;
                })
                .end()
                .then(function (result) {
                   defer.resolve(result);
                })
                .catch(function (error) {
                   debug('ERROR >> Search failed:', error);
                });
              break;
          case 3:
              nightmare
                  .click('#tv-filter')
                  .wait(500)
                  .click('#internet-filter')
                  .wait(500)
                  .click('#phone-filter')
                  .wait(500)
                  .evaluate(function(){
                    return document.querySelector('html').innerHTML;
                  })
                  .end()
                  .then(function (result) {
                    defer.resolve(result);
                  })
                  .catch(function (error) {
                    debug('ERROR >> Search failed:', error);
                  });
                  break;
       }
      }
    });
  });
  return defer.promise;
}