How to hold the cache in selenium in a loop after

2019-05-21 23:55发布

I am using this spider to click on the color and then page gets refreshed and then subsequent clicking on the links but it breaks in between and throw Element not found in the cache - perhaps the page has changed since it was looked up Error How to get hold of original page after completion of loop?

Couldn't find any suitable solution for this.

import scrapy
from scrapy.contrib.spiders import CrawlSpider
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from scrapy.selector import Selector
import time

class CompItem(scrapy.Item):
    model_name = scrapy.Field()
    model_link = scrapy.Field()
    url = scrapy.Field()
    what = scrapy.Field()
    seller = scrapy.Field()

class criticspider(CrawlSpider):
    name = "extract"
    allowed_domains = ["mysmartprice.com"]
    start_urls = ["http://www.mysmartprice.com/mobile/huawei-honor-holly-msp4857"]

    def __init__(self, *args, **kwargs):
        super(criticspider, self).__init__(*args, **kwargs)
        self.download_delay = 0.25
        self.browser = webdriver.Firefox()
        self.browser.maximize_window()

        self.browser.implicitly_wait(20)

    def parse_start_url(self, response):
        self.browser.get(response.url)

        # waiting for "Go to store" to become visible
        wait = WebDriverWait(self.browser, 10)
        wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.store_pricetable")))

        main_window = self.browser.window_handles[0]

        # iterate over featured stores and visit them
        for i,store in enumerate(self.browser.find_elements_by_css_selector("div.store_pricetable")):
            link = store.find_element_by_css_selector("div.store_gostore > div.storebutton")
            ActionChains(self.browser).key_down(Keys.SHIFT).move_to_element(link).click(link).key_up(Keys.SHIFT).perform()

            # there is a popup preventing us to navigate to the store URL - close it 
            try:
                popup_close = self.browser.find_element_by_css_selector(".popup-closebutton")
                popup_close.click()

                # repeat the click
                ActionChains(self.browser).key_down(Keys.SHIFT).move_to_element(link).click(link).key_up(Keys.SHIFT).perform()
            except NoSuchElementException:
                pass

            button = self.browser.find_element_by_xpath('/html/body/div[3]/div/div[2]/div/div[2]/div/div[1]/div[4]/div/div[3]').click()

            #time.sleep(5)
            item = CompItem()
            sel = Selector(text=self.browser.page_source)
            item["what"] = "url"
            item["seller"] = response.xpath('//div[@class="store_rating_bar_out"]/@data-storename').extract()[i]
            item["model_name"] = sel.xpath('//span[contains(@itemprop,"brand")]/text()').extract()[0] +" "+sel.xpath('//span[contains(@itemprop,"name")]/text()').extract()[0] + sel.xpath('//span[contains(@class,"variant")]/text()').extract()[0]
            # shift+click on the "Go to Store" link


            # switch to the newly opened window, read the current url and close the window
            self.browser.switch_to.window(self.browser.window_handles[-1])

            # wait until "On your way to the store" would not be in title
            wait.until(lambda browser: "On your way to the Store" not in browser.title)

            item['url'] = self.browser.current_url
            yield item
            self.browser.close()

            # switch back to the main window
            self.browser.switch_to.window(main_window)
            self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + "r")
            wait = WebDriverWait(self.browser, 10)

0条回答
登录 后发表回答