I am using this spider to click on the color and then page gets refreshed and then subsequent clicking on the links but it breaks in between and throw Element not found in the cache - perhaps the page has changed since it was looked up Error How to get hold of original page after completion of loop?
Couldn't find any suitable solution for this.
import scrapy
from scrapy.contrib.spiders import CrawlSpider
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from scrapy.selector import Selector
import time
class CompItem(scrapy.Item):
model_name = scrapy.Field()
model_link = scrapy.Field()
url = scrapy.Field()
what = scrapy.Field()
seller = scrapy.Field()
class criticspider(CrawlSpider):
name = "extract"
allowed_domains = ["mysmartprice.com"]
start_urls = ["http://www.mysmartprice.com/mobile/huawei-honor-holly-msp4857"]
def __init__(self, *args, **kwargs):
super(criticspider, self).__init__(*args, **kwargs)
self.download_delay = 0.25
self.browser = webdriver.Firefox()
self.browser.maximize_window()
self.browser.implicitly_wait(20)
def parse_start_url(self, response):
self.browser.get(response.url)
# waiting for "Go to store" to become visible
wait = WebDriverWait(self.browser, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.store_pricetable")))
main_window = self.browser.window_handles[0]
# iterate over featured stores and visit them
for i,store in enumerate(self.browser.find_elements_by_css_selector("div.store_pricetable")):
link = store.find_element_by_css_selector("div.store_gostore > div.storebutton")
ActionChains(self.browser).key_down(Keys.SHIFT).move_to_element(link).click(link).key_up(Keys.SHIFT).perform()
# there is a popup preventing us to navigate to the store URL - close it
try:
popup_close = self.browser.find_element_by_css_selector(".popup-closebutton")
popup_close.click()
# repeat the click
ActionChains(self.browser).key_down(Keys.SHIFT).move_to_element(link).click(link).key_up(Keys.SHIFT).perform()
except NoSuchElementException:
pass
button = self.browser.find_element_by_xpath('/html/body/div[3]/div/div[2]/div/div[2]/div/div[1]/div[4]/div/div[3]').click()
#time.sleep(5)
item = CompItem()
sel = Selector(text=self.browser.page_source)
item["what"] = "url"
item["seller"] = response.xpath('//div[@class="store_rating_bar_out"]/@data-storename').extract()[i]
item["model_name"] = sel.xpath('//span[contains(@itemprop,"brand")]/text()').extract()[0] +" "+sel.xpath('//span[contains(@itemprop,"name")]/text()').extract()[0] + sel.xpath('//span[contains(@class,"variant")]/text()').extract()[0]
# shift+click on the "Go to Store" link
# switch to the newly opened window, read the current url and close the window
self.browser.switch_to.window(self.browser.window_handles[-1])
# wait until "On your way to the store" would not be in title
wait.until(lambda browser: "On your way to the Store" not in browser.title)
item['url'] = self.browser.current_url
yield item
self.browser.close()
# switch back to the main window
self.browser.switch_to.window(main_window)
self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + "r")
wait = WebDriverWait(self.browser, 10)