Displaying src of image inside an iframe with sele

2019-08-25 08:24发布

问题:

I am trying to download an image automatically from shapeNet using Python and selenium. I am almost there but the last step evades me.

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By


profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.socks", "yourproxy")
profile.set_preference("network.proxy.socks_port", number_of_port)
#browser = webdriver.Firefox(firefox_profile=profile)
browser = webdriver.Firefox()

browser.get('https://www.shapenet.org/taxonomy-viewer')
#Page is long to load
wait = WebDriverWait(browser, 30)
element = wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='02958343_anchor']")))
linkElem = browser.find_element_by_xpath("//*[@id='02958343_anchor']")
linkElem.click()
#Page is also long to display iframe
element = wait.until(EC.element_to_be_clickable((By.ID, "model_3dw_bcf0b18a19bce6d91ad107790a9e2d51")))
linkElem = browser.find_element_by_id("model_3dw_bcf0b18a19bce6d91ad107790a9e2d51")
linkElem.click()
#iframe slow to be displayed
wait.until(EC.frame_to_be_available_and_switch_to_it((By.ID, 'viewerIframe')))

Until now everything runs smoothly we are into the iframe. The next line works but I have to use a time.sleep() to make it work it is slightly ugly but I do not know of any alternative and it is not the core of my question:

import time
#does not work have to use time.sleep
#element = wait.until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/div[3]/h4")))
time.sleep(20)
linkElem = browser.find_element_by_xpath("/html/body/div[3]/div[3]/h4")
linkElem.click()

Now I want to download one of the images just displayed in the collapse menu opened by my click so I found its xpath using developper tools:

img = browser.find_element_by_xpath("/html/body/div[3]/div[3]/div/div/div/span/img")
src = img.get_attribute('src')

And now it can access img but src is None until I click manually on the webpage. Why is that ? What am I doing wrong ?

PS: the last step would be:

os.system("wget %s --no-check-certificate"%src)

回答1:

Instead of xpath("/html/body/div[3]/div[3]/div/div/div/span/img"), you can use the following xpath :

img = browser.find_element_by_xpath("/html/body/div[3]/div[3]//div[@class='searchResult' and @id='image.3dw.bcf0b18a19bce6d91ad107790a9e2d51.0']/img[@class='enlarge']")
src = img.get_attribute('src')