I Wrote this Piece of code which scrapes Amazon for some elements using page URL, Now i want to add a csv function which enables me to append horizontally CSV columns With Following varibles :- ( Date_time, price, Merchant, Sellers_count ) Each time i run the code this columns should be added on right without removing any existing columns ..Here is code & table format to whom i want to add
# -*- coding: cp1252 -*-
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
import requests, csv, time, urllib2, gspread, os, ast, datetime
from scrapy import Selector as s
from lxml import html
from random import randint
from oauth2client.client import SignedJwtAssertionCredentials
x = lambda x: source.xpath(x).extract()
links = ['http://www.amazon.com/dp/B00064NZCK',
'http://www.amazon.com/dp/B000CIU7F8',
'http://www.amazon.com/dp/B000H5839I',
'http://www.amazon.com/dp/B000LTLBHG',
'http://www.amazon.com/dp/B000SDLXKU',
'http://www.amazon.com/dp/B000SDLXNC',
'http://www.amazon.com/dp/B000SPHPWI',
'http://www.amazon.com/dp/B000UUMHRE']
driver = webdriver.Firefox()
#driver.set_page_load_timeout(30)
for Url in links:
try:
driver.get(Url)
except:
pass
time.sleep(randint(1,3))
try:
html = driver.page_source
source = s(text=html,type="html")
except:
pass
try:
Page_link = x('//link[@rel="canonical"]//@href')
except:
pass
try:
Product_Name = x('//span[@id="productTitle"]/text()')
except:
pass
Product_Name = str(Product_Name).encode('utf-8'); Product_Name = Product_Name.replace("[u'","").replace("']","")
try:
price = x('//span[@id="priceblock_ourprice"]//text()')
except:
pass
try:
Merchant = x('//div[@id="merchant-info"]//a//text()')
except:
pass
try:
Sellers_count = x('//span[@class="olp-padding-right"]//a/text()')
except:
pass
if Merchant == []:
Merchant = 'Amazon'
else:
Merchant = Merchant[0]
price = str(price).replace("[u'","").replace("']","")
if len(Sellers_count)>0:
Sellers_count = Sellers_count[0].encode('utf-8')
else:
Sellers_count = str(Sellers_count).encode('utf-8')
try:
Sellers_count = Sellers_count.replace("Â new",""); Sellers_count = int(Sellers_count)-1
except:
pass
if Sellers_count == []:
Sellers_count = str(Sellers_count).replace("[]","")
else:
Sellers_count = Sellers_count
Date_time = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
print Date_time, Product_Name, Url, price, Merchant, Sellers_count
The Existing table format I want to append to :-
ASIN ID PRODUCT URL
B00064NZCK MG-5690 BigMouth Inc Over The Hill Parking Privelege Permit http://www.amazon.com/dp/B00064NZCK
B000CIU7F8 BM1102 BigMouth Inc Pocket Disgusting Sounds Machine http://www.amazon.com/dp/B000CIU7F8
B000H5839I MG-4774 BigMouth Inc All Occasion Over The Hill Cane http://www.amazon.com/dp/B000H5839I
B000LTLBHG BM1234 BigMouth Inc Beer Belt / 6 Pack Holster(Black) http://www.amazon.com/dp/B000LTLBHG
B000SDLXKU BM1103 BigMouth Inc Covert Clicker http://www.amazon.com/dp/B000SDLXKU
B000SDLXNC BM1254 BigMouth Inc Inflatable John http://www.amazon.com/dp/B000SDLXNC
B000SPHPWI SO:AP Design Sense Generic Weener Kleener Soap http://www.amazon.com/dp/B000SPHPWI
B000UUMHRE MG-5305 BigMouth Inc Over the Hill Rectal Thermometer http://www.amazon.com/dp/B000UUMHRE