I need the API to fetch the top selling books on Amazon. By default it only fetches the top 10 items, but I need more than 10, near about 1000 items content by using single hit.
OR
I need the way to scrape the 1000 top selling books using the Amazon ECS API.
OR
Is there any way except the Amazon API to scrape all of the top selling books on Amazon?
here's how I do it - but it won't work for more than 100 items after the end of this month as Amazon are limiting ItemPage to 10.
rescheck = Amazon::Ecs.item_search("search term here", :response_group => 'Large', :country => 'uk')
n=0
rescheck.total_pages.times do |n|
n=n+1
if n <= rescheck.total_pages
res = Amazon::Ecs.item_search("search term here", :response_group => 'Large', :item_page =>n, :country => 'uk')
res.items.each do |item|
asin = item.get('ASIN')
title = item.get('ItemAttributes/Title')
brand = item.get('ItemAttributes/Brand')
#etc
To get the 100 bestselling books you have to specify the ItemPage
parameter:
http://ecs.amazonaws.com/onca/xml?
Service=AWSECommerceService&
AWSAccessKeyId=[AWS Access Key ID]&
Operation=ItemSearch&
BrowseNode=17&
SearchIndex=Books&
ItemPage=2
&Sort=salesrank
&Timestamp=[YYYY-MM-DDThh:mm:ssZ]
&Signature=[Request Signature]
Problem: The maximum ItemPage number that can be returned is 10. So you cant get past 100 books.
Reference: http://docs.amazonwebservices.com/AWSECommerceService/latest/DG/index.html?ItemSearch.html
Using previous comment, I scrap from DOM of documentation for IN this categories:
[
{
"index":"All",
"node":""
},
{
"index":"Baby",
"node":"1571275031"
},
{
"index":"Beauty",
"node":"1355017031"
},
{
"index":"Books",
"node":"976390031"
},
{
"index":"Automotive",
"node":"4772061031"
},
{
"index":"Apparel",
"node":"1571272031"
},
{
"index":"PCHardware",
"node":"976393031"
},
{
"index":"Electronics",
"node":"976420031"
},
{
"index":"GiftCards",
"node":"3704983031"
},
{
"index":"Grocery",
"node":"2454179031"
},
{
"index":"HealthPersonalCare",
"node":"1350385031"
},
{
"index":"HomeGarden",
"node":"2454176031"
},
{
"index":"Industrial",
"node":"5866079031"
},
{
"index":"Jewelry",
"node":"1951049031"
},
{
"index":"KindleStore",
"node":"1571278031"
},
{
"index":"Luggage",
"node":"2454170031"
},
{
"index":"DVD",
"node":"976417031"
},
{
"index":"Music",
"node":"976446031"
},
{
"index":"MusicalInstruments",
"node":"3677698031"
},
{
"index":"OfficeProducts",
"node":"2454173031"
},
{
"index":"PetSupplies",
"node":"4740420031"
},
{
"index":"Shoes",
"node":"1571284031"
},
{
"index":"Software",
"node":"976452031"
},
{
"index":"SportingGoods",
"node":"1984444031"
},
{
"index":"Toys",
"node":"1350381031"
},
{
"index":"VideoGames",
"node":"976461031"
},
{
"index":"Watches",
"node":"1350388031"
}
]
<?php
namespace MarcL;
use MarcL\CurlHttpRequest;
use MarcL\AmazonUrlBuilder;
use MarcL\Transformers\DataTransformerFactory;
class AmazonAPI
{
private $urlBuilder = NULL;
private $dataTransformer = NULL;
public $item=0;
public $perRequest=0;
public function IND_money_format($money)
{
$len = strlen($money);
$m = '';
$money = strrev($money);
for($i=0;$i<$len;$i++){
if(( $i==3 || ($i>3 && ($i-1)%2==0) )&& $i!=$len){
$m .=',';
}
$m .=$money[$i];
}
return strrev($m);
}
// Valid names that can be used for search
private $mValidSearchNames = array(
'All',
'Apparel',
'Appliances',
'Automotive',
'Baby',
'Beauty',
'Blended',
'Books',
'Classical',
'DVD',
'Electronics',
'Grocery',
'HealthPersonalCare',
'HomeGarden',
'HomeImprovement',
'Jewelry',
'KindleStore',
'Kitchen',
'Lighting',
'Marketplace',
'MP3Downloads',
'Music',
'MusicTracks',
'MusicalInstruments',
'OfficeProducts',
'OutdoorLiving',
'Outlet',
'PetSupplies',
'PCHardware',
'Shoes',
'Software',
'SoftwareVideoGames',
'SportingGoods',
'Tools',
'Toys',
'VHS',
'Video',
'VideoGames',
'Watches'
);
private $mErrors = array();
public function __construct($urlBuilder, $outputType) {
$this->urlBuilder = $urlBuilder;
$this->dataTransformer = DataTransformerFactory::create($outputType);
}
public function GetValidSearchNames() {
return $this->mValidSearchNames;
}
/**
* Search for items
*
* @param keywords Keywords which we're requesting
* @param searchIndex Name of search index (category) requested. NULL if searching all.
* @param sortBy Category to sort by, only used if searchIndex is not 'All'
* @param condition Condition of item. Valid conditions : Used, Collectible, Refurbished, All
*
* @return mixed SimpleXML object, array of data or false if failure.
*/
public function ItemSearch($keywords,$itemPage, $searchIndex = NULL, $sortBy = NULL, $condition = 'All',$minPrice=50000,$maxPrice=55000) {
?>
<table cellpadding="5">
<thead>
<tr>
<td>Title</td>
<td>List Price</td>
<td>Offer Price</td>
<td>Offer Selling Price</td>
<td>Amount Saved</td>
<td>Brand Name</td>
<td>Size</td>
<td>Color</td>
<td>Manufacturer</td>
</tr>
</thead>
<tbody>
<?php
$totalPages=0;
while($maxPrice<=100000)
{
$finished=false;
$itemPage=0;
while(!$finished)
{
$itemPage=$itemPage+1;
sleep(1);
$mer="MerchantId";
$merVal="Amazon";
$params = array(
'Operation' => 'ItemSearch',
'ResponseGroup' => 'Small,ItemAttributes,Offers,OfferSummary',
'Keywords' => $keywords,
'Condition' => $condition,
'ItemPage' => $itemPage,
'ListPrice' => $itemPage,
'MinimumPrice' => $minPrice,
'MaximumPrice' => $maxPrice,
'SearchIndex' => empty($searchIndex) ? 'All' : $searchIndex,
'Sort' => $sortBy && ($searchIndex != 'All') ? $sortBy : NULL
);
$totalPages=$this->FetchItems($params,$itemPage,$maxPrice,false);
if(($itemPage)==1)
{
$finished=true;
$itemPage=0;
}
}
$minPrice=$maxPrice;
$maxPrice=$maxPrice+5000;
}
//echo "<br/>total Records : ".$this->item;
?>
</tbody>
</table>
<br/><br/>
<?php
$style="";
for($looper=1;$looper<=$totalPages;$looper++)
{
if($looper>($itemPage-3) && $looper<($itemPage+3))
{
if($looper==$itemPage)
{
$style="style='color:red;'";
echo "<a href='examples.php?itemPage=".$looper."' ".$style.">".$looper."</a> ";
}
else
{
echo "<a href='examples.php?itemPage=".$looper."'>".$looper."</a> ";
}
}else if($looper>($totalPages-3))
{
echo "<a href='examples.php?itemPage=".$looper."'>".$looper."</a> ";
}else if($looper>(($totalPages/2)-3) && $looper<(($totalPages/2)+3))
{
echo "<a href='examples.php?itemPage=".$looper."'>".$looper."</a> ";
}
}
die();
//return $this->MakeAndParseRequest($params,$itemPage);
}
/**
* Lookup items from ASINs
*
* @param asinList Either a single ASIN or an array of ASINs
* @param onlyFromAmazon True if only requesting items from Amazon and not 3rd party vendors
*
* @return mixed SimpleXML object, array of data or false if failure.
*/
public function ItemLookup($asinList,$itemPage, $onlyFromAmazon = false) {
$asinList="B01D0XDW1C";
if (is_array($asinList)) {
$asinList = implode(',', $asinList);
}
$params = array(
'Operation' => 'ItemLookup',
'ResponseGroup' => 'ItemAttributes,Offers,Images',
'ReviewSort' => '-OverallRating',
'ItemId' => $asinList,
'MerchantId' => ($onlyFromAmazon == true) ? 'Amazon' : 'All'
);
return $this->MakeAndParseRequest($params,$itemPage,true);
}
public function GetErrors() {
return $this->mErrors;
}
private function AddError($error) {
array_push($this->mErrors, $error);
}
public function FetchItems($params,$itemPage,$maxPrice,$lookup=false)
{
$signedUrl = $this->urlBuilder->generate($params);
if($lookup)
{
try
{
$request = new CurlHttpRequest();
$response = $request->execute($signedUrl);
$fileContents = str_replace(array("\n", "\r", "\t"), '', $response);
$fileContents = trim(str_replace('"', "'", $fileContents));
$simpleXml = simplexml_load_string($fileContents);
$json = json_encode($simpleXml);
$decodedJson=json_decode($json,true);
//print_r($decodedJson);
print_r($decodedJson);
die();
$parsedXml = simplexml_load_string($response);
if ($parsedXml === false) {
return false;
}
return $this->dataTransformer->execute($parsedXml);
} catch(\Exception $error) {
$this->AddError("Error downloading data : $signedUrl : " . $error->getMessage());
return false;
}
}
else
{
try
{
$request = new CurlHttpRequest();
$response = $request->execute($signedUrl);
$fileContents = str_replace(array("\n", "\r", "\t"), '', $response);
$fileContents = trim(str_replace('"', "'", $fileContents));
$simpleXml = simplexml_load_string($fileContents);
$json = json_encode($simpleXml);
$decodedJson=json_decode($json,true);
//print_r($decodedJson);
//die();
if(isset($decodedJson['Items']))
{
$this->perRequest=0;
foreach($decodedJson['Items']['Item'] as $itm)
{
if(isset($itm['ItemAttributes']['ListPrice']['FormattedPrice']))
{
$this->item=$this->item+1;
$this->perRequest=$this->perRequest+1;
?>
<tr>
<td>
<?php
if(isset($itm['ItemAttributes']['Title']))
echo $itm['ItemAttributes']['Title'];
else
echo "N/A";
?>
</td>
<td>
<?php
if(isset($itm['ItemAttributes']['ListPrice']['FormattedPrice']))
echo $itm['ItemAttributes']['ListPrice']['FormattedPrice'];
else
echo "N/A";
?>
</td>
<?php
$savedAmount=0;
if(isset($itm['Offers']['Offer']['OfferListing']['Price']['FormattedPrice']))
{
?>
<td><?php echo $itm['Offers']['Offer']['OfferListing']['Price']['FormattedPrice']; ?></td>
<?php
if(isset($itm['Offers']['Offer']['OfferListing']['SalePrice']['FormattedPrice']))
{
$total=(int)($itm['ItemAttributes']['ListPrice']['Amount']);
$offer=(int)($itm['Offers']['Offer']['OfferListing']['SalePrice']['Amount']);
$savedAmount=$total-$offer;
$savedAmount=$savedAmount/100;
$savedAmount=$this->IND_money_format($savedAmount);
$savedAmount="INR ".$savedAmount.".00";
?>
<td><?php echo $itm['Offers']['Offer']['OfferListing']['SalePrice']['FormattedPrice']; ?></td>
<td><?php echo $savedAmount; ?></td>
<?php
}
else
{
$total=(int)($itm['ItemAttributes']['ListPrice']['Amount']);
$offer=(int)($itm['Offers']['Offer']['OfferListing']['Price']['Amount']);
$savedAmount=$total-$offer;
$savedAmount=$savedAmount/100;
$savedAmount=$this->IND_money_format($savedAmount);
$savedAmount="INR ".$savedAmount.".00";
?>
<td><?php echo $itm['Offers']['Offer']['OfferListing']['Price']['FormattedPrice']; ?></td>
<td><?php echo $savedAmount; ?></td>
<?php
}
}
else if(isset($itm['OfferSummary']['LowestNewPrice']['FormattedPrice']))
{
$total=(int)($itm['ListPrice']['Amount']);
$offer=(int)($itm['Offers']['Offer']['OfferListing']['SalePrice']['Amount']);
$savedAmount=$total-$offer;
$savedAmount=$savedAmount/100;
$savedAmount=$this->IND_money_format($savedAmount);
$savedAmount="INR ".$savedAmount.".00";
?>
<td><?php echo $itm['OfferSummary']['LowestNewPrice']['FormattedPrice']; ?></td>
<td><?php echo $itm['OfferSummary']['LowestNewPrice']['FormattedPrice']; ?></td>
<td><?php echo $savedAmount; ?></td>
<?php
}
else
{
?>
<td>N/A</td>
<td>N/A</td>
<td>N/A</td>
<?php
}
?>
<td>
<?php
if(isset($itm['ItemAttributes']['Brand']))
echo $itm['ItemAttributes']['Brand'];
else
echo "N/A";
?>
</td>
<td>
<?php
if(isset($itm['ItemAttributes']['Size']))
echo $itm['ItemAttributes']['Size'];
else
echo "N/A";
?>
</td>
<td>
<?php
if(isset($itm['ItemAttributes']['Color']))
echo $itm['ItemAttributes']['Color'];
else
echo "N/A";
?>
</td>
<td>
<?php
if(isset($itm['ItemAttributes']['Manufacturer']))
echo $itm['ItemAttributes']['Manufacturer'];
else
echo "N/A";
?>
</td>
</tr>
<?php
}
}
//return
//echo $maxPrice." : ".$decodedJson['Items']['TotalPages']."<br/>";
}
//echo "PerRequest : ".$this->perRequest."<br/>";
//die();
//$parsedXml = simplexml_load_string($response);
//if ($parsedXml === false) {
// return false;
//}
//return $this->dataTransformer->execute($parsedXml);
} catch(\Exception $error) {
$this->AddError("Error downloading data : $signedUrl : " . $error->getMessage());
return false;
}
}
}
private function MakeAndParseRequest($params,$itemPage,$lookup=false)
{
$this->item=0;
/*$style="";
for($looper=1;$looper<=$totalPages;$looper++)
{
if($looper>($itemPage-3) && $looper<($itemPage+3))
{
if($looper==$itemPage)
{
$style="style='color:red;'";
echo "<a href='examples.php?itemPage=".$looper."' ".$style.">".$looper."</a> ";
}
else
{
echo "<a href='examples.php?itemPage=".$looper."'>".$looper."</a> ";
}
}else if($looper>($totalPages-3))
{
echo "<a href='examples.php?itemPage=".$looper."'>".$looper."</a> ";
}else if($looper>(($totalPages/2)-3) && $looper<(($totalPages/2)+3))
{
echo "<a href='examples.php?itemPage=".$looper."'>".$looper."</a> ";
}
}
*/
}
}
?>