i am trying to figureout a way to loop all countries.
I have below script it has 3 loops.Loop # 2, 3 working fine. but Loop #1 is not.
here is the logic - main alexa site www.alexa.com/topsites/countries and this has multiple countries each country has a 2 charecter suffix. for each country number is prefixing to get country listing.
process: 1. loop#1 go to www.alexa.com/topsites/countries and loop all countries.(this part does not work) 2.loop#2 for each country loop all pages(this part also working) 3. loop# 3 for each page collect data. (this part is working)
var jsLF="\n";
var macro;
macro = "CODE:";
macro += "VERSION BUILD=9002379" + jsLF;
macro += "TAB T=1" + jsLF;
macro += "TAB CLOSEALLOTHERS" + jsLF;
macro += "TAG POS={{i}} TYPE=A ATTR=HREF:* EXTRACT=TXT" + jsLF;
macro += "SAVEAS TYPE=EXTRACT FOLDER=C:\\ FILE=hiprsites.txt" + jsLF;
var macro1;
macro1 = "CODE:";
macro1 += "VERSION BUILD=9002379" + jsLF;
macro1 += "TAB T=1" + jsLF;
macro1 += "TAB CLOSEALLOTHERS" + jsLF;
macro1 += "URL GOTO=http://www.alexa.com/topsites/countries;{{j}}/ID" + jsLF;
var macroAllC;
macroAllC = "CODE:";
macroAllC += "VERSION BUILD=9002379" + jsLF;
macroAllC += "TAB T=1" + jsLF;
macroAllC += "TAB CLOSEALLOTHERS" + jsLF;
macroAllC += "URL GOTO=http://www.alexa.com/topsites/countries;{{z}}" + jsLF;
//LOOP #1
//loop all countries take one country and go to next loop
for (var z=0;z<200;z++)
{
iimDisplay(z);
iimSet("z", z);
iimPlay(macroAllC);
//LOOP #2
//loop all the pages for each page get data
for (var j=0;j<20;j++)
{
iimDisplay(j);
iimSet("j", j);
iimPlay(macro1);
//LOOP #3
//loop the current page and get all 25 result
for(var i=1;i<=25;i++)
{
iimDisplay(i);
iimSet("i", i);
iimPlay(macro);
iimSet("i",i);
}//loop individual pages
}//loop macro1
First of all you need to extract all countries in a temporary array:
var countries = new Array(), i = 4;
do
{
iimDisplay("Extracting " + i);
iimSet("i", i);
iimPlay("CODE:TAG POS={{i}} TYPE=A ATTR=HREF:*countries* EXTRACT=HREF");
if(iimGetLastExtract()!='#EANF#')
countries.push(iimGetLastExtract());
else break;
i++;
}
while(i);
I'm setting i=4
to get the first country AF
Then you will have to loop through each country page and extract all data in another temporary array:
for(i=0;i<countries.length;i++)
{
var j = 2;
iimSet("url", countries[i]);
iimPlay("CODE:URL GOTO={{url}}");
do
{
iimDisplay("Loop " + (i+1) + " of " + countries.length + "\nExtracting " + j);
iimSet("j", j);
iimPlay("CODE:SET !TIMEOUT_STEP 1\nTAG POS={{j}} TYPE=A ATTR=HREF:*siteinfo* EXTRACT=HREF");
if(iimGetLastExtract()!='#EANF#')
temp_pages.push(iimGetLastExtract());
else
{
iimPlay("CODE:TAG POS=1 TYPE=A ATTR=TXT:Next");
if(iimGetLastError()=='OK')
j = 1;
else
break;
}
j++;
}
while(j);
}
Now we have all pages from all countries stored in temp_pages
array, which you will have to loop and extract the data you need:
for(i=0;i<temp_pages.length;i++)
{iimDisplay("Loop " + (i+1) + " of " + temp_pages.length);
iimSet("url", temp_pages[i]);
iimPlay("CODE:URL GOTO={{url}}");
j = 1;
do
{
iimSet("j", j);
iimPlay("CODE:TAG POS={{j}} TYPE=A ATTR=HREF:* EXTRACT=TXT");
if(iimGetLastExtract()!='#EANF#')
{
iimSet("ext", iimGetLastExtract());
iimPlay("CODE:SET !EXTRACT {{ext}}\nSAVEAS TYPE=EXTRACT FOLDER=C:\\ FILE=hiprsites.txt");
}
else break;
j++;
}
while(j);
}
As a result you will have in hiprsites.txt
a lot of anchors (don't know why you need this information). If you want to extract hrefs
, then change TAG POS={{j}} TYPE=A ATTR=HREF:* EXTRACT=TXT
to TAG POS={{j}} TYPE=A ATTR=HREF:* EXTRACT=HREF
.
Good Luck )
try to review your code before you're posting the question to prevent such silly mistakes
var macroAllC;
macroAllC = "CODE:";
macroAllC += "VERSION BUILD=9002379" + jsLF;
macroAllC += "TAB T=1" + jsLF;
macroAllC += "TAB CLOSEALLOTHERS" + jsLF;
macroAllC += "URL GOTO=http://www.alexa.com/topsites/countries/{{z}}" + jsLF;
var countries=new Array("RU","PK","GR");
for (var z=0;z<200;z++)
{
iimDisplay(z);
iimSet("z", countries[z]);
iimPlay(macroAllC);