Does anyone know how I would be able to take the the URL as an argument in Python as page? Just to readline in the script, user inputs into the shell and pass it through as an argument just to make the script more portable?
import sys, re
import webpage_get
def print_links(page):
''' find all hyperlinks on a webpage passed in as input and
print '''
print '\n[*] print_links()'
links = re.findall(r'(\http://\w+\.\w+[-_]*\.*\w+\.*?\w+\.*?\w+\.*[//]*\.*?\w+ [//]*?\w+[//]*?\w+)', page)
# sort and print the links
links.sort()
print '[+]', str(len(links)), 'HyperLinks Found:'
for link in links:
print link
def main():
# temp testing url argument
sys.argv.append('http://www.4chan.org')
# Check args
if len(sys.argv) != 2:
print '[-] Usage: webpage_getlinks URL'
return
# Get the web page
page = webpage_get.wget(sys.argv[1])
# Get the links
print_links(page)
if __name__ == '__main__':
main()