有谁知道我是如何将能够把URL作为Python中的参数作为页? 只是在脚本的ReadLine,用户输入到外壳,并通过把它作为一个参数只是为了让剧本更加便携?
import sys, re
import webpage_get
def print_links(page):
''' find all hyperlinks on a webpage passed in as input and
print '''
print '\n[*] print_links()'
links = re.findall(r'(\http://\w+\.\w+[-_]*\.*\w+\.*?\w+\.*?\w+\.*[//]*\.*?\w+ [//]*?\w+[//]*?\w+)', page)
# sort and print the links
links.sort()
print '[+]', str(len(links)), 'HyperLinks Found:'
for link in links:
print link
def main():
# temp testing url argument
sys.argv.append('http://www.4chan.org')
# Check args
if len(sys.argv) != 2:
print '[-] Usage: webpage_getlinks URL'
return
# Get the web page
page = webpage_get.wget(sys.argv[1])
# Get the links
print_links(page)
if __name__ == '__main__':
main()