Blacklisting URLs in PhantomJS and GhostDriver is pretty straightforward. First initialize the driver with a handler:
PhantomJSDriver driver = new PhantomJSDriver();
driver.executePhantomJS(loadFile("/phantomjs/handlers.js"))
And configure the handler:
this.onResourceRequested = function (requestData, networkRequest) {
var allowedUrls = [
/https?:\/\/localhost.*/,
/https?:\/\/.*\.example.com\/?.*/
];
var disallowedUrls = [
/https?:\/\/nonono.com.*/
];
function isUrlAllowed(url) {
function matches(url) {
return function(re) {
return re.test(url);
};
}
return allowedUrls.some(matches(url)) && !disallowedUrls.some(matches(url));
}
if (!isUrlAllowed(requestData.url)) {
console.log("Aborting disallowed request (# " + requestData.id + ") to url: '" + requestData.url + "'");
networkRequest.abort();
}
};
I haven't found a good way to do this with HtmlUnitDriver. There's the ScriptPreProcessor mentioned in How to filter javascript from specific urls in HtmlUnit, but it uses WebClient, not HtmlUnitDriver. Any ideas?
Extend HtmlUnitDriver and implement a ScriptPreProcessor
(for editing content) and a HttpWebConnection
(for allowing/blocking URLs):
public class FilteringHtmlUnitDriver extends HtmlUnitDriver {
private static final String[] ALLOWED_URLS = {
"https?://localhost.*",
"https?://.*\\.yes.yes/?.*",
};
private static final String[] DISALLOWED_URLS = {
"https?://spam.nono.*"
};
public FilteringHtmlUnitDriver(DesiredCapabilities capabilities) {
super(capabilities);
}
@Override
protected WebClient modifyWebClient(WebClient client) {
WebConnection connection = filteringWebConnection(client);
ScriptPreProcessor preProcessor = filteringPreProcessor();
client.setWebConnection(connection);
client.setScriptPreProcessor(preProcessor);
return client;
}
private ScriptPreProcessor filteringPreProcessor() {
return (htmlPage, sourceCode, sourceName, lineNumber, htmlElement) -> editContent(sourceCode);
}
private String editContent(String sourceCode) {
return sourceCode.replaceAll("foo", "bar"); }
private WebConnection filteringWebConnection(WebClient client) {
return new HttpWebConnection(client) {
@Override
public WebResponse getResponse(WebRequest request) throws IOException {
String url = request.getUrl().toString();
WebResponse emptyResponse = new WebResponse(
new WebResponseData("".getBytes(), SC_OK, "", new ArrayList<>()), request, 0);
for (String disallowed : DISALLOWED_URLS) {
if (url.matches(disallowed)) {
return emptyResponse;
}
}
for (String allowed : ALLOWED_URLS) {
if (url.matches(allowed)) {
return super.getResponse(request);
}
}
return emptyResponse;
}
};
}
}
This enables both editing of content, and blocking of URLs.