I have an automation script in CasperJS controlling a PhantomJS headless browser that logs into a site, enters data over multiple pages / form.
From the same physical server, I have PHP/MySQL serving up a CRM client website. On the CRM site, I want to have the ability to:
- Trigger the remote CasperJS script to go browse a remote site and log in and fill out forms
- Read the output stream (i.e. "Page 1 complete, page 2 complete" ,etc)
- Display the status updates to the client user as the CasperJS script is executing
I am thinking that socket.io is the ticket here. But, I am I going about this all wrong? I am trying to avoid having a selenium server running. I checked this answer on SO but I am not looking for screenshots, I'm looking for the console output from CasperJS to be displayed in the client website.
I had a similar task once and concocted a solution using local Express.js server with Socket.io.
You would launch this server with node.js and then pass tasks to it from PHP by making POST requests to http://127.0.0.1:9000
(I used the excellent Requests library).
Here's a simplified version of my script:
var fs = require("fs");
var express = require("express");
var app = express();
var server = require("http").Server(app);
var io = require("socket.io")(server);
var iosocket;
// Express middleware to get variables from POST request
var bodyParser = require('body-parser');
app.use(bodyParser.urlencoded({ extended: true }));
// Create websocket connection
io.on("connection", function(socket){
console.log('io.js connection');
iosocket = socket;
});
// Receieve task from external POST request
app.post("/scrape", function(req, res){
res.send("Request accepted");
// Url to parse
var url = req.body.url;
// Variable to collect data from scraper
var data = [];
// Launch scraping script
var spawn = require('child_process').spawn,
child = spawn('/path/to/casperjs', ['/path/to/scrape/script.js', url]);
console.log("Spawned parser");
// Receieve data from script
child.stdout.on('data', function (data) {
var message = data.toString();
data.push(message);
// Send data to the web client
iosocket.emit("message", message);
});
// On error
child.stderr.on('data', function (data) {
console.log('stderr: ' + data.toString());
});
// On scraper exit
child.on('close', function (code) {
console.log("Scraper exited with code: " + code);
//
// Put data into a file or a database, for example
//
fs.writeFileSync("path/to/file/results_" + (new Date()).getTime() + ".json", JSON.stringify(data));
});
});
// Bind app to port @ localhost
server.listen(9000, "127.0.0.1");
Solution with CasperJS/Phantomjs server is interesting, however people pointed out that it leaks memory, which probably won't be happening if you run short-lived CasperJS scripts.