puppeteer stream playing <video> to node.js

2020-05-03 12:25发布

问题:

using Puppeteer I'm able to navigate to a certain video src URL, and the MP4 (using a custom build of chronium) plays fine.

NOW: I want to be able to get the video data that's playing and send it to some kind of buffer in node.js that can be saved as a file or sent to a client via a websocket or sent as a response etc.... but I'm not sure how to do it, all I have is the video playing.

I'm not able to just send the URL over to node.js, because in order to view the video file you have to go through the whole puppeteer crawling process (it's not just a static URL, it's dependent on that browser session only, so only puppeteer can view it).

SO: what can I do to get a src URL to a file (or buffer) in nodeJS? this is my current code, if it helps:

var puppeteer = require("puppeteer-core");
var http=require("https");
var fs=require("fs");
var fetch=require("fetch-node");
(async() => {
    var browser = await puppeteer.launch({
        executablePath:"./cobchrome/chrome.exe"
    });
    console.log("Got browser", browser);
    var page = await browser.newPage();

    console.log(page,"got page");
    var agentStr = `Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0`;
    var agent = await page.setUserAgent(agentStr);
    console.log(agent, "Set the user agent");
 //   await page.goto("https://drive.google.com/file/d/17tkL8jPlBIh5XtcX_tNhyDV5nSX8v7f8/preview");
    await page.goto("https://docs.google.com/file/d/1Cyuh41yNfYZU_zL-MHLf_EPJCYnlT7oJ/preview?enablejsapi=1&playerapiid=player4");
    console.log("went to page..");
    await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './downloadscob/'})
    await page.screenshot({path:"shots/onopen.png"});
  //  var btn = await page.$(".ndfHFb-c4YZDc ndfHFb-c4YZDc-AHmuwe-Hr88gd-OWB6Me ndfHFb-c4YZDc-vyDMJf-aZ2wEe ndfHFb-c4YZDc-i5oIFb ndfHFb-c4YZDc-e1YmVc ndfHFb-c4YZDc-TSZdd");
  //  var tst = await page.$("#start-of-content");
    var clickEl = ".ndfHFb-c4YZDc-aTv5jf-bVEB4e-RJLb9c";
    var newClickID = ".ndfHFb-c4YZDc-aTv5jf-NziyQe-LgbsSe";
    var clicker = await page.waitForSelector(newClickID);
    console.log(clicker,"got clicker");
    await page.screenshot({path:"shots/ongotclicker.png"});
    await page.click(clickEl);
    console.log("clicked")
    await page.screenshot({path:"shots/onclicked.png"});
    var frame = await page.waitForSelector("iframe[id=drive-viewer-video-player-object-0]");
    console.log(frame, "got video frame");
    await page.screenshot({path:"shots/ongotframe.png"});



    var cf = await frame.contentFrame();
    await page.screenshot({path:"shots/oncf.png"});
    console.log(cf, "got content frame");
    await cf.waitFor(() => !!document.querySelector("video"))
    await page.screenshot({path:"shots/videoappeared.png"});
    //await cf.waitFor(30000);
 //   var videos = await cf.$("video");
//    console.log(videos, videos.length, "all videos");
    var video = await cf.$("video");
    await page.screenshot({path:"shots/selectedvideo.png"});
    var videoEl = await cf.evaluate(

        v =>{
            var result = {};
            for(var k in v) {
                result[k] = v[k];
            }
            return result;
        },
        video
    );
    var src = videoEl.src;

    var file = fs.createWriteStream("down.mp4");
    console.log("starting to stream");
    var req = http.get(src, r => {
        console.log("finished pipin");
        r.pipe(file); //I REALLY thought this would work but it doesn't do anything
    });
    var start = Date.now();
    await page.screenshot({path:"shots/evalled_vido.png"});
    console.log("$$###VIDEO SOURCE::", "time it took", src);
    await page.goto(src);
    await page.screenshot({path:"shots/wentToNewPage.png"});
  //  await page.waitFor(5000);
    await page.screenshot({path:"shots/maybeItsPlayingNow.png"});
    console.log("ABOUT t oFETHC wit H SOURCE", src)
    var content = await page.content();
    fs.writeFile("outputagain.txt", content, (re) => {
        console.log("saved it?");
    })
    console.log(content);

 // await browser.close();
})();

currently the page.content() at the end just gets the HTML content of the page, not any binary data................