获取谷歌文档作为HTML(Get Google Document as HTML)

2019-07-19 03:48发布

我有,我可以建立一个网站的博客使用谷歌云端硬盘文档来支持它的单纯的用户朋友大胆的想法。 我能够创建编译的文件列表的contentService的。 不过,我看不到的方式来将文档转换为HTML。 我知道,谷歌可以使网页中的文件,所以我不知道是否有可能得到使用的渲染过的版本在我的内容服务。

这可能吗?

Answer 1:

有气体没有直接的方法来获取文档的HTML版本,这是一个相当古老的增强请求,但解决方法最初描述由恩里克·阿布雷乌工作得很好,我用它所有的时间...

在需要授权过程的唯一恼人的事情从脚本编辑器,这使得它在不安的共享应用程序中使用(与“脚本无法”用户)被调用,但这只会发生一次)。

还有一个图书馆通过创建罗曼Vialard这使事情(有点)更容易...并增加了一些其他有趣的功能。



Answer 2:

你可以试试下面的代码:

  function getGoogleDocumentAsHTML(){
  var id = DocumentApp.getActiveDocument().getId() ;
  var forDriveScope = DriveApp.getStorageUsed(); //needed to get Drive Scope requested
  var url = "https://docs.google.com/feeds/download/documents/export/Export?id="+id+"&exportFormat=html";
  var param = {
    method      : "get",
    headers     : {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
    muteHttpExceptions:true,
  };
  var html = UrlFetchApp.fetch(url,param).getContentText();
  Logger.log(html);
}


Answer 3:

Node.js的解决方案

这里是你如何能得到一个谷歌文档的使用谷歌驱动的node.js的客户端库的HTML。

// import googleapis npm package
var google = require('googleapis');

// variables
var fileId = '<google drive doc file id>',
    accessToken = '<oauth access token>';

// oauth setup
var OAuth2 = google.auth.OAuth2,
    OAuth2Client = new OAuth2();

// set oauth credentials
OAuth2Client.setCredentials({access_token: accessToken});

// google drive setup
var drive = google.drive({version: 'v3', auth: OAuth2Client});

// download file as text/html
var buffers = [];
drive.files.export(
    {
        fileId: fileId,
        mimeType: 'text/html'
    }
)
    .on('error', function(err) {
        // handle error
    })
    .on('data', function(data) {
        buffers.push(data); // data is a buffer
    })
    .on('end', function() {
        var buffer = Buffer.concat(buffers),
            googleDocAsHtml = buffer.toString();
        console.log(googleDocAsHtml);
    });

看看在谷歌驱动器V3下载的文档为更多的语言和选项。

需要注意的是, 谷歌的API Node.js的客户端是在阿尔法(一月2017)。



Answer 4:

下面是古尔AOuth之后发表恩里克的想法,新的版本有点文档片断:

function exportAsHTML(){
  var forDriveScope = DriveApp.getStorageUsed(); //needed to get Drive Scope requested
  var docID = DocumentApp.getActiveDocument().getId();
  var url = "https://docs.google.com/feeds/download/documents/export/Export?id="+docID+"&exportFormat=html";
  var param = {
    method      : "get",
    headers     : {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
    muteHttpExceptions:true,
  };
  var html = UrlFetchApp.fetch(url,param).getContentText();
  return html; 

}

然后用通常的mailApp:

function mailer(){
   var docbody = exportAsHTML();
   MailApp.sendEmail({
     to: "email@mail.com",
     subject: "document emailer",
     htmlBody:  docbody  });
}

希望新的解决办法帮助

JD



Answer 5:

您可以使用该解决方案在这里

/**
 * Converts a file to HTML. The Advanced Drive service must be enabled to use
 * this function.
 */
function convertToHtml(fileId) {
  var file = Drive.Files.get(fileId);
  var htmlExportLink = file.exportLinks['text/html'];
  if (!htmlExportLink) {
    throw 'File cannot be converted to HTML.';
  }
  var oAuthToken = ScriptApp.getOAuthToken();
  var response = UrlFetchApp.fetch(htmlExportLink, {
    headers:{
      'Authorization': 'Bearer ' + oAuthToken
    },
    muteHttpExceptions: true
  });
  if (!response.getResponseCode() == 200) {
    throw 'Error converting to HTML: ' + response.getContentText();
  }
  return response.getContentText();
}

通过为FILEID,谷歌的文档的ID,并启用先进的驱动服务按照指示在这里 。



Answer 6:

我有这个问题为好。 该文档HTML出口吐出来的HTML实在是太丑了,所以这是我的解决方案:

/**
 * Takes in a Google Doc ID, gets that doc in HTML format, cleans up the markup, and returns the resulting HTML string.
 *
 * @param {string} the id of the google doc
 * @param {boolean} [useCaching] enable or disable caching. default true.
 * @return {string} the doc's body in html format
 */
function getContent(id, useCaching) {

  if (!id) {
    throw "Please call this API with a valid Google Doc ID";
  }

  if (useCaching == null) {
    useCaching = true;
  }

  if (typeof useCaching != "boolean") {
    throw "If you're going to specify useCaching, it must be boolean.";
  }

  var cache = CacheService.getScriptCache();
  var cached = cache.get(id); // see if we have a cached version of our parsed html
  if (cached && useCaching) {
    var html = cached;
    Logger.log("Pulling doc html from cache...");
  } else {

    Logger.log("Grabbing and parsing fresh html from the doc...");

    try {
      var doc = DriveApp.getFileById(id);
    } catch (err) {
      throw "Please call this API with a valid Google Doc ID. " + err.message;
    }

    var docName = doc.getName();

    var forDriveScope = DriveApp.getStorageUsed(); // needed to get Drive Scope requested in ScriptApp.getOAuthToken();
    var url = "https://docs.google.com/feeds/download/documents/export/Export?id=" + id + "&exportFormat=html";
    var param = {
      method: "get",
      headers: {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
      muteHttpExceptions:true,
    };

    var html = UrlFetchApp.fetch(url, param).getContentText();

    // nuke the whole head section, including the stylesheet and meta tag
    html = html.replace(/<head>.*<\/head>/, '');
    // remove almost all html attributes
    html = html.replace(/ (id|class|style|start|colspan|rowspan)="[^"]*"/g, '');
    // remove all of the spans, as well as the outer html and body
    html = html.replace(/<(span|\/span|body|\/body|html|\/html)>/g, '');
    // clearly the superior way of denoting line breaks
    html = html.replace(/<br>/g, '<br />');

    cache.put(id, html, 900) // cache doc contents for 15 minutes, in case we get a lot of requests

  }

  Logger.log(html);

  return html;

}

https://gist.github.com/xd1936/cc229d14a89e6327336177bb07ac2980



Answer 7:

也许这会为你工作?

function doGet() {
  var blob = DriveApp.getFileById('myFileId').getAsHTML();
  return HtmlService.createHtmlOutput(blob);
}


文章来源: Get Google Document as HTML