如何将altChunk元素添加到使用Apache POI的XWPFDocument(How to a

2019-10-29 15:09发布

我想补充的HTML作为altChunk采用Apache POI的DOCX文件。 我知道,doc4jx可以用一个简单的API做到这一点,但由于技术上的原因,我需要使用Apache POI。

使用CT类做低层次的东西与XML是有点棘手。 我可以创建下面的代码的altChunk:

import java.io.File;
import java.io.FileOutputStream;

import javax.xml.namespace.QName;

import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.impl.values.XmlComplexContentImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTBodyImpl;

public class AltChunkTest {
    public static void main(String[] args) throws Exception  {
        XWPFDocument doc = new XWPFDocument();
        doc.createParagraph().createRun().setText("AltChunk below:");
        QName ALTCHUNK =  new QName ( "http://schemas.openxmlformats.org/wordprocessingml/2006/main" ,  "altChunk" ) ; 
        CTDocument1 ctDoc = doc.getDocument() ; 
        CTBodyImpl ctBody =  (CTBodyImpl) ctDoc. getBody(); 
        XmlComplexContentImpl xcci =  ( XmlComplexContentImpl ) ctBody.get_store().add_element_user(ALTCHUNK); 
        // what's need to now add "<b>Hello World!</b>"
        FileOutputStream out = new FileOutputStream(new File("test.docx"));
        doc.write(out);
    }
}

但我怎么添加HTML内容为“xcci”现在?

Answer 1:

Office Open XMLWord*.docx )的altChunk提供了使用纯的方法HTML描述的文档部分。

约两个重要的备注altChunk

首先:它仅用于导入内容。 如果您在使用打开文档Word和保存,新保存的文件将不包含另一种格式内容的一部分,也不是altChunk标记引用它。 Word保存所有导入的内容为默认Office Open XML元素。

第二:除了大多数应用程序Word ,其能够读取*.docx 不会太阅读altChunk含量可言。 例如LibreofficeOpenOffice Writer 不会altChunk内容以及apache poi 不会altChunk打开时内容*.docx文件。

如何altChunk在实施*.docx ZIP文件结构?

/word/*.html中的文件*.docx ZIP文件。 那些被标识在所引用/word/document.xml<w:altChunk r:id="htmlDoc1"/>例如。 的ID和之间的关系/word/*.html文件中给出/word/_rels/document.xml.rels<Relationship Id="htmlDoc1" Target="htmlDoc1.html" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk"/>的例子。

因此,我们需要在第一POIXMLDocumentPart S为/word/*.html文件和POIXMLRelation S为ID和之间的关系/word/*.html文件。 下面的代码提供,通过使用具有沿POIXMLDocumentPart的包装类/word/htmlDoc#.html在* .DOCX ZIP压缩文件。 这也提供了操纵HTML方法。 此外,它提供了创建一个方法/word/htmlDoc#.html在* .DOCX ZIP档案文件,并创建关系到它。

码:

import java.io.*;

import org.apache.poi.*;
import org.apache.poi.ooxml.*;
import org.apache.poi.openxml4j.opc.*;

import org.apache.poi.xwpf.usermodel.*;

public class CreateWordWithHTMLaltChunk {

 //a method for creating the htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive  
 //String id will be htmlDoc#.
 private static MyXWPFHtmlDocument createHtmlDoc(XWPFDocument document, String id) throws Exception {
  OPCPackage oPCPackage = document.getPackage();
  PackagePartName partName = PackagingURIHelper.createPartName("/word/" + id + ".html");
  PackagePart part = oPCPackage.createPart(partName, "text/html");
  MyXWPFHtmlDocument myXWPFHtmlDocument = new MyXWPFHtmlDocument(part, id);
  document.addRelation(myXWPFHtmlDocument.getId(), new XWPFHtmlRelation(), myXWPFHtmlDocument);
  return myXWPFHtmlDocument;
 }

 public static void main(String[] args) throws Exception {

  XWPFDocument document = new XWPFDocument();

  XWPFParagraph paragraph;
  XWPFRun run;
  MyXWPFHtmlDocument myXWPFHtmlDocument;

  paragraph = document.createParagraph();
  run = paragraph.createRun();
  run.setText("Default paragraph followed by first HTML chunk.");

  myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc1");
  myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>",
   "<body><p>Simple <b>HTML</b> <i>formatted</i> <u>text</u></p></body>"));
  document.getDocument().getBody().addNewAltChunk().setId(myXWPFHtmlDocument.getId());  

  paragraph = document.createParagraph();
  run = paragraph.createRun();
  run.setText("Default paragraph followed by second HTML chunk.");

  myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc2");
  myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>",
   "<body>" +
   "<table>"+
   "<caption>A table></caption>" +
   "<tr><th>Name</th><th>Date</th><th>Amount</th></tr>" +
   "<tr><td>John Doe</td><td>2018-12-01</td><td>1,234.56</td></tr>" +
   "</table>" +
   "</body>"
   ));
  document.getDocument().getBody().addNewAltChunk().setId(myXWPFHtmlDocument.getId());  

  FileOutputStream out = new FileOutputStream("CreateWordWithHTMLaltChunk.docx");
  document.write(out);
  out.close();
  document.close();

 }

 //a wrapper class for the  htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive
 //provides methods for manipulating the HTML
 //TODO: We should *not* using String methods for manipulating HTML!
 private static class MyXWPFHtmlDocument extends POIXMLDocumentPart {

  private String html;
  private String id;

  private MyXWPFHtmlDocument(PackagePart part, String id) throws Exception {
   super(part);
   this.html = "<!DOCTYPE html><html><head><style></style><title>HTML import</title></head><body></body>";
   this.id = id;
  }

  private String getId() {
   return id;
  }

  private String getHtml() {
   return html;
  }

  private void setHtml(String html) {
   this.html = html;
  }

  @Override
  protected void commit() throws IOException {
   PackagePart part = getPackagePart();
   OutputStream out = part.getOutputStream();
   Writer writer = new OutputStreamWriter(out, "UTF-8");
   writer.write(html);
   writer.close();
   out.close();
  }

 }

 //the XWPFRelation for /word/htmlDoc#.html
 private final static class XWPFHtmlRelation extends POIXMLRelation {
  private XWPFHtmlRelation() {
   super(
    "text/html", 
    "http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk", 
    "/word/htmlDoc#.html");
  }
 }
}

注:由于使用的altChunk此代码需要的所有模式的全罐子ooxml-schemas-*.jar在提到的Apache POI FAQ-N10025 。

结果:



Answer 2:

基于阿克塞尔里希特的回答 ,我更换了调用CTBody.addNewAltChunk()与CTBodyImpl.get_store()。add_element_user(QName的),它消除了加15MB依赖OOXML-模式 。 因为这是一个桌面应用程序中使用,我们正在努力,以保持应用程序的大小尽可能小。 在情况下,它可能是帮助别人:

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;

import javax.xml.namespace.QName;

import org.apache.poi.ooxml.POIXMLDocumentPart;
import org.apache.poi.ooxml.POIXMLRelation;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.xmlbeans.SimpleValue;
import org.apache.xmlbeans.impl.values.XmlComplexContentImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTBodyImpl;

public class AltChunkTest {
    public static void main(String[] args) throws Exception  {
        XWPFDocument doc = new XWPFDocument();
        doc.createParagraph().createRun().setText("AltChunk below:");
        addHtml(doc,"chunk1","<!DOCTYPE html><html><head><style></style><title></title></head><body><b>Hello World!</b></body></html>");
        FileOutputStream out = new FileOutputStream(new File("test.docx"));
        doc.write(out);
    }

    static void addHtml(XWPFDocument doc, String id,String html) throws Exception {
        OPCPackage oPCPackage = doc.getPackage();
        PackagePartName partName = PackagingURIHelper.createPartName("/word/" + id + ".html");
        PackagePart part = oPCPackage.createPart(partName, "text/html");
        class HtmlRelation extends POIXMLRelation {
            private HtmlRelation() {
                super(  "text/html",
                        "http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk",
                        "/word/htmlDoc#.html");
            }
        }
        class HtmlDocumentPart extends POIXMLDocumentPart {
            private HtmlDocumentPart(PackagePart part) throws Exception {
                super(part);
            }

            @Override
            protected void commit() throws IOException {
                try (OutputStream out = part.getOutputStream()) {
                    try (Writer writer = new OutputStreamWriter(out, "UTF-8")) {
                        writer.write(html);
                    }
                }
            }
        };
        HtmlDocumentPart documentPart = new HtmlDocumentPart(part);
        doc.addRelation(id, new HtmlRelation(), documentPart);
        CTBodyImpl b = (CTBodyImpl) doc.getDocument().getBody();
        QName ALTCHUNK = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "altChunk");
        XmlComplexContentImpl altchunk = (XmlComplexContentImpl) b.get_store().add_element_user(ALTCHUNK);
        QName ID = new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id");
        SimpleValue target = (SimpleValue)altchunk.get_store().add_attribute_user(ID);
        target.setStringValue(id);
    }
}


文章来源: How to add an altChunk element to a XWPFDocument using Apache POI