Embedding HSSF(excel) into HSLF(ppt) using apache

2019-01-15 15:04发布

问题:

I want to embed the excel sheet into presentation(PPT) using apache poi. how can we do this? If anyones knows, please help me.

回答1:

This took me a while to figure out how the parts belong together ...

The embedding can be done in two ways:

  • by updating an already embedded worksheet
    • Pro: just call ObjectData.get/setData() and your are done
    • Con: what if you want to have more than one OLE object embedded?
  • or you can embed the elements from scratch (see below)

As usual when I try to figure out, how to implement certain POI features, I'm comparing the results with Libre Office files, in this case a few parts had to be created/modified:

  • in the Powerpoint object ...
    • the binary data of the emebedded object is stored as a root level record. Most of the root records are position dependent, so you need to recalc all their offsets when a new record, e.g. a slide, is created
    • the binary data record is referenced by embedding records inside the Document record
    • ... and to obfuscate it a bit more, this document reference is referenced once more by the actual shape object
  • in the POIFS of the embedded worksheet ...
    • an Ole Stream entry needs to be created
    • and the root node has to have the class-id of the embedded document type
    • apart from that, there are no changes on the embedded workbook object neccessary and the data itself, is a self-contained excel file

Furthermore I've used the two practical info classes: BiffViewer and POIFSLister.

As this is just a proof of concept, it is by far from complete. For further modifications on the representation of the embedded elements, you'll need to consult the spec.

There is still an unsolved issue of creating a preview image for the embedded object. You might want to use a neutral image, which is replaced anyway, as soon as user activates (double-clicks) the ole object ... An alternative would be to use jodconverter, but than the POI approach would be a bit senseless ...

(tested with POI3.9 / Libre Office 4.0 / MS Excel Viewer / MS Office 2003)

import java.awt.geom.Rectangle2D;
import java.io.*;
import java.lang.reflect.Field;

import org.apache.poi.POIDocument;
import org.apache.poi.ddf.*;
import org.apache.poi.hpsf.ClassID;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.model.Picture;
import org.apache.poi.hslf.model.Slide;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.usermodel.*;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;
import org.apache.poi.poifs.filesystem.*;
import org.apache.poi.util.*;

public class PoiOleXlsInPpt {
    static final OleType EXCEL97      = new OleType("{00020820-0000-0000-C000-000000000046}");
    static final OleType EXCEL95      = new OleType("{00020810-0000-0000-C000-000000000046}");
    static final OleType WORD97       = new OleType("{00020906-0000-0000-C000-000000000046}");
    static final OleType WORD95       = new OleType("{00020900-0000-0000-C000-000000000046}");
    static final OleType POWERPOINT97 = new OleType("{64818D10-4F9B-11CF-86EA-00AA00B929E8}");
    static final OleType POWERPOINT95 = new OleType("{EA7BAE70-FB3B-11CD-A903-00AA00510EA3}");

    static class OleType {
        final String classId;
        OleType(String classId) {
            this.classId = classId;
        }
        ClassID getClassID() {
            ClassID cls = new ClassID();
            byte clsBytes[] = cls.getBytes();
            String clsStr = classId.replaceAll("[{}-]", "");
            for (int i=0; i<clsStr.length(); i+=2) {
                clsBytes[i/2] = (byte)Integer.parseInt(clsStr.substring(i, i+2), 16);
            }
            return cls;
        }
    }

    public static void main(String[] args) throws Exception {
        HSLFSlideShow _hslfSlideShow = HSLFSlideShow.create(); 
        SlideShow ppt = new SlideShow(_hslfSlideShow);

        OLEShape oleShape1 = createOLEShape(getSampleWorkbook1(), ppt, _hslfSlideShow, EXCEL97);
        oleShape1.setAnchor(new Rectangle2D.Double(100,100,100,100));
        OLEShape oleShape2 = createOLEShape(getSampleWorkbook2(), ppt, _hslfSlideShow, EXCEL97);
        oleShape2.setAnchor(new Rectangle2D.Double(300,300,100,100));
        OLEShape oleShape3 = createOLEShape(getSampleDocument(), ppt, _hslfSlideShow, WORD97);
        oleShape3.setAnchor(new Rectangle2D.Double(300,100,100,100));

        // create and link visuals to the ole data
        Slide slide = ppt.createSlide();
        slide.addShape(oleShape1);
        slide.addShape(oleShape2);
        slide.addShape(oleShape3);

        FileOutputStream fos = new FileOutputStream("ole_xls_in_ppt_out2.ppt");
        ppt.write(fos);
        fos.close();
    }

    static OLEShape createOLEShape(
          POIDocument sample
        , SlideShow ppt
        , HSLFSlideShow _hslfSlideShow
        , OleType oleType
    ) throws IOException {
        // generate a preview image
        int prevIdx = generatePreview(ppt, sample);

        // add the data to the SlideShow
        ExEmbed eeEmbed = addOleDataToDocumentRecord(ppt);
        ExOleObjStg exOleObjStg = addOleDataToRootRecords(_hslfSlideShow, sample, oleType);
        eeEmbed.getExOleObjAtom().setObjStgDataRef(exOleObjStg.getPersistId());

        OLEShape oleShape = new OLEShape(prevIdx);
        linkOleDataToShape(oleShape, eeEmbed);

        return oleShape;
    }

    static POIDocument getSampleWorkbook1() {
        HSSFWorkbook wb = new HSSFWorkbook();
        HSSFSheet sheet = wb.createSheet();
        sheet.createRow(1).createCell(1).setCellValue("First Workbook");
        return wb;
    }

    static POIDocument getSampleWorkbook2() {
        HSSFWorkbook wb = new HSSFWorkbook();
        HSSFSheet sheet = wb.createSheet();
        sheet.createRow(1).createCell(1).setCellValue("Second Workbook");
        return wb;
    }

    // the sample document has apparently a problem,
    // i.e. word inside ms powerpoint crashed, and libre office doesn't display the text
    // it was just a test, if embedding elements != Excel works
    // in case HWPF is interesting to you, you probably know anyway, where the error below is ...
    static POIDocument getSampleDocument() throws IOException {
        FileInputStream fis = new FileInputStream("src/test/resources/empty.doc");
        HWPFDocument doc = new HWPFDocument(fis);
        fis.close();
        Range range = doc.getRange();
        CharacterRun run1 = range.insertAfter("Sample text");    
        run1.setFontSize(11);       
        return doc;
    }

    /**
     * Generates a modified version of the sample element, which
     * contains embedding informations
     */
    static byte[] wrapOleData(POIDocument oleData, OleType oleType) {
        try {
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            oleData.write(bos);

            ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
            bos.reset();
            POIFSFileSystem poifs = new POIFSFileSystem(bis);

            final String OLESTREAM_NAME = "\u0001Ole";
            DirectoryNode root = poifs.getRoot();
            if (!root.hasEntry(OLESTREAM_NAME)) {
                // the following data was taken from an example libre office document
                // beside this "\u0001Ole" record there were several other records, e.g. CompObj,
                // OlePresXXX, but it seems, that they aren't neccessary
                byte oleBytes[] = { 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
                poifs.createDocument(new ByteArrayInputStream(oleBytes), OLESTREAM_NAME);
            }

            // need to set storage clsid, otherwise embedded object is not recognized
            root.setStorageClsid(oleType.getClassID());

            poifs.writeFilesystem(bos);
            return bos.toByteArray();
        } catch (IOException e) {
            throw new RuntimeException("wth?!", e);
        }
    }


    /**
     * to be defined, how to create a preview image
     * for a start, I've taken just a dummy image, which will be
     * replaced, when the user activates the ole object
     *
     * not really an alternativ:
     * http://stackoverflow.com/questions/16704624/how-to-print-a-workbook-file-made-using-apache-poi-and-java
     * 
     * @return image index of the preview image 
     */
    static int generatePreview(SlideShow ppt, POIDocument oleData) {
        try {
            FileInputStream fis = new FileInputStream("src/test/resources/dilbert-2011-09-28-powerpoint.jpg");
            byte previewImg[] = IOUtils.toByteArray(fis);
            fis.close();
            return ppt.addPicture(previewImg, Picture.JPEG);
        } catch (IOException e) {
            throw new RuntimeException("not really?", e);
        }
    }

    static ExEmbed addOleDataToDocumentRecord(SlideShow ppt) {
        // taken from SlideShow.addControl()
        Document _documentRecord = ppt.getDocumentRecord();
        ExObjList lst = _documentRecord.getExObjList();
        if (lst == null) {
            lst = new ExObjList();
            _documentRecord.addChildAfter(lst, _documentRecord.getDocumentAtom());
            try {
                Field f = Document.class.getDeclaredField("exObjList");
                f.setAccessible(true);
                f.set(_documentRecord, lst);
            } catch (Exception e) {
                throw new RuntimeException("not here", e);
            }
        }
        ExObjListAtom objAtom = lst.getExObjListAtom();
        // increment the object ID seed
        int objectId = (int) objAtom.getObjectIDSeed() + 1;
        objAtom.setObjectIDSeed(objectId);

        ExEmbed exEmbed = new ExEmbed();
        // remove unneccessary infos, so we don't need to specify the type
        // of the ole object multiple times
        Record children[] = exEmbed.getChildRecords();
        exEmbed.removeChild(children[2]);
        exEmbed.removeChild(children[3]);
        exEmbed.removeChild(children[4]);

        ExEmbedAtom eeEmbed = exEmbed.getExEmbedAtom();
        try {
            Field f = ExEmbedAtom.class.getDeclaredField("_data");
            f.setAccessible(true);
            f.set(eeEmbed, new byte[]{0,0,0,0,1/*CantLockServerB*/,0,0,0});
            // oops, there seems to be an error in the default constructor ...
            // should be 8 and not 7 bytes
            setRecordLength(eeEmbed, 8);
        } catch (Exception e) {
            throw new RuntimeException("trust me ;)", e);
        }

        ExOleObjAtom eeAtom = exEmbed.getExOleObjAtom();
        eeAtom.setObjID(objectId);
        eeAtom.setDrawAspect(ExOleObjAtom.DRAW_ASPECT_VISIBLE);
        eeAtom.setType(ExOleObjAtom.TYPE_EMBEDDED);
        // eeAtom.setSubType(ExOleObjAtom.SUBTYPE_EXCEL);
        // should be ignored?!?, see MS-PPT ExOleObjAtom, but Libre Office sets it ...
        eeAtom.setOptions(1226240);

        lst.addChildAfter(exEmbed, objAtom);

        return exEmbed;
    }

    static ExOleObjStg addOleDataToRootRecords(
          HSLFSlideShow _hslfSlideShow
        , POIDocument oleData
        , OleType oleType
    ) throws IOException {
        ExOleObjStg exOleObjStg = new ExOleObjStg();
        int slideRecordPos = _hslfSlideShow.appendRootLevelRecord(exOleObjStg);
        exOleObjStg.setPersistId(slideRecordPos);
        exOleObjStg.setData(wrapOleData(oleData, oleType));

        // taken from SlideShow.createSlide
        Record _records[] = _hslfSlideShow.getRecords();

        // Add the new OLE record into the PersistPtr stuff
        int offset = 0;
        int slideOffset = 0;
        PersistPtrHolder ptr = null;
        UserEditAtom usr = null;
        for (int i = 0; i < _records.length; i++) {
            Record record = _records[i];
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            try {
                record.writeOut(out);
            } catch (IOException e) {
                throw new HSLFException(e);
            }

            // Grab interesting records as they come past
            if (_records[i].getRecordType() == RecordTypes.PersistPtrIncrementalBlock.typeID) {
                ptr = (PersistPtrHolder) _records[i];
            }
            if (_records[i].getRecordType() == RecordTypes.UserEditAtom.typeID) {
                usr = (UserEditAtom) _records[i];
            }

            if (i == slideRecordPos) {
                slideOffset = offset;
            }
            offset += out.size();
        }

        // the ole objects needs to know its position within
        // the root records, because it will be later accessed
        // via its index from the shape
        int psrId = usr.getMaxPersistWritten() + 1;
        exOleObjStg.setPersistId(psrId);

        // Last view is now of the slide
        usr.setLastViewType((short) UserEditAtom.LAST_VIEW_SLIDE_VIEW);
        usr.setMaxPersistWritten(psrId); // increment the number of persit objects

        // Add the new slide into the last PersistPtr
        // (Also need to tell it where it is)
        exOleObjStg.setLastOnDiskOffset(slideOffset);
        ptr.addSlideLookup(psrId, slideOffset);

        return exOleObjStg;
    }

    static void linkOleDataToShape(OLEShape oleShape, ExEmbed exEmbed) {
        oleShape.setEscherProperty(EscherProperties.BLIP__PICTUREID, exEmbed.getExOleObjAtom().getObjID());

        EscherSpRecord spRecord = oleShape.getSpContainer().getChildById(EscherSpRecord.RECORD_ID);
        spRecord.setFlags(spRecord.getFlags()|EscherSpRecord.FLAG_OLESHAPE);

        // ExObjRefAtom is not set in OLEShape 
        UnknownEscherRecord uer = new UnknownEscherRecord();
        byte uerData[] = new byte[12];
        LittleEndian.putShort( uerData, 0, (short)0 ); // options = 0
        LittleEndian.putShort( uerData, 2, (short)RecordTypes.ExObjRefAtom.typeID); // recordId
        LittleEndian.putInt( uerData, 4, 4 ); // remaining bytes
        LittleEndian.putInt( uerData, 8, exEmbed.getExOleObjAtom().getObjID() ); // the data
        uer.fillFields(uerData, 0, null);

        EscherContainerRecord uerCont = new EscherContainerRecord();
        uerCont.setRecordId((short)RecordTypes.EscherClientData);
        uerCont.setVersion((short)0x000F); // yes, we are still a container ...
        uerCont.addChildRecord(uer);

        oleShape.getSpContainer().addChildRecord(uerCont);
    }

    static void setRecordLength(Record record, int len) throws NoSuchFieldException, IllegalAccessException {
        Field f = record.getClass().getDeclaredField("_header");
        f.setAccessible(true);
        byte _header[] = (byte[])f.get(record);
        LittleEndian.putInt(_header, 4, len);
        f.set(record, _header);
    }
}