How to get list of Named range,sheet name and refe

2019-07-15 05:56发布

问题:

I'm tring to to read large excel file (size~10MB,.xlsx) .

I'm using below code

Workbook xmlworkbook =WorkbookFactory.create(OPCPackage.openOrCreate(root_path_name_file));

But it's showing Heap memory issue.

I have also seen other solution on StackOverflow some of them given to increase the JVM but i dont want to increase jvm.

Issue 1) We can't use SXSSF (Streaming Usermodel API) because this is only for writing or creating new workbook.

My sole objective to get the number of NamedRange of sheet, Total number of sheet and their sheet name for large excel file.

回答1:

If the requirement is only to get the named ranges and sheet names, then only the /xl/workbook.xml from the *.xlsx ZIPPackage must be parsed since those informations are all stored there.

This is possible by getting the appropriate PackagePart and parsing the XML from this. For parsing XML my favorite is using StAX.

Example code which gets all sheet names and defined named ranges:

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;

import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.XMLEvent;

import javax.xml.namespace.QName;

import java.io.File;

import java.util.regex.Pattern;

import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;

class StaxReadOPCPackageParts {

 public static void main(String[] args) {
  try {

   File file = new File("file.xlsx");
   OPCPackage opcpackage = OPCPackage.open(file);

   //get the workbook package part
   PackagePart workbookpart = opcpackage.getPartsByName(Pattern.compile("/xl/workbook.xml")).get(0);

   //create reader for package part            
   XMLEventReader reader = XMLInputFactory.newInstance().createXMLEventReader(workbookpart.getInputStream());

   List<String> sheetNames = new ArrayList<>();
   Map<String, String> definedNames = new HashMap<>();

   boolean isInDefinedName = false;
   String sheetName = "";
   String definedNameName = "";
   StringBuffer definedNameFormula = new StringBuffer();

   while(reader.hasNext()){ //loop over all XML in workbook.xml
    XMLEvent event = (XMLEvent)reader.next();

    if(event.isStartElement()) {
     StartElement startElement = (StartElement)event;
     QName startElementName = startElement.getName();
     if(startElementName.getLocalPart().equalsIgnoreCase("sheet")) { //start element of sheet definition
      Attribute attribute = startElement.getAttributeByName(new QName("name"));
      sheetName = attribute.getValue();
      sheetNames.add(sheetName);
     } else if (startElementName.getLocalPart().equalsIgnoreCase("definedName")) { //start element of definedName
      Attribute attribute = startElement.getAttributeByName(new QName("name"));
      definedNameName = attribute.getValue();
      isInDefinedName = true;
     }
    } else if(event.isCharacters() && isInDefinedName) { //character content of definedName == the formula
     definedNameFormula.append(((Characters)event).getData());
    } else if(event.isEndElement()) {
     EndElement endElement = (EndElement)event;
     QName endElementName = endElement.getName();
     if(endElementName.getLocalPart().equalsIgnoreCase("definedName")) { //end element of definedName
      definedNames.put(definedNameName, definedNameFormula.toString());
      definedNameFormula = new StringBuffer();
      isInDefinedName = false;
     }
    }

   } 

   opcpackage.close();

   System.out.println("Sheet names:");
   for (String shName : sheetNames) {
    System.out.println("Sheet name: " + shName);
   }

   System.out.println("Named ranges:");
   for (String defName : definedNames.keySet()) {
    System.out.println("Name: " + defName + ", Formula: " + definedNames.get(defName));
   }

  } catch (Exception ex) {
     ex.printStackTrace();
  }
 }
}