I am new to apache poi, I wanted to split a excel file into multiple files based on row count.
E.g data.xlsx has 15k rows, new files should be like data_1.xlsx with 5k rows,data_2.xlsx should be 5-10k and data_3.xlsx should be 10-15k.
I am new to apache poi, I wanted to split a excel file into multiple files based on row count.
E.g data.xlsx has 15k rows, new files should be like data_1.xlsx with 5k rows,data_2.xlsx should be 5-10k and data_3.xlsx should be 10-15k.
I've got you.
package com.industries.seanimus;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class ReportSplitter {
private final String fileName;
private final int maxRows;
public ReportSplitter(String fileName, final int maxRows) {
ZipSecureFile.setMinInflateRatio(0);
this.fileName = fileName;
this.maxRows = maxRows;
try {
/* Read in the original Excel file. */
OPCPackage pkg = OPCPackage.open(new File(fileName));
XSSFWorkbook workbook = new XSSFWorkbook(pkg);
XSSFSheet sheet = workbook.getSheetAt(0);
/* Only split if there are more rows than the desired amount. */
if (sheet.getPhysicalNumberOfRows() >= maxRows) {
List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
writeWorkBooks(wbs);
}
pkg.close();
}
catch (EncryptedDocumentException | IOException | InvalidFormatException e) {
e.printStackTrace();
}
}
private List<SXSSFWorkbook> splitWorkbook(XSSFWorkbook workbook) {
List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();
SXSSFWorkbook wb = new SXSSFWorkbook();
SXSSFSheet sh = wb.createSheet();
SXSSFRow newRow;
SXSSFCell newCell;
int rowCount = 0;
int colCount = 0;
XSSFSheet sheet = workbook.getSheetAt(0);
for (Row row : sheet) {
newRow = sh.createRow(rowCount++);
/* Time to create a new workbook? */
if (rowCount == maxRows) {
workbooks.add(wb);
wb = new SXSSFWorkbook();
sh = wb.createSheet();
rowCount = 0;
}
for (Cell cell : row) {
newCell = newRow.createCell(colCount++);
newCell = setValue(newCell, cell);
CellStyle newStyle = wb.createCellStyle();
newStyle.cloneStyleFrom(cell.getCellStyle());
newCell.setCellStyle(newStyle);
}
colCount = 0;
}
/* Only add the last workbook if it has content */
if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
workbooks.add(wb);
}
return workbooks;
}
/*
* Grabbing cell contents can be tricky. We first need to determine what
* type of cell it is.
*/
private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
switch (cell.getCellType()) {
case Cell.CELL_TYPE_STRING:
newCell.setCellValue(cell.getRichStringCellValue().getString());
break;
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
newCell.setCellValue(cell.getDateCellValue());
} else {
newCell.setCellValue(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_BOOLEAN:
newCell.setCellValue(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
newCell.setCellFormula(cell.getCellFormula());
break;
default:
System.out.println("Could not determine cell type");
}
return newCell;
}
/* Write all the workbooks to disk. */
private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
FileOutputStream out;
try {
for (int i = 0; i < wbs.size(); i++) {
String newFileName = fileName.substring(0, fileName.length() - 5);
out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
wbs.get(i).write(out);
out.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args){
/* This will create a new workbook every 1000 rows. */
new ReportSplitter("Data.xlsx", 1000);
}
}
A few notes:
For writing the workbooks, I use SXSSFWorkbook. It's a lot faster than HSSF or XSSF, as it doesn't hold everything in memory before writing (which causes a horrible gc mess).
The Busy Developer's Guide is your friend for learning Apache POI ;)
ENJOY!
Thanks for your code. Just two cent from my side The code above does not copy the time Hence I modified it for having Time Columns which is a small modification in setValue Code. Basically I'm checking using format part if it is a time column for which the year would be 1899 Hope it helps :)
private static SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
switch (cell.getCellType()) {
case Cell.CELL_TYPE_STRING:
newCell.setCellValue(cell.getRichStringCellValue().getString());
break;
case Cell.CELL_TYPE_NUMERIC:
//System.out.println("The Cell Type is numeric ");
if (DateUtil.isCellDateFormatted(cell)) {
System.out.println(cell.getDateCellValue());
Date c = cell.getDateCellValue();
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("hh:mm:ss");
SimpleDateFormat year = new SimpleDateFormat("yyyy");
String strTime = simpleDateFormat.format(c);
String strYear=year.format(c);
if(strYear.equals("1899"))
{
System.out.println(strTime);
newCell.setCellValue(DateUtil.convertTime(strTime));
}
else
{
newCell.setCellValue(c);
}
} else {
newCell.setCellValue(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_BOOLEAN:
newCell.setCellValue(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
newCell.setCellFormula(cell.getCellFormula());
break;
default:
System.out.println("Could not determine cell type");
}
return newCell;
}