I wonder if anyone has done this with iTextSharp, but I would like to combine multiple PDF files into one but leave the page breaks out. For example, I would like to create 4 PDF files containing 3 lines of text each, so I want the resulting file to have all 12 lines in 1 page. Is this possible?
问题:
回答1:
As the OP also tagged this question with [iText] and I am more at home with Java than .Net, here an answer for iText/Java. It should be easy to translate to iTextSharp/C#.
The original question
I would like to combine multiple PDF files into one but leave the page breaks out. For example, I would like to create 4 PDF files containing 3 lines of text each, so I want the resulting file to have all 12 lines in 1 page.
For PDF files as indicated in that example you can use this simple utility class:
public class PdfDenseMergeTool
{
public PdfDenseMergeTool(Rectangle size, float top, float bottom, float gap)
{
this.pageSize = size;
this.topMargin = top;
this.bottomMargin = bottom;
this.gap = gap;
}
public void merge(OutputStream outputStream, Iterable<PdfReader> inputs) throws DocumentException, IOException
{
try
{
openDocument(outputStream);
for (PdfReader reader: inputs)
{
merge(reader);
}
}
finally
{
closeDocument();
}
}
void openDocument(OutputStream outputStream) throws DocumentException
{
final Document document = new Document(pageSize, 36, 36, topMargin, bottomMargin);
final PdfWriter writer = PdfWriter.getInstance(document, outputStream);
document.open();
this.document = document;
this.writer = writer;
newPage();
}
void closeDocument()
{
try
{
document.close();
}
finally
{
this.document = null;
this.writer = null;
this.yPosition = 0;
}
}
void newPage()
{
document.newPage();
yPosition = pageSize.getTop(topMargin);
}
void merge(PdfReader reader) throws IOException
{
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
for (int page = 1; page <= reader.getNumberOfPages(); page++)
{
merge(reader, parser, page);
}
}
void merge(PdfReader reader, PdfReaderContentParser parser, int page) throws IOException
{
TextMarginFinder finder = parser.processContent(page, new TextMarginFinder());
Rectangle pageSizeToImport = reader.getPageSize(page);
float heightToImport = finder.getHeight();
float maxHeight = pageSize.getHeight() - topMargin - bottomMargin;
if (heightToImport > maxHeight)
{
throw new IllegalArgumentException(String.format("Page %s content too large; height: %s, limit: %s.", page, heightToImport, maxHeight));
}
if (heightToImport > yPosition - pageSize.getBottom(bottomMargin))
{
newPage();
}
else if (!writer.isPageEmpty())
{
heightToImport += gap;
}
yPosition -= heightToImport;
PdfImportedPage importedPage = writer.getImportedPage(reader, page);
writer.getDirectContent().addTemplate(importedPage, 0, yPosition - (finder.getLly() - pageSizeToImport.getBottom()));
}
Document document = null;
PdfWriter writer = null;
float yPosition = 0;
final Rectangle pageSize;
final float topMargin;
final float bottomMargin;
final float gap;
}
If you have a list of PdfReader
instances inputs
, you can merge them like this into an OutputStream output
:
PdfDenseMergeTool tool = new PdfDenseMergeTool(PageSize.A4, 18, 18, 5);
tool.merge(output, inputs);
This creates a merged document using an A4 page size, a top and bottom margin of 18/72" each and a gap between contents of different PDF pages of 5/72".
The comments
The iText TextMarginFinder
(used in the PdfDenseMergeTool
above) only considers text. If other content types also are to be considered, this class has to be extended somewhat.
Each PDF has just a few lines, perhaps a table or an image, but I want the end result in one page.
If the tables contain decorations reaching above or below the text content (e.g. lines or colored backgrounds), you should use a larger gap value. Unfortunately the parsing framework used by the TextMarginFinder
does not forward vector graphics commands to the finder.
If the images are bitmap images, the TextMarginFinder
should be extended by implementing its renderImage
method to take the image area into account, too.
Also, some of the PDFs may contain fields, so I'd like to keep those fields in the resulting combined PDF as well.
If AcroForm fields are also to be considered, you have to
- extend the rectangle represented by the
TextMarginFinder
to also include the visualization rectangles of the widget annotations, and - extend the
PdfDenseMergeTool.merge(PdfReader, PdfReaderContentParser, int)
method to also copy those widget annotations.
Update
I wrote above
Unfortunately the parsing framework used by the
TextMarginFinder
does not forward vector graphics commands to the finder.
Meanwhile (in version 5.5.6) that parsing framework has been extended to also forward vector graphics commands.
If you replace the line
TextMarginFinder finder = parser.processContent(page, new TextMarginFinder());
by
MarginFinder finder = parser.processContent(page, new MarginFinder());
using the MarginFinder
class presented at the bottom of this answer, all content is considered, not merely text.
回答2:
For those of you who want the above code in C#, here you go.
using System;
using System.Collections.Generic;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
namespace Test.WebService.Support {
public class PDFMerge {
private Rectangle PageSize;
private float TopMargin;
private float BottomMargin;
private float Gap;
private Document Document = null;
private PdfWriter Writer = null;
private float YPosition = 0;
public PDFMerge(Rectangle size, float top, float bottom, float gap) {
this.PageSize = size;
this.TopMargin = top;
this.BottomMargin = bottom;
this.Gap = gap;
} // PDFMerge
public void Merge(MemoryStream outputStream, List<PdfReader> inputs) {
try {
this.OpenDocument(outputStream);
foreach (PdfReader reader in inputs) {
this.Merge(reader);
}
} finally {
this.CloseDocument();
}
} // Merge
private void Merge(PdfReader reader) {
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
for (int p = 1; p <= reader.NumberOfPages; p++) {
this.Merge(reader, parser, p);
}
} // Merge
private void Merge(PdfReader reader, PdfReaderContentParser parser, int pageIndex) {
TextMarginFinder Finder = parser.ProcessContent(pageIndex, new TextMarginFinder());
Rectangle PageSizeToImport = reader.GetPageSize(pageIndex);
float HeightToImport = Finder.GetHeight();
float MaxHeight = PageSize.Height - TopMargin - BottomMargin;
if (HeightToImport > MaxHeight) {
throw new ArgumentException(string.Format("Page {0} content too large; height: {1}, limit: {2}.", pageIndex, HeightToImport, MaxHeight));
}
if (HeightToImport > YPosition - PageSize.GetBottom(BottomMargin)) {
this.NewPage();
} else if (!Writer.PageEmpty) {
HeightToImport += Gap;
}
YPosition -= HeightToImport;
PdfImportedPage ImportedPage = Writer.GetImportedPage(reader, pageIndex);
Writer.DirectContent.AddTemplate(ImportedPage, 0, YPosition - (Finder.GetLly() - PageSizeToImport.Bottom));
} // Merge
private void OpenDocument(MemoryStream outputStream) {
Document Document = new Document(PageSize, 36, 36, this.TopMargin, BottomMargin);
PdfWriter Writer = PdfWriter.GetInstance(Document, outputStream);
Document.Open();
this.Document = Document;
this.Writer = Writer;
this.NewPage();
} // OpenDocument
private void CloseDocument() {
try {
Document.Close();
} finally {
this.Document = null;
this.Writer = null;
this.YPosition = 0;
}
} // CloseDocument
private void NewPage() {
Document.NewPage();
YPosition = PageSize.GetTop(TopMargin);
} // NewPage
}
}