iTextSharp is producing a corrupt PDF

2019-02-10 01:58发布

问题:

The code snippet below returns a corrupt PDF document however if I return mergedDocument instead it always returns a valid PDF. mergedDocument is based on a PDF file i created using Word, whereas completed document is entirely programmatically generated. The code "works" in that it throws no exceptions. Why is iTextSharp creating a corrupt PDF?

byte[] completedDocument = null;            
using (MemoryStream streamCompleted = new MemoryStream())
{
    using (Document document = new Document())
    {                    
        PdfCopy copy = new PdfCopy(document, streamCompleted);
        document.Open();
        copy.Open();                    

        foreach (var item in eventItems)
        {
            byte[] mergedDocument = null;
            PdfReader reader = new PdfReader(pdfTemplates[item.DataTokens[NotifyTokenType.OrganisationID]]);
            using (MemoryStream streamTemplate = new MemoryStream())
            {
                using (PdfStamper stamper = new PdfStamper(reader, streamTemplate))
                {
                    foreach (var token in item.DataTokens)
                    {
                        if (stamper.AcroFields.Fields.Any(fld => fld.Key == token.Key.ToString()))
                        {
                            stamper.AcroFields.SetField(token.Key.ToString(), token.Value);
                        }
                    }
                    stamper.FormFlattening = true;
                    stamper.Writer.CloseStream = false;
                }

                mergedDocument = new byte[streamTemplate.Length];
                streamTemplate.Position = 0;
                streamTemplate.Read(mergedDocument, 0, (int)streamTemplate.Length);
            }
            reader = new PdfReader(mergedDocument);

            for (int i = 1; i <= reader.NumberOfPages; i++)
            {
                document.SetPageSize(PageSize.A4);
                copy.AddPage(copy.GetImportedPage(reader, i));
            }
        }
        completedDocument = new byte[streamCompleted.Length];
        streamCompleted.Position = 0;
        streamCompleted.Read(completedDocument, 0, (int)streamCompleted.Length);
    }                
}
return completedDocument;

回答1:

You need to close the document and copy objects to flush the PDF writing buffer. This, however, causes some problems when trying to read the stream into an array. The fix for that is to use the ToArray() method of the MemoryStream which still works on closed streams. The changes I made have comments on them.

        byte[] completedDocument = null;
        using (MemoryStream streamCompleted = new MemoryStream())
        {
            using (Document document = new Document())
            {
                PdfCopy copy = new PdfCopy(document, streamCompleted);
                document.Open();
                copy.Open();

                foreach (var item in eventItems)
                {
                    byte[] mergedDocument = null;
                    PdfReader reader = new PdfReader(pdfTemplates[item.DataTokens[NotifyTokenType.OrganisationID]]);
                    using (MemoryStream streamTemplate = new MemoryStream())
                    {
                        using (PdfStamper stamper = new PdfStamper(reader, streamTemplate))
                        {
                            foreach (var token in item.DataTokens)
                            {
                                if (stamper.AcroFields.Fields.Any(fld => fld.Key == token.Key.ToString()))
                                {
                                    stamper.AcroFields.SetField(token.Key.ToString(), token.Value);
                                }
                            }
                            stamper.FormFlattening = true;
                            stamper.Writer.CloseStream = false;
                        }
                        //Copy the stream's bytes
                        mergedDocument = streamTemplate.ToArray();
                    }
                    reader = new PdfReader(mergedDocument);

                    for (int i = 1; i <= reader.NumberOfPages; i++)
                    {
                        document.SetPageSize(PageSize.A4);
                        copy.AddPage(copy.GetImportedPage(reader, i));
                    }
                    //Close the document and the copy
                    document.Close();
                    copy.Close();
                }
                //ToArray() can operate on closed streams
                completedDocument = streamCompleted.ToArray();
            }
        }
        return completedDocument;


回答2:

Also make sure your html doesn't contains hr tag while converting html to pdf

hdnEditorText.Value.Replace("\"", "'").Replace("<hr />", "").Replace("<hr/>", "")