iTextsharp base64 embedded image in header not par

2019-09-09 07:42发布

问题:

Background:

I have an editor where I can edit the header and footer of a document in HTML and then merge them into the main document. Base64 Embedded images work perfectly within the main document content, but in the Headers or footers they vanish (so if I have a main document with image1.png in and this document has a header with image_header.png in, image1 will be showing, image_header won't). What it looks like to me is that the tag processor is not being applied to the elements within the PageHeader html.

I have created a custom ImageTagProcessor (Below)

Public Class CustomImageTagProcessor
    Inherits iTextSharp.tool.xml.html.Image
    Public Overrides Function [End](ctx As IWorkerContext, tag As Tag, currentContent As IList(Of IElement)) As IList(Of IElement)
        Dim attributes As IDictionary(Of String, String) = tag.Attributes
        Dim src As String = String.Empty
        If Not attributes.TryGetValue(iTextSharp.tool.xml.html.HTML.Attribute.SRC, src) Then
            Return New List(Of IElement)(1)
        End If

        If String.IsNullOrEmpty(src) Then
            Return New List(Of IElement)(1)
        End If



        If src.StartsWith("data:image/", StringComparison.InvariantCultureIgnoreCase) Then
            ' data:[<MIME-type>][;charset=<encoding>][;base64],<data>
            Dim base64Data As String = src.Substring(src.IndexOf(",") + 1)
            Dim imagedata As Byte() = Convert.FromBase64String(base64Data)
            Dim image As iTextSharp.text.Image = iTextSharp.text.Image.GetInstance(imagedata)

            Dim list As List(Of IElement) = New List(Of IElement)()
            Dim htmlPipelineContext As pipeline.html.HtmlPipelineContext = GetHtmlPipelineContext(ctx)
            list.Add(GetCssAppliers().Apply(New Chunk(DirectCast(GetCssAppliers().Apply(image, tag, htmlPipelineContext), iTextSharp.text.Image), 0, 0, True), tag, htmlPipelineContext))
            Return list
        Else
            If File.Exists(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src)) Then
                Dim imagedata As Byte() = File.ReadAllBytes(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src))
                Dim image As iTextSharp.text.Image = iTextSharp.text.Image.GetInstance(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src))

                Dim list As List(Of IElement) = New List(Of IElement)()
                Dim htmlPipelineContext As pipeline.html.HtmlPipelineContext = GetHtmlPipelineContext(ctx)
                list.Add(GetCssAppliers().Apply(New Chunk(DirectCast(GetCssAppliers().Apply(image, tag, htmlPipelineContext), iTextSharp.text.Image), 0, 0, True), tag, htmlPipelineContext))
                Return list
            End If
            Return MyBase.[End](ctx, tag, currentContent)
        End If
    End Function
End Class

and is connected by the following code in the main PDF generator library

Dim tagProcessors As DefaultTagProcessorFactory = CType(Tags.GetHtmlTagProcessorFactory(), DefaultTagProcessorFactory)
tagProcessors.RemoveProcessor(HTML.Tag.IMG) ' remove the default processor
tagProcessors.AddProcessor(HTML.Tag.IMG, New CustomImageTagProcessor()) ' use our new processor

'Setup CSS
Dim cssResolver As ICSSResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(True)
cssResolver.AddCssFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/css/pdf.css"), True)
'Setup Fonts
Dim xmlFontProvider As XMLWorkerFontProvider = New XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS)
xmlFontProvider.RegisterDirectory(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/fonts/"))

Dim cssAppliers As CssAppliers = New CssAppliersImpl(xmlFontProvider)
Dim htmlContext As HtmlPipelineContext = New HtmlPipelineContext(cssAppliers)
htmlContext.SetAcceptUnknown(True)
htmlContext.SetTagFactory(tagProcessors)

'Setup Pipelines
Dim pdf As PdfWriterPipeline = New PdfWriterPipeline(document, writer)
Dim htmlp As HtmlPipeline = New HtmlPipeline(htmlContext, pdf)
Dim css As CssResolverPipeline = New CssResolverPipeline(cssResolver, htmlp)

In my HeaderFooter class (Inheriting from PdfPageEventHelper) a new instance of the class in instanciated with:

Public Sub New(ByVal headerHTML As String, ByVal footerHTML As String)
    MyBase.New()

    '< Other code not related >

    Dim pdfElHandler As New PdfElementsHandler()

    Using sr As TextReader = New StringReader(Me.HeaderHTML)
        XMLWorkerHelper.GetInstance().ParseXHtml(pdfElHandler, sr)
    End Using
    headerElements = pdfElHandler.elements

    Using sr As TextReader = New StringReader(Me.FooterHTML)
        XMLWorkerHelper.GetInstance().ParseXHtml(pdfElHandler, sr)
    End Using
    footerElements = pdfElHandler.elements

    headerTable = New PdfPTable(1)
    headerTable = BuildElements(headerElements, "header")

    footerTable = New PdfPTable(1)
    footerTable = BuildElements(footerElements, "footer")

End Sub

Private Function BuildElements(tableElements As ElementList, type As String) As PdfPTable

        Dim holderTable As New PdfPTable({1})
        holderTable.HorizontalAlignment = Element.ALIGN_LEFT

        Dim holderCell As New PdfPCell()
        holderCell.Padding = 0
        holderCell.UseBorderPadding = False
        holderCell.Border = 0

        If type = "header" Then
            If Not String.IsNullOrEmpty(HeaderHTML) Then

                For Each el As IElement In tableElements
                    holderCell.AddElement(el)
                Next

                Dim holderRow As New PdfPRow({holderCell})
                holderTable.Rows.Add(holderRow)

            End If
        End If

        If type = "footer" Then
            If Not String.IsNullOrEmpty(FooterHTML) Then

                For Each el As IElement In tableElements
                    holderCell.AddElement(el)
                Next

                Dim holderRow As New PdfPRow({holderCell})
                holderTable.Rows.Add(holderRow)

            End If
        End If
        holderTable.WidthPercentage = 100

        Return holderTable
    End Function

A debug Step through of the headerElements after ParseXHTML shows:

1 Table (correct)
1 Row (correct)
2 Cells (correct)
Cell[0] Empty (not correct, there should be an image Element in here, parsed from an <img src="data:image/png;base64,xxxxxx... html element
Cell[1] Contains composite text elements (correct)

My OnEndPage event looks like :

Public Overrides Sub OnEndPage(ByVal writer As PdfWriter, ByVal document As Document)
        'MyBase.OnEndPage(writer, document)
        Dim pageSize As Rectangle = document.PageSize

        Dim tagProcessors As DefaultTagProcessorFactory = CType(Tags.GetHtmlTagProcessorFactory(), DefaultTagProcessorFactory)
        tagProcessors.RemoveProcessor(HTML.Tag.IMG) ' remove the default processor
        tagProcessors.AddProcessor(HTML.Tag.IMG, New CustomImageTagProcessor()) ' use our new processor

        Dim htmlContext As HtmlPipelineContext = New HtmlPipelineContext(Nothing)
        htmlContext.SetAcceptUnknown(True)
        htmlContext.SetTagFactory(tagProcessors)



        Dim FinalMarginTop, FinalMarginBottom As Single
        FinalMarginTop = Me.MarginTop
        FinalMarginBottom = Me.MarginBottom

        document.SetMargins(MarginLeft, MarginRight, MarginTop, MarginBottom)

        If Me.UsesHeader Or Me.UsesFooter Then
            Dim under As PdfContentByte = writer.DirectContent
            Dim ct As New ColumnText(writer.DirectContent)

            If Me.UsesHeader Then
                'Create the header rectangle
                Dim headerRect As New Rectangle(0, document.PageSize.Height, document.PageSize.Width, CalculatedHeaderHeight)

                headerRect.Left += MarginLeft
                headerRect.Right -= MarginRight ' document.RightMargin
                headerRect.Top += MarginTop ' document.TopMargin
                headerRect.Bottom -= MarginBottom ' document.BottomMargin

                If HeaderType = EnumHeaderDisplayType.FirstPageOnly Then
                    If writer.PageNumber = 1 Then
                        ct.SetSimpleColumn(headerRect)
                        ct.AddElement(headerTable)
                        ct.Go()

                        FinalMarginTop = MarginTop
                    End If
                Else
                    ct.SetSimpleColumn(headerRect)
                    ct.AddElement(headerTable)
                    ct.Go()

                    FinalMarginTop = CalculatedHeaderHeight + MarginTop
                End If
            End If

            If Me.UsesFooter Then
                Dim footerRect As New Rectangle(0, 0, pageSize.Width, CalculatedFooterHeight)
                footerRect.BorderWidth = 0
                footerRect.Left += document.LeftMargin
                footerRect.Right -= document.RightMargin
                footerRect.Top += CalculatedFooterHeight
                footerRect.Bottom += document.BottomMargin
                ct.SetSimpleColumn(footerRect)
                ct.AddElement(footerTable)
                ct.Go()

                FinalMarginBottom = CalculatedFooterHeight + MarginBottom
            End If

        End If


    End Sub

So I think that the Custom Image Tag Processor Needs to be applied at this stage, but I cannot see where within OnEndPage I can use it.

回答1:

So i figured out the answer to this. Pipeline processing for the main document did not apply to the writers that built the headers and footers.

I had to setup the tagprocessors, cssresolvers, fonts and pipelines within the pageevent as well. Once this was done, the images were processed and bytes of the processed images were applied to the main document on every pageevent (or pageend)

so my code became

If Me.UsesHeader Then
            headerElements = New ElementList() 'XMLWorkerHelper.ParseToElementList(Me.HeaderHTML, Nothing)
            headerTable = SetTable(headerElements, Me.HeaderHTML)
            CalculatedHeaderHeight = headerTable.Rows(0).MaxHeights
        End If

        If Me.UsesFooter Then
            footerElements = New ElementList() 'XMLWorkerHelper.ParseToElementList(Me.HeaderHTML, Nothing)
            footerTable = SetTable(footerElements, Me.FooterHTML)
            CalculatedFooterHeight = footerTable.Rows(0).MaxHeights
        End If

where footerElements and headerElements are of type ElementList and the SetTable function is :

Public Function SetTable(ByVal elements As ElementList, ByVal htmlcode As String) As PdfPTable

        Dim tagProcessors As DefaultTagProcessorFactory = CType(Tags.GetHtmlTagProcessorFactory(), DefaultTagProcessorFactory)
        tagProcessors.RemoveProcessor(HTML.Tag.IMG) ' remove the default processor
        tagProcessors.AddProcessor(HTML.Tag.IMG, New CustomImageTagProcessor()) ' use our new processor

        Dim cssResolver As ICSSResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(True)
        cssResolver.AddCssFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/css/pdf.css"), True)

        'Setup Fonts
        Dim xmlFontProvider As XMLWorkerFontProvider = New XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS)
        xmlFontProvider.RegisterDirectory(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/fonts/"))

        Dim cssAppliers As CssAppliers = New CssAppliersImpl(xmlFontProvider)

        Dim htmlContext As HtmlPipelineContext = New HtmlPipelineContext(cssAppliers)
        htmlContext.SetAcceptUnknown(True)
        htmlContext.SetTagFactory(tagProcessors)

        Dim pdf As ElementHandlerPipeline = New ElementHandlerPipeline(elements, Nothing)
        Dim htmlp As HtmlPipeline = New HtmlPipeline(htmlContext, pdf)
        Dim css As CssResolverPipeline = New CssResolverPipeline(cssResolver, htmlp)

        Dim worker As XMLWorker = New XMLWorker(css, True)
        Dim p As XMLParser = New XMLParser(worker)

        'Dim holderTable As New PdfPTable({1})
        Dim holderTable As PdfPTable = New PdfPTable({1})

        holderTable.HorizontalAlignment = Element.ALIGN_LEFT

        Dim holderCell As New PdfPCell()
        holderCell.Padding = 0
        holderCell.UseBorderPadding = False
        holderCell.Border = 0

        p.Parse(New MemoryStream(System.Text.Encoding.ASCII.GetBytes(htmlcode)))

        For Each el As IElement In elements
            holderCell.AddElement(el)
        Next

        Dim holderRow As New PdfPRow({holderCell})
        holderTable.Rows.Add(holderRow)
        holderTable.WidthPercentage = 100

        Return holderTable

    End Function

And this solved my problem