Background:
I have an editor where I can edit the header and footer of a document in HTML and then merge them into the main document. Base64 Embedded images work perfectly within the main document content, but in the Headers or footers they vanish (so if I have a main document with image1.png in and this document has a header with image_header.png in, image1 will be showing, image_header won't). What it looks like to me is that the tag processor is not being applied to the elements within the PageHeader html.
I have created a custom ImageTagProcessor (Below)
Public Class CustomImageTagProcessor
Inherits iTextSharp.tool.xml.html.Image
Public Overrides Function [End](ctx As IWorkerContext, tag As Tag, currentContent As IList(Of IElement)) As IList(Of IElement)
Dim attributes As IDictionary(Of String, String) = tag.Attributes
Dim src As String = String.Empty
If Not attributes.TryGetValue(iTextSharp.tool.xml.html.HTML.Attribute.SRC, src) Then
Return New List(Of IElement)(1)
End If
If String.IsNullOrEmpty(src) Then
Return New List(Of IElement)(1)
End If
If src.StartsWith("data:image/", StringComparison.InvariantCultureIgnoreCase) Then
' data:[<MIME-type>][;charset=<encoding>][;base64],<data>
Dim base64Data As String = src.Substring(src.IndexOf(",") + 1)
Dim imagedata As Byte() = Convert.FromBase64String(base64Data)
Dim image As iTextSharp.text.Image = iTextSharp.text.Image.GetInstance(imagedata)
Dim list As List(Of IElement) = New List(Of IElement)()
Dim htmlPipelineContext As pipeline.html.HtmlPipelineContext = GetHtmlPipelineContext(ctx)
list.Add(GetCssAppliers().Apply(New Chunk(DirectCast(GetCssAppliers().Apply(image, tag, htmlPipelineContext), iTextSharp.text.Image), 0, 0, True), tag, htmlPipelineContext))
Return list
Else
If File.Exists(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src)) Then
Dim imagedata As Byte() = File.ReadAllBytes(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src))
Dim image As iTextSharp.text.Image = iTextSharp.text.Image.GetInstance(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src))
Dim list As List(Of IElement) = New List(Of IElement)()
Dim htmlPipelineContext As pipeline.html.HtmlPipelineContext = GetHtmlPipelineContext(ctx)
list.Add(GetCssAppliers().Apply(New Chunk(DirectCast(GetCssAppliers().Apply(image, tag, htmlPipelineContext), iTextSharp.text.Image), 0, 0, True), tag, htmlPipelineContext))
Return list
End If
Return MyBase.[End](ctx, tag, currentContent)
End If
End Function
End Class
and is connected by the following code in the main PDF generator library
Dim tagProcessors As DefaultTagProcessorFactory = CType(Tags.GetHtmlTagProcessorFactory(), DefaultTagProcessorFactory)
tagProcessors.RemoveProcessor(HTML.Tag.IMG) ' remove the default processor
tagProcessors.AddProcessor(HTML.Tag.IMG, New CustomImageTagProcessor()) ' use our new processor
'Setup CSS
Dim cssResolver As ICSSResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(True)
cssResolver.AddCssFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/css/pdf.css"), True)
'Setup Fonts
Dim xmlFontProvider As XMLWorkerFontProvider = New XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS)
xmlFontProvider.RegisterDirectory(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/fonts/"))
Dim cssAppliers As CssAppliers = New CssAppliersImpl(xmlFontProvider)
Dim htmlContext As HtmlPipelineContext = New HtmlPipelineContext(cssAppliers)
htmlContext.SetAcceptUnknown(True)
htmlContext.SetTagFactory(tagProcessors)
'Setup Pipelines
Dim pdf As PdfWriterPipeline = New PdfWriterPipeline(document, writer)
Dim htmlp As HtmlPipeline = New HtmlPipeline(htmlContext, pdf)
Dim css As CssResolverPipeline = New CssResolverPipeline(cssResolver, htmlp)
In my HeaderFooter class (Inheriting from PdfPageEventHelper
) a new instance of the class in instanciated with:
Public Sub New(ByVal headerHTML As String, ByVal footerHTML As String)
MyBase.New()
'< Other code not related >
Dim pdfElHandler As New PdfElementsHandler()
Using sr As TextReader = New StringReader(Me.HeaderHTML)
XMLWorkerHelper.GetInstance().ParseXHtml(pdfElHandler, sr)
End Using
headerElements = pdfElHandler.elements
Using sr As TextReader = New StringReader(Me.FooterHTML)
XMLWorkerHelper.GetInstance().ParseXHtml(pdfElHandler, sr)
End Using
footerElements = pdfElHandler.elements
headerTable = New PdfPTable(1)
headerTable = BuildElements(headerElements, "header")
footerTable = New PdfPTable(1)
footerTable = BuildElements(footerElements, "footer")
End Sub
Private Function BuildElements(tableElements As ElementList, type As String) As PdfPTable
Dim holderTable As New PdfPTable({1})
holderTable.HorizontalAlignment = Element.ALIGN_LEFT
Dim holderCell As New PdfPCell()
holderCell.Padding = 0
holderCell.UseBorderPadding = False
holderCell.Border = 0
If type = "header" Then
If Not String.IsNullOrEmpty(HeaderHTML) Then
For Each el As IElement In tableElements
holderCell.AddElement(el)
Next
Dim holderRow As New PdfPRow({holderCell})
holderTable.Rows.Add(holderRow)
End If
End If
If type = "footer" Then
If Not String.IsNullOrEmpty(FooterHTML) Then
For Each el As IElement In tableElements
holderCell.AddElement(el)
Next
Dim holderRow As New PdfPRow({holderCell})
holderTable.Rows.Add(holderRow)
End If
End If
holderTable.WidthPercentage = 100
Return holderTable
End Function
A debug Step through of the headerElements after ParseXHTML shows:
1 Table (correct)
1 Row (correct)
2 Cells (correct)
Cell[0] Empty (not correct, there should be an image Element in here, parsed from an <img src="data:image/png;base64,xxxxxx... html element
Cell[1] Contains composite text elements (correct)
My OnEndPage
event looks like :
Public Overrides Sub OnEndPage(ByVal writer As PdfWriter, ByVal document As Document)
'MyBase.OnEndPage(writer, document)
Dim pageSize As Rectangle = document.PageSize
Dim tagProcessors As DefaultTagProcessorFactory = CType(Tags.GetHtmlTagProcessorFactory(), DefaultTagProcessorFactory)
tagProcessors.RemoveProcessor(HTML.Tag.IMG) ' remove the default processor
tagProcessors.AddProcessor(HTML.Tag.IMG, New CustomImageTagProcessor()) ' use our new processor
Dim htmlContext As HtmlPipelineContext = New HtmlPipelineContext(Nothing)
htmlContext.SetAcceptUnknown(True)
htmlContext.SetTagFactory(tagProcessors)
Dim FinalMarginTop, FinalMarginBottom As Single
FinalMarginTop = Me.MarginTop
FinalMarginBottom = Me.MarginBottom
document.SetMargins(MarginLeft, MarginRight, MarginTop, MarginBottom)
If Me.UsesHeader Or Me.UsesFooter Then
Dim under As PdfContentByte = writer.DirectContent
Dim ct As New ColumnText(writer.DirectContent)
If Me.UsesHeader Then
'Create the header rectangle
Dim headerRect As New Rectangle(0, document.PageSize.Height, document.PageSize.Width, CalculatedHeaderHeight)
headerRect.Left += MarginLeft
headerRect.Right -= MarginRight ' document.RightMargin
headerRect.Top += MarginTop ' document.TopMargin
headerRect.Bottom -= MarginBottom ' document.BottomMargin
If HeaderType = EnumHeaderDisplayType.FirstPageOnly Then
If writer.PageNumber = 1 Then
ct.SetSimpleColumn(headerRect)
ct.AddElement(headerTable)
ct.Go()
FinalMarginTop = MarginTop
End If
Else
ct.SetSimpleColumn(headerRect)
ct.AddElement(headerTable)
ct.Go()
FinalMarginTop = CalculatedHeaderHeight + MarginTop
End If
End If
If Me.UsesFooter Then
Dim footerRect As New Rectangle(0, 0, pageSize.Width, CalculatedFooterHeight)
footerRect.BorderWidth = 0
footerRect.Left += document.LeftMargin
footerRect.Right -= document.RightMargin
footerRect.Top += CalculatedFooterHeight
footerRect.Bottom += document.BottomMargin
ct.SetSimpleColumn(footerRect)
ct.AddElement(footerTable)
ct.Go()
FinalMarginBottom = CalculatedFooterHeight + MarginBottom
End If
End If
End Sub
So I think that the Custom Image Tag Processor Needs to be applied at this stage, but I cannot see where within OnEndPage I can use it.