How to customize fontstyles while creating pdf fro

2019-08-12 23:47发布

问题:

I am creating PDF file from Html file using itext library,but while converting it uses some default css library and doesn't create file in required format.Can someone please suggest how can I customize font styles while converting using XMLWorkerHelper.MOstly I need to reduce size of characters while writing to PDF.

Below is my code.
package com.highradius.converthtmltopdf;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;

import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.FontProvider;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfAWriter;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.css.CssFile;
import com.itextpdf.tool.xml.pipeline.css.CSSResolver;

public class ConvertHtmlToPDF 
{
    final static String HTML = "EMAIL_BODY_acc_102712_amount__20150611_201726170.html";
    final static String PDF= "NewPdf1.pdf";
    final static String TXT= "Text.txt";
    static File HtmlFile= null;
    static File PdfFile= null;
    protected void convertHtmlToPdf (File HtmlFile, File PdfFile ) {

        try{
            FontFactory.registerDirectories();
            Document document = new Document();
            document.setPageCount(100);
            document.setHtmlStyleClass(TXT);
            document.setPageSize(PageSize.A3);
            PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(PdfFile));
            writer.setFullCompression();
            writer.setInitialLeading(12.5f);
            document.open();
            XMLWorkerHelper.getInstance().parseXHtml(writer, document,
                    new FileInputStream(HtmlFile)); 
//          CssFile cssfile=XMLWorkerHelper.getCSS(new FileInputStream(HtmlFile));
            CSSResolver cssResolver = XMLWorkerHelper.getInstance().getDefaultCssResolver(false);  
            cssResolver.addCss("table tr td{text-align:center; border:1px solid gray;padding:4px;}", true);
            cssResolver.clear();
            document.close();
            System.out.println( "PDF Created!" );
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
    public  void convertPDFToText(String Pdf,String Txt){
        try{
            FileWriter fw=new FileWriter(Txt);
            BufferedWriter bw=new BufferedWriter(fw);
            PdfReader pr=new PdfReader(Pdf);
            int pNum=pr.getNumberOfPages();
            for(int page=1;page<=pNum;page++){
                String text=PdfTextExtractor.getTextFromPage(pr, page);
                bw.write(text);
                bw.newLine();
            }
            bw.flush();
            bw.close();
        }catch(Exception e){e.printStackTrace();}

    }
    public static void main(String[] args) {

        ConvertHtmlToPDF chtp = new ConvertHtmlToPDF();
        HtmlFile= new File(HTML);
        PdfFile= new File(PDF);
        chtp.convertHtmlToPdf(HtmlFile, PdfFile);
        chtp.convertPDFToText(PDF, TXT);

    }
}
My HTMLFile

<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
<meta name="Generator" content="Microsoft Word 14 (filtered medium)"></meta>
<!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style><![endif]--><style><!--
/* Font Definitions */
@font-face
    {font-family:Calibri;
    panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
    {font-family:Tahoma;
    panose-1:2 11 6 4 3 5 4 4 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
    {margin:0cm;
    margin-bottom:.0001pt;
    font-size:11.0pt;
    font-family:"Calibri","sans-serif";
    mso-fareast-language:EN-US;}
a:link, span.MsoHyperlink
    {mso-style-priority:99;
    color:blue;
    text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
    {mso-style-priority:99;
    color:purple;
    text-decoration:underline;}
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
    {mso-style-priority:99;
    mso-style-link:"Balloon Text Char";
    margin:0cm;
    margin-bottom:.0001pt;
    font-size:8.0pt;
    font-family:"Tahoma","sans-serif";
    mso-fareast-language:EN-US;}
span.EmailStyle17
    {mso-style-type:personal-compose;
    font-family:"Calibri","sans-serif";
    color:windowtext;}
span.BalloonTextChar
    {mso-style-name:"Balloon Text Char";
    mso-style-priority:99;
    mso-style-link:"Balloon Text";
    font-family:"Tahoma","sans-serif";}
.MsoChpDefault
    {mso-style-type:export-only;
    font-family:"Calibri","sans-serif";
    mso-fareast-language:EN-US;}
@page WordSection1
    {size:612.0pt 792.0pt;
    margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
    {page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-IE" link="blue" vlink="purple">
<div class="WordSection1">
<p class="MsoNormal">FYI<o:p></o:p></p>
<p class="MsoNormal"><o:p>&nbsp;</o:p></p>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" width="224" style="width:168.0pt;margin-left:-.75pt;border-collapse:collapse">
<tbody>
<tr style="height:15.0pt">
<td width="77" nowrap="" valign="bottom" style="width:58.0pt;background:gray;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal"><b><span style="color:white;mso-fareast-language:EN-IE">Number<o:p></o:p></span></b></p>
</td>
<td width="63" nowrap="" valign="bottom" style="width:47.0pt;background:gray;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal"><b><span style="color:white;mso-fareast-language:EN-IE">Currency<o:p></o:p></span></b></p>
</td>
<td width="84" nowrap="" valign="bottom" style="width:63.0pt;background:gray;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal"><b><span style="color:white;mso-fareast-language:EN-IE">Balance Due<o:p></o:p></span></b></p>
</td>
</tr>
<tr style="height:15.0pt">
<td width="77" nowrap="" valign="bottom" style="width:58.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal" align="right" style="text-align:right"><span style="color:black;mso-fareast-language:EN-IE">3140552178<o:p></o:p></span></p>
</td>
<td width="63" nowrap="" valign="bottom" style="width:47.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal"><span style="color:black;mso-fareast-language:EN-IE">EUR<o:p></o:p></span></p>
</td>
<td width="84" nowrap="" valign="bottom" style="width:63.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal" align="right" style="text-align:right"><span style="color:red;mso-fareast-language:EN-IE">-1,786.38
</span><span style="color:black;mso-fareast-language:EN-IE"><o:p></o:p></span></p>
</td>
</tr>
<tr style="height:15.0pt">
<td width="77" nowrap="" valign="bottom" style="width:58.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal" align="right" style="text-align:right"><span style="color:black;mso-fareast-language:EN-IE">3700081975<o:p></o:p></span></p>
</td>
<td width="63" nowrap="" valign="bottom" style="width:47.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal"><span style="color:black;mso-fareast-language:EN-IE">EUR<o:p></o:p></span></p>
</td>
<td width="84" nowrap="" valign="bottom" style="width:63.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal" align="right" style="text-align:right"><span style="color:red;mso-fareast-language:EN-IE">-1,005.00
</span><span style="color:black;mso-fareast-language:EN-IE"><o:p></o:p></span></p>
</td>
</tr>
<tr style="height:15.0pt">
<td width="77" nowrap="" valign="bottom" style="width:58.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal" align="right" style="text-align:right"><span style="color:black;mso-fareast-language:EN-IE">3700081976<o:p></o:p></span></p>
</td>
<td width="63" nowrap="" valign="bottom" style="width:47.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal"><span style="color:black;mso-fareast-language:EN-IE">EUR<o:p></o:p></span></p>
</td>
<td width="84" nowrap="" valign="bottom" style="width:63.0pt;padding:0cm 5.4pt 0cm 5.4pt;height:15.0pt">
<p class="MsoNormal" align="right" style="text-align:right"><span style="color:red;mso-fareast-language:EN-IE">-1,005.00
</span><span style="color:black;mso-fareast-language:EN-IE"><o:p></o:p></span></p>
</td>
</tr>
</div>
</body>
</html>


Output:

CurrencBalance
Number y Due
31405521
78 EUR -1,786.38
37000819
75 EUR -1,005.00
37000819
76 EUR -1,005.00
31405121
97 EUR 131.05
31405121
98 EUR 702.88

I need all the td tags in the same line with out any break.

回答1:

Your HTML looks as if it was created by Word. It contains plenty of unnecessary stuff. Some of the unnecessary stuff is also unwanted in the context of XML Worker, so I had to trim away the things that prevent iText and XML Worker from rendering the HTML to PDF.

Take a look at table11.html to see what I did with the HTML. Then run ParseHtmlTable11 to convert the HTML to PDF. The result will be html_table_11.pdf:

In this screen shot, you see the original HTML rendered in a browser to the left and the PDF as created by iText / XML Worker from this HTML to the right. I've also opened the Fonts tab of the Document properties to show that Calibri and Calibri-Bold are used as fonts.



标签: java css itext