I've gone through avery iText topic related to arabic caracters on stackoverflow already, but didn't find an answer for this one.
I need to convert an html file in to pdf, but this html contains both english and arabic caracters.
Displaying the html in Notepadd++ or in any browser, there is no problem, I can see arabic caracters properly, but when I use the following program to convert into pdf, I can't figure out a way to display arabic caracters, I only get "?" instead :
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import org.apache.commons.io.IOUtils;
import com.itextpdf.text.Document;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
public class Test2 {
/**
* @param args
*/
public static void main(String[] args) {
try {
FileInputStream in = new FileInputStream(new File(
"C:\\Test\\test_arabic.html"));
String k = IOUtils.toString(in, Charset.forName("UTF-8"));
OutputStream file = new FileOutputStream(new File("C:\\Test\\Test.pdf"));
Document document = new Document();
PdfWriter writer = PdfWriter.getInstance(document, file);
InputStream htmlIn = new ByteArrayInputStream(k.getBytes());
document.open();
XMLWorkerHelper helper = XMLWorkerHelper.getInstance();
FontFactory.getFontImp().registerDirectory("C:\\Windows\\Fonts");
FontFactory.getFontImp().defaultEncoding = BaseFont.IDENTITY_H;
helper.parseXHtml(writer, document, htmlIn, in, Charset.forName("UTF-8"),
FontFactory.getFontImp());
document.close();
file.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
Here is my sample html file :
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
<meta name="language" content="ar-SA" />
<title>My arabic html</title>
</head>
<body>
<font size="1">
<table width="700" style='font-family:Verdana; font-size:20px; color:blue'>
<tr>
<td align="left">ADVICE</td>
<td dir="rtl" lang="ar-SA"><p align='right' style='font-family:Traditional Arabic;'> إشعار </p></td>
</tr>
</table>
<table width="700" style='font-size:16px; color:white; background-color:gray'>
<tr>
<td align="left">Foreign Exchange</td>
<td dir="rtl" lang="ar-SA"><p align='right' style='font-family:Traditional Arabic;'> تبادل العملات الأجنبية </p></td>
</tr>
</table>
</font>
</body>
</html>
Does anyone know how to do that ?
I also tried converting my html into a Bytes array using w3c document and iTextRender, but no success.
Edit : I now use the code provided by Vahidn (thanks a lot again)
Little complement because I'm still struggling with the alignment now.
It seems that the align="left" does not work with arabic and runDirection RTL.
Here is my sample html :
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
<meta name="language" content="ar-SA" />
<title>Confirmation Notice</title>
</head>
<body>
<font size="1">
<table width="700" style="font-family:Verdana; font-size:20px; color:white; background-color:blue">
<tr>
<td width="350" align="right">ADVICE</td>
<td width="350" align="left" dir="rtl" lang="ar-SA">
<p style="font-family:traditional arabic;">
<b>إشعار</b>
</p>
</td>
</tr>
<tr>
<td width="350" align="right">Islamic Return Account</td>
<td width="350" dir="rtl" lang="ar-SA" align="left">
<p style="font-family:traditional arabic;">
<b>حساب العائد الإسلامي</b>
</p>
</td>
</tr>
</table>
</font>
</body>
</html>
But it never aligns on the left the arabic column. align center works though...
Any idea ?
Thanks a lot
Thanks for your help
I solved this issue using iTextSharp (C# version). Here you can find it: http://www.dotnettips.info/file/userfile?name=XMLWorkerRTLsample.cs
the attached sample needs a little modification as well:
public void Add(IWritable htmlElement)
{
var writableElement = htmlElement as WritableElement;
if (writableElement == null)
return;
foreach (var element in writableElement.Elements())
{
var div = element as PdfDiv;
if (div != null)
{
foreach (var divChildElement in div.Content)
{
fixNestedTablesRunDirection(divChildElement);
_paragraph.Add(divChildElement);
}
}
else
{
fixNestedTablesRunDirection(element);
_paragraph.Add(element);
}
}
}
Sorry for the delay Samy, here is my code (the htmlString is my example above) :
Charset CHARSET_UTF8 = Charset.forName("UTF-8");
ByteArrayOutputStream baos = null;
try {
baos = new ByteArrayOutputStream();
Document pdfDoc = new Document();
PdfWriter writer = PdfWriter.getInstance(pdfDoc, baos);
writer.setRgbTransparencyBlending(true);
pdfDoc.open();
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver();
ElementsCollector elementsHandler = new ElementsCollector();
HtmlPipelineContext htmlContext = new HtmlPipelineContext(new CssAppliersImpl(
new UnicodeFontProvider()));
htmlContext.charSet(CHARSET_UTF8);
htmlContext.setAcceptUnknown(true).autoBookmark(true)
.setTagFactory(Tags.getHtmlTagProcessorFactory());
CssResolverPipeline pipeline = new CssResolverPipeline(cssResolver, new HtmlPipeline(htmlContext,
new ElementHandlerPipeline(elementsHandler, null)));
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser parser = new XMLParser();
parser.addListener(worker);
parser.parse(new StringReader(htmlString));
PdfPTable mainTable = new PdfPTable(1);
mainTable.setWidthPercentage(100);
PdfPCell cell = new PdfPCell();
cell.setBorder(0);
cell.setHorizontalAlignment(Element.ALIGN_LEFT);
cell.addElement(elementsHandler.getParagraph());
mainTable.addCell(cell);
pdfDoc.add(mainTable);
pdfDoc.close();
ElementCollector :
import java.util.Iterator;
import java.util.List;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPRow;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.ElementHandler;
import com.itextpdf.tool.xml.Writable;
import com.itextpdf.tool.xml.html.pdfelement.NoNewLineParagraph;
import com.itextpdf.tool.xml.pipeline.WritableElement;
public class ElementsCollector implements ElementHandler {
private Paragraph _paragraph;
public ElementsCollector() {
_paragraph = new Paragraph();
_paragraph.setAlignment(Element.ALIGN_LEFT);
}
public Paragraph getParagraph() {
return _paragraph;
}
@Override
public void add(Writable htmlElement) {
WritableElement writableElement = (WritableElement) htmlElement;
if (writableElement == null) {
return;
}
for (Element element : writableElement.elements()) {
if (element instanceof NoNewLineParagraph) {
NoNewLineParagraph para = (NoNewLineParagraph) element;
Iterator<Element> it = para.iterator();
while (it.hasNext()) {
Element divChildElement = (Element) it.next();
fixNestedTablesRunDirection(divChildElement);
_paragraph.add(divChildElement);
}
} else {
fixNestedTablesRunDirection(element);
_paragraph.add(element);
}
}
}
private void fixNestedTablesRunDirection(Element element) {
if (element == null) {
return;
}
if (element instanceof PdfPTable) {
PdfPTable table = (PdfPTable) element;
for (PdfPRow row : table.getRows()) {
for (PdfPCell cell : row.getCells()) {
if (cell.getCompositeElements() != null) {
for (Element item : cell.getCompositeElements()) {
List<Chunk> chunks = item.getChunks();
if (chunks != null) {
for (Chunk chunk : chunks) {
Font font = chunk.getFont();
if (font != null) {
String name = font.getFamilyname() != null ? font.getFamilyname()
.toLowerCase() : null;
if (name != null && !name.isEmpty() && name.contains("arabic")) {
cell.setRunDirection(PdfWriter.RUN_DIRECTION_RTL);
if (item instanceof Paragraph
&& ((Paragraph) item).getAlignment() == 2) {
((Paragraph) item).setAlignment(0);
}
continue;
}
}
}
}
}
}
}
}
}
}
}
and UnicodeFontProvider :
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.nio.file.Paths;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.FontFactoryImp;
import com.itextpdf.text.pdf.BaseFont;
public class UnicodeFontProvider extends FontFactoryImp {
public UnicodeFontProvider() {
String root = System.getenv("SystemRoot");
FileSystems.getDefault();
Path path = Paths.get(root, "fonts");
FontFactory.getFontImp().registerDirectory(path.toString());
// TODO test, works on windows so far
}
public Font getFont(String fontname, String encoding, boolean embedded, float size, int style,
BaseColor color, boolean cached) {
if (fontname!= null && !fontname.isEmpty()) {
return new Font(Font.FontFamily.UNDEFINED, size, style, color);
}
return FontFactory.getFont(fontname, BaseFont.IDENTITY_H, BaseFont.EMBEDDED, size, style, color);
}
}