i'm trying to extract text with coordinates from a pdf file using PDFBox.
I mixed some methods/info found on internet (stackoverflow too), but the problem i have the coordinates doesnt'seems to be right. When i try to use coordinates for drawing a rectangle on top of tex, for example, the rect is painted elsewhere.
This is my code (please don't judge the style, was written very fast just to test)
import java.util.List;
import org.apache.pdfbox.text.TextPosition;
* @author samue
public class TextLine {
public List<TextPosition> textPositions = null;
public String text = "";
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
* @author samue
public class myStripper extends PDFTextStripper {
public myStripper() throws IOException
protected void startPage(PDPage page) throws IOException
startOfLine = true;
protected void writeLineSeparator() throws IOException
startOfLine = true;
public String getText(PDDocument doc) throws IOException
lines = new ArrayList<TextLine>();
return super.getText(doc);
protected void writeWordSeparator() throws IOException
TextLine tmpline = null;
tmpline = lines.get(lines.size() - 1);
tmpline.text += getWordSeparator();
protected void writeString(String text, List<TextPosition> textPositions) throws IOException
TextLine tmpline = null;
if (startOfLine) {
tmpline = new TextLine();
tmpline.text = text;
tmpline.textPositions = textPositions;
} else {
tmpline = lines.get(lines.size() - 1);
tmpline.text += text;
if (startOfLine)
startOfLine = false;
super.writeString(text, textPositions);
boolean startOfLine = true;
public ArrayList<TextLine> lines = null;
click event on AWT button
private void jButton1MouseClicked(java.awt.event.MouseEvent evt) {
// TODO add your handling code here:
try {
File file = new File("C:\\Users\\samue\\Desktop\\mwb_I_201711.pdf");
PDDocument doc = PDDocument.load(file);
myStripper stripper = new myStripper();
stripper.setStartPage(1); // fix it to first page just to test it
TextLine line = stripper.lines.get(1); // the line i want to paint on
float minx = -1;
float maxx = -1;
for (TextPosition pos: line.textPositions)
if (pos == null)
if (minx == -1 || pos.getTextMatrix().getTranslateX() < minx) {
minx = pos.getTextMatrix().getTranslateX();
if (maxx == -1 || pos.getTextMatrix().getTranslateX() > maxx) {
maxx = pos.getTextMatrix().getTranslateX();
TextPosition firstPosition = line.textPositions.get(0);
TextPosition lastPosition = line.textPositions.get(line.textPositions.size() - 1);
float x = minx;
float y = firstPosition.getTextMatrix().getTranslateY();
float w = (maxx - minx) + lastPosition.getWidth();
float h = lastPosition.getHeightDir();
PDPageContentStream contentStream = new PDPageContentStream(doc, doc.getPage(0), PDPageContentStream.AppendMode.APPEND, false);
contentStream.addRect(x, y, w, h);
File fileout = new File("C:\\Users\\samue\\Desktop\\pdfbox.pdf");
} catch (Exception ex) {
any suggestion? what am i doing wrong?