This question already has answers here:
Closed 4 years ago.
I'm trying to load a matplotlib object into reportlab.
Here is my code:
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Image
from matplotlib import pyplot as plt
def __get_img_data():
"""
returns the binary image data of the plot
"""
img_file = NamedTemporaryFile(delete=False)
plt.savefig(img_file.name)
img_data = open(img_file.name + '.png', 'rb').read()
os.remove(img_file.name)
os.remove(img_file.name + '.png')
return img_data
def get_plot():
# HERE I PLOT SOME STUFF
img_data = __get_img_data()
plt.close()
return img_data
class NumberedCanvas(canvas.Canvas):
def __init__(self):
pass
class ReportTemplate:
def __init__(self):
pass
def _header_footer(self, canvas, doc):
pass
def get_data(self):
elements = []
elements.append('hello')
## HERE I WANT TO ADD THE IMAGE
imgdata = get_plot()
with open('/tmp/x.png', 'wb') as fh:
fh.write(imgdata)
im = Image('/tmp/x.png', width=usable_width, height=usable_width)
elements.append(im)
os.remove('/tmp/x.png')
######
doc.build(elements, onFirstPage=self._header_footer,\
onLaterPages=self._header_footer,\
canvasmaker=NumberedCanvas)
# blah blah
return obj
My goal is to insert the plot image into the report.
This works fine but I do not want to write to a temporary file.
I tried installing PIL because I've read some people doing it with PIL's image library but as soon as I install PIL, I another part of my code breaks due to incompatible Pillow versions.
pdfrw documentation sucks
The sole reason the pdfrw example discussed in the first answer to this question is a bit klunky is because the pdfrw documentation sucks badly. Due to the sucky doc, that example's author @Larry-Meyn used the vectorpdf extension for rst2pdf as as starting point, and that extension is not really documented either, and has to deal with the quirks of rst2pdf as well as pdfrw (and is more general than you need, in that it can let rst2pdf display an arbitrary rectangle from an arbitray page of a preexisting PDF). It's amazing that Larry managed to make it work at all, and my hat's off to him.
I am perfectly qualified to say this, because I am the author of pdfrw and made a few contributions to rst2pdf, including that vectorpdf extension.
But you probably want to use pdfrw anyway
I wasn't really paying attention to stackoverflow until a month ago, and pdfrw itself languished for a few years, but I'm here now, and I think it would behoove you to take another look at pdfrw, even though the documentation still sucks.
Why? Because if you output to a png file, your image will be rasterized, and if you use pdfrw, it will remain in vector format, which means that it will look nice at any scale.
So I modified your answer's png example
Your png example wasn't quite a complete program -- the parameters to doc.build weren't defined, styles wasn't defined, it was missing a few imports, etc. But it was close enough to garner some intent and get it working.
Edit -- I just noticed that this example was actually a modified version of Larry's example, so that example is still very valuable because it's a bit more full-featured than this in some ways.
After I fixed those issues and got some output, I added an option to be able to use png or pdf, so you can see the difference. The program below will create two different PDF files, and you can compare the results for yourself.
import cStringIO
from matplotlib import pyplot as plt
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Image, Flowable
from reportlab.lib.units import inch
from reportlab.lib.styles import getSampleStyleSheet
from pdfrw import PdfReader, PdfDict
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl
styles = getSampleStyleSheet()
style = styles['Normal']
def form_xo_reader(imgdata):
page, = PdfReader(imgdata).pages
return pagexobj(page)
class PdfImage(Flowable):
def __init__(self, img_data, width=200, height=200):
self.img_width = width
self.img_height = height
self.img_data = img_data
def wrap(self, width, height):
return self.img_width, self.img_height
def drawOn(self, canv, x, y, _sW=0):
if _sW > 0 and hasattr(self, 'hAlign'):
a = self.hAlign
if a in ('CENTER', 'CENTRE', TA_CENTER):
x += 0.5*_sW
elif a in ('RIGHT', TA_RIGHT):
x += _sW
elif a not in ('LEFT', TA_LEFT):
raise ValueError("Bad hAlign value " + str(a))
canv.saveState()
img = self.img_data
if isinstance(img, PdfDict):
xscale = self.img_width / img.BBox[2]
yscale = self.img_height / img.BBox[3]
canv.translate(x, y)
canv.scale(xscale, yscale)
canv.doForm(makerl(canv, img))
else:
canv.drawImage(img, x, y, self.img_width, self.img_height)
canv.restoreState()
def make_report(outfn, use_pdfrw):
fig = plt.figure(figsize=(4, 3))
plt.plot([1,2,3,4],[1,4,9,26])
plt.ylabel('some numbers')
imgdata = cStringIO.StringIO()
fig.savefig(imgdata, format='pdf' if use_pdfrw else 'png')
imgdata.seek(0)
reader = form_xo_reader if use_pdfrw else ImageReader
image = reader(imgdata)
doc = SimpleDocTemplate(outfn)
style = styles["Normal"]
story = [Spacer(0, inch)]
img = PdfImage(image, width=200, height=200)
for i in range(10):
bogustext = ("Paragraph number %s. " % i)
p = Paragraph(bogustext, style)
story.append(p)
story.append(Spacer(1,0.2*inch))
story.append(img)
for i in range(10):
bogustext = ("Paragraph number %s. " % i)
p = Paragraph(bogustext, style)
story.append(p)
story.append(Spacer(1,0.2*inch))
doc.build(story)
make_report("hello_png.pdf", False)
make_report("hello_pdf.pdf", True)
What are the downsides to this approach?
The first obvious downside is that there is now a requirement for pdfrw, but that's available from PyPI.
The next downside is that if you are putting a lot of matplotlib plots into a document, I think this technique will replicate resources such as fonts, because I don't believe that reportlab is smart enough to notice the duplicates.
I believe this problem can be solved by outputting all your plots to different pages of a single PDF. I haven't actually tried that with matplotlib, but pdfrw is perfectly capable of converting each page of an existing pdf to a separate flowable.
So if you have a lot of plots and it's making your final PDF too big, you could look into that, or just try one of the PDF optimizers out there and see if it helps. In any case, that's a different problem for a different day.
I found 2 solutions:
1: using a package called pdfrw:
Is there a matplotlib flowable for ReportLab?
2: a simpler cleaner way:
class PdfImage(Flowable):
def __init__(self, img_data, width=200, height=200):
self.img_width = width
self.img_height = height
self.img_data = img_data
def wrap(self, width, height):
return self.img_width, self.img_height
def drawOn(self, canv, x, y, _sW=0):
if _sW > 0 and hasattr(self, 'hAlign'):
a = self.hAlign
if a in ('CENTER', 'CENTRE', TA_CENTER):
x += 0.5*_sW
elif a in ('RIGHT', TA_RIGHT):
x += _sW
elif a not in ('LEFT', TA_LEFT):
raise ValueError("Bad hAlign value " + str(a))
canv.saveState()
canv.drawImage(self.img_data, x, y, self.img_width, self.img_height)
canv.restoreState()
def make_report():
fig = plt.figure(figsize=(4, 3))
plt.plot([1,2,3,4],[1,4,9,26])
plt.ylabel('some numbers')
imgdata = cStringIO.StringIO()
fig.savefig(imgdata, format='png')
imgdata.seek(0)
image = ImageReader(imgdata)
doc = SimpleDocTemplate("hello.pdf")
style = styles["Normal"]
story = [Spacer(0, inch)]
img = PdfImage(image, width=200, height=200)
for i in range(10):
bogustext = ("Paragraph number %s. " % i)
p = Paragraph(bogustext, style)
story.append(p)
story.append(Spacer(1,0.2*inch))
story.append(img)
for i in range(10):
bogustext = ("Paragraph number %s. " % i)
p = Paragraph(bogustext, style)
story.append(p)
story.append(Spacer(1,0.2*inch))
doc.build(story, onFirstPage=myFirstPage, onLaterPages=myLaterPages, canvasmaker=PageNumCanvas)