fix PyPDF2 3.x page copying issue - prevent 327-byte empty PDFs

Added explicit page copying after cloneReaderDocumentRoot() calls because
PyPDF2 3.x only copies document structure, not content pages.

🤖 assisted by claude
This commit is contained in:
Ernad Husremovic 2025-09-02 19:31:11 +02:00
parent c3d53c6a4d
commit 3f19943cec
3 changed files with 510 additions and 11 deletions

View file

@ -237,8 +237,8 @@ class OdooPdfFileReader(PdfFileReader):
if not file_path:
return []
for i in range(0, len(file_path), 2):
attachment = file_path[i+1].getObject()
yield (attachment["/F"], attachment["/EF"]["/F"].getObject().getData())
attachment = file_path[i+1].get_object()
yield (attachment["/F"], attachment["/EF"]["/F"].get_object().get_data())
except Exception:
# malformed pdf (i.e. invalid xref page)
return []
@ -281,10 +281,10 @@ class OdooPdfFileWriter(PdfFileWriter):
})
if self._root_object.get('/Names') and self._root_object['/Names'].get('/EmbeddedFiles'):
names_array = self._root_object["/Names"]["/EmbeddedFiles"]["/Names"]
names_array.extend([attachment.getObject()['/F'], attachment])
names_array.extend([attachment.get_object()['/F'], attachment])
else:
names_array = ArrayObject()
names_array.extend([attachment.getObject()['/F'], attachment])
names_array.extend([attachment.get_object()['/F'], attachment])
embedded_files_names_dictionary = DictionaryObject()
embedded_files_names_dictionary.update({
@ -359,7 +359,7 @@ class OdooPdfFileWriter(PdfFileWriter):
icc_profile_file_data = compress(icc_profile.read())
icc_profile_stream_obj = DecodedStreamObject()
icc_profile_stream_obj.setData(icc_profile_file_data)
icc_profile_stream_obj.set_data(icc_profile_file_data)
icc_profile_stream_obj.update({
NameObject("/Filter"): NameObject("/FlateDecode"),
NameObject("/N"): NumberObject(3),
@ -389,9 +389,9 @@ class OdooPdfFileWriter(PdfFileWriter):
fonts = {}
# First browse through all the pages of the pdf file, to get a reference to all the fonts used in the PDF.
for page in pages:
for font in page.getObject()['/Resources']['/Font'].values():
for descendant in font.getObject()['/DescendantFonts']:
fonts[descendant.idnum] = descendant.getObject()
for font in page.get_object()['/Resources']['/Font'].values():
for descendant in font.get_object()['/DescendantFonts']:
fonts[descendant.idnum] = descendant.get_object()
# Then for each font, rewrite the width array with the information taken directly from the font file.
# The new width are calculated such as width = round(1000 * font_glyph_width / font_units_per_em)
@ -412,7 +412,7 @@ class OdooPdfFileWriter(PdfFileWriter):
else:
_logger.warning('The fonttools package is not installed. Generated PDF may not be PDF/A compliant.')
outlines = self._root_object['/Outlines'].getObject()
outlines = self._root_object['/Outlines'].get_object()
outlines[NameObject('/Count')] = NumberObject(1)
# Set odoo as producer
@ -434,7 +434,7 @@ class OdooPdfFileWriter(PdfFileWriter):
footer = b'<?xpacket end="w"?>'
metadata = b'%s%s%s' % (header, metadata_content, footer)
file_entry = DecodedStreamObject()
file_entry.setData(metadata)
file_entry.set_data(metadata)
file_entry.update({
NameObject("/Type"): NameObject("/Metadata"),
NameObject("/Subtype"): NameObject("/XML"),
@ -455,7 +455,7 @@ class OdooPdfFileWriter(PdfFileWriter):
:return:
'''
file_entry = DecodedStreamObject()
file_entry.setData(attachment['content'])
file_entry.set_data(attachment['content'])
file_entry.update({
NameObject("/Type"): NameObject("/EmbeddedFile"),
NameObject("/Params"):