mirror of
https://github.com/bringout/oca-edi.git
synced 2026-04-18 17:52:06 +02:00
Initial commit: OCA Edi packages (42 packages)
This commit is contained in:
commit
df976c03db
2184 changed files with 571602 additions and 0 deletions
48
odoo-bringout-oca-edi-pdf_helper/pdf_helper/utils.py
Normal file
48
odoo-bringout-oca-edi-pdf_helper/pdf_helper/utils.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Copyright 2015-2021 Akretion France
|
||||
# @author: Alexis de Lattre <alexis.delattre@akretion.com>
|
||||
# Copyright 2022 Camptocamp SA
|
||||
# @author: Simone Orsi <simahawk@gmail.com>
|
||||
# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl).
|
||||
|
||||
import logging
|
||||
from io import BytesIO
|
||||
from struct import error as StructError
|
||||
|
||||
from lxml import etree
|
||||
|
||||
try:
|
||||
from PyPDF2.errors import PdfReadError
|
||||
except ImportError:
|
||||
from PyPDF2.utils import PdfReadError
|
||||
|
||||
from odoo.tools.pdf import OdooPdfFileReader
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PDFParser:
|
||||
def __init__(self, pdf_file):
|
||||
self.pdf_file = pdf_file
|
||||
|
||||
def get_xml_files(self):
|
||||
"""Parse PDF files to extract XML content.
|
||||
|
||||
:param pdf_file: binary PDF file content
|
||||
:returns: a dict like {$filename: $parsed_xml_file_obj}.
|
||||
"""
|
||||
res = {}
|
||||
with BytesIO(self.pdf_file) as buffer:
|
||||
pdf_reader = OdooPdfFileReader(buffer, strict=False)
|
||||
|
||||
# Process embedded files.
|
||||
for xml_name, content in pdf_reader.getAttachments():
|
||||
try:
|
||||
res[xml_name] = etree.fromstring(content)
|
||||
except Exception:
|
||||
_logger.debug("Non XML file found in PDF")
|
||||
if res:
|
||||
_logger.debug("Valid XML files found in PDF: %s", list(res.keys()))
|
||||
return res
|
||||
|
||||
def get_xml_files_swallable_exceptions(self):
|
||||
return (NotImplementedError, StructError, PdfReadError)
|
||||
Loading…
Add table
Add a link
Reference in a new issue