# Copyright 2017-2013 Camptocamp SA # Copyright 2023 ACSONE SA/NV # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html) from __future__ import annotations import io import logging import mimetypes import os import re import time from contextlib import closing, contextmanager import fsspec # pylint: disable=missing-manifest-dependency import psycopg2 from slugify import slugify # pylint: disable=missing-manifest-dependency import odoo from odoo import _, api, fields, models from odoo.exceptions import AccessError, UserError from odoo.osv.expression import AND, OR, normalize_domain from .strtobool import strtobool _logger = logging.getLogger(__name__) REGEX_SLUGIFY = r"[^-a-z0-9_]+" FS_FILENAME_RE_PARSER = re.compile( r"^(?P.+)-(?P\d+)-(?P\d+)(?P\..+)$" ) def is_true(strval): return bool(strtobool(strval or "0")) def clean_fs(files): _logger.info("cleaning old files from filestore") for full_path in files: if os.path.exists(full_path): try: os.unlink(full_path) except OSError: _logger.info( "_file_delete could not unlink %s", full_path, exc_info=True ) except IOError: # Harmless and needed for race conditions _logger.info( "_file_delete could not unlink %s", full_path, exc_info=True ) class IrAttachment(models.Model): _inherit = "ir.attachment" fs_filename = fields.Char( "File Name into the filesystem storage", help="The name of the file in the filesystem storage." "To preserve the mimetype and the meaning of the filename" "the filename is computed from the name and the extension", readonly=True, ) internal_url = fields.Char( "Internal URL", compute="_compute_internal_url", help="The URL to access the file from the server.", ) fs_url = fields.Char( "Filesystem URL", compute="_compute_fs_url", help="The URL to access the file from the filesystem storage.", store=True, ) fs_url_path = fields.Char( "Filesystem URL Path", compute="_compute_fs_url_path", help="The path to access the file from the filesystem storage.", ) fs_storage_code = fields.Char( "Filesystem Storage Code", related="fs_storage_id.code", store=True, ) fs_storage_id = fields.Many2one( "fs.storage", "Filesystem Storage", compute="_compute_fs_storage_id", help="The storage where the file is stored.", store=True, ondelete="restrict", ) @api.depends("name") def _compute_internal_url(self) -> None: for rec in self: filename, extension = os.path.splitext(rec.name) # determine if the file is an image pfx = "/web/content" if rec.mimetype and rec.mimetype.startswith("image/"): pfx = "/web/image" if not extension: extension = mimetypes.guess_extension(rec.mimetype) rec.internal_url = f"{pfx}/{rec.id}/{filename}{extension}" @api.depends("fs_filename") def _compute_fs_url(self) -> None: for rec in self: new_url = None actual_url = rec.fs_url or None if rec.fs_filename: new_url = self.env["fs.storage"]._get_url_for_attachment(rec) # ensure we compare value of same type and not None with False new_url = new_url or None if new_url != actual_url: rec.fs_url = new_url @api.depends("fs_filename") def _compute_fs_url_path(self) -> None: for rec in self: rec.fs_url_path = None if rec.fs_filename: rec.fs_url_path = self.env["fs.storage"]._get_url_for_attachment( rec, exclude_base_url=True ) @api.depends("fs_filename") def _compute_fs_storage_id(self): for rec in self: if rec.store_fname: code = rec.store_fname.partition("://")[0] fs_storage = self.env["fs.storage"].sudo().get_by_code(code) if fs_storage != rec.fs_storage_id: rec.fs_storage_id = fs_storage elif rec.fs_storage_id: rec.fs_storage_id = None @staticmethod def _is_storage_disabled(storage=None, log=True): msg = _("Storages are disabled (see environment configuration).") if storage: msg = _("Storage '%s' is disabled (see environment configuration).") % ( storage, ) is_disabled = is_true(os.environ.get("DISABLE_ATTACHMENT_STORAGE")) if is_disabled and log: _logger.warning(msg) return is_disabled def _get_storage_force_db_config(self): return self.env["fs.storage"].get_force_db_for_default_attachment_rules( self._storage() ) def _store_in_db_instead_of_object_storage_domain(self): """Return a domain for attachments that must be forced to DB Read the docstring of ``_store_in_db_instead_of_object_storage`` for more details. Used in ``force_storage_to_db_for_special_fields`` to find records to move from the object storage to the database. The domain must be inline with the conditions in ``_store_in_db_instead_of_object_storage``. """ domain = [] storage_config = self._get_storage_force_db_config() for mimetype_key, limit in storage_config.items(): part = [("mimetype", "=like", "{}%".format(mimetype_key))] if limit: part = AND([part, [("file_size", "<=", limit)]]) # OR simplifies to [(1, '=', 1)] if a domain being OR'ed is empty domain = OR([domain, part]) if domain else part return domain def _store_in_db_instead_of_object_storage(self, data, mimetype): """Return whether an attachment must be stored in db When we are using an Object Storage. This is sometimes required because the object storage is slower than the database/filesystem. Small images (128, 256) are used in Odoo in list / kanban views. We want them to be fast to read. They are generally < 50KB (default configuration) so they don't take that much space in database, but they'll be read much faster than from the object storage. The assets (application/javascript, text/css) are stored in database as well whatever their size is: * a database doesn't have thousands of them * of course better for performance * better portability of a database: when replicating a production instance for dev, the assets are included The configuration can be modified on the fs.storage record, in the field ``force_db_for_default_attachment_rules``, as a dictionary, for instance:: {"image/": 51200, "application/javascript": 0, "text/css": 0} Where the key is the beginning of the mimetype to configure and the value is the limit in size below which attachments are kept in DB. 0 means no limit. These limits are applied only if the storage is the default one for attachments (see ``_storage``). The conditions are also applied into the domain of the method ``_store_in_db_instead_of_object_storage_domain`` used to move records from a filesystem storage to the database. """ if self._is_storage_disabled(): return True storage_config = self._get_storage_force_db_config() for mimetype_key, limit in storage_config.items(): if mimetype.startswith(mimetype_key): if not limit: return True bin_data = data return len(bin_data) <= limit return False def _get_datas_related_values(self, data, mimetype): storage = self.env.context.get("storage_location") or self._storage() if data and storage in self._get_storage_codes(): if self._store_in_db_instead_of_object_storage(data, mimetype): # compute the fields that depend on datas bin_data = data values = { "file_size": len(bin_data), "checksum": self._compute_checksum(bin_data), "index_content": self._index(bin_data, mimetype), "store_fname": False, "db_datas": data, } return values return super( IrAttachment, self.with_context(mimetype=mimetype) )._get_datas_related_values(data, mimetype) ########################################################### # Odoo methods that we override to use the object storage # ########################################################### @api.model def _storage(self): # We check if a filesystem storage is configured for attachments storage = self.env["fs.storage"].get_default_storage_code_for_attachments() if not storage: # If not, we use the default storage configured into odoo storage = super()._storage() return storage @api.model_create_multi def create(self, vals_list): """ Storage may depend on resource field, but the method calling _storage (_get_datas_related_values) does not take all vals, just the mimetype. The only way to give res_field and res_model to _storage method is to pass them into the context, and perform 1 create call per record to create. """ vals_list_no_model = [] attachments = self.env["ir.attachment"] for vals in vals_list: if vals.get("res_model"): attachment = super( IrAttachment, self.with_context( attachment_res_model=vals.get("res_model"), attachment_res_field=vals.get("res_field"), ), ).create(vals) attachments += attachment else: vals_list_no_model.append(vals) atts = super().create(vals_list_no_model) attachments |= atts attachments._enforce_meaningful_storage_filename() return attachments def write(self, vals): if not self: return super().write(vals) if ("datas" in vals or "raw" in vals) and not ( "name" in vals or "mimetype" in vals ): mimetype = self._compute_mimetype(vals) if mimetype and mimetype != "application/octet-stream": vals["mimetype"] = mimetype else: # When we write on an attachment, if the mimetype is not provided, it # will be computed from the name. The problem is that if you assign a # value to the field ``datas`` or ``raw``, the name is not provided # nor the mimetype, so the mimetype will be set to ``application/octet- # stream``. # We want to avoid this, so we take the mimetype of the first attachment # and we set it on all the attachments if they all have the same mimetype. # If they don't have the same mimetype, we raise an error. # OPW-3277070 mimetypes = self.mapped("mimetype") if len(set(mimetypes)) == 1: vals["mimetype"] = mimetypes[0] else: raise UserError( _( "You can't write on multiple attachments with different " "mimetypes at the same time." ) ) for rec in self: # As when creating a new attachment, we must pass the res_field # and res_model into the context hence sadly we must perform 1 call # for each attachment super( IrAttachment, rec.with_context( attachment_res_model=vals.get("res_model") or rec.res_model, attachment_res_field=vals.get("res_field") or rec.res_field, ), ).write(vals) if "name" in vals: self._enforce_meaningful_storage_filename() return True @api.model def _file_read(self, fname): if self._is_file_from_a_storage(fname): return self._storage_file_read(fname) else: return super()._file_read(fname) @api.model def _file_write(self, bin_data, checksum): location = self.env.context.get("storage_location") or self._storage() if location in self._get_storage_codes(): filename = self._storage_file_write(bin_data) else: filename = super()._file_write(bin_data, checksum) return filename @api.model def _file_delete(self, fname) -> None: # pylint: disable=missing-return if self._is_file_from_a_storage(fname): cr = self.env.cr # using SQL to include files hidden through unlink or due to record # rules cr.execute( "SELECT COUNT(*) FROM ir_attachment WHERE store_fname = %s", (fname,) ) count = cr.fetchone()[0] if not count: self._storage_file_delete(fname) else: super()._file_delete(fname) def _set_attachment_data(self, asbytes) -> None: # pylint: disable=missing-return super()._set_attachment_data(asbytes) self._enforce_meaningful_storage_filename() ############################################## # Internal methods to use the object storage # ############################################## @api.model def _storage_file_read(self, fname: str) -> bytes | None: """Read the file from the filesystem storage""" fs, _storage, fname = self._fs_parse_store_fname(fname) try: with fs.open(fname, "rb") as f: return f.read() except IOError: _logger.info( "Error reading %s on storage %s", fname, _storage, exc_info=True ) return b"" def _storage_write_option(self, fs): mimetype = self.env.context.get("mimetype") if mimetype: root_fs = self.env["fs.storage"]._get_root_filesystem(fs) if hasattr(root_fs, "s3"): return {"ContentType": mimetype} return {} @api.model def _storage_file_write(self, bin_data: bytes) -> str: """Write the file to the filesystem storage""" storage = self.env.context.get("storage_location") or self._storage() fs = self._get_fs_storage_for_code(storage) path = self._get_fs_path(storage, bin_data) dirname = os.path.dirname(path) if not fs.exists(dirname): fs.makedirs(dirname) fname = f"{storage}://{path}" kwargs = self._storage_write_option(fs) with fs.open(path, "wb", **kwargs) as f: f.write(bin_data) self._fs_mark_for_gc(fname) return fname @api.model def _storage_file_delete(self, fname): """Delete the file from the filesystem storage It's safe to use the fname (the store_fname) to delete the file because even if it's the full path to the file, the gc will only delete the file if they belong to the configured storage directory path. """ self._fs_mark_for_gc(fname) @api.model def _get_fs_path(self, storage_code: str, bin_data: bytes) -> str: """Compute the path to store the file in the filesystem storage""" key = self.env.context.get("force_storage_key") if not key: key = self._compute_checksum(bin_data) if self.env["fs.storage"]._must_optimize_directory_path(storage_code): # Generate a unique directory path based on the file's hash key = os.path.join(key[:2], key[2:4], key) # Generate a unique directory path based on the file's hash return key def _build_fs_filename(self): """Build the filename to store in the filesystem storage The filename is computed from the name, the extension and a version number. The version number is incremented each time we build a new filename. To know if a filename has already been build, we check if the fs_filename field is set. If it is set, we increment the version number. The version number is taken from the computed filename. The format of the filename is: --. """ self.ensure_one() filename, extension = os.path.splitext(self.name) if not extension: extension = mimetypes.guess_extension(self.mimetype) version = 0 if self.fs_filename: parsed = self._parse_fs_filename(self.fs_filename) if parsed: version = parsed[2] + 1 return "{}{}".format( slugify( "{}-{}-{}".format(filename, self.id, version), regex_pattern=REGEX_SLUGIFY, ), extension, ) def _enforce_meaningful_storage_filename(self) -> None: """Enforce meaningful filename for files stored in the filesystem storage The filename of the file in the filesystem storage is computed from the mimetype and the name of the attachment. This method is called when an attachment is created to ensure that the filename of the file in the filesystem keeps the same meaning as the name of the attachment. Keeping the same meaning and mimetype is important to also ease to provide a meaningful and SEO friendly URL to the file in the filesystem storage. """ for attachment in self: if not self._is_file_from_a_storage(attachment.store_fname): continue fs, storage, filename = attachment._get_fs_parts() if self.env["fs.storage"]._must_use_filename_obfuscation(storage): attachment.fs_filename = filename continue new_filename = attachment._build_fs_filename() # we must keep the same full path as the original filename new_filename_with_path = os.path.join( os.path.dirname(filename), new_filename ) fs.rename(filename, new_filename_with_path) attachment.fs_filename = new_filename # we need to update the store_fname with the new filename by # calling the write method of the field since the write method # of ir_attachment prevent normal write on store_fname attachment._force_write_store_fname(f"{storage}://{new_filename_with_path}") self._fs_mark_for_gc(attachment.store_fname) def _force_write_store_fname(self, store_fname): """Force the write of the store_fname field The base implementation of the store_fname field prevent the write of the store_fname field. This method bypass this limitation by calling the write method of the field directly. """ self._fields["store_fname"].write(self, store_fname) @api.model def _get_fs_storage_for_code( self, code: str, ) -> fsspec.AbstractFileSystem | None: """Return the filesystem for the given storage code""" fs = self.env["fs.storage"].get_fs_by_code(code) if not fs: raise SystemError(f"No Filesystem storage for code {code}") return fs @api.model def _fs_parse_store_fname( self, fname: str ) -> tuple[fsspec.AbstractFileSystem, str, str]: """Return the filesystem, the storage code and the path for the given fname :param fname: the fname to parse :param base: if True, return the base filesystem """ partition = fname.partition("://") storage_code = partition[0] fs = self._get_fs_storage_for_code(storage_code) fname = partition[2] return fs, storage_code, fname @api.model def _parse_fs_filename(self, filename: str) -> tuple[str, int, int, str] | None: """Parse the filename and return the name, id, version and extension --. """ if not filename: return None filename = os.path.basename(filename) match = FS_FILENAME_RE_PARSER.match(filename) if not match: return None name, res_id, version, extension = match.groups() return name, int(res_id), int(version), extension @api.model def _is_file_from_a_storage(self, fname): if not fname: return False for storage_code in self._get_storage_codes(): if self._is_storage_disabled(storage_code): continue uri = "{}://".format(storage_code) if fname.startswith(uri): return True return False @api.model def _fs_mark_for_gc(self, fname): """Mark the file for deletion The file will be deleted by the garbage collector if it's no more referenced by any attachment. We use a garbage collector to enforce the transaction mechanism between Odoo and the filesystem storage. Files are added to the garbage collector when: - each time a file is created in the filesystem storage - an attachment is deleted Whatever the result of the current transaction, the information of files marked for deletion is stored in the database. When the garbage collector is called, it will check if the file is still referenced by an attachment. If not, the file is physically deleted from the filesystem storage. If the creation of the attachment fails, since the file is marked for deletion when it's written into the filesystem storage, it will be deleted by the garbage collector. If the content of the attachment is updated, we always create a new file. This new file is marked for deletion and the old one too. If the transaction succeeds, the old file is deleted by the garbage collector since it's no more referenced by any attachment. If the transaction fails, the old file is not deleted since it's still referenced by the attachment but the new file is deleted since it's marked for deletion and not referenced. """ self.env["fs.file.gc"]._mark_for_gc(fname) def _get_fs_parts( self, ) -> tuple[fsspec.AbstractFileSystem, str, str] | tuple[None, None, None]: """Return the filesystem, the storage code and the path for the current attachment""" if not self.store_fname: return None, None, None return self._fs_parse_store_fname(self.store_fname) def open( self, mode="rb", block_size=None, cache_options=None, compression=None, new_version=True, **kwargs, ) -> io.IOBase: """ Return a file-like object from the filesystem storage where the attachment content is stored. In read mode, this method works for all attachments, even if the content is stored in the database or into the odoo filestore or a filesystem storage. The resultant instance must function correctly in a context ``with`` block. (parameters are ignored in the case of the database storage). Parameters ---------- path: str Target file mode: str like 'rb', 'w' See builtin ``open()`` block_size: int Some indication of buffering - this is a value in bytes cache_options : dict, optional Extra arguments to pass through to the cache. compression: string or None If given, open file using compression codec. Can either be a compression name (a key in ``fsspec.compression.compr``) or "infer" to guess the compression from the filename suffix. new_version: bool If True, and mode is 'w', create a new version of the file. If False, and mode is 'w', overwrite the current version of the file. This flag is True by default to avoid data loss and ensure transaction mechanism between Odoo and the filesystem storage. encoding, errors, newline: passed on to TextIOWrapper for text mode Returns ------- A file-like object TODO if open with 'w' in mode, we could use a buffered IO detecting that the content is modified and invalidating the attachment cache... """ self.ensure_one() return AttachmentFileLikeAdapter( self, mode=mode, block_size=block_size, cache_options=cache_options, compression=compression, new_version=new_version, **kwargs, ) @contextmanager def _do_in_new_env(self, new_cr=False): """Context manager that yields a new environment Using a new Odoo Environment thus a new PG transaction. """ if new_cr: registry = odoo.modules.registry.Registry.new(self.env.cr.dbname) with closing(registry.cursor()) as cr: try: yield self.env(cr=cr) except Exception: cr.rollback() raise else: # disable pylint error because this is a valid commit, # we are in a new env cr.commit() # pylint: disable=invalid-commit else: # make a copy yield self.env() def _get_storage_codes(self): """Get the list of filesystem storage active in the system""" return self.env["fs.storage"].sudo().get_storage_codes() ################################ # useful methods for migration # ################################ def _move_attachment_to_store(self): self.ensure_one() _logger.info("inspecting attachment %s (%d)", self.name, self.id) fname = self.store_fname storage = fname.partition("://")[0] if self._is_storage_disabled(storage): fname = False if fname: # migrating from filesystem filestore # or from the old 'store_fname' without the bucket name _logger.info("moving %s on the object storage", fname) self.write( { "datas": self.datas, # this is required otherwise the # mimetype gets overriden with # 'application/octet-stream' # on assets "mimetype": self.mimetype, } ) _logger.info("moved %s on the object storage", fname) return self._full_path(fname) elif self.db_datas: _logger.info("moving on the object storage from database") self.write({"datas": self.datas}) @api.model def force_storage(self): if not self.env["res.users"].browse(self.env.uid)._is_admin(): raise AccessError(_("Only administrators can execute this action.")) location = self.env.context.get("storage_location") or self._storage() if location not in self._get_storage_codes(): return super().force_storage() self._force_storage_to_object_storage() @api.model def force_storage_to_db_for_special_fields( self, new_cr=False, storage: str | None = None ): """Migrate special attachments from Object Storage back to database The access to a file stored on the objects storage is slower than a local disk or database access. For attachments like image_small that are accessed in batch for kanban views, this is too slow. We store this type of attachment in the database. This method can be used when migrating a filestore where all the files, including the special files (assets, image_small, ...) have been pushed to the Object Storage and we want to write them back in the database. It is not called anywhere, but can be called by RPC or scripts. """ if not storage: storage = self._storage() if self._is_storage_disabled(storage): _logger.warning( "Storage '%s' is disabled, skipping migration of attachments to DB", storage, ) return if storage not in self._get_storage_codes(): _logger.warning( "Storage '%s' is not configured, " "skipping migration of attachments to DB", storage, ) return domain = AND( ( normalize_domain( [ ("store_fname", "=like", "{}://%".format(storage)), # for res_field, see comment in # _force_storage_to_object_storage "|", ("res_field", "=", False), ("res_field", "!=", False), ] ), normalize_domain(self._store_in_db_instead_of_object_storage_domain()), ) ) with self._do_in_new_env(new_cr=new_cr) as new_env: model_env = new_env["ir.attachment"].with_context(prefetch_fields=False) attachment_ids = model_env.search(domain).ids if not attachment_ids: return total = len(attachment_ids) start_time = time.time() _logger.info( "Moving %d attachments from %s to DB for fast access", total, storage ) current = 0 for attachment_id in attachment_ids: current += 1 # if we browse attachments outside of the loop, the first # access to 'datas' will compute all the 'datas' fields at # once, which means reading hundreds or thousands of files at # once, exhausting memory attachment = model_env.browse(attachment_id) # this write will read the datas from the Object Storage and # write them back in the DB (the logic for location to write is # in the 'datas' inverse computed field) # we need to write the mimetype too, otherwise it will be # overwritten with 'application/octet-stream' on assets. On each # write, the mimetype is recomputed if not given. If we don't # pass it nor the name, the mimetype will be set to the default # value 'application/octet-stream' on assets. attachment.write({"datas": attachment.datas}) if current % 100 == 0 or total - current == 0: _logger.info( "attachment %s/%s after %.2fs", current, total, time.time() - start_time, ) @api.model def _force_storage_to_object_storage(self, new_cr=False): _logger.info("migrating files to the object storage") storage = self.env.context.get("storage_location") or self._storage() if self._is_storage_disabled(storage): return # The weird "res_field = False OR res_field != False" domain # is required! It's because of an override of _search in ir.attachment # which adds ('res_field', '=', False) when the domain does not # contain 'res_field'. # https://github.com/odoo/odoo/blob/9032617120138848c63b3cfa5d1913c5e5ad76db/ # odoo/addons/base/ir/ir_attachment.py#L344-L347 domain = [ "!", ("store_fname", "=like", "{}://%".format(storage)), "|", ("res_field", "=", False), ("res_field", "!=", False), ] # We do a copy of the environment so we can workaround the cache issue # below. We do not create a new cursor by default because it causes # serialization issues due to concurrent updates on attachments during # the installation with self._do_in_new_env(new_cr=new_cr) as new_env: model_env = new_env["ir.attachment"] ids = model_env.search(domain).ids files_to_clean = [] for attachment_id in ids: try: with new_env.cr.savepoint(): # check that no other transaction has # locked the row, don't send a file to storage # in that case self.env.cr.execute( "SELECT id " "FROM ir_attachment " "WHERE id = %s " "FOR UPDATE NOWAIT", (attachment_id,), log_exceptions=False, ) # This is a trick to avoid having the 'datas' # function fields computed for every attachment on # each iteration of the loop. The former issue # being that it reads the content of the file of # ALL the attachments on each loop. new_env.clear() attachment = model_env.browse(attachment_id) path = attachment._move_attachment_to_store() if path: files_to_clean.append(path) except psycopg2.OperationalError: _logger.error( "Could not migrate attachment %s to S3", attachment_id ) # delete the files from the filesystem once we know the changes # have been committed in ir.attachment if files_to_clean: new_env.cr.commit() clean_fs(files_to_clean) class AttachmentFileLikeAdapter(object): """ This class is a wrapper class around the ir.attachment model. It is used to open the ir.attachment as a file and to read/write data to it. When the content of the file is stored into the odoo filestore or in a filesystem storage, this object allows you to read/write the content from the file in a direct way without having to read/write the whole file into memory. When the content of the file is stored into database, this content is read/written from/into a buffer in memory. Parameters ---------- attachment : ir.attachment The attachment to open as a file. mode: str like 'rb', 'w' See builtin ``open()`` block_size: int Some indication of buffering - this is a value in bytes cache_options : dict, optional Extra arguments to pass through to the cache. compression: string or None If given, open file using compression codec. Can either be a compression name (a key in ``fsspec.compression.compr``) or "infer" to guess the compression from the filename suffix. new_version: bool If True, and mode is 'w', create a new version of the file. If False, and mode is 'w', overwrite the current version of the file. This flag is True by default to avoid data loss and ensure transaction mechanism between Odoo and the filesystem storage. encoding, errors, newline: passed on to TextIOWrapper for text mode You can use this class to adapt an attachment object as a file in 2 ways: * as a context manager wrapping the attachment object as a file * or as a nomral utility class Examples >>> with AttachmentFileLikeAdapter(attachment, mode="rb") as f: ... f.read() b'Hello World' # at the end of the context manager, the file is closed >>> f = AttachmentFileLikeAdapter(attachment, mode="rb") >>> f.read() b'Hello World' # you have to close the file manually >>> f.close() """ def __init__( self, attachment: IrAttachment, mode: str = "rb", block_size: int | None = None, cache_options: dict | None = None, compression: str | None = None, new_version: bool = False, **kwargs, ): self._attachment = attachment self._mode = mode self._block_size = block_size self._cache_options = cache_options self._compression = compression self._new_version = new_version self._kwargs = kwargs # state attributes self._file: io.IOBase | None = None self._filesystem: fsspec.AbstractFileSystem | None = None self._new_store_fname: str | None = None @property def attachment(self) -> IrAttachment: """The attachment object the file is related to""" return self._attachment @property def mode(self) -> str: """The mode used to open the file""" return self._mode @property def block_size(self) -> int | None: """The block size used to open the file""" return self._block_size @property def cache_options(self) -> dict | None: """The cache options used to open the file""" return self._cache_options @property def compression(self) -> str | None: """The compression used to open the file""" return self._compression @property def new_version(self) -> bool: """Is the file open for a new version""" return self._new_version @property def kwargs(self) -> dict: """The kwargs passed when opening the file on the""" return self._kwargs @property def _is_open_for_modify(self) -> bool: """Is the file open for modification A file is open for modification if it is open for writing or appending """ return "w" in self.mode or "a" in self.mode @property def _is_open_for_read(self) -> bool: """Is the file open for reading""" return "r" in self.mode @property def _is_stored_in_db(self) -> bool: """Is the file stored in database""" return self.attachment._storage() == "db" def __enter__(self) -> io.IOBase: """Called when entering the context manager Create the file object and return it. """ # we call the attachment instance to get the file object self._file_open() return self._file def _file_open(self) -> io.IOBase: """Open the attachment content as a file-like object This method will initialize the following attributes: * _file: the file-like object. * _filesystem: filesystem object. * _new_store_fname: the new store_fname if the file is opened for a new version. """ new_store_fname = None if ( self._is_open_for_read or (self._is_open_for_modify and not self.new_version) or self._is_stored_in_db ): if self.attachment._is_file_from_a_storage(self.attachment.store_fname): fs, _storage, fname = self.attachment._get_fs_parts() filepath = fname filesystem = fs elif self.attachment.store_fname: filepath = self.attachment._full_path(self.attachment.store_fname) filesystem = fsspec.filesystem("file") else: filepath = f"{self.attachment.id}" filesystem = fsspec.filesystem("memory") if "a" in self.mode or self._is_open_for_read: filesystem.pipe_file(filepath, self.attachment.db_datas) the_file = filesystem.open( filepath, mode=self.mode, block_size=self.block_size, cache_options=self.cache_options, compression=self.compression, **self.kwargs, ) else: # mode='w' and new_version=True and storage != 'db' # We must create a new file with a new name. If we are in an # append mode, we must copy the content of the old file (or create # the new one by copy of the old one). # to not break the storage plugin mechanism, we'll use the # _file_write method to create the new empty file with a random # content and checksum to avoid collision. content = self._gen_random_content() checksum = self.attachment._compute_checksum(content) new_store_fname = self.attachment.with_context( attachment_res_model=self.attachment.res_model, attachment_res_field=self.attachment.res_field, )._file_write(content, checksum) if self.attachment._is_file_from_a_storage(new_store_fname): ( filesystem, _storage, new_filepath, ) = self.attachment._fs_parse_store_fname(new_store_fname) _fs, _storage, old_filepath = self.attachment._get_fs_parts() else: new_filepath = self.attachment._full_path(new_store_fname) old_filepath = self.attachment._full_path(self.attachment.store_fname) filesystem = fsspec.filesystem("file") if "a" in self.mode: filesystem.cp_file(old_filepath, new_filepath) the_file = filesystem.open( new_filepath, mode=self.mode, block_size=self.block_size, cache_options=self.cache_options, compression=self.compression, **self.kwargs, ) self._filesystem = filesystem self._new_store_fname = new_store_fname self._file = the_file def _gen_random_content(self, size=256): """Generate a random content of size bytes""" return os.urandom(size) def _file_close(self): """Close the file-like object opened by _file_open""" if not self._file: return if not self._file.closed: self._file.flush() self._file.close() if self._is_open_for_modify: attachment_data = self._get_attachment_data() if ( not (self.new_version and self._new_store_fname) and self._is_stored_in_db ): attachment_data["raw"] = self._file.getvalue() self.attachment.write(attachment_data) if self.new_version and self._new_store_fname: self.attachment._force_write_store_fname(self._new_store_fname) self.attachment._enforce_meaningful_storage_filename() self._ensure_cache_consistency() def _get_attachment_data(self) -> dict: ret = {} if self._file: file_path = self._file.path if hasattr(self._filesystem, "path"): file_path = file_path.replace(self._filesystem.path, "") file_path = file_path.lstrip("/") ret["checksum"] = self._filesystem.checksum(file_path) ret["file_size"] = self._filesystem.size(file_path) # TODO index_content is too expensive to compute here or should be configurable # data = self._file.read() # ret["index_content"] = self.attachment._index_content(data, # self.attachment.mimetype, ret["checksum"]) ret["index_content"] = b"" return ret def _ensure_cache_consistency(self): """Ensure the cache consistency once the file is closed""" if self._is_open_for_modify and not self._is_stored_in_db: self.attachment.invalidate_recordset(fnames=["raw", "datas", "db_datas"]) if ( self.attachment.res_model and self.attachment.res_id and self.attachment.res_field ): self.attachment.env[self.attachment.res_model].browse( self.attachment.res_id ).invalidate_recordset(fnames=[self.attachment.res_field]) def __exit__(self, *args): """Called when exiting the context manager. Close the file if it is not already closed. """ self._file_close() def __getattr__(self, attr): """ Forward all other attributes to the underlying file object. This method is required to make the object behave like a file object when the AttachmentFileLikeAdapter is used outside a context manager. .. code-block:: python f = AttachmentFileLikeAdapter(attachment) f.read() """ if not self._file: self.__enter__() return getattr(self._file, attr)