oca-storage/odoo-bringout-oca-storage-fs_attachment/fs_attachment/models/ir_attachment.py
2025-08-29 15:43:06 +02:00

1153 lines
45 KiB
Python

# Copyright 2017-2013 Camptocamp SA
# Copyright 2023 ACSONE SA/NV
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html)
from __future__ import annotations
import io
import logging
import mimetypes
import os
import re
import time
from contextlib import closing, contextmanager
import fsspec # pylint: disable=missing-manifest-dependency
import psycopg2
from slugify import slugify # pylint: disable=missing-manifest-dependency
import odoo
from odoo import _, api, fields, models
from odoo.exceptions import AccessError, UserError
from odoo.osv.expression import AND, OR, normalize_domain
from .strtobool import strtobool
_logger = logging.getLogger(__name__)
REGEX_SLUGIFY = r"[^-a-z0-9_]+"
FS_FILENAME_RE_PARSER = re.compile(
r"^(?P<name>.+)-(?P<id>\d+)-(?P<version>\d+)(?P<extension>\..+)$"
)
def is_true(strval):
return bool(strtobool(strval or "0"))
def clean_fs(files):
_logger.info("cleaning old files from filestore")
for full_path in files:
if os.path.exists(full_path):
try:
os.unlink(full_path)
except OSError:
_logger.info(
"_file_delete could not unlink %s", full_path, exc_info=True
)
except IOError:
# Harmless and needed for race conditions
_logger.info(
"_file_delete could not unlink %s", full_path, exc_info=True
)
class IrAttachment(models.Model):
_inherit = "ir.attachment"
fs_filename = fields.Char(
"File Name into the filesystem storage",
help="The name of the file in the filesystem storage."
"To preserve the mimetype and the meaning of the filename"
"the filename is computed from the name and the extension",
readonly=True,
)
internal_url = fields.Char(
"Internal URL",
compute="_compute_internal_url",
help="The URL to access the file from the server.",
)
fs_url = fields.Char(
"Filesystem URL",
compute="_compute_fs_url",
help="The URL to access the file from the filesystem storage.",
store=True,
)
fs_url_path = fields.Char(
"Filesystem URL Path",
compute="_compute_fs_url_path",
help="The path to access the file from the filesystem storage.",
)
fs_storage_code = fields.Char(
"Filesystem Storage Code",
related="fs_storage_id.code",
store=True,
)
fs_storage_id = fields.Many2one(
"fs.storage",
"Filesystem Storage",
compute="_compute_fs_storage_id",
help="The storage where the file is stored.",
store=True,
ondelete="restrict",
)
@api.depends("name")
def _compute_internal_url(self) -> None:
for rec in self:
filename, extension = os.path.splitext(rec.name)
# determine if the file is an image
pfx = "/web/content"
if rec.mimetype and rec.mimetype.startswith("image/"):
pfx = "/web/image"
if not extension:
extension = mimetypes.guess_extension(rec.mimetype)
rec.internal_url = f"{pfx}/{rec.id}/{filename}{extension}"
@api.depends("fs_filename")
def _compute_fs_url(self) -> None:
for rec in self:
new_url = None
actual_url = rec.fs_url or None
if rec.fs_filename:
new_url = self.env["fs.storage"]._get_url_for_attachment(rec)
# ensure we compare value of same type and not None with False
new_url = new_url or None
if new_url != actual_url:
rec.fs_url = new_url
@api.depends("fs_filename")
def _compute_fs_url_path(self) -> None:
for rec in self:
rec.fs_url_path = None
if rec.fs_filename:
rec.fs_url_path = self.env["fs.storage"]._get_url_for_attachment(
rec, exclude_base_url=True
)
@api.depends("fs_filename")
def _compute_fs_storage_id(self):
for rec in self:
if rec.store_fname:
code = rec.store_fname.partition("://")[0]
fs_storage = self.env["fs.storage"].sudo().get_by_code(code)
if fs_storage != rec.fs_storage_id:
rec.fs_storage_id = fs_storage
elif rec.fs_storage_id:
rec.fs_storage_id = None
@staticmethod
def _is_storage_disabled(storage=None, log=True):
msg = _("Storages are disabled (see environment configuration).")
if storage:
msg = _("Storage '%s' is disabled (see environment configuration).") % (
storage,
)
is_disabled = is_true(os.environ.get("DISABLE_ATTACHMENT_STORAGE"))
if is_disabled and log:
_logger.warning(msg)
return is_disabled
def _get_storage_force_db_config(self):
return self.env["fs.storage"].get_force_db_for_default_attachment_rules(
self._storage()
)
def _store_in_db_instead_of_object_storage_domain(self):
"""Return a domain for attachments that must be forced to DB
Read the docstring of ``_store_in_db_instead_of_object_storage`` for
more details.
Used in ``force_storage_to_db_for_special_fields`` to find records
to move from the object storage to the database.
The domain must be inline with the conditions in
``_store_in_db_instead_of_object_storage``.
"""
domain = []
storage_config = self._get_storage_force_db_config()
for mimetype_key, limit in storage_config.items():
part = [("mimetype", "=like", "{}%".format(mimetype_key))]
if limit:
part = AND([part, [("file_size", "<=", limit)]])
# OR simplifies to [(1, '=', 1)] if a domain being OR'ed is empty
domain = OR([domain, part]) if domain else part
return domain
def _store_in_db_instead_of_object_storage(self, data, mimetype):
"""Return whether an attachment must be stored in db
When we are using an Object Storage. This is sometimes required
because the object storage is slower than the database/filesystem.
Small images (128, 256) are used in Odoo in list / kanban views. We
want them to be fast to read.
They are generally < 50KB (default configuration) so they don't take
that much space in database, but they'll be read much faster than from
the object storage.
The assets (application/javascript, text/css) are stored in database
as well whatever their size is:
* a database doesn't have thousands of them
* of course better for performance
* better portability of a database: when replicating a production
instance for dev, the assets are included
The configuration can be modified on the fs.storage record, in the
field ``force_db_for_default_attachment_rules``, as a dictionary, for
instance::
{"image/": 51200, "application/javascript": 0, "text/css": 0}
Where the key is the beginning of the mimetype to configure and the
value is the limit in size below which attachments are kept in DB.
0 means no limit.
These limits are applied only if the storage is the default one for
attachments (see ``_storage``).
The conditions are also applied into the domain of the method
``_store_in_db_instead_of_object_storage_domain`` used to move records
from a filesystem storage to the database.
"""
if self._is_storage_disabled():
return True
storage_config = self._get_storage_force_db_config()
for mimetype_key, limit in storage_config.items():
if mimetype.startswith(mimetype_key):
if not limit:
return True
bin_data = data
return len(bin_data) <= limit
return False
def _get_datas_related_values(self, data, mimetype):
storage = self.env.context.get("storage_location") or self._storage()
if data and storage in self._get_storage_codes():
if self._store_in_db_instead_of_object_storage(data, mimetype):
# compute the fields that depend on datas
bin_data = data
values = {
"file_size": len(bin_data),
"checksum": self._compute_checksum(bin_data),
"index_content": self._index(bin_data, mimetype),
"store_fname": False,
"db_datas": data,
}
return values
return super(
IrAttachment, self.with_context(mimetype=mimetype)
)._get_datas_related_values(data, mimetype)
###########################################################
# Odoo methods that we override to use the object storage #
###########################################################
@api.model
def _storage(self):
# We check if a filesystem storage is configured for attachments
storage = self.env["fs.storage"].get_default_storage_code_for_attachments()
if not storage:
# If not, we use the default storage configured into odoo
storage = super()._storage()
return storage
@api.model_create_multi
def create(self, vals_list):
"""
Storage may depend on resource field, but the method calling _storage
(_get_datas_related_values) does not take all vals, just the mimetype.
The only way to give res_field and res_model to _storage method
is to pass them into the context, and perform 1 create call per record
to create.
"""
vals_list_no_model = []
attachments = self.env["ir.attachment"]
for vals in vals_list:
if vals.get("res_model"):
attachment = super(
IrAttachment,
self.with_context(
attachment_res_model=vals.get("res_model"),
attachment_res_field=vals.get("res_field"),
),
).create(vals)
attachments += attachment
else:
vals_list_no_model.append(vals)
atts = super().create(vals_list_no_model)
attachments |= atts
attachments._enforce_meaningful_storage_filename()
return attachments
def write(self, vals):
if not self:
return super().write(vals)
if ("datas" in vals or "raw" in vals) and not (
"name" in vals or "mimetype" in vals
):
mimetype = self._compute_mimetype(vals)
if mimetype and mimetype != "application/octet-stream":
vals["mimetype"] = mimetype
else:
# When we write on an attachment, if the mimetype is not provided, it
# will be computed from the name. The problem is that if you assign a
# value to the field ``datas`` or ``raw``, the name is not provided
# nor the mimetype, so the mimetype will be set to ``application/octet-
# stream``.
# We want to avoid this, so we take the mimetype of the first attachment
# and we set it on all the attachments if they all have the same mimetype.
# If they don't have the same mimetype, we raise an error.
# OPW-3277070
mimetypes = self.mapped("mimetype")
if len(set(mimetypes)) == 1:
vals["mimetype"] = mimetypes[0]
else:
raise UserError(
_(
"You can't write on multiple attachments with different "
"mimetypes at the same time."
)
)
for rec in self:
# As when creating a new attachment, we must pass the res_field
# and res_model into the context hence sadly we must perform 1 call
# for each attachment
super(
IrAttachment,
rec.with_context(
attachment_res_model=vals.get("res_model") or rec.res_model,
attachment_res_field=vals.get("res_field") or rec.res_field,
),
).write(vals)
if "name" in vals:
self._enforce_meaningful_storage_filename()
return True
@api.model
def _file_read(self, fname):
if self._is_file_from_a_storage(fname):
return self._storage_file_read(fname)
else:
return super()._file_read(fname)
@api.model
def _file_write(self, bin_data, checksum):
location = self.env.context.get("storage_location") or self._storage()
if location in self._get_storage_codes():
filename = self._storage_file_write(bin_data)
else:
filename = super()._file_write(bin_data, checksum)
return filename
@api.model
def _file_delete(self, fname) -> None: # pylint: disable=missing-return
if self._is_file_from_a_storage(fname):
cr = self.env.cr
# using SQL to include files hidden through unlink or due to record
# rules
cr.execute(
"SELECT COUNT(*) FROM ir_attachment WHERE store_fname = %s", (fname,)
)
count = cr.fetchone()[0]
if not count:
self._storage_file_delete(fname)
else:
super()._file_delete(fname)
def _set_attachment_data(self, asbytes) -> None: # pylint: disable=missing-return
super()._set_attachment_data(asbytes)
self._enforce_meaningful_storage_filename()
##############################################
# Internal methods to use the object storage #
##############################################
@api.model
def _storage_file_read(self, fname: str) -> bytes | None:
"""Read the file from the filesystem storage"""
fs, _storage, fname = self._fs_parse_store_fname(fname)
try:
with fs.open(fname, "rb") as f:
return f.read()
except IOError:
_logger.info(
"Error reading %s on storage %s", fname, _storage, exc_info=True
)
return b""
def _storage_write_option(self, fs):
mimetype = self.env.context.get("mimetype")
if mimetype:
root_fs = self.env["fs.storage"]._get_root_filesystem(fs)
if hasattr(root_fs, "s3"):
return {"ContentType": mimetype}
return {}
@api.model
def _storage_file_write(self, bin_data: bytes) -> str:
"""Write the file to the filesystem storage"""
storage = self.env.context.get("storage_location") or self._storage()
fs = self._get_fs_storage_for_code(storage)
path = self._get_fs_path(storage, bin_data)
dirname = os.path.dirname(path)
if not fs.exists(dirname):
fs.makedirs(dirname)
fname = f"{storage}://{path}"
kwargs = self._storage_write_option(fs)
with fs.open(path, "wb", **kwargs) as f:
f.write(bin_data)
self._fs_mark_for_gc(fname)
return fname
@api.model
def _storage_file_delete(self, fname):
"""Delete the file from the filesystem storage
It's safe to use the fname (the store_fname) to delete the file because
even if it's the full path to the file, the gc will only delete the file
if they belong to the configured storage directory path.
"""
self._fs_mark_for_gc(fname)
@api.model
def _get_fs_path(self, storage_code: str, bin_data: bytes) -> str:
"""Compute the path to store the file in the filesystem storage"""
key = self.env.context.get("force_storage_key")
if not key:
key = self._compute_checksum(bin_data)
if self.env["fs.storage"]._must_optimize_directory_path(storage_code):
# Generate a unique directory path based on the file's hash
key = os.path.join(key[:2], key[2:4], key)
# Generate a unique directory path based on the file's hash
return key
def _build_fs_filename(self):
"""Build the filename to store in the filesystem storage
The filename is computed from the name, the extension and a version
number. The version number is incremented each time we build a new
filename. To know if a filename has already been build, we check if
the fs_filename field is set. If it is set, we increment the version
number. The version number is taken from the computed filename.
The format of the filename is:
<slugified name>-<id>-<version>.<extension>
"""
self.ensure_one()
filename, extension = os.path.splitext(self.name)
if not extension:
extension = mimetypes.guess_extension(self.mimetype)
version = 0
if self.fs_filename:
parsed = self._parse_fs_filename(self.fs_filename)
if parsed:
version = parsed[2] + 1
return "{}{}".format(
slugify(
"{}-{}-{}".format(filename, self.id, version),
regex_pattern=REGEX_SLUGIFY,
),
extension,
)
def _enforce_meaningful_storage_filename(self) -> None:
"""Enforce meaningful filename for files stored in the filesystem storage
The filename of the file in the filesystem storage is computed from
the mimetype and the name of the attachment. This method is called
when an attachment is created to ensure that the filename of the file
in the filesystem keeps the same meaning as the name of the attachment.
Keeping the same meaning and mimetype is important to also ease to provide
a meaningful and SEO friendly URL to the file in the filesystem storage.
"""
for attachment in self:
if not self._is_file_from_a_storage(attachment.store_fname):
continue
fs, storage, filename = attachment._get_fs_parts()
if self.env["fs.storage"]._must_use_filename_obfuscation(storage):
attachment.fs_filename = filename
continue
new_filename = attachment._build_fs_filename()
# we must keep the same full path as the original filename
new_filename_with_path = os.path.join(
os.path.dirname(filename), new_filename
)
fs.rename(filename, new_filename_with_path)
attachment.fs_filename = new_filename
# we need to update the store_fname with the new filename by
# calling the write method of the field since the write method
# of ir_attachment prevent normal write on store_fname
attachment._force_write_store_fname(f"{storage}://{new_filename_with_path}")
self._fs_mark_for_gc(attachment.store_fname)
def _force_write_store_fname(self, store_fname):
"""Force the write of the store_fname field
The base implementation of the store_fname field prevent the write
of the store_fname field. This method bypass this limitation by
calling the write method of the field directly.
"""
self._fields["store_fname"].write(self, store_fname)
@api.model
def _get_fs_storage_for_code(
self,
code: str,
) -> fsspec.AbstractFileSystem | None:
"""Return the filesystem for the given storage code"""
fs = self.env["fs.storage"].get_fs_by_code(code)
if not fs:
raise SystemError(f"No Filesystem storage for code {code}")
return fs
@api.model
def _fs_parse_store_fname(
self, fname: str
) -> tuple[fsspec.AbstractFileSystem, str, str]:
"""Return the filesystem, the storage code and the path for the given fname
:param fname: the fname to parse
:param base: if True, return the base filesystem
"""
partition = fname.partition("://")
storage_code = partition[0]
fs = self._get_fs_storage_for_code(storage_code)
fname = partition[2]
return fs, storage_code, fname
@api.model
def _parse_fs_filename(self, filename: str) -> tuple[str, int, int, str] | None:
"""Parse the filename and return the name, id, version and extension
<name-without-extension>-<id>-<version>.<extension>
"""
if not filename:
return None
filename = os.path.basename(filename)
match = FS_FILENAME_RE_PARSER.match(filename)
if not match:
return None
name, res_id, version, extension = match.groups()
return name, int(res_id), int(version), extension
@api.model
def _is_file_from_a_storage(self, fname):
if not fname:
return False
for storage_code in self._get_storage_codes():
if self._is_storage_disabled(storage_code):
continue
uri = "{}://".format(storage_code)
if fname.startswith(uri):
return True
return False
@api.model
def _fs_mark_for_gc(self, fname):
"""Mark the file for deletion
The file will be deleted by the garbage collector if it's no more
referenced by any attachment. We use a garbage collector to enforce
the transaction mechanism between Odoo and the filesystem storage.
Files are added to the garbage collector when:
- each time a file is created in the filesystem storage
- an attachment is deleted
Whatever the result of the current transaction, the information of files
marked for deletion is stored in the database.
When the garbage collector is called, it will check if the file is still
referenced by an attachment. If not, the file is physically deleted from
the filesystem storage.
If the creation of the attachment fails, since the file is marked for
deletion when it's written into the filesystem storage, it will be
deleted by the garbage collector.
If the content of the attachment is updated, we always create a new file.
This new file is marked for deletion and the old one too. If the transaction
succeeds, the old file is deleted by the garbage collector since it's no
more referenced by any attachment. If the transaction fails, the old file
is not deleted since it's still referenced by the attachment but the new
file is deleted since it's marked for deletion and not referenced.
"""
self.env["fs.file.gc"]._mark_for_gc(fname)
def _get_fs_parts(
self,
) -> tuple[fsspec.AbstractFileSystem, str, str] | tuple[None, None, None]:
"""Return the filesystem, the storage code and the path for the current attachment"""
if not self.store_fname:
return None, None, None
return self._fs_parse_store_fname(self.store_fname)
def open(
self,
mode="rb",
block_size=None,
cache_options=None,
compression=None,
new_version=True,
**kwargs,
) -> io.IOBase:
"""
Return a file-like object from the filesystem storage where the attachment
content is stored.
In read mode, this method works for all attachments, even if the content
is stored in the database or into the odoo filestore or a filesystem storage.
The resultant instance must function correctly in a context ``with``
block.
(parameters are ignored in the case of the database storage).
Parameters
----------
path: str
Target file
mode: str like 'rb', 'w'
See builtin ``open()``
block_size: int
Some indication of buffering - this is a value in bytes
cache_options : dict, optional
Extra arguments to pass through to the cache.
compression: string or None
If given, open file using compression codec. Can either be a compression
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
compression from the filename suffix.
new_version: bool
If True, and mode is 'w', create a new version of the file.
If False, and mode is 'w', overwrite the current version of the file.
This flag is True by default to avoid data loss and ensure transaction
mechanism between Odoo and the filesystem storage.
encoding, errors, newline: passed on to TextIOWrapper for text mode
Returns
-------
A file-like object
TODO if open with 'w' in mode, we could use a buffered IO detecting that
the content is modified and invalidating the attachment cache...
"""
self.ensure_one()
return AttachmentFileLikeAdapter(
self,
mode=mode,
block_size=block_size,
cache_options=cache_options,
compression=compression,
new_version=new_version,
**kwargs,
)
@contextmanager
def _do_in_new_env(self, new_cr=False):
"""Context manager that yields a new environment
Using a new Odoo Environment thus a new PG transaction.
"""
if new_cr:
registry = odoo.modules.registry.Registry.new(self.env.cr.dbname)
with closing(registry.cursor()) as cr:
try:
yield self.env(cr=cr)
except Exception:
cr.rollback()
raise
else:
# disable pylint error because this is a valid commit,
# we are in a new env
cr.commit() # pylint: disable=invalid-commit
else:
# make a copy
yield self.env()
def _get_storage_codes(self):
"""Get the list of filesystem storage active in the system"""
return self.env["fs.storage"].sudo().get_storage_codes()
################################
# useful methods for migration #
################################
def _move_attachment_to_store(self):
self.ensure_one()
_logger.info("inspecting attachment %s (%d)", self.name, self.id)
fname = self.store_fname
storage = fname.partition("://")[0]
if self._is_storage_disabled(storage):
fname = False
if fname:
# migrating from filesystem filestore
# or from the old 'store_fname' without the bucket name
_logger.info("moving %s on the object storage", fname)
self.write(
{
"datas": self.datas,
# this is required otherwise the
# mimetype gets overriden with
# 'application/octet-stream'
# on assets
"mimetype": self.mimetype,
}
)
_logger.info("moved %s on the object storage", fname)
return self._full_path(fname)
elif self.db_datas:
_logger.info("moving on the object storage from database")
self.write({"datas": self.datas})
@api.model
def force_storage(self):
if not self.env["res.users"].browse(self.env.uid)._is_admin():
raise AccessError(_("Only administrators can execute this action."))
location = self.env.context.get("storage_location") or self._storage()
if location not in self._get_storage_codes():
return super().force_storage()
self._force_storage_to_object_storage()
@api.model
def force_storage_to_db_for_special_fields(
self, new_cr=False, storage: str | None = None
):
"""Migrate special attachments from Object Storage back to database
The access to a file stored on the objects storage is slower
than a local disk or database access. For attachments like
image_small that are accessed in batch for kanban views, this
is too slow. We store this type of attachment in the database.
This method can be used when migrating a filestore where all the files,
including the special files (assets, image_small, ...) have been pushed
to the Object Storage and we want to write them back in the database.
It is not called anywhere, but can be called by RPC or scripts.
"""
if not storage:
storage = self._storage()
if self._is_storage_disabled(storage):
_logger.warning(
"Storage '%s' is disabled, skipping migration of attachments to DB",
storage,
)
return
if storage not in self._get_storage_codes():
_logger.warning(
"Storage '%s' is not configured, "
"skipping migration of attachments to DB",
storage,
)
return
domain = AND(
(
normalize_domain(
[
("store_fname", "=like", "{}://%".format(storage)),
# for res_field, see comment in
# _force_storage_to_object_storage
"|",
("res_field", "=", False),
("res_field", "!=", False),
]
),
normalize_domain(self._store_in_db_instead_of_object_storage_domain()),
)
)
with self._do_in_new_env(new_cr=new_cr) as new_env:
model_env = new_env["ir.attachment"].with_context(prefetch_fields=False)
attachment_ids = model_env.search(domain).ids
if not attachment_ids:
return
total = len(attachment_ids)
start_time = time.time()
_logger.info(
"Moving %d attachments from %s to DB for fast access", total, storage
)
current = 0
for attachment_id in attachment_ids:
current += 1
# if we browse attachments outside of the loop, the first
# access to 'datas' will compute all the 'datas' fields at
# once, which means reading hundreds or thousands of files at
# once, exhausting memory
attachment = model_env.browse(attachment_id)
# this write will read the datas from the Object Storage and
# write them back in the DB (the logic for location to write is
# in the 'datas' inverse computed field)
# we need to write the mimetype too, otherwise it will be
# overwritten with 'application/octet-stream' on assets. On each
# write, the mimetype is recomputed if not given. If we don't
# pass it nor the name, the mimetype will be set to the default
# value 'application/octet-stream' on assets.
attachment.write({"datas": attachment.datas})
if current % 100 == 0 or total - current == 0:
_logger.info(
"attachment %s/%s after %.2fs",
current,
total,
time.time() - start_time,
)
@api.model
def _force_storage_to_object_storage(self, new_cr=False):
_logger.info("migrating files to the object storage")
storage = self.env.context.get("storage_location") or self._storage()
if self._is_storage_disabled(storage):
return
# The weird "res_field = False OR res_field != False" domain
# is required! It's because of an override of _search in ir.attachment
# which adds ('res_field', '=', False) when the domain does not
# contain 'res_field'.
# https://github.com/odoo/odoo/blob/9032617120138848c63b3cfa5d1913c5e5ad76db/
# odoo/addons/base/ir/ir_attachment.py#L344-L347
domain = [
"!",
("store_fname", "=like", "{}://%".format(storage)),
"|",
("res_field", "=", False),
("res_field", "!=", False),
]
# We do a copy of the environment so we can workaround the cache issue
# below. We do not create a new cursor by default because it causes
# serialization issues due to concurrent updates on attachments during
# the installation
with self._do_in_new_env(new_cr=new_cr) as new_env:
model_env = new_env["ir.attachment"]
ids = model_env.search(domain).ids
files_to_clean = []
for attachment_id in ids:
try:
with new_env.cr.savepoint():
# check that no other transaction has
# locked the row, don't send a file to storage
# in that case
self.env.cr.execute(
"SELECT id "
"FROM ir_attachment "
"WHERE id = %s "
"FOR UPDATE NOWAIT",
(attachment_id,),
log_exceptions=False,
)
# This is a trick to avoid having the 'datas'
# function fields computed for every attachment on
# each iteration of the loop. The former issue
# being that it reads the content of the file of
# ALL the attachments on each loop.
new_env.clear()
attachment = model_env.browse(attachment_id)
path = attachment._move_attachment_to_store()
if path:
files_to_clean.append(path)
except psycopg2.OperationalError:
_logger.error(
"Could not migrate attachment %s to S3", attachment_id
)
# delete the files from the filesystem once we know the changes
# have been committed in ir.attachment
if files_to_clean:
new_env.cr.commit()
clean_fs(files_to_clean)
class AttachmentFileLikeAdapter(object):
"""
This class is a wrapper class around the ir.attachment model. It is used to
open the ir.attachment as a file and to read/write data to it.
When the content of the file is stored into the odoo filestore or in a
filesystem storage, this object allows you to read/write the content from
the file in a direct way without having to read/write the whole file into
memory. When the content of the file is stored into database, this content
is read/written from/into a buffer in memory.
Parameters
----------
attachment : ir.attachment
The attachment to open as a file.
mode: str like 'rb', 'w'
See builtin ``open()``
block_size: int
Some indication of buffering - this is a value in bytes
cache_options : dict, optional
Extra arguments to pass through to the cache.
compression: string or None
If given, open file using compression codec. Can either be a compression
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
compression from the filename suffix.
new_version: bool
If True, and mode is 'w', create a new version of the file.
If False, and mode is 'w', overwrite the current version of the file.
This flag is True by default to avoid data loss and ensure transaction
mechanism between Odoo and the filesystem storage.
encoding, errors, newline: passed on to TextIOWrapper for text mode
You can use this class to adapt an attachment object as a file in 2 ways:
* as a context manager wrapping the attachment object as a file
* or as a nomral utility class
Examples
>>> with AttachmentFileLikeAdapter(attachment, mode="rb") as f:
... f.read()
b'Hello World'
# at the end of the context manager, the file is closed
>>> f = AttachmentFileLikeAdapter(attachment, mode="rb")
>>> f.read()
b'Hello World'
# you have to close the file manually
>>> f.close()
"""
def __init__(
self,
attachment: IrAttachment,
mode: str = "rb",
block_size: int | None = None,
cache_options: dict | None = None,
compression: str | None = None,
new_version: bool = False,
**kwargs,
):
self._attachment = attachment
self._mode = mode
self._block_size = block_size
self._cache_options = cache_options
self._compression = compression
self._new_version = new_version
self._kwargs = kwargs
# state attributes
self._file: io.IOBase | None = None
self._filesystem: fsspec.AbstractFileSystem | None = None
self._new_store_fname: str | None = None
@property
def attachment(self) -> IrAttachment:
"""The attachment object the file is related to"""
return self._attachment
@property
def mode(self) -> str:
"""The mode used to open the file"""
return self._mode
@property
def block_size(self) -> int | None:
"""The block size used to open the file"""
return self._block_size
@property
def cache_options(self) -> dict | None:
"""The cache options used to open the file"""
return self._cache_options
@property
def compression(self) -> str | None:
"""The compression used to open the file"""
return self._compression
@property
def new_version(self) -> bool:
"""Is the file open for a new version"""
return self._new_version
@property
def kwargs(self) -> dict:
"""The kwargs passed when opening the file on the"""
return self._kwargs
@property
def _is_open_for_modify(self) -> bool:
"""Is the file open for modification
A file is open for modification if it is open for writing or appending
"""
return "w" in self.mode or "a" in self.mode
@property
def _is_open_for_read(self) -> bool:
"""Is the file open for reading"""
return "r" in self.mode
@property
def _is_stored_in_db(self) -> bool:
"""Is the file stored in database"""
return self.attachment._storage() == "db"
def __enter__(self) -> io.IOBase:
"""Called when entering the context manager
Create the file object and return it.
"""
# we call the attachment instance to get the file object
self._file_open()
return self._file
def _file_open(self) -> io.IOBase:
"""Open the attachment content as a file-like object
This method will initialize the following attributes:
* _file: the file-like object.
* _filesystem: filesystem object.
* _new_store_fname: the new store_fname if the file is
opened for a new version.
"""
new_store_fname = None
if (
self._is_open_for_read
or (self._is_open_for_modify and not self.new_version)
or self._is_stored_in_db
):
if self.attachment._is_file_from_a_storage(self.attachment.store_fname):
fs, _storage, fname = self.attachment._get_fs_parts()
filepath = fname
filesystem = fs
elif self.attachment.store_fname:
filepath = self.attachment._full_path(self.attachment.store_fname)
filesystem = fsspec.filesystem("file")
else:
filepath = f"{self.attachment.id}"
filesystem = fsspec.filesystem("memory")
if "a" in self.mode or self._is_open_for_read:
filesystem.pipe_file(filepath, self.attachment.db_datas)
the_file = filesystem.open(
filepath,
mode=self.mode,
block_size=self.block_size,
cache_options=self.cache_options,
compression=self.compression,
**self.kwargs,
)
else:
# mode='w' and new_version=True and storage != 'db'
# We must create a new file with a new name. If we are in an
# append mode, we must copy the content of the old file (or create
# the new one by copy of the old one).
# to not break the storage plugin mechanism, we'll use the
# _file_write method to create the new empty file with a random
# content and checksum to avoid collision.
content = self._gen_random_content()
checksum = self.attachment._compute_checksum(content)
new_store_fname = self.attachment.with_context(
attachment_res_model=self.attachment.res_model,
attachment_res_field=self.attachment.res_field,
)._file_write(content, checksum)
if self.attachment._is_file_from_a_storage(new_store_fname):
(
filesystem,
_storage,
new_filepath,
) = self.attachment._fs_parse_store_fname(new_store_fname)
_fs, _storage, old_filepath = self.attachment._get_fs_parts()
else:
new_filepath = self.attachment._full_path(new_store_fname)
old_filepath = self.attachment._full_path(self.attachment.store_fname)
filesystem = fsspec.filesystem("file")
if "a" in self.mode:
filesystem.cp_file(old_filepath, new_filepath)
the_file = filesystem.open(
new_filepath,
mode=self.mode,
block_size=self.block_size,
cache_options=self.cache_options,
compression=self.compression,
**self.kwargs,
)
self._filesystem = filesystem
self._new_store_fname = new_store_fname
self._file = the_file
def _gen_random_content(self, size=256):
"""Generate a random content of size bytes"""
return os.urandom(size)
def _file_close(self):
"""Close the file-like object opened by _file_open"""
if not self._file:
return
if not self._file.closed:
self._file.flush()
self._file.close()
if self._is_open_for_modify:
attachment_data = self._get_attachment_data()
if (
not (self.new_version and self._new_store_fname)
and self._is_stored_in_db
):
attachment_data["raw"] = self._file.getvalue()
self.attachment.write(attachment_data)
if self.new_version and self._new_store_fname:
self.attachment._force_write_store_fname(self._new_store_fname)
self.attachment._enforce_meaningful_storage_filename()
self._ensure_cache_consistency()
def _get_attachment_data(self) -> dict:
ret = {}
if self._file:
file_path = self._file.path
if hasattr(self._filesystem, "path"):
file_path = file_path.replace(self._filesystem.path, "")
file_path = file_path.lstrip("/")
ret["checksum"] = self._filesystem.checksum(file_path)
ret["file_size"] = self._filesystem.size(file_path)
# TODO index_content is too expensive to compute here or should be configurable
# data = self._file.read()
# ret["index_content"] = self.attachment._index_content(data,
# self.attachment.mimetype, ret["checksum"])
ret["index_content"] = b""
return ret
def _ensure_cache_consistency(self):
"""Ensure the cache consistency once the file is closed"""
if self._is_open_for_modify and not self._is_stored_in_db:
self.attachment.invalidate_recordset(fnames=["raw", "datas", "db_datas"])
if (
self.attachment.res_model
and self.attachment.res_id
and self.attachment.res_field
):
self.attachment.env[self.attachment.res_model].browse(
self.attachment.res_id
).invalidate_recordset(fnames=[self.attachment.res_field])
def __exit__(self, *args):
"""Called when exiting the context manager.
Close the file if it is not already closed.
"""
self._file_close()
def __getattr__(self, attr):
"""
Forward all other attributes to the underlying file object.
This method is required to make the object behave like a file object
when the AttachmentFileLikeAdapter is used outside a context manager.
.. code-block:: python
f = AttachmentFileLikeAdapter(attachment)
f.read()
"""
if not self._file:
self.__enter__()
return getattr(self._file, attr)