18.0 vanilla

2026-04-20 11:52:04 +02:00 · 2025-10-03 18:06:50 +02:00 · 2025-10-03 18:06:50 +02:00 · 0a7ae8db93
commit 0a7ae8db93
parent d72e748793
337 changed files with 399651 additions and 232598 deletions
--- a/odoo-bringout-oca-ocb-base/odoo/tools/mail.py
+++ b/odoo-bringout-oca-ocb-base/odoo/tools/mail.py
@ -3,11 +3,11 @@

 import base64
 import collections
+import itertools
 import logging
 import random
 import re
 import socket
-import threading
 import time
 import email.utils
 from email.utils import getaddresses as orig_getaddresses
@ -20,10 +20,25 @@ from lxml import etree, html
 from lxml.html import clean, defs
 from werkzeug import urls

-import odoo
-from odoo.loglevels import ustr
 from odoo.tools import misc

+__all__ = [
+    "email_domain_extract",
+    "email_domain_normalize",
+    "email_normalize",
+    "email_normalize_all",
+    "email_split",
+    "encapsulate_email",
+    "formataddr",
+    "html2plaintext",
+    "html_normalize",
+    "html_sanitize",
+    "is_html_empty",
+    "parse_contact_from_email",
+    "plaintext2html",
+    "single_email_re",
+]
+
 _logger = logging.getLogger(__name__)


@ -44,12 +59,13 @@ else:
 safe_attrs = defs.safe_attrs | frozenset(
    ['style',
     'data-o-mail-quote', 'data-o-mail-quote-node',  # quote detection
-     'data-oe-model', 'data-oe-id', 'data-oe-field', 'data-oe-type', 'data-oe-expression', 'data-oe-translation-initial-sha', 'data-oe-nodeid',
-     'data-last-history-steps', 'data-oe-protected', 'data-oe-transient-content', 'data-width', 'data-height', 'data-scale-x', 'data-scale-y', 'data-x', 'data-y',
+     'data-oe-model', 'data-oe-id', 'data-oe-field', 'data-oe-type', 'data-oe-expression', 'data-oe-translation-source-sha', 'data-oe-nodeid',
+     'data-last-history-steps', 'data-oe-protected', 'data-embedded', 'data-embedded-editable', 'data-embedded-props', 'data-oe-version',
+     'data-oe-transient-content', 'data-behavior-props', 'data-prop-name', 'data-width', 'data-height', 'data-scale-x', 'data-scale-y', 'data-x', 'data-y',  # legacy editor
+     'data-oe-role', 'data-oe-aria-label',
     'data-publish', 'data-id', 'data-res_id', 'data-interval', 'data-member_id', 'data-scroll-background-ratio', 'data-view-id',
     'data-class', 'data-mimetype', 'data-original-src', 'data-original-id', 'data-gl-filter', 'data-quality', 'data-resize-width',
     'data-shape', 'data-shape-colors', 'data-file-name', 'data-original-mimetype',
-     'data-behavior-props', 'data-prop-name',  # knowledge commands
     'data-mimetype-before-conversion',
     ])
 SANITIZE_TAGS = {
@ -68,10 +84,12 @@ class _Cleaner(clean.Cleaner):
    _style_whitelist = [
        'font-size', 'font-family', 'font-weight', 'font-style', 'background-color', 'color', 'text-align',
        'line-height', 'letter-spacing', 'text-transform', 'text-decoration', 'text-decoration', 'opacity',
-        'float', 'vertical-align', 'display',
+        'float', 'vertical-align', 'display', 'object-fit',
        'padding', 'padding-top', 'padding-left', 'padding-bottom', 'padding-right',
        'margin', 'margin-top', 'margin-left', 'margin-bottom', 'margin-right',
        'white-space',
+        # appearance
+        'background-image', 'background-position', 'background-size', 'background-repeat', 'background-origin',
        # box model
        'border', 'border-color', 'border-radius', 'border-style', 'border-width', 'border-top', 'border-bottom',
        'height', 'width', 'max-width', 'min-width', 'min-height',
@ -86,6 +104,7 @@ class _Cleaner(clean.Cleaner):

    strip_classes = False
    sanitize_style = False
+    conditional_comments = True

    def __call__(self, doc):
        super(_Cleaner, self).__call__(doc)
@ -118,6 +137,24 @@ class _Cleaner(clean.Cleaner):
            else:
                del el.attrib['style']

+    def kill_conditional_comments(self, doc):
+        """Override the default behavior of lxml.
+
+        https://github.com/lxml/lxml/blob/e82c9153c4a7d505480b94c60b9a84d79d948efb/src/lxml/html/clean.py#L501-L510
+
+        In some use cases, e.g. templates used for mass mailing,
+        we send emails containing conditional comments targeting Microsoft Outlook,
+        to give special styling instructions.
+        https://github.com/odoo/odoo/pull/119325/files#r1301064789
+
+        Within these conditional comments, unsanitized HTML can lie.
+        However, in modern browser, these comments are considered as simple comments,
+        their content is not executed.
+        https://caniuse.com/sr_ie-features
+        """
+        if self.conditional_comments:
+            super().kill_conditional_comments(doc)
+

 def tag_quote(el):
    def _create_new_node(tag, text, tail=None, attrs=None):
@ -209,13 +246,23 @@ def tag_quote(el):
        # remove single node
        el.set('data-o-mail-quote-node', '1')
        el.set('data-o-mail-quote', '1')
-    if el.getparent() is not None and (el.getparent().get('data-o-mail-quote') or el.getparent().get('data-o-mail-quote-container')) and not el.getparent().get('data-o-mail-quote-node'):
-        el.set('data-o-mail-quote', '1')
+    if el.getparent() is not None and not el.getparent().get('data-o-mail-quote-node'):
+        if el.getparent().get('data-o-mail-quote'):
+            el.set('data-o-mail-quote', '1')
+        # only quoting the elements following the first quote in the container
+        # avoids issues with repeated calls to html_normalize
+        elif el.getparent().get('data-o-mail-quote-container'):
+            if (first_sibling_quote := el.getparent().find("*[@data-o-mail-quote]")) is not None:
+                siblings = el.getparent().getchildren()
+                quote_index = siblings.index(first_sibling_quote)
+                element_index = siblings.index(el)
+                if quote_index < element_index:
+                    el.set('data-o-mail-quote', '1')
    if el.getprevious() is not None and el.getprevious().get('data-o-mail-quote') and not el.text_content().strip():
        el.set('data-o-mail-quote', '1')


-def html_normalize(src, filter_callback=None):
+def html_normalize(src, filter_callback=None, output_method="html"):
    """ Normalize `src` for storage as an html field value.

    The string is parsed as an html tag soup, made valid, then decorated for
@ -228,27 +275,27 @@ def html_normalize(src, filter_callback=None):
    :param filter_callback: optional callable taking a single `etree._Element`
        document parameter, to be called during normalization in order to
        filter the output document
+    :param output_method: defines the output method to pass to `html.tostring`.
+        It defaults to 'html', but can also be 'xml' for xhtml output.
    """
-
    if not src:
        return src

-    src = ustr(src, errors='replace')
    # html: remove encoding attribute inside tags
-    doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
-    src = doctype.sub(u"", src)
+    src = re.sub(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', "", src, flags=re.IGNORECASE | re.DOTALL)
+
+    src = src.replace('--!>', '-->')
+    src = re.sub(r'(<!-->|<!--->)', '<!-- -->', src)
+    # On the specific case of Outlook desktop it adds unnecessary '<o:.*></o:.*>' tags which are parsed
+    # in '<p></p>' which may alter the appearance (eg. spacing) of the mail body
+    src = re.sub(r'</?o:.*?>', '', src)

    try:
-        src = src.replace('--!>', '-->')
-        src = re.sub(r'(<!-->|<!--->)', '<!-- -->', src)
-        # On the specific case of Outlook desktop it adds unnecessary '<o:.*></o:.*>' tags which are parsed
-        # in '<p></p>' which may alter the appearance (eg. spacing) of the mail body
-        src = re.sub(r'</?o:.*?>', '', src)
        doc = html.fromstring(src)
    except etree.ParserError as e:
        # HTML comment only string, whitespace only..
        if 'empty' in str(e):
-            return u""
+            return ""
        raise

    # perform quote detection before cleaning and class removal
@ -259,7 +306,7 @@ def html_normalize(src, filter_callback=None):
    if filter_callback:
        doc = filter_callback(doc)

-    src = html.tostring(doc, encoding='unicode')
+    src = html.tostring(doc, encoding='unicode', method=output_method)

    # this is ugly, but lxml/etree tostring want to put everything in a
    # 'div' that breaks the editor -> remove that
@ -272,7 +319,7 @@ def html_normalize(src, filter_callback=None):
    return src


-def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=False, sanitize_style=False, sanitize_form=True, strip_style=False, strip_classes=False):
+def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=False, sanitize_style=False, sanitize_form=True, sanitize_conditional_comments=True, strip_style=False, strip_classes=False, output_method="html"):
    if not src:
        return src

@ -286,6 +333,7 @@ def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=Fals
            'forms': sanitize_form,            # True = remove form tags
            'remove_unknown_tags': False,
            'comments': False,
+            'conditional_comments': sanitize_conditional_comments,   # True = remove conditional comments
            'processing_instructions': False
        }
        if sanitize_tags:
@ -311,7 +359,7 @@ def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=Fals
        return doc

    try:
-        sanitized = html_normalize(src, filter_callback=sanitize_handler)
+        sanitized = html_normalize(src, filter_callback=sanitize_handler, output_method=output_method)
    except etree.ParserError:
        if not silent:
            raise
@ -329,7 +377,8 @@ def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=Fals
 # HTML/Text management
 # ----------------------------------------------------------

-URL_REGEX = r'(\bhref=[\'"](?!mailto:|tel:|sms:)([^\'"]+)[\'"])'
+URL_SKIP_PROTOCOL_REGEX = r'mailto:|tel:|sms:'
+URL_REGEX = rf'''(\bhref=['"](?!{URL_SKIP_PROTOCOL_REGEX})([^'"]+)['"])'''
 TEXT_URL_REGEX = r'https?://[\w@:%.+&~#=/-]+(?:\?\S+)?'
 # retrieve inner content of the link
 HTML_TAG_URL_REGEX = URL_REGEX + r'([^<>]*>([^<>]+)<\/)?'
@ -392,20 +441,24 @@ def create_link(url, label):
    return f'<a href="{url}" target="_blank" rel="noreferrer noopener">{label}</a>'


-def html2plaintext(html, body_id=None, encoding='utf-8'):
+def html2plaintext(html, body_id=None, encoding='utf-8', include_references=True):
    """ From an HTML text, convert the HTML to plain text.
    If @param body_id is provided then this is the tag where the
    body (not necessarily <body>) starts.
+    :param include_references: If False, numbered references and
+        URLs for links and images will not be included.
    """
    ## (c) Fry-IT, www.fry-it.com, 2007
    ## <peter@fry-it.com>
    ## download here: http://www.peterbe.com/plog/html2plaintext
-
-    html = ustr(html)
-
-    if not html.strip():
+    if not (html and html.strip()):
        return ''

+    if isinstance(html, bytes):
+        html = html.decode(encoding)
+    else:
+        assert isinstance(html, str), f"expected str got {html.__class__.__name__}"
+
    tree = etree.fromstring(html, parser=etree.HTMLParser())

    if body_id is not None:
@ -416,28 +469,25 @@ def html2plaintext(html, body_id=None, encoding='utf-8'):
        tree = source[0]

    url_index = []
-    i = 0
-    for link in tree.findall('.//a'):
-        url = link.get('href')
-        if url:
-            i += 1
-            link.tag = 'span'
-            link.text = '%s [%s]' % (link.text, i)
-            url_index.append(url)
+    linkrefs = itertools.count(1)
+    if include_references:
+        for link in tree.findall('.//a'):
+            if url := link.get('href'):
+                link.tag = 'span'
+                link.text = f'{link.text} [{next(linkrefs)}]'
+                url_index.append(url)

-    for img in tree.findall('.//img'):
-        src = img.get('src')
-        if src:
-            i += 1
-            img.tag = 'span'
-            if src.startswith('data:'):
-                img_name = None   # base64 image
-            else:
-                img_name = re.search(r'[^/]+(?=\.[a-zA-Z]+(?:\?|$))', src)
-            img.text = '%s [%s]' % (img_name.group(0) if img_name else 'Image', i)
-            url_index.append(src)
+        for img in tree.findall('.//img'):
+            if src := img.get('src'):
+                img.tag = 'span'
+                if src.startswith('data:'):
+                    img_name = None  # base64 image
+                else:
+                    img_name = re.search(r'[^/]+(?=\.[a-zA-Z]+(?:\?|$))', src)
+                img.text = '%s [%s]' % (img_name[0] if img_name else 'Image', next(linkrefs))
+                url_index.append(src)

-    html = ustr(etree.tostring(tree, encoding=encoding))
+    html = etree.tostring(tree, encoding="unicode")
    # \r char is converted into &#13;, must remove it
    html = html.replace('&#13;', '')

@ -461,10 +511,10 @@ def html2plaintext(html, body_id=None, encoding='utf-8'):
    html = '\n'.join([x.strip() for x in html.splitlines()])
    html = html.replace('\n' * 2, '\n')

-    for i, url in enumerate(url_index):
-        if i == 0:
-            html += '\n\n'
-        html += ustr('[%s] %s\n') % (i + 1, url)
+    if url_index:
+        html += '\n\n'
+        for i, url in enumerate(url_index, start=1):
+            html += f'[{i}] {url}\n'

    return html.strip()

@ -482,7 +532,8 @@ def plaintext2html(text, container_tag=None):
        embedded into a ``<div>``
    :rtype: markupsafe.Markup
    """
-    text = misc.html_escape(ustr(text))
+    assert isinstance(text, str)
+    text = misc.html_escape(text)

    # 1. replace \n and \r
    text = re.sub(r'(\r\n|\r|\n)', '<br/>', text)
@ -508,7 +559,7 @@ def append_content_to_html(html, content, plaintext=True, preserve=False, contai
    """ Append extra content at the end of an HTML snippet, trying
        to locate the end of the HTML document (</body>, </html>, or
        EOF), and converting the provided content in html unless ``plaintext``
-        is False.
+        is ``False``.

        Content conversion can be done in two ways:

@ -529,17 +580,16 @@ def append_content_to_html(html, content, plaintext=True, preserve=False, contai
        :param str container_tag: tag to wrap the content into, defaults to `div`.
        :rtype: markupsafe.Markup
    """
-    html = ustr(html)
    if plaintext and preserve:
-        content = u'\n<pre>%s</pre>\n' % misc.html_escape(ustr(content))
+        content = '\n<pre>%s</pre>\n' % misc.html_escape(content)
    elif plaintext:
        content = '\n%s\n' % plaintext2html(content, container_tag)
    else:
        content = re.sub(r'(?i)(</?(?:html|body|head|!\s*DOCTYPE)[^>]*>)', '', content)
-        content = u'\n%s\n' % ustr(content)
+        content = '\n%s\n' % content
    # Force all tags to lowercase
    html = re.sub(r'(</?)(\w+)([ >])',
-        lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
+        lambda m: '%s%s%s' % (m[1], m[2].lower(), m[3]), html)
    insert_location = html.find('</body>')
    if insert_location == -1:
        insert_location = html.find('</html>')