19.0 vanilla

2026-04-20 09:12:04 +02:00 · 2026-03-09 09:30:27 +01:00 · 2026-03-09 09:30:27 +01:00 · 2d3ee4855a
commit 2d3ee4855a
parent d1963a3c3a
7430 changed files with 2687981 additions and 2965473 deletions
--- a/odoo-bringout-oca-ocb-mail/mail/tools/link_preview.py
+++ b/odoo-bringout-oca-ocb-mail/mail/tools/link_preview.py
@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+# Part of Odoo. See LICENSE file for full copyright and licensing details.
+import re
+from lxml import html
+import chardet
+import requests
+from urllib3.exceptions import LocationParseError
+
+
+def get_link_preview_from_url(url, request_session=None):
+    """
+    Get the Open Graph properties of an url. (https://ogp.me/)
+    If the url leads directly to an image mimetype, return
+    the url as preview image else retrieve the properties from
+    the html page.
+
+    Using a stream request to prevent loading the whole page
+    as those properties are declared in the <head> tag.
+
+    The request session is optional as in some cases using
+    a session could be beneficial performance wise
+    (e.g. a lot of url could have the same domain).
+    """
+    # Some websites are blocking non browser user agent.
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0',
+        'Odoo-Link-Preview': 'True',  # Used to identify coming from the link previewer
+    }
+    try:
+        if request_session:
+            response = request_session.get(url, timeout=3, headers=headers, allow_redirects=True, stream=True)
+        else:
+            response = requests.get(url, timeout=3, headers=headers, allow_redirects=True, stream=True)
+    except requests.exceptions.RequestException:
+        return False
+    except LocationParseError:
+        return False
+    if not response.ok or not response.headers.get('Content-Type'):
+        return False
+    # Content-Type header can return a charset, but we just need the
+    # mimetype (eg: image/jpeg;charset=ISO-8859-1)
+    content_type = response.headers['Content-Type'].split(';')
+    if response.headers['Content-Type'].startswith('image/'):
+        return {
+            'image_mimetype': content_type[0],
+            'og_image': url, # If the url mimetype is already an image type, set url as preview image
+            'source_url': url,
+        }
+    elif response.headers['Content-Type'].startswith('text/html'):
+        return get_link_preview_from_html(url, response)
+    return False
+
+def get_link_preview_from_html(url, response):
+    """
+    Retrieve the Open Graph properties from the html page. (https://ogp.me/)
+    Load the page with chunks of 8kb to prevent loading the whole
+    html when we only need the <head> tag content.
+    Fallback on the <title> tag if the html doesn't have
+    any Open Graph title property.
+    """
+    content = b""
+    for chunk in response.iter_content(chunk_size=8192):
+        content += chunk
+        pos = content.find(b'</head>', -8196 * 2)
+        # Stop reading once all the <head> data is found
+        if pos != -1:
+            content = content[:pos + 7]
+            break
+
+    if not content:
+        return False
+
+    encoding = response.encoding or chardet.detect(content).get("encoding", "utf-8")
+    try:
+        decoded_content = content.decode(encoding)
+    except (UnicodeDecodeError, TypeError) as e:
+        decoded_content = content.decode("utf-8", errors="ignore")
+
+    try:
+        tree = html.fromstring(decoded_content)
+    except ValueError:
+        decoded_content = re.sub(
+            r"^<\?xml[^>]+\?>\s*", "", decoded_content, flags=re.IGNORECASE
+        )
+        tree = html.fromstring(decoded_content)
+
+    og_title = tree.xpath('//meta[@property="og:title"]/@content')
+    if og_title:
+        og_title = og_title[0]
+    elif tree.find('.//title') is not None:
+        # Fallback on the <title> tag if it exists
+        og_title = tree.find('.//title').text
+    else:
+        return False
+    og_description = tree.xpath('//meta[@property="og:description"]/@content')
+    og_type = tree.xpath('//meta[@property="og:type"]/@content')
+    og_site_name = tree.xpath('//meta[@property="og:site_name"]/@content')
+    og_image = tree.xpath('//meta[@property="og:image"]/@content')
+    og_mimetype = tree.xpath('//meta[@property="og:image:type"]/@content')
+    return {
+        'og_description': og_description[0] if og_description else None,
+        'og_image': og_image[0] if og_image else None,
+        'og_mimetype': og_mimetype[0] if og_mimetype else None,
+        'og_title': og_title,
+        'og_type': og_type[0] if og_type else None,
+        'og_site_name': og_site_name[0] if og_site_name else None,
+        'source_url': url,
+    }