From fda8f4639b7c16e5f7dfe82a8cf6cac4e368ed4e Mon Sep 17 00:00:00 2001 From: Jean-Paul Smets <jp@nexedi.com> Date: Mon, 26 Mar 2007 18:45:39 +0000 Subject: [PATCH] Added base support base on HTML content. git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@13679 20353a03-c40f-0410-a6d1-a30d3c3de9de --- product/ERP5/Document/Document.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py index ece12199a4..698d7fbc5c 100644 --- a/product/ERP5/Document/Document.py +++ b/product/ERP5/Document/Document.py @@ -380,6 +380,7 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin): href_parser = re.compile('<a[^>]*href=[\'"](.*?)[\'"]',re.IGNORECASE) body_parser = re.compile('<body[^>]*>(.*?)</body>', re.IGNORECASE + re.DOTALL) title_parser = re.compile('<title[^>]*>(.*?)</title>', re.IGNORECASE + re.DOTALL) + base_parser = re.compile('<base[^>]*href=[\'"](.*?)[\'"][^>]*>', re.IGNORECASE + re.DOTALL) # Declarative security security = ClassSecurityInfo() @@ -1134,13 +1135,12 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin): Returns the content base URL based on the actual content or on its URL. """ - # XXX TODO - try to retrieve base URL from content - # If no base_url defined, define the base URL from our URL base_url = self.asURL() base_url_list = base_url.split('/') if len(base_url_list): - if base_url_list[-1]: + if base_url_list[-1] and base_url_list[-1].find('.') > 0: # Cut the trailing part in http://www.some.site/at/trailing.html + # but not in http://www.some.site/at base_url = '/'.join(base_url_list[:-1]) return base_url -- 2.30.9