From fda8f4639b7c16e5f7dfe82a8cf6cac4e368ed4e Mon Sep 17 00:00:00 2001
From: Jean-Paul Smets <jp@nexedi.com>
Date: Mon, 26 Mar 2007 18:45:39 +0000
Subject: [PATCH] Added base support base on HTML content.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@13679 20353a03-c40f-0410-a6d1-a30d3c3de9de
---
 product/ERP5/Document/Document.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py
index ece12199a4..698d7fbc5c 100644
--- a/product/ERP5/Document/Document.py
+++ b/product/ERP5/Document/Document.py
@@ -380,6 +380,7 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
   href_parser = re.compile('<a[^>]*href=[\'"](.*?)[\'"]',re.IGNORECASE)
   body_parser = re.compile('<body[^>]*>(.*?)</body>', re.IGNORECASE + re.DOTALL)
   title_parser = re.compile('<title[^>]*>(.*?)</title>', re.IGNORECASE + re.DOTALL)
+  base_parser = re.compile('<base[^>]*href=[\'"](.*?)[\'"][^>]*>', re.IGNORECASE + re.DOTALL)
 
   # Declarative security
   security = ClassSecurityInfo()
@@ -1134,13 +1135,12 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
       Returns the content base URL based on the actual content or
       on its URL.
     """
-    # XXX TODO - try to retrieve base URL from content
-    # If no base_url defined, define the base URL from our URL
     base_url = self.asURL()
     base_url_list = base_url.split('/')
     if len(base_url_list):
-      if base_url_list[-1]:
+      if base_url_list[-1] and base_url_list[-1].find('.') > 0:
         # Cut the trailing part in http://www.some.site/at/trailing.html
+        # but not in http://www.some.site/at
         base_url = '/'.join(base_url_list[:-1])
     return base_url
 
-- 
2.30.9