From 7722657ff963f921fb04b852bf3197f0a82aef6d Mon Sep 17 00:00:00 2001 From: Tristan Cavelier <tristan.cavelier@nexedi.com> Date: Wed, 1 Jun 2016 10:47:36 +0000 Subject: [PATCH] erp5_web: make html hreferenced objects implicit successors --- .../erp5_web/Base_normalizeUrlPathname.py | 35 +++++++ .../erp5_web/Base_normalizeUrlPathname.xml | 62 ++++++++++++ .../WebPage_extractReferredObjectDict.py | 94 +++++++++++++++++++ .../WebPage_extractReferredObjectDict.xml | 62 ++++++++++++ .../WebPage_getImplicitSuccessorValueList.py | 17 ++++ .../WebPage_getImplicitSuccessorValueList.xml | 66 +++++++++++++ 6 files changed, 336 insertions(+) create mode 100644 bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_normalizeUrlPathname.py create mode 100644 bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_normalizeUrlPathname.xml create mode 100644 bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_extractReferredObjectDict.py create mode 100644 bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_extractReferredObjectDict.xml create mode 100644 bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_getImplicitSuccessorValueList.py create mode 100644 bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_getImplicitSuccessorValueList.xml diff --git a/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_normalizeUrlPathname.py b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_normalizeUrlPathname.py new file mode 100644 index 0000000000..572065f796 --- /dev/null +++ b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_normalizeUrlPathname.py @@ -0,0 +1,35 @@ +""" + This parameters are default browser behavior with url normalization + + - if keep_empty is True: `a//` -> `a//` else `a//` -> `a/`; + - if keep_single_dot is False: `./a` -> `a` else `./a` -> `./a`; + - if keep_double_dot is False: `/../a` -> `/a` else `/../a` -> `/../a` + - if keep_trailing_slash is True: `/a//` -> `/a//` else `/a//` -> `/a` +""" +outer_component_list = [] +inner_component_list = [] +suffix_list = [] +dont_keep_empty = not keep_empty +dont_keep_single_dot = not keep_single_dot +dont_keep_double_dot = not keep_double_dot +starts_with_slash = False +if pathname[:1] == "/": + pathname = pathname[1:] + starts_with_slash = True +if pathname[-1:] == "/": + pathname = pathname[:-1] + if keep_trailing_slash: + suffix_list.append("") +component_list = pathname.split("/") +for component in component_list: + if component == ".." and dont_keep_double_dot: + if inner_component_list: + inner_component_list.pop() + else: + outer_component_list.append("..") + elif not (component == "" and dont_keep_empty or + component == "." and dont_keep_single_dot): + inner_component_list.append(component) +if starts_with_slash: + return "/" + "/".join(inner_component_list + suffix_list) +return "/".join(outer_component_list + inner_component_list + suffix_list) diff --git a/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_normalizeUrlPathname.xml b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_normalizeUrlPathname.xml new file mode 100644 index 0000000000..0daeb2b4f0 --- /dev/null +++ b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_normalizeUrlPathname.xml @@ -0,0 +1,62 @@ +<?xml version="1.0"?> +<ZopeData> + <record id="1" aka="AAAAAAAAAAE="> + <pickle> + <global name="PythonScript" module="Products.PythonScripts.PythonScript"/> + </pickle> + <pickle> + <dictionary> + <item> + <key> <string>Script_magic</string> </key> + <value> <int>3</int> </value> + </item> + <item> + <key> <string>_bind_names</string> </key> + <value> + <object> + <klass> + <global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/> + </klass> + <tuple/> + <state> + <dictionary> + <item> + <key> <string>_asgns</string> </key> + <value> + <dictionary> + <item> + <key> <string>name_container</string> </key> + <value> <string>container</string> </value> + </item> + <item> + <key> <string>name_context</string> </key> + <value> <string>context</string> </value> + </item> + <item> + <key> <string>name_m_self</string> </key> + <value> <string>script</string> </value> + </item> + <item> + <key> <string>name_subpath</string> </key> + <value> <string>traverse_subpath</string> </value> + </item> + </dictionary> + </value> + </item> + </dictionary> + </state> + </object> + </value> + </item> + <item> + <key> <string>_params</string> </key> + <value> <string>pathname, keep_empty=True, keep_single_dot=False, keep_double_dot=False, keep_trailing_slash=True</string> </value> + </item> + <item> + <key> <string>id</string> </key> + <value> <string>Base_normalizeUrlPathname</string> </value> + </item> + </dictionary> + </pickle> + </record> +</ZopeData> diff --git a/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_extractReferredObjectDict.py b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_extractReferredObjectDict.py new file mode 100644 index 0000000000..7a50326071 --- /dev/null +++ b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_extractReferredObjectDict.py @@ -0,0 +1,94 @@ +from zExceptions import Unauthorized +portal = context.getPortalObject() + +href_object_dict = {} +def main(): + for part in context.Base_parseHtml(context.getTextContent("").decode("utf-8")): + handleHtmlPart(part) + return href_object_dict + +def handleHtmlTag(tag, attrs): + #if tag == "base": and "href" in attrs: # should not exist in safe-html + # NotImplemented + if tag == "object": + for i in range(len(attrs)): + if attrs[i][0] == "data": + handleHref(attrs[i][1]) + elif tag == "style": + # for style tags, next data will always be the entire text until </style> + on_next_data[0] = handleCss + else: + for i in range(len(attrs)): + if attrs[i][0] in ("src", "href"): + handleHref(attrs[i][1]) + +on_next_data = [lambda x: x] +def handleHtmlPart(part): + part_type = part[0] + if part_type in ("starttag", "startendtag"): + return handleHtmlTag(part[1], part[2]) + if part_type == "data": + if on_next_data[0] is None: + return part[1] + on_next_data[0](part[1]) + on_next_data[0] = None + return None + +def handleHref(href): + # handles "base_url/document_module/id" + # handles "base_url/R-Document.Reference" + # handles "base_url/R-Document.Reference/view" + if not isHrefAUrl(href): + return href + try: + obj = traverseHref(href, allow_method=False) + except (KeyError, Unauthorized): + obj = None + href_object_dict[href] = obj + +def handleCss(data): + for part in context.Base_parseCssForUrl(data): + if part[0] == "url": + handleHref(part[2]) + +def isHrefAUrl(href): + return href.startswith("https://") or href.startswith("http://") or not href.split(":", 1)[0].isalpha() + +def traverseHref(url, allow_method=True, allow_hash=False): + base_obj, relative_path = prepareHrefTraverse(url, allow_hash=allow_hash) + obj = base_obj.restrictedTraverse(relative_path) + if allow_method or obj is None: + return obj + try: + obj.getUid() + except AttributeError: + obj = base_obj.restrictedTraverse("/".join(relative_path.split("/")[:-1])) + return obj + +site_object_dict = context.ERP5Site_getWebSiteDomainDict() +base_url_root_object = getattr(context, "getWebSiteValue", str)() or portal +base_url_object = context +assert base_url_object.getRelativeUrl().startswith(base_url_root_object.getRelativeUrl()) +base_url = base_url_object.getRelativeUrl()[len(base_url_root_object.getRelativeUrl()):] +if not base_url.startswith("/"): + base_url = "/" + base_url + +normalize_kw = {"keep_empty": False, "keep_trailing_slash": False} +def prepareHrefTraverse(url, allow_hash=False): + url = url.split("?")[0] + if not allow_hash: + url = url.split("#")[0] + if url.startswith("https://") or url.startswith("http://") or url.startswith("//"): # absolute url possibly on other sites + site_url = "/".join(url.split("/", 3)[:3]) + domain = url.split("/", 3)[2] + site_object = site_object_dict[domain] + relative_path = url[len(site_url):] + relative_path = (relative_path[1:] if relative_path[:1] == "/" else relative_path) + relative_path = context.Base_normalizeUrlPathname("/" + relative_path, **normalize_kw)[1:] + return site_object, str(relative_path) + if url.startswith("/"): # absolute path, relative url + return base_url_root_object, str(context.Base_normalizeUrlPathname(url, **normalize_kw)[1:]) + # relative path + return base_url_root_object, str(context.Base_normalizeUrlPathname(base_url + "/" + url, **normalize_kw)[1:]) + +return main() diff --git a/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_extractReferredObjectDict.xml b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_extractReferredObjectDict.xml new file mode 100644 index 0000000000..8a35d8572d --- /dev/null +++ b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_extractReferredObjectDict.xml @@ -0,0 +1,62 @@ +<?xml version="1.0"?> +<ZopeData> + <record id="1" aka="AAAAAAAAAAE="> + <pickle> + <global name="PythonScript" module="Products.PythonScripts.PythonScript"/> + </pickle> + <pickle> + <dictionary> + <item> + <key> <string>Script_magic</string> </key> + <value> <int>3</int> </value> + </item> + <item> + <key> <string>_bind_names</string> </key> + <value> + <object> + <klass> + <global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/> + </klass> + <tuple/> + <state> + <dictionary> + <item> + <key> <string>_asgns</string> </key> + <value> + <dictionary> + <item> + <key> <string>name_container</string> </key> + <value> <string>container</string> </value> + </item> + <item> + <key> <string>name_context</string> </key> + <value> <string>context</string> </value> + </item> + <item> + <key> <string>name_m_self</string> </key> + <value> <string>script</string> </value> + </item> + <item> + <key> <string>name_subpath</string> </key> + <value> <string>traverse_subpath</string> </value> + </item> + </dictionary> + </value> + </item> + </dictionary> + </state> + </object> + </value> + </item> + <item> + <key> <string>_params</string> </key> + <value> <string></string> </value> + </item> + <item> + <key> <string>id</string> </key> + <value> <string>WebPage_extractReferredObjectDict</string> </value> + </item> + </dictionary> + </pickle> + </record> +</ZopeData> diff --git a/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_getImplicitSuccessorValueList.py b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_getImplicitSuccessorValueList.py new file mode 100644 index 0000000000..fb1a339e0a --- /dev/null +++ b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_getImplicitSuccessorValueList.py @@ -0,0 +1,17 @@ +""" + Called by WebPage.getImplicitSuccessorValueList + + `reference_list` is list of dicts containing reference and/or version + and/or language and maybe some more things. But this implementation + just ignores it. + + It extracts href and their according objects from this ERP5 instance, + and returns the list of uniq objects. +""" +uid_set = set() +for obj in context.WebPage_extractReferredObjectDict().values(): + if obj is not None: + uid_set.add(obj.getUid()) +if uid_set: + return context.portal_catalog(uid=list(uid_set)) +return () diff --git a/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_getImplicitSuccessorValueList.xml b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_getImplicitSuccessorValueList.xml new file mode 100644 index 0000000000..03c404e5a1 --- /dev/null +++ b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_getImplicitSuccessorValueList.xml @@ -0,0 +1,66 @@ +<?xml version="1.0"?> +<ZopeData> + <record id="1" aka="AAAAAAAAAAE="> + <pickle> + <global name="PythonScript" module="Products.PythonScripts.PythonScript"/> + </pickle> + <pickle> + <dictionary> + <item> + <key> <string>Script_magic</string> </key> + <value> <int>3</int> </value> + </item> + <item> + <key> <string>_bind_names</string> </key> + <value> + <object> + <klass> + <global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/> + </klass> + <tuple/> + <state> + <dictionary> + <item> + <key> <string>_asgns</string> </key> + <value> + <dictionary> + <item> + <key> <string>name_container</string> </key> + <value> <string>container</string> </value> + </item> + <item> + <key> <string>name_context</string> </key> + <value> <string>context</string> </value> + </item> + <item> + <key> <string>name_m_self</string> </key> + <value> <string>script</string> </value> + </item> + <item> + <key> <string>name_subpath</string> </key> + <value> <string>traverse_subpath</string> </value> + </item> + </dictionary> + </value> + </item> + </dictionary> + </state> + </object> + </value> + </item> + <item> + <key> <string>_params</string> </key> + <value> <string>reference_list=None</string> </value> + </item> + <item> + <key> <string>id</string> </key> + <value> <string>WebPage_getImplicitSuccessorValueList</string> </value> + </item> + <item> + <key> <string>title</string> </key> + <value> <string>Get referenced by us objects</string> </value> + </item> + </dictionary> + </pickle> + </record> +</ZopeData> -- 2.30.9