Commit d32a1124 authored by Sven Franck's avatar Sven Franck Committed by Xiaowu Zhang

erp5_corporate_identity: improve slideshow rendering including displaying...

erp5_corporate_identity: improve slideshow rendering including displaying legacy presentation as slideshow
parent a8b888ae
"""
================================================================================
Try to convert old OpenOffice presentations into slideshows
================================================================================
"""
# uses cloudooo to convert odp/sxi to html (quite buggy) and then salvages the
# result into a slideshow html, which is passed on as remote_content to the
# slideshow renderer
# kw-parameters (* default)
# ------------------------------------------------------------------------------
import re
blank = ''
flags = re.MULTILINE|re.DOTALL|re.IGNORECASE
def getHeaderSlideTitle(my_doc):
return '<h1>' + my_doc.getTitle() + '</h1>'
def getSlideList(content):
return re.findall(r'<html>(.*?)</html>', content, flags=flags)
def getKey(item):
return int(item[0])
# -------------------------------- Setup ---------------------------------------
if context.getPortalType() in ["Presentation"]:
portal = context.getPortalObject()
mimetype = 'text/html'
content_type = context.getContentType()
raw_data = portal.portal_transforms.convertToData(mimetype, str(context.getData() or ""), context=context, mimetype=content_type)
if raw_data is None:
raise ValueError("Failed to convert to %r" % mimetype)
if context.REQUEST is not None:
context.REQUEST.RESPONSE.setHeader("Content-Type", mimetype)
# get a list of slides
content = getSlideList(raw_data)
# every slide is in the raw_data twice, once with the title and image as text,
# once with the slidecontent without title. All slides are mixed randomly, so
# we need to find out which slide contains what and then put them in their
# correct order. We do this by extracting the links in the slides navigation
# bar. This bar as a switch to change from image to text slides with the
# current slide number so <a href="text3">Text</a> to switch from Graphic
# slide 3 to Text slide 3. We use this to identify current slide
if len(content) > 0:
slideshow = []
output = blank
for slide in content:
slide_nav = re.search(r'<center>(.*?)</center>', slide, flags=flags).group()
slide_nav_link_list = re.findall(r'<a(.*?)</a>', slide_nav, flags=flags)
for link in slide_nav_link_list:
# the header slide. Contains header and extracted text from image
if re.search(r'>Graphic', link, flags=flags):
pointer = re.search(r'(text|img)([0-9]*)\.', link, flags=flags)
if pointer is not None:
slide_header = re.search(r'<h1>(.*)?</h1>', slide, flags=flags).group()
slideshow.append([str(pointer.group(2)), slide_header])
# the content slide. Contains image and notes
if re.search(r'>Text', link, flags=flags):
pointer = re.search(r'(text|img)([0-9]*)\.', link, flags=flags)
if pointer is not None:
slideshow.append([str(pointer.group(2)), slide])
# time to sort and add first slide header in case missing
slideshow = sorted(slideshow, key=getKey)
if '<h1' not in slideshow[0][1]:
slideshow.insert(0, ["0", getHeaderSlideTitle(context)])
output = ""
section_start = '<section>'
section_end = '</section>'
# slideshow will contain <header>, <content>, <header>, <content>...
# so we need to go through it two-slides at a time to assemble
# slides
slide_iter = iter(slideshow)
for slide in slide_iter:
slide_1st = slide
slide_2nd = next(slide_iter)
# we don't know whether header is on first or second position
if '<h1' not in slide_1st[1]:
go_1st = slide_2nd[1]
go_2nd = slide_1st[1]
else:
go_1st = slide_1st[1]
go_2nd = slide_2nd[1]
go_2nd = go_2nd.replace(re.search(r'<head>.*?</center><br>', go_2nd, flags=flags).group(), blank)
go_2nd = go_2nd.replace("<h3>Notes:</h3><br>", '<details open="open">')
go_2nd = go_2nd.replace("</body>", "</details>")
output = output + section_start + go_1st + go_2nd + section_end
kw["remote_content"] = output
return context.WebPage_viewAsSlideshowWIP(*args, **kw)
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Presentation_viewAsSlideshow</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
......@@ -21,24 +21,27 @@ MAIN FILE: generate presentation in different output formats
# display_note: display slide notes (1) or not (0)*
# display_svg: display svg-images as svg or png*
# ------
# flag_ooo: convert legacy odp, sxi formats (not active)
# remote_content: convert legacy odp, sxi formats (not active)
import re
from base64 import b64encode
blank = ''
flags = re.MULTILINE|re.DOTALL|re.IGNORECASE
details_separator = '</section><section class="ci-notes-continue"><section><h1>cont.</h1></section>'
pref = context.getPortalObject().portal_preferences
# ------------------ HTML cleanup/converter methods ----------------------------
def getHeaderSlideTitle(my_doc):
return '<h1>' + my_doc.getTitle() + '</h1>'
def getSlideList(my_content):
return re.findall(r'<section[^>]*?>(.*?)</section>', my_content, re.S)
#def getSectionSlideList(my_content):
# return re.findall(r'(<section[^>]*?>.*?</section>)', my_content, re.S)
# https://regex101.com/r/8F8GTx/1/
def getSlideDetailsList(my_content):
return re.findall(r'<section.*?>\s?<section>.*?</details>\s?</section>', my_content, re.S)
......@@ -51,6 +54,54 @@ def getDetailsList(my_slide):
#def getNestedSection(my_content):
# return my_content.find("<section") > -1
# please look the other direction until we can use beautifulsoup
def getSlideFront(my_content):
# is there an image on the slide?
img = re.search(r'(<img.*?/>)', slide_content, flags=flags)
if img:
return img.group()
# is there another tag on the slide?
tag = re.search(r'<(.*?)( |>)', slide_content, flags=flags)
if tag:
key = tag.group(1)
element = re.search(r'(<%s.*?</%s>)'%(key, key), my_content, flags=flags)
if element:
return element.group()
# empty slide
return None
# opinionated
# TODO h1: chapter, h2:slide ?
def setH1AndH2AsSlideHeaders(my_content):
for start_tag in re.findall(r'<h2', my_content, flags=flags):
my_content = my_content.replace(start_tag, '<h1')
for end_tag in re.findall(r'\/h2>', my_content, flags=flags):
my_content = my_content.replace(end_tag, '/h1>')
return my_content
def removePlaceholders(my_content):
if my_content.find('${') > -1:
for substitution_string in re.findall(r'(\${.*})', my_content):
my_content = my_content.replace(substitution_string, blank)
return my_content
def removeComments(my_content):
for comment_string in re.findall(r'(<!--.*?-->)', my_content, flags=flags):
my_content = my_content.replace(comment_string, blank)
return my_content
def removeImageWrappers(my_content):
img_list = re.findall(r'(<p style=\"text-align: center;\">(.*?)</p>)', my_content, flags=flags)
for wrapped_image in img_list:
my_content = my_content.replace(wrapped_image[0], wrapped_image[1])
return my_content
def removeLineBreaks(my_content):
return my_content.replace('\n', '').replace('\r', '')
def splitMultipleDetails(my_content):
for slide in getSlideDetailsList(my_content):
detail_list = getDetailsList(slide)
......@@ -86,118 +137,101 @@ def splitMultipleDetails(my_content):
def removeEmptyDetails(my_content):
content = my_content.replace('<details open="open"></details>', blank)
content = content.replace('<details></details>', blank)
content = content.replace('<details open=""></details>', blank)
content = content.replace('<details>&nbsp;</details>', blank)
content = content.replace('<details> </details>', blank)
return content
def getPageList(my_content):
return re.findall(r'<html>(.*?)</html>', my_content, re.S)
def getPageTitle(my_full_page):
result = re.search('<title>(.+?)</title>', my_full_page)
if result:
return result.group(1)
def getPageContent(my_full_page):
result_list = my_full_page.split("</center><br>")
if len(result_list) == 2:
return result_list[1].replace("</body>", blank)
def addSlideContent(my_content, my_notes):
return ''.join([
'<section>',
my_content,
'<details open="open">',
my_notes,
'</details></section>'
])
def sortContent(my_page_list):
try:
page_content_list = []
page_tuple_first = None
page_tuple_last = None
for page in my_page_list:
page_title = getPageTitle(page)
# Note cloudooo default html transformation mixes slide order. dirty fix
if page_title.find("Commercial") > -1:
page_content = getPageContent(page)
if page_content.find("<center>") > -1:
page_tuple_last = (page_title, page_content, "first")
elif page_title.find("ERP5") > -1:
page_content = getPageContent(page)
if page_content.find("<center>") > -1:
page_tuple_first = (page_title, page_content, "last")
else:
page_content = getPageContent(page)
if page_title.find("Slide") > -1:
slide_number = int(page_title.replace("Slide ", ""))
page_content_list.append((slide_number, page_content, None))
else:
if page_content.find("<center>") > -1:
page_tuple_first = (page_title, page_content, "first")
sort_content_list = sorted(page_content_list, key=lambda page_foo: page_foo[0])
if page_tuple_last is not None:
sort_content_list.append(page_tuple_last)
if page_tuple_first is not None:
sort_content_list = [page_tuple_first] + sort_content_list
return sort_content_list
except Exception as e:
raise e
def addLastSlide(my_last_slide):
if my_last_slide.count("<div") != 2:
last_slide_relative_url = pref.getPreferredCorporateIdentityTemplateSlideLastSlideRelativeUrl() or None
if last_slide_relative_url:
# try:
last_slide = doc.restrictedTraverse(last_slide_relative_url) or None
if last_slide is not None:
return last_slide.getTextContent()
#except AttributeError:
# last_slide_content = blank
return blank
# -------------------------- Setup ---------------------------------------------
doc = context
doc_prefix = pref.getPreferredCorporateIdentityTemplateSlideDocumentPrefix() or "Slideshow."
doc_converted_content = None
doc_upgraded_content = None
doc_slide_iter = None
doc_format = kw.get('format') or 'html'
doc_display_notes = int(kw.get('display_note') or 0)
doc_display_svg = kw.get('display_svg') or 'png'
doc_download = int(kw.get('document_download') or 0)
doc_save = int(kw.get('document_save') or 0)
doc_ooo = int(kw.get('flag_ooo') or 0)
doc_ooo = kw.get('remote_content') or None
doc_content = doc_ooo or doc.getTextContent()
doc_is_slideshow = getSlideList(doc_content) or None
override_logo_reference = kw.get('override_logo_reference', None)
override_source_organisation_title = kw.get("override_source_organisation_title", None)
override_batch_mode = kw.get('batch_mode')
override_source_person_title = None
# ---------- backward compatability with legacy odp/sxi presentations ----------
# note: this has to come first to convert file into html and then continue
if doc_ooo:
doc_portal = doc.getPortalObject()
if doc.getPortalType() in ["Presentation"]:
raw_data = doc_portal.portal_transforms.convertToData(
"text/html",
str(doc.getData() or blank),
context=context,
mimetype=doc.getContentType()
)
if raw_data is None:
raise ValueError("Failed to convert to %r" % "text/html")
# got something
page_list = getPageList(raw_data)
if len(page_list) > 0:
page_content = sortContent(page_list)
doc_converted_content = blank
for slide in page_content:
if slide[1].find("<center>") > -1:
slide_content_list = slide[1].split("<h3>Notes:</h3>")
if len(slide_content_list) != 2:
slide_content = slide[1]
slide_notes = blank
else:
slide_content = slide_content_list[0]
slide_content = slide_content.replace("<center>", "")
slide_content = slide_content.replace("</center>", "")
slide_notes = slide_content_list[1]
doc_converted_content += addSlideContent(slide_content, slide_notes)
# --------------------- Convert any page into a slideshow ----------------------
# Note: mileage varies depending on the cleanliness of the HTML page
if doc_is_slideshow is None:
doc_upgraded_content = removePlaceholders(doc_content)
doc_upgraded_content = removeComments(doc_upgraded_content)
doc_upgraded_content = removeImageWrappers(doc_upgraded_content)
doc_upgraded_content = setH1AndH2AsSlideHeaders(doc_upgraded_content)
doc_upgraded_content = removeLineBreaks(doc_upgraded_content)
doc_content = blank
last_slide_content = blank
section_start = '<section>'
details_start = '<details open="open">'
details_end = '</details>'
section_end = '</section>'
# separate by <h1>, these will be our slide headers
fake_slide_list = re.split(r'(<h1.*?/h1>)', doc_upgraded_content, flags=flags)
# insert page title if first element isn't a <h1>
if '<h1' not in fake_slide_list[0]:
fake_slide_list.insert(0, getHeaderSlideTitle(doc))
# opinionated add of a "Thank you" slide if the last slide doesn't
# contain the default two <div> columns
last_slide_content = addLastSlide(fake_slide_list[-1])
# fake_slide_list will be <h1>,<content>,<h1>,<content> so we need to go
# over two items at a time
doc_slide_iter = iter(fake_slide_list)
for x in doc_slide_iter:
slide_header = x
# remove whitespace so we don't end up with empty <details>
slide_content = " ".join(next(doc_slide_iter).split())
# build slides assuming the first element after the header is on the slide
# (an img, a paragraph, a list, whatever). The rest goes into details. If
# there is an img on the slide, move it to the top
slide_front = getSlideFront(slide_content)
if slide_front:
slide_content = slide_content.replace(slide_front, blank)
else:
slide_front = blank
# build a new doc from slides
doc_content = doc_content + section_start + slide_header + slide_front \
+ details_start + slide_content + details_end + section_end \
# other case: we have a slideshow, doc_is_slideshow contains the slides
else:
last_slide_content = addLastSlide(doc_is_slideshow[-1])
# add last slide if required
doc_content = doc_content + last_slide_content
# -------------------------- Document Parameters ------------------------------
doc_dirty_content = doc_converted_content or doc.getTextContent()
doc_content = removeEmptyDetails(doc_dirty_content)
doc_content = removeEmptyDetails(doc_content)
doc_title = doc.getShortTitle() or doc.getTitle()
doc_language = doc.getLanguage()
doc_description = doc.getDescription()
......@@ -271,9 +305,9 @@ for image in re.findall('(<img.*?/>)', doc_content):
#
# for link in re.findall('(<a.*?<\/a>)', document_content):
# doc_content = doc_content.replace(link, doc.WebPage_validateLink(link_string=link, link_toc=true))
#
# ------------- backwards compatability with old slideshow ---------------------
# ------------- backcompat: old slideshow -------------------------------------
# requires to wrap content of slides that contain <details> into nested
# <section> tags. Done here, after book, because it adds more complexity
if getDetails(doc_content) > -1:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment