extension.erp5.JupyterCompile.py 44.4 KB
Newer Older
1
# -*- coding: utf-8 -*-
2 3 4
from matplotlib.figure import Figure
from IPython.core.display import DisplayObject
from IPython.lib.display import IFrame
5
from cStringIO import StringIO
6 7
from erp5.portal_type import Image
from types import ModuleType
8
from ZODB.serialize import ObjectWriter
9
import cPickle
10
import sys
11
import traceback
12
import ast
13
import base64
14
import json
15
import transaction
16
import Acquisition
17
import astor
18
import importlib
19
from erp5.component.module.Log import log
20

21 22 23 24 25 26 27 28 29 30
# Display matplotlib figure automatically like
# the original python kernel
import matplotlib
import matplotlib.pyplot as plt
from IPython.core.pylabtools import print_figure
from IPython.core.display import _pngxy
from ipykernel.jsonutil import json_clean, encode_images
import threading
display_data_wrapper_lock = threading.Lock()

31 32
# Well known unserializable types
from Record import Record
33 34 35 36
well_known_unserializable_type_tuple = (ModuleType, Record)
# ZBigArray may not be available
try:
  from wendelin.bigarray.array_zodb import ZBigArray
37 38 39
  # FIXME ZBigArrays are regular ZODB objects and must be serializable
  # FIXME the bug is probably in CanSerialize()
  # FIXME -> see https://lab.nexedi.com/nexedi/erp5/commit/5fb16acd#note_33582 for details
40 41 42
  well_known_unserializable_type_tuple = tuple(list(well_known_unserializable_type_tuple) + [ZBigArray])
except ImportError:
  pass
43

44 45
def Base_executeJupyter(self, python_expression=None, reference=None, \
                        title=None, request_reference=False, **kw):
46 47 48
  # Check if implementation is enabled
  if not self.getPortalObject().ERP5Site_isDataNotebookEnabled():
    return "The synchronous and unrestricted implementation is not enabled on the server"
49
  # Check permissions for current user and display message to non-authorized user
50 51
  if not self.Base_checkPermission('portal_components', 'Manage Portal'):
    return "You are not authorized to access the script"
52

53
  # Convert the request_reference argument string to their respeced boolean values
54 55
  request_reference = {'True': True, \
                       'False': False}.get(request_reference, False)
56

57 58 59 60
  # Return python dictionary with title and reference of all notebooks
  # for request_reference=True
  if request_reference:
    data_notebook_list = self.portal_catalog(portal_type='Data Notebook')
61 62
    notebook_detail_list = [{'reference': obj.getReference(), \
                             'title': obj.getTitle()} for obj in data_notebook_list]
63
    return notebook_detail_list
64

65 66 67
  if not reference:
    message = "Please set or use reference for the notebook you want to use"
    return message
68

69 70 71
  # Take python_expression as '' for empty code from jupyter frontend
  if not python_expression:
    python_expression = ''
72

73
  # Get Data Notebook with the specific reference
74 75 76
  data_notebook = self.portal_catalog.getResultValue(
                         portal_type='Data Notebook',
                         reference=reference)
77

78 79 80 81
  # Create new Data Notebook if reference doesn't match with any from existing ones
  if not data_notebook:
    notebook_module = self.getDefaultModule(portal_type='Data Notebook')
    data_notebook = notebook_module.DataNotebookModule_addDataNotebook(
82 83 84
                                      title=title,
                                      reference=reference,
                                      batch_mode=True)
85 86 87 88 89 90 91

  # By default, store_history is True
  store_history = kw.get('store_history', True)
  data_notebook_line = None
  if store_history:
    # Add new Data Notebook Line to the Data Notebook
    data_notebook_line = data_notebook.DataNotebook_addDataNotebookLine(
92 93
                                       notebook_code=python_expression,
                                       batch_mode=True)
94

95 96 97 98
  # Gets the context associated to the data notebook being used
  old_notebook_context = data_notebook.getNotebookContext()
  if not old_notebook_context:
    old_notebook_context = self.Base_createNotebookContext()
99

100 101
  # Pass all to code Base_runJupyter external function which would execute the code
  # and returns a dict of result
102
  final_result = displayDataWrapper(lambda:Base_runJupyterCode(self, python_expression, old_notebook_context))
103

104
  new_notebook_context = final_result['notebook_context']
105

106 107
  result = {
    u'code_result': final_result['result_string'],
108 109
    u'print_result': final_result['print_result'],
    u'displayhook_result': final_result['displayhook_result'],
110 111 112 113
    u'ename': final_result['ename'],
    u'evalue': final_result['evalue'],
    u'traceback': final_result['traceback'],
    u'status': final_result['status'],
114 115 116
    u'mime_type': final_result['mime_type'],
    u'extra_data_list': final_result['extra_data_list'],
  }
117 118

  # Updates the context in the notebook with the resulting context of code
119 120
  # execution.
  data_notebook.setNotebookContext(new_notebook_context)
121

122 123 124 125 126 127 128 129 130 131 132 133
  # We try to commit, but the notebook context property may have variables that
  # cannot be serialized into the ZODB and couldn't be captured by our code yet.
  # In this case we abort the transaction and warn the user about it. Unforunately,
  # the exeception raised when this happens doesn't help to know exactly which
  # object caused the problem, so we cannot tell the user what to fix.
  try:
    transaction.commit()
  except transaction.interfaces.TransactionError as e:
    transaction.abort()
    exception_dict = getErrorMessageForException(self, e, new_notebook_context)
    result.update(exception_dict)
    return json.dumps(result)
134

135 136 137 138 139 140 141 142
  # Catch exception while seriaizing the result to be passed to jupyter frontend
  # and in case of error put code_result as None and status as 'error' which would
  # be shown by Jupyter frontend
  try:
    serialized_result = json.dumps(result)
  except UnicodeDecodeError:
    result = {
      u'code_result': None,
143 144
      u'print_result': None,
      u'displayhook_result': None,
145 146 147 148
      u'ename': u'UnicodeDecodeError',
      u'evalue': None,
      u'traceback': None,
      u'status': u'error',
149
      u'mime_type': result['mime_type']}
150
    serialized_result = json.dumps(result)
151 152 153 154 155

  if data_notebook_line is not None:
    data_notebook_line.edit(
      notebook_code_result = result['code_result'],
      mime_type = result['mime_type'])
156 157

  return serialized_result
158 159


160 161 162 163 164 165 166 167 168
def mergeTracebackListIntoResultDict(result_dict, error_result_dict_list):
  if error_result_dict_list:
    if result_dict['traceback'] is None:
      result_dict['traceback'] = []
    for error_result_dict in error_result_dict_list:
      result_dict['traceback'].append(error_result_dict['traceback'])
      result_dict['status'] = error_result_dict['status']
  return result_dict

169 170

def matplotlib_pre_run():
171
  matplotlib.interactive(False)
172 173 174 175
  rc = {'figure.figsize': (6.0,4.0),
        'figure.facecolor': (1,1,1,0),
        'figure.edgecolor': (1,1,1,0),
        'font.size': 10,
176
        'figure.dpi': 72,
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
        'figure.subplot.bottom' : .125
        }
  for key, value in rc.items():
    matplotlib.rcParams[key] = value
  plt.gcf().clear()

def matplotlib_post_run(data_list):
  png_data = None
  figure = plt.gcf()
  # Always try to get the current figure.
  # This is not efficient, but we can support any libraries
  # that use matplotlib.
  png_data = print_figure(figure, fmt='png')
  figure.clear()
  if png_data is not None:
    width, height = _pngxy(png_data)
    data = encode_images({'image/png':png_data})
    metadata = {'image/png':dict(width=width, height=height)}
    data_list.append(json_clean(dict(data=data, metadata=metadata)))

class Displayhook(object):
  def hook(self, value):
    if value is not None:
200 201 202 203
      if getattr(value, '_repr_html_', None) is not None:
        self.result = {'data':{'text/html':value._repr_html_()}, 'metadata':{}}
      else:
        self.result = repr(value)
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
  def pre_run(self):
    self.old_hook = sys.displayhook
    sys.displayhook = self.hook
    self.result = None
  def post_run(self):
    sys.displayhook = self.old_hook
displayhook = Displayhook()

def displayDataWrapper(function):
  with display_data_wrapper_lock:
    # pre run
    displayhook.pre_run()
    matplotlib_pre_run()
    extra_data_list = []
    try:
      result = function()
      extra_data_list = result.get('extra_data_list', [])
    finally:
      # post run
      displayhook.post_run()
      matplotlib_post_run(extra_data_list)
  result['extra_data_list'] = extra_data_list
  return result

228
def Base_runJupyterCode(self, jupyter_code, old_notebook_context):
229
  """
230
    Function to execute jupyter code and update the context dictionary.
231
    Code execution depends on 'interactivity', a.k.a , if the ast.node object has
232
    ast.Expr instance (valid for expressions) or not.
233

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
    old_notebook_context should contain both variables dict and setup functions.
    Here, setup dict is {key: value} pair of setup function names and another dict,
    which contains the function's alias and code, as string. These functions
    should be executed before `jupyter_code` to properly create the required
    environment.

    For example:
    old_notebook_context =  {
      'setup': {
        'numpy setup': {
          'func_name': 'numpy_setup_function',
          'code': ...
        }
      },
      'variables': {
        'my_variable': 1
      }
    }
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268

    The behaviour would be similar to that of jupyter notebook:-
    ( https://github.com/ipython/ipython/blob/master/IPython/core/interactiveshell.py#L2954 )
    Example:

      code1 = '''
      23
      print 23 #Last node not an expression, interactivity = 'last'
      '''
      out1 = '23'

      code2 = '''
      123
      12 #Last node an expression, interactivity = 'none'
      '''
      out2 = '12'

269
  """
270
  mime_type = 'text/plain'
271 272
  status = u'ok'
  ename, evalue, tb_list = None, None, None
273

274 275
  # Other way would be to use all the globals variables instead of just an empty
  # dictionary, but that might hamper the speed of exec or eval.
276 277
  # Something like -- user_context = globals(); user_context['context'] = self;
  user_context = {}
278
  output = ''
279 280 281

  # Saving the initial globals dict so as to compare it after code execution
  globals_dict = globals()
282
  notebook_context = old_notebook_context
283

284 285 286
  inject_variable_dict = {}
  current_var_dict = {}
  current_setup_dict = {}
287 288
  setup_error_return_dict_list = []

289 290 291
  # Execute only if jupyter_code is not empty
  if jupyter_code:
    # Create ast parse tree
292 293 294
    try:
      ast_node = ast.parse(jupyter_code)
    except Exception as e:
295
      # It's not necessary to abort the current transaction here 'cause the
296
      # user's code wasn't executed at all yet.
297
      return getErrorMessageForException(self, e, notebook_context)
298

299 300
    # Fixing "normal" imports and detecting environment object usage
    import_fixer = ImportFixer()
301
    print_fixer = PrintFixer()
302 303
    environment_collector = EnvironmentParser()
    ast_node = import_fixer.visit(ast_node)
304 305

    # Whenever we have new imports we need to warn the user about the
306 307
    # environment
    if (import_fixer.warning_module_names != []):
308
      warning = ("print ('"
309 310 311 312
                 "WARNING: You imported from the modules %s without "
                 "using the environment object, which is not recomended. "
                 "Your import was automatically converted to use such method. "
                 "The setup functions were named as *module*_setup. "
313
                 "')") % (', '.join(import_fixer.warning_module_names))
314 315
      tree = ast.parse(warning)
      tree.body[0].lineno = ast_node.body[-1].lineno+5
316
      ast_node.body.append(tree.body[0])
317

318 319
    ast_node = print_fixer.visit(ast_node)
    ast.fix_missing_locations(ast_node)
320

321 322 323 324 325 326 327 328
    # The collector also raises errors when environment.define and undefine
    # calls are made incorrectly, so we need to capture them to propagate
    # to Jupyter for rendering.
    try:
      ast_node = environment_collector.visit(ast_node)
    except (EnvironmentDefinitionError, EnvironmentUndefineError) as e:
      transaction.abort()
      return getErrorMessageForException(self, e, notebook_context)
329

330 331 332
    # Get the node list from the parsed tree
    nodelist = ast_node.body

333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
    # Handle case for empty nodelist(in case of comments as jupyter_code)
    if nodelist:
      # If the last node is instance of ast.Expr, set its interactivity as 'last'
      # This would be the case if the last node is expression
      if isinstance(nodelist[-1], ast.Expr):
        interactivity = "last"
      else:
        interactivity = "none"

      # Here, we define which nodes to execute with 'single' and which to execute
      # with 'exec' mode.
      if interactivity == 'none':
        to_run_exec, to_run_interactive = nodelist, []
      elif interactivity == 'last':
        to_run_exec, to_run_interactive = nodelist[:-1], nodelist[-1:]
348

349 350
      # Variables used at the display hook to get the proper form to display
      # the last returning variable of any code cell.
351
      display_data = {'result': '',
352
                      'mime_type': None}
353 354

      # This is where one part of the  display magic happens. We create an
355 356 357 358 359 360 361 362 363 364
      # instance of ProcessorList and add each of the built-in processors.
      # The classes which each of them are responsiblefor rendering are defined
      # in the classes themselves.
      # The customized display hook will automatically use the processor
      # of the matching class to decide how the object should be displayed.
      processor_list = ProcessorList()
      processor_list.addProcessor(IPythonDisplayObjectProcessor)
      processor_list.addProcessor(MatplotlibFigureProcessor)
      processor_list.addProcessor(ERP5ImageProcessor)
      processor_list.addProcessor(IPythonDisplayObjectProcessor)
365

366 367 368 369 370 371
      # Putting necessary variables in the `exec` calls context and storing
      inject_variable_dict = {
        'context': self,
        'environment': Environment(),
        '_display_data': display_data,
        '_processor_list': processor_list,
372
        '_volatile_variable_list': [],
373
        '_print': CustomPrint()}
374
      user_context.update(inject_variable_dict)
375
      user_context.update(notebook_context['variables'])
376

377 378 379 380 381 382 383 384
      # Getting the environment setup defined in the current code cell
      current_setup_dict = environment_collector.getEnvironmentSetupDict()
      current_var_dict = environment_collector.getEnvironmentVarDict()

      # Removing old setup from the setup functions
      removed_setup_message_list = []
      for func_alias in environment_collector.getEnvironmentRemoveList():
        found = False
385
        for key, data in notebook_context['setup'].items():
386 387 388
          if key == func_alias:
            found = True
            func_name = data['func_name']
389
            del notebook_context['setup'][func_alias]
390 391 392 393 394 395 396 397 398 399 400 401
            try:
              del user_context[func_alias]
            except KeyError:
              pass
            removed_setup_message = (
              "%s (%s) was removed from the setup list. "
              "Variables it may have added to the context and are not pickleable "
              "were automatically removed.\n"
            ) % (func_name, func_alias)
            removed_setup_message_list.append(removed_setup_message)
            break
        if not found:
402
          transaction.abort()
403 404
          result = {
            'result_string': "EnvironmentUndefineError: Trying to remove non existing function/variable from environment: '%s'\n" % func_alias,
405 406
            'print_result': {"data":{"text/plain":"EnvironmentUndefineError: Trying to remove non existing function/variable from environment: '%s'\n" % func_alias}, "metadata":{}},
            'displayhook_result': None,
407 408 409 410 411
            'notebook_context': notebook_context,
            'status': 'ok',
            'mime_type': 'text/plain',
            'evalue': None,
            'ename': None,
412
            'traceback': None}
413
          return result
414

415 416
      # Removing all the setup functions if user call environment.clearAll()
      if environment_collector.clearAll():
417
        keys = notebook_context ['setup'].keys()
418
        for key in keys:
419
          del notebook_context['setup'][key]
420

421
      # Running all the setup functions that we got
422
      failed_setup_key_list = []
423
      for key, value in notebook_context['setup'].iteritems():
424 425 426 427
        try:
          code = compile(value['code'], '<string>', 'exec')
          exec(code, user_context, user_context)
        # An error happened, so we show the user the stacktrace along with a
428
        # note that the exception happened in a setup function's code.
429
        except Exception as e:
430
          failed_setup_key_list.append(key)
431 432
          if value['func_name'] in user_context:
            del user_context[value['func_name']]
433
          error_return_dict = getErrorMessageForException(self, e, notebook_context)
434 435
          additional_information = "An error happened when trying to run the one of your setup functions:"
          error_return_dict['traceback'].insert(0, additional_information)
436
          setup_error_return_dict_list.append(error_return_dict)
437 438
      for failed_setup_key in failed_setup_key_list:
        del notebook_context['setup'][failed_setup_key]
439

440 441 442 443 444 445 446 447 448 449 450
      # Iterating over envinronment.define calls captured by the environment collector
      # that are functions and saving them as setup functions.
      for func_name, data in current_setup_dict.iteritems():
        setup_string = (
          "%s\n"
          "_result = %s()\n"
          "if _result and isinstance(_result, dict):\n"
          "    globals().update(_result)\n"
          "_volatile_variable_list += _result.keys()\n"
          "del %s, _result\n"
        ) % (data['code'], func_name, func_name)
451
        notebook_context['setup'][data['alias']] = {
452
          "func_name": func_name,
453
          "code": setup_string}
454 455 456 457 458

      # Iterating over envinronment.define calls captured by the environment collector
      # that are simple variables and saving them in the setup.
      for variable, value, in current_var_dict.iteritems():
        setup_string = "%s = %s\n" % (variable, repr(value))
459
        notebook_context['setup'][variable] = {
460
          'func_name': variable,
461
          'code': setup_string}
462
        user_context['_volatile_variable_list'] += variable
463

464
      if environment_collector.showEnvironmentSetup():
465
        inject_variable_dict['_print'].write("%s\n" % str(notebook_context['setup']))
466

467 468 469 470
      # Execute the nodes with 'exec' mode
      for node in to_run_exec:
        mod = ast.Module([node])
        code = compile(mod, '<string>', "exec")
471
        try:
472
          exec(code, user_context, user_context)
473 474 475 476
        except Exception as e:
          # Abort the current transaction. As a consequence, the notebook lines
          # are not added if an exception occurs.
          transaction.abort()
477 478
          return mergeTracebackListIntoResultDict(getErrorMessageForException(self, e, notebook_context),
                                                  setup_error_return_dict_list)
479 480 481 482

      # Execute the interactive nodes with 'single' mode
      for node in to_run_interactive:
        mod = ast.Interactive([node])
483
        try:
484 485
          code = compile(mod, '<string>', 'single')
          exec(code, user_context, user_context)
486 487 488 489
        except Exception as e:
          # Abort the current transaction. As a consequence, the notebook lines
          # are not added if an exception occurs.
          transaction.abort()
490 491
          return mergeTracebackListIntoResultDict(getErrorMessageForException(self, e, notebook_context),
                                                  setup_error_return_dict_list)
492

493
      mime_type = display_data['mime_type'] or mime_type
494
      inject_variable_dict['_print'].write("\n".join(removed_setup_message_list) + display_data['result'])
495

496 497
    # Saves a list of all the variables we injected into the user context and
    # shall be deleted before saving the context.
498
    volatile_variable_list = current_setup_dict.keys() + inject_variable_dict.keys() + user_context.get('_volatile_variable_list', [])
499 500
    volatile_variable_list.append('__builtins__')

501
    for key, val in user_context.items():
502
      if not key in globals_dict.keys() and not isinstance(val, well_known_unserializable_type_tuple) and not key in volatile_variable_list:
503
        if canSerialize(val):
504 505
          notebook_context['variables'][key] = val
        else:
506
          del user_context[key]
507
          message = (
508
            "Cannot serialize the variable named %s whose value is %s, "
509
            "thus it will not be stored in the context. "
510
            "You should move it's definition to a function and "
511 512
            "use the environment object to load it.\n"
          ) % (key, val)
513
          inject_variable_dict['_print'].write(message)
514 515

    # Deleting from the variable storage the keys that are not in the user
516 517
    # context anymore (i.e., variables that are deleted by the user).
    for key in notebook_context['variables'].keys():
518
      if not key in user_context:
519
        del notebook_context['variables'][key]
520

521 522
    if inject_variable_dict.get('_print') is not None:
      output = inject_variable_dict['_print'].getCapturedOutputString()
523 524 525

  displayhook_result = {"data":{}, "metadata":{}}
  if displayhook.result is not None:
526 527 528 529
    if isinstance(displayhook.result, str):
      displayhook_result["data"]["text/plain"] = displayhook.result
    elif isinstance(displayhook.result, dict):
      displayhook_result = displayhook.result
530
  result = {
531
    'result_string': output,
532 533
    'print_result': {"data":{"text/plain":output}, "metadata":{}},
    'displayhook_result': displayhook_result,
534
    'notebook_context': notebook_context,
535
    'status': status,
536
    'mime_type': mime_type,
537 538
    'evalue': evalue,
    'ename': ename,
539
    'traceback': tb_list}
540
  return mergeTracebackListIntoResultDict(result, setup_error_return_dict_list)
541 542


543 544 545 546 547 548 549 550
class EnvironmentUndefineError(TypeError):
  pass


class EnvironmentDefinitionError(TypeError):
  pass


551
def canSerialize(obj):
Ivan Tyagov's avatar
Ivan Tyagov committed
552

553
  container_type_tuple = (list, tuple, dict, set, frozenset)
554

555 556 557 558 559 560 561 562 563 564 565 566 567
  # if object is a container, we need to check its elements for presence of
  # objects that cannot be put inside the zodb
  if isinstance(obj, container_type_tuple):
    if isinstance(obj, dict):
      result_list = []
      for key, value in obj.iteritems():
        result_list.append(canSerialize(key))
        result_list.append(canSerialize(value))
    else:
      result_list = [canSerialize(element) for element in obj]
    return all(result_list)
  # if obj is an object and implements __getstate__, ZODB.serialize can check
  # if we can store it
568
  elif isinstance(obj, object) and hasattr(obj, '__getstate__') and hasattr(obj, '_p_jar'):
569 570 571
    # Need to unwrap the variable, otherwise we get a TypeError, because
    # objects cannot be pickled while inside an acquisition wrapper.
    unwrapped_obj = Acquisition.aq_base(obj)
572 573 574 575 576
    try:
      writer = ObjectWriter(unwrapped_obj)
    except:
      # Ignore any exceptions, otherwise Jupyter becomes permanent unusble state.
      return False
577 578 579 580
    for obj in writer:
      try:
        writer.serialize(obj)
      # Because writer.serialize(obj) relies on the implementation of __getstate__
581
      # of obj, all errors can happen, so the "except all" is necessary here.
582 583 584 585 586 587
      except:
        return False
    return True
  else:
    # If cannot serialize object with ZODB.serialize, try with cPickle
    # Only a dump of the object is not enough. Dumping and trying to
588 589 590
    # load it will properly raise errors in all possible situations,
    # for example: if the user defines a dict with an object of a class
    # that he created the dump will stil work, but the load will fail.
591 592 593
    try:
      cPickle.loads(cPickle.dumps(obj))
    # By unknowing reasons, trying to catch cPickle.PicklingError in the "normal"
594
    # way isn't working. This issue might be related to some weirdness in
595 596 597 598
    # pickle/cPickle that is reported in this issue: http://bugs.python.org/issue1457119.
    #
    # So, as a temporary fix, we're investigating the exception's class name as
    # string to be able to identify them.
599 600
    #
    # Even though the issue seems complicated, this quickfix should be
601
    # properly rewritten in a better way as soon as possible.
Ivan Tyagov's avatar
Ivan Tyagov committed
602
    except (cPickle.PicklingError, TypeError, NameError, AttributeError):
603
      return False
604 605
    else:
      return True
606 607


608
class CustomPrint(object):
609

610 611
  def __init__(self):
    self.captured_output_list = []
612

613 614
  def write(self, *args):
    self.captured_output_list += args
615

616 617
  def getCapturedOutputString(self):
    return ''.join(self.captured_output_list)
618

619 620

class PrintFixer(ast.NodeTransformer):
621

622 623 624 625
  def visit_Print(self, node):
    _print_name_node = ast.Name(id="_print", ctx=ast.Load())
    node.dest = _print_name_node
    return node
626

627 628 629 630 631 632

class EnvironmentParser(ast.NodeTransformer):
  """
    EnvironmentParser class is an AST transformer that walks in the abstract
    code syntax tree to find calls to `define` and `undefine`  on a variable
    named `environment`.
633

634
    The `define` call should receive a function, which will have it's code
635
    stored as string in `self.environment_setup_dict`. If only kw args are
636
    provided, the variables definition will be stored in self.environment_var_dict.
637

638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
    The `undefine` call will removed keys in self.environment_setup_dict.
  """

  def __init__(self):
    self.environment_setup_dict = {}
    self.environment_var_dict = {}
    self.environment_remove_list = []
    self.function_dict = {}
    self.environment_clear_all = False
    self.show_environment_setup = False

  def visit_FunctionDef(self, node):
    """
      Stores all the function nodes in a dictionary to be accesed later when
      we detect they are used as parameters for an `environment.define` call.
    """
    self.function_dict[node.name] = node
    return node

  def visit_Expr(self, node):
    """
659 660
      Visits expressions and check if they are in the form of either
      `environment.define` or `environment.undefine` properly stores the
661 662 663 664 665 666 667 668 669 670 671
      arguments definition as string.
    """
    value = node.value
    if isinstance(value, ast.Call):
      function = value.func
      if isinstance(function, ast.Attribute):
        attribute = function.value
        if isinstance(attribute, ast.Name):
          name = attribute.id
          if name == 'environment' and function.attr == 'define' and not value.keywords:
            if not len(value.args) == 2:
672
              raise EnvironmentDefinitionError('environment.define calls receive 2 arguments')
673

674
            self._ensureType(
675 676
              obj=value.args[0],
              klass=ast.Name,
677 678
              error_message='Type mismatch. environment.define receives a function as first argument.'
            )
679

680
            self._ensureType(
681 682
              obj=value.args[1],
              klass=ast.Str,
683 684
              error_message='Type mismatch. environment.define receives a string as second argument.'
            )
685

686 687 688 689 690 691 692 693 694 695 696 697
            func_name = value.args[0].id
            func_alias = value.args[1].s
            function_node = self.function_dict[func_name]
            function_string = astor.to_source(function_node)
            self.environment_setup_dict[func_name] = {
              "code": function_string,
              "alias": func_alias
            }
          elif name == 'environment' and function.attr == 'define' and value.keywords:
            for keyword in value.keywords:
              arg_name = keyword.arg
              arg_value_node = keyword.value
698 699

              # The value can be a number, string or name. We need to handle
700 701 702 703 704 705 706 707 708 709
              # them separatedly. This dict trick was used to avoid the very
              # ugly if.
              node_value_dict = {
                ast.Num: lambda node: str(node.n),
                ast.Str: lambda node: node.s,
                ast.Name: lambda node: node.id
              }
              arg_value = node_value_dict[type(arg_value_node)](arg_value_node)
              self.environment_var_dict[arg_name] = arg_value
          elif name == 'environment' and function.attr == 'undefine':
710
            self._ensureType(
711 712
              obj=value.args[0],
              klass=ast.Str,
713 714 715
              call_type='undefine',
              error_message='Type mismatch. environment.undefine receives only a string as argument.'
            )
716

717 718 719 720 721 722 723
            func_alias = value.args[0].s
            self.environment_remove_list.append(func_alias)
          elif name == 'environment' and function.attr == 'clearAll':
            self.environment_clear_all = True
          elif name == 'environment'and function.attr == 'showSetup':
            self.show_environment_setup = True
    return node
724

725 726 727 728 729 730 731
  def _ensureType(self, obj=None, klass=None, error_message=None, call_type='define'):
    if not isinstance(obj, klass):
      if call_type == 'define':
        error_class = EnvironmentDefinitionError
      elif call_type == 'undefine':
        error_class = EnvironmentUndefineError
      raise error_class(error_message)
732

733 734
  def clearAll(self):
    return self.environment_clear_all
735

736 737 738 739 740
  def showEnvironmentSetup(self):
    return self.show_environment_setup

  def getEnvironmentSetupDict(self):
    return self.environment_setup_dict
741

742 743
  def getEnvironmentVarDict(self):
    return self.environment_var_dict
744

745 746 747 748 749 750 751 752 753
  def getEnvironmentRemoveList(self):
    return self.environment_remove_list


class Environment(object):
  """
   Dumb object used to receive call on an object named `environment` inside
   user context. These calls will be tracked by the EnvironmentParser calls.
  """
754

755 756
  def define(self, *args, **kwargs):
    pass
757

758 759
  def undefine(self, name):
    pass
760

761 762
  def clearAll(self):
    pass
763

764 765
  def showSetup(self):
    pass
766

767 768 769

class ImportFixer(ast.NodeTransformer):
  """
770
   The ImportFixer class is responsible for fixing "normal" imports that users
771
   might try to execute.
772

773 774 775
   It will automatically replace them with the proper usage of the environment
   object using AST manipulation.
  """
776

777 778
  def __init__(self):
    self.import_func_dict = {}
779
    self.warning_module_names = []
780

781 782
  def visit_FunctionDef(self, node):
    """
783
      Processes function definition nodes. We want to store a list of all the
784 785 786 787 788 789 790
      import that are inside functions, because they do not affect the outter
      user context, thus do not imply in any un-pickleable variable being added
      there.
    """
    for child in node.body:
      if isinstance(child, ast.Import):
        for alias in child.names:
791 792 793 794
          if getattr(alias, 'asname'):
            import_name = alias.asname
          else:
            import_name = alias.name
795
          self.import_func_dict[import_name] = node.name
796
    return self.generic_visit(node)
797

798 799 800 801 802 803 804 805 806 807 808 809
  def visit_ImportFrom(self, node):
    """
     Fixes `import x from y` statements in the same way `import y` is fixed.
    """
    return self.visit_Import(node)

  def visit_Import(self, node):
    """
    This function replaces `normal` imports by creating AST nodes to define
    and environment function which setups the module and return it to be merged
    with the user context.
    """
810

811
    test_import_string = None
812 813 814 815 816
    result_name = ""
    root_module_name = ""

    module_names = []

817 818
    if getattr(node, "module", None) is not None:
      # case when 'from <module_name> import <something>'
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
      root_module_name = node.module

      if (node.names[0].name == '*'):
        # case when "from <module_name> import *"
        mod = importlib.import_module(node.module)
        tmp_dict = mod.__dict__

        for name in tmp_dict.keys():
          if (name[0] != '_'):
            module_names.append(name)

        test_import_string = "from %s import *" %(node.module)
        result_name = "%s_ALL" %(node.module)
      else:
        # case when "from <module_name> import a as b, c as d, ..."
        original_names = []
        as_names = []

        for name in node.names:
          original_names.append(name.name)
          if getattr(name, "asname", None) is None:
            as_names.append(None)
          else:
            as_names.append(name.asname)

        test_import_string = "from %s import " %(node.module)
        for i in range(0, len(original_names)):
          test_import_string = test_import_string + original_names[i]
          if as_names[i]!=None:
            test_import_string = test_import_string + ' as %s' %(as_names[i])
          test_import_string = test_import_string + ', '
        test_import_string = test_import_string[:-2]

        module_names = []
        for i in range(0, len(original_names)):
          if as_names[i]!=None:
            module_names.append(as_names[i])
          else:
            module_names.append(original_names[i])
858

859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878
        for i in range(0, len(original_names)):
          if as_names[i]!=None:
            result_name = result_name + '%s_' %(as_names[i])
          else:
            result_name = result_name + '%s_' %(original_names[i])
        result_name = result_name[:-1]



    elif getattr(node.names[0], 'asname'):
      # case when "import <module_name> as <name>""
      module_names = [(node.names[0].asname), ]
      test_import_string = "import %s as %s" %(node.names[0].name,
                                               module_names[0])
      result_name = node.names[0].asname
      root_module_name = node.names[0].name

    else:
      # case when "import <module_name>"
      module_names = [(node.names[0].name), ]
879
      test_import_string = "import %s" % node.names[0].name
880 881
      result_name = node.names[0].name
      root_module_name = node.names[0].name
882

883 884 885 886 887 888
    final_module_names = []
    for name in module_names:
      if not self.import_func_dict.get(name):
        final_module_names.append(name)

    if final_module_names:
889 890 891 892
      # try to import module before it is added to environment
      # this way if user tries to import non existent module Exception
      # is immediately raised and doesn't block next Jupyter cell execution
      exec(test_import_string)
893

894 895 896 897 898 899
      dotless_result_name = ""
      for character in result_name:
        if character == '.':
          dotless_result_name = dotless_result_name + '_dot_'
        else:
          dotless_result_name = dotless_result_name + character
900

901
      empty_function = self.newEmptyFunction("%s_setup" %dotless_result_name)
902 903
      return_dict = self.newReturnDict(final_module_names)

904
      empty_function.body = [node, return_dict]
905 906
      environment_set = self.newEnvironmentSetCall("%s_setup" %dotless_result_name)
      self.newImportWarningCall(root_module_name, dotless_result_name)
907
      return [empty_function, environment_set]
908 909 910 911 912 913 914 915 916 917 918
    else:
      return node

  def newEmptyFunction(self, func_name):
    """
      Return a AST.Function object representing a function with name `func_name`
      and an empty body.
    """
    func_body = "def %s(): pass" % func_name
    return ast.parse(func_body).body[0]

919
  def newReturnDict(self, module_names):
920 921
    """
      Return an AST.Expr representing a returned dict with one single key named
922
      `'module_name'` (as string) which returns the variable `module_name` (as
923
      expression).
924
    """
925 926
    return_dict = "return {"
    for name in module_names:
927 928 929 930 931
      if name.find('.') != -1:
        base_name = name[:name.find('.')]
      else:
        base_name = name
      return_dict = return_dict + "'%s': %s, " % (base_name, base_name)
932
    return_dict = return_dict + '}'
933 934 935 936 937 938 939 940 941 942 943
    return ast.parse(return_dict).body[0]

  def newEnvironmentSetCall(self, func_name):
    """
      Return an AST.Expr representaion an `environment.define` call receiving
      `func_name` (as an expression) and `'func_name'` (as string).
    """
    code_string = "environment.define(%s, '%s')" % (func_name, func_name)
    tree = ast.parse(code_string)
    return tree.body[0]

944
  def newImportWarningCall(self, module_name, function_name):
945
    """
946 947
      Adds a new module to the warning to the user about the importing of new
      modules.
948
    """
949
    self.warning_module_names.append(module_name)
950

951

952 953
def renderAsHtml(self, renderable_object):
  '''
954
    renderAsHtml will render its parameter as HTML by using the matching
955
    display processor for that class. Some processors can be found in this
956
    file.
957 958 959 960 961 962 963
  '''
  # Ugly frame hack to access the processor list defined in the body of the
  # kernel's code, where `exec` is called.
  #
  # At this point the stack should be, from top to the bottom:
  #
  #   5. ExternalMethod Patch call
964
  #   4. Base_runJupyterCode frame (where we want to change variable)
965 966 967
  #   3. exec call to run the user's code
  #   2. ExternalMethod Patch call through `context.Base_renderAsHtml` in the notebook
  #   1. renderAsHtml frame (where the function is)
968
  #
969 970 971 972 973
  # So sys._getframe(3) is enough to get us up into the frame we want.
  #
  compile_jupyter_frame = sys._getframe(3)
  compile_jupyter_locals = compile_jupyter_frame.f_locals
  processor = compile_jupyter_locals['processor_list'].getProcessorFor(renderable_object)
Ivan Tyagov's avatar
Ivan Tyagov committed
974
  result, _ = processor(renderable_object).process()
975
  compile_jupyter_locals['inject_variable_dict']['_print'].write(result)
976
  compile_jupyter_locals['display_data']['mime_type'] = 'text/html'
977

978
def getErrorMessageForException(self, exception, notebook_context):
979 980
  '''
    getErrorMessageForException receives an Expcetion object and a context for
981
    code execution (notebook_context) and will return a dict as Jupyter
982 983
    requires for error rendering.
  '''
Ivan Tyagov's avatar
Ivan Tyagov committed
984
  _, value, _ = sys.exc_info()
985 986 987 988
  traceback_text = traceback.format_exc().split('\n')[:-1]
  return {
    'status': 'error',
    'result_string': None,
989 990
    'print_result': None,
    'displayhook_result': None,
991
    'notebook_context': notebook_context,
992 993 994 995 996 997
    'mime_type': 'text/plain',
    'evalue': str(value),
    'ename': exception.__class__.__name__,
    'traceback': traceback_text
  }

998
def createNotebookContext(self):
999
  """
1000
  Function to create an empty notebook context.
1001
  """
1002
  return {'variables': {}, 'setup': {}}
1003

1004 1005 1006 1007 1008 1009 1010
class ObjectProcessor(object):
  '''
    Basic object processor that stores the first parameters of the constructor
    in the `subject` attribute and store the target classes for that processor.
  '''
  TARGET_CLASSES=None
  TARGET_MODULES=None
1011

1012 1013 1014
  @classmethod
  def getTargetClasses(cls):
    return cls.TARGET_CLASSES
1015

1016 1017 1018
  @classmethod
  def getTargetModules(cls):
    return cls.TARGET_MODULES
1019

1020 1021 1022 1023 1024
  def __init__(self, something):
    self.subject = something

class MatplotlibFigureProcessor(ObjectProcessor):
  '''
1025
    MatplotlibFigureProcessor handles the rich display of
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
    matplotlib.figure.Figure objects. It displays them using an img tag with
    the inline png image encoded as base64.
  '''
  TARGET_CLASSES=[Figure,]
  TARGET_MODULES=['matplotlib.pyplot',]

  def process(self):
    image_io = StringIO()
    self.subject.savefig(image_io, format='png')
    image_io.seek(0)
    return self._getImageHtml(image_io), 'text/html'
1037

1038 1039
  def _getImageHtml(self, image_io):
    return '<img src="data:image/png;base64,%s" /><br />' % base64.b64encode(image_io.getvalue())
1040

1041 1042 1043 1044 1045 1046 1047
class ERP5ImageProcessor(ObjectProcessor):
  '''
   ERP5ImageProcessor handles the rich display of ERP5's image_module object.
   It gets the image data and content type and use them to create a proper img
   tag.
  '''
  TARGET_CLASSES=[Image,]
1048

1049 1050 1051 1052 1053
  def process(self):
    from base64 import b64encode
    figure_data = b64encode(self.subject.getData())
    mime_type = self.subject.getContentType()
    return '<img src="data:%s;base64,%s" /><br />' % (mime_type, figure_data), 'text/html'
1054

1055 1056 1057
class IPythonDisplayObjectProcessor(ObjectProcessor):
  '''
    IPythonDisplayObjectProcessor handles the display of all objects from the
1058 1059 1060
    IPython.display module, including: Audio, IFrame, YouTubeVideo, VimeoVideo,
    ScribdDocument, FileLink, and FileLinks.

1061 1062 1063 1064
    All these objects have the `_repr_html_` method, which is used by the class
    to render them.
  '''
  TARGET_CLASSES=[DisplayObject, IFrame]
1065

1066 1067
  def process(self):
    html_repr = self.subject._repr_html_()
1068
    return html_repr + '<br />', 'text/html'
1069 1070 1071 1072 1073

class GenericProcessor(ObjectProcessor):
  '''
    Generic processor to render objects as string.
  '''
1074

1075 1076
  def process(self):
    return str(self.subject), 'text/plain'
1077

1078 1079 1080 1081 1082 1083 1084
class ProcessorList(object):
  '''
    ProcessorList is responsible to store all the processors in a dict using
    the classes they handle as the key. Subclasses of these classes will have
    the same processor of the eigen class. This means that the order of adding
    processors is important, as some classes' processors may be overwritten in
    some situations.
1085 1086 1087 1088

    The `getProcessorFor` method uses `something.__class__' and not
    `type(something)` because using the later onobjects returned by portal
    catalog queries will return an AcquisitionWrapper type instead of the
1089 1090
    object's real class.
  '''
1091

1092 1093 1094
  def __init__(self, default=GenericProcessor):
    self.processors = {}
    self.default_processor = GenericProcessor
1095

1096 1097 1098
  def addProcessor(self, processor):
    classes = processor.getTargetClasses()
    modules = processor.getTargetModules()
1099

1100 1101 1102 1103 1104
    if classes and not len(classes) == 0:
      for klass in classes:
        self.processors[klass] = processor
        for subclass in klass.__subclasses__():
          self.processors[subclass] = processor
1105

1106 1107 1108
    if modules and not len(modules) == 0:
      for module in modules:
        self.processors[module] = processor
1109

1110 1111 1112 1113 1114
  def getProcessorFor(self, something):
    if not isinstance(something, ModuleType):
      return self.processors.get(something.__class__, self.default_processor)
    else:
      return self.processors.get(something.__name__, self.default_processor)
1115

1116

1117
def storeIFrame(self, html, key):
1118
  self.portal_caches.erp5_pivottable_frame_cache.set(key, html)
1119 1120
  return True

1121

1122 1123
# WARNING!
#
1124 1125 1126
# This is a highly experimental PivotTableJs integration which does not follow
# ERP5 Javascrpt standards and it will be refactored to use JIO and RenderJS.
#
1127
def erp5PivotTableUI(self, df):
1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
  from IPython.display import IFrame
  template = """
  <!DOCTYPE html>
  <html>
    <head>
      <title>PivotTable.js</title>

      <!-- external libs from cdnjs -->
      <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.10/c3.min.css">
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.11.4/jquery-ui.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery-csv/0.71/jquery.csv-0.71.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.10/c3.min.js"></script>

      <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/pivot.min.css">
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/pivot.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/d3_renderers.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/c3_renderers.min.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/pivottable/2.0.2/export_renderers.min.js"></script>

      <style>
        body {font-family: Verdana;}
        .node {
         border: solid 1px white;
         font: 10px sans-serif;
         line-height: 12px;
         overflow: hidden;
         position: absolute;
         text-indent: 2px;
        }
        .c3-line, .c3-focused {stroke-width: 3px !important;}
        .c3-bar {stroke: white !important; stroke-width: 1;}
        .c3 text { font-size: 12px; color: grey;}
        .tick line {stroke: white;}
        .c3-axis path {stroke: grey;}
        .c3-circle { opacity: 1 !important; }
      </style>
    </head>
    <body>
      <script type="text/javascript">
        $(function(){
          if(window.location != window.parent.location)
            $("<a>", {target:"_blank", href:""})
              .text("[pop out]").prependTo($("body"));

1174 1175 1176
          $("#output").pivotUI(
            $.csv.toArrays($("#output").text()),
            {
1177
              renderers: $.extend(
1178 1179
                $.pivotUtilities.renderers,
                $.pivotUtilities.c3_renderers,
1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190
                $.pivotUtilities.d3_renderers,
                $.pivotUtilities.export_renderers
                ),
              hiddenAttributes: [""]
            }
          ).show();
         });
      </script>
      <div id="output" style="display: none;">%s</div>
    </body>
  </html>
1191
  """
1192 1193 1194 1195
  html_string = template % df.to_csv()
  from hashlib import sha512
  key = sha512(html_string).hexdigest()
  storeIFrame(self, html_string, key)
1196 1197
  iframe_host = self.REQUEST['HTTP_X_FORWARDED_HOST'].split(',')[0]
  url = "https://%s/erp5/Base_displayPivotTableFrame?key=%s" % (iframe_host, key)
1198
  return IFrame(src=url, width='100%', height='500')
1199

1200 1201 1202 1203 1204 1205 1206 1207
def Base_checkExistingReference(self, reference):
  existing_notebook = self.portal_catalog.getResultValue(
                         owner=self.portal_membership.getAuthenticatedMember().getUserName(),
                         portal_type='Data Notebook',
                         reference=reference)
  if not existing_notebook is None:
    return True
  return False