From a254bf509f54c9910d8cffb39285d16ea135a45c Mon Sep 17 00:00:00 2001
From: Tatuya Kamada <tatuya@nexedi.com>
Date: Wed, 9 Sep 2020 10:38:02 +0200
Subject: [PATCH] patches/Restricted: Allow numpy and pandas

Most parts of this patch come from wendelin, plus the tests and some fix.
---
 .../test.erp5.testRestrictedPythonSecurity.py | 165 +++++++++++++++++-
 product/ERP5Type/patches/Restricted.py        |  81 +++++++++
 2 files changed, 245 insertions(+), 1 deletion(-)

diff --git a/bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py b/bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py
index fd97cd9e5d..d2b6d61894 100644
--- a/bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py
+++ b/bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py
@@ -35,8 +35,9 @@ from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase
 from Products.ERP5Type.tests.utils import createZODBPythonScript
 from Products.ERP5Type.tests.utils import removeZODBPythonScript
 from Products.ERP5Type.patches.Restricted import allow_class_attribute
+from Products.ERP5Type.patches.Restricted import (pandas_black_list, dataframe_black_list, series_black_list)
 from AccessControl import Unauthorized
-
+from AccessControl.ZopeGuards import Unauthorized as ZopeGuardsUnauthorized
 
 class TestRestrictedPythonSecurity(ERP5TypeTestCase):
   """
@@ -453,6 +454,168 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
         expected="ok"
     )
 
+  def testNumpy(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import numpy as np
+      return [x for x in (np.dtype('int32').name, np.timedelta64(1, 'D').nbytes)]
+      '''),
+      expected=["int32", 8]
+    )
+
+  def testNdarrayWrite(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import numpy as np
+      z = np.array([[1,2],[3,4]])
+      z[0][0] = 99
+      return z[0][0]
+      '''),
+      expected=99
+    )
+
+  def testPandasSeries(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import pandas as pd
+      return pd.Series([1,2,3]).tolist()
+      '''),
+      expected=[1,2,3]
+    )
+
+  def testPandasTimestamp(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import pandas as pd
+      return pd.Timestamp('2020-01').year
+      '''),
+      expected=2020
+    )
+
+  def testPandasDatetimeIndex(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import pandas as pd
+      df = pd.DataFrame({'date':['2020-01-01','2020-03-01']})
+      df['date'] = pd.to_datetime(df['date'])
+      df.set_index('date', inplace=True)
+      return str(df.index.name)
+      '''),
+      expected='date'
+    )
+
+  def testPandasMultiIndex(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import pandas as pd
+      df = pd.DataFrame({'a':[1,2],'b':[3,4],'c':[5,6]})
+      df2 = df.set_index(['a','b'],drop=True)
+      return list(df2.index.names)
+      '''),
+      expected=['a','b']
+    )
+
+  def testPandasIndex(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import pandas as pd
+      df = pd.DataFrame({'a':[1,2],'b':[3,4]})
+      df2 = df.set_index(['a'],drop=True)
+      return list(df2.index.names)
+      '''),
+      expected=['a']
+    )
+
+  def testPandasGroupBy(self):
+    # test pandas.core.groupby.DataFrameGroupBy,SeriesGroupBy
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import pandas as pd
+      df2 = pd.DataFrame({'id':[1,1,2],'quantity':[3,4,5],'price':[6,7,8]})
+      return list(df2.groupby(['id'])['quantity'].agg('sum'))
+      '''),
+      expected=[7,5]
+    )
+
+  def testPandasLocIndexer(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import pandas as pd
+      df = pd.DataFrame({'a':[1,2],'b':[3,4]})
+      return df.loc[df['a'] == 1]['b'][0]
+      '''),
+      expected=3
+    )
+
+  def testPandasDataFrameWrite(self):
+    self.createAndRunScript(
+      textwrap.dedent('''
+      import pandas as pd
+      df = pd.DataFrame({'a':[1,2], 'b':[3,4]})
+      df.iloc[0, 0] = 999
+      return df['a'][0]
+      '''),
+      expected=999
+    )
+
+  def testPandasIORead(self):
+    self.assertRaises(Unauthorized,
+      self.createAndRunScript,
+      textwrap.dedent('''
+      import pandas as pd
+      pd.read_csv('testPandasIORead.csv')
+      '''))
+
+    # Test the black_list configuration validity
+    for read_method in pandas_black_list:
+      self.assertRaises(Unauthorized,
+        self.createAndRunScript,
+        textwrap.dedent('''
+        import pandas as pd
+        read_method = pd.{read_method}
+        read_method('testPandasIORead.data')
+        '''.format(read_method=read_method)))
+
+  def testPandasDataFrameIOWrite(self):
+    self.assertRaises(ZopeGuardsUnauthorized,
+      self.createAndRunScript,
+      textwrap.dedent('''
+      import pandas as pd
+      df = pd.DataFrame({'a':[1,2,3]})
+      df.to_csv('testPandasDataFrameIOWrite.csv')
+      '''))
+
+    # Test the black_list configuration validity
+    for write_method in dataframe_black_list:
+      self.assertRaises(ZopeGuardsUnauthorized,
+        self.createAndRunScript,
+        textwrap.dedent('''
+        import pandas as pd
+        df = pd.DataFrame(columns=['a','b'],data=[[1,2]])
+        write_method = df.{write_method}
+        write_method('testPandasDataFrameIOWrite.data')
+        '''.format(write_method=write_method)))
+
+  def testPandasSeriesIOWrite(self):
+    self.assertRaises(ZopeGuardsUnauthorized,
+      self.createAndRunScript,
+      textwrap.dedent('''
+      import pandas as pd
+      df = pd.Series([4,5,6])
+      df.to_csv('testPandasSeriesIOWrite.csv')
+      '''))
+
+    # Test the black_list configuration validity
+    for write_method in series_black_list:
+      self.assertRaises(ZopeGuardsUnauthorized,
+        self.createAndRunScript,
+        textwrap.dedent('''
+        import pandas as pd
+        df = pd.Series([4,5,6])
+        write_method = df.{write_method}
+        write_method('testPandasSeriesIOWrite.data')
+        '''.format(write_method=write_method)))
+
 
 def test_suite():
   suite = unittest.TestSuite()
diff --git a/product/ERP5Type/patches/Restricted.py b/product/ERP5Type/patches/Restricted.py
index ba06940c7a..c3a643543c 100644
--- a/product/ERP5Type/patches/Restricted.py
+++ b/product/ERP5Type/patches/Restricted.py
@@ -394,3 +394,84 @@ del member_id, member
 from random import SystemRandom
 allow_type(SystemRandom)
 ModuleSecurityInfo('os').declarePublic('urandom')
+
+#
+# backport from wendelin
+#
+# we neeed to allow access to numpy's internal types
+import pandas as pd
+import numpy as np
+allow_module('numpy')
+allow_module('numpy.lib.recfunctions')
+for dtype in ('int8', 'int16', 'int32', 'int64', \
+              'uint8', 'uint16', 'uint32', 'uint64', \
+              'float16', 'float32', 'float64', \
+              'complex64', 'complex128'):
+  z = np.array([0,], dtype = dtype)
+  allow_type(type(z[0]))
+  allow_type(type(z))
+
+  sz = np.array([(0,)], dtype = [('f0', dtype)])
+  allow_type(type(sz[0]))
+  allow_type(type(sz))
+
+  rz = np.rec.array(np.array([(0,)], dtype = [('f0', dtype)]))
+  allow_type(type(rz[0]))
+  allow_type(type(rz))
+
+allow_type(np.timedelta64)
+allow_type(type(np.c_))
+allow_type(type(np.dtype('int16')))
+
+allow_module('pandas')
+
+allow_type(pd.Series)
+allow_type(pd.Timestamp)
+allow_type(pd.DatetimeIndex)
+# XXX: pd.DataFrame has its own security thus disable until we can fully integrate it
+#allow_type(pd.DataFrame)
+allow_type(pd.MultiIndex)
+allow_type(pd.indexes.range.RangeIndex)
+allow_type(pd.indexes.numeric.Int64Index)
+allow_type(pd.core.groupby.DataFrameGroupBy)
+allow_type(pd.core.groupby.SeriesGroupBy)
+allow_class(pd.DataFrame)
+
+def restrictedMethod(s,name):
+  def dummyMethod(*args, **kw):
+    raise Unauthorized(name)
+  return dummyMethod
+
+# Note: These black_list methods are for pandas 0.19.2
+series_black_list = ['to_csv', 'to_json', 'to_pickle', 'to_hdf',
+                     'to_sql', 'to_msgpack']
+series_black_list_dict = {m: restrictedMethod for m in series_black_list}
+ContainerAssertions[pd.Series] = _check_access_wrapper(pd.Series,
+                                                       series_black_list_dict)
+
+pandas_black_list = ['read_csv', 'read_json', 'read_pickle', 'read_hdf', 'read_fwf',
+                     'read_excel', 'read_html', 'read_msgpack',
+                     'read_gbq', 'read_sas', 'read_stata']
+ModuleSecurityInfo('pandas').declarePrivate(*pandas_black_list)
+
+dataframe_black_list = ['to_csv', 'to_json', 'to_pickle', 'to_hdf',
+                        'to_excel', 'to_html', 'to_sql', 'to_msgpack',
+                        'to_latex', 'to_gbq', 'to_stata']
+dataframe_black_list_dict = {m: restrictedMethod for m in dataframe_black_list}
+ContainerAssertions[pd.DataFrame] = _check_access_wrapper(
+                                      pd.DataFrame, dataframe_black_list_dict)
+
+# Modify 'safetype' dict in full_write_guard function
+# of RestrictedPython (closure) directly to allow
+# write access to ndarray and pandas DataFrame.
+from RestrictedPython.Guards import full_write_guard
+full_write_guard.func_closure[1].cell_contents.__self__[np.ndarray] = True
+full_write_guard.func_closure[1].cell_contents.__self__[np.core.records.recarray] = True
+full_write_guard.func_closure[1].cell_contents.__self__[np.core.records.record] = True
+full_write_guard.func_closure[1].cell_contents.__self__[pd.DataFrame] = True
+full_write_guard.func_closure[1].cell_contents.__self__[pd.Series] = True
+full_write_guard.func_closure[1].cell_contents.__self__[pd.tseries.index.DatetimeIndex] = True
+full_write_guard.func_closure[1].cell_contents.__self__[pd.core.indexing._iLocIndexer] = True
+full_write_guard.func_closure[1].cell_contents.__self__[pd.core.indexing._LocIndexer] = True
+full_write_guard.func_closure[1].cell_contents.__self__[pd.MultiIndex] = True
+full_write_guard.func_closure[1].cell_contents.__self__[pd.Index] = True
-- 
2.30.9