From a254bf509f54c9910d8cffb39285d16ea135a45c Mon Sep 17 00:00:00 2001 From: Tatuya Kamada <tatuya@nexedi.com> Date: Wed, 9 Sep 2020 10:38:02 +0200 Subject: [PATCH] patches/Restricted: Allow numpy and pandas Most parts of this patch come from wendelin, plus the tests and some fix. --- .../test.erp5.testRestrictedPythonSecurity.py | 165 +++++++++++++++++- product/ERP5Type/patches/Restricted.py | 81 +++++++++ 2 files changed, 245 insertions(+), 1 deletion(-) diff --git a/bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py b/bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py index fd97cd9e5d..d2b6d61894 100644 --- a/bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py +++ b/bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py @@ -35,8 +35,9 @@ from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase from Products.ERP5Type.tests.utils import createZODBPythonScript from Products.ERP5Type.tests.utils import removeZODBPythonScript from Products.ERP5Type.patches.Restricted import allow_class_attribute +from Products.ERP5Type.patches.Restricted import (pandas_black_list, dataframe_black_list, series_black_list) from AccessControl import Unauthorized - +from AccessControl.ZopeGuards import Unauthorized as ZopeGuardsUnauthorized class TestRestrictedPythonSecurity(ERP5TypeTestCase): """ @@ -453,6 +454,168 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase): expected="ok" ) + def testNumpy(self): + self.createAndRunScript( + textwrap.dedent(''' + import numpy as np + return [x for x in (np.dtype('int32').name, np.timedelta64(1, 'D').nbytes)] + '''), + expected=["int32", 8] + ) + + def testNdarrayWrite(self): + self.createAndRunScript( + textwrap.dedent(''' + import numpy as np + z = np.array([[1,2],[3,4]]) + z[0][0] = 99 + return z[0][0] + '''), + expected=99 + ) + + def testPandasSeries(self): + self.createAndRunScript( + textwrap.dedent(''' + import pandas as pd + return pd.Series([1,2,3]).tolist() + '''), + expected=[1,2,3] + ) + + def testPandasTimestamp(self): + self.createAndRunScript( + textwrap.dedent(''' + import pandas as pd + return pd.Timestamp('2020-01').year + '''), + expected=2020 + ) + + def testPandasDatetimeIndex(self): + self.createAndRunScript( + textwrap.dedent(''' + import pandas as pd + df = pd.DataFrame({'date':['2020-01-01','2020-03-01']}) + df['date'] = pd.to_datetime(df['date']) + df.set_index('date', inplace=True) + return str(df.index.name) + '''), + expected='date' + ) + + def testPandasMultiIndex(self): + self.createAndRunScript( + textwrap.dedent(''' + import pandas as pd + df = pd.DataFrame({'a':[1,2],'b':[3,4],'c':[5,6]}) + df2 = df.set_index(['a','b'],drop=True) + return list(df2.index.names) + '''), + expected=['a','b'] + ) + + def testPandasIndex(self): + self.createAndRunScript( + textwrap.dedent(''' + import pandas as pd + df = pd.DataFrame({'a':[1,2],'b':[3,4]}) + df2 = df.set_index(['a'],drop=True) + return list(df2.index.names) + '''), + expected=['a'] + ) + + def testPandasGroupBy(self): + # test pandas.core.groupby.DataFrameGroupBy,SeriesGroupBy + self.createAndRunScript( + textwrap.dedent(''' + import pandas as pd + df2 = pd.DataFrame({'id':[1,1,2],'quantity':[3,4,5],'price':[6,7,8]}) + return list(df2.groupby(['id'])['quantity'].agg('sum')) + '''), + expected=[7,5] + ) + + def testPandasLocIndexer(self): + self.createAndRunScript( + textwrap.dedent(''' + import pandas as pd + df = pd.DataFrame({'a':[1,2],'b':[3,4]}) + return df.loc[df['a'] == 1]['b'][0] + '''), + expected=3 + ) + + def testPandasDataFrameWrite(self): + self.createAndRunScript( + textwrap.dedent(''' + import pandas as pd + df = pd.DataFrame({'a':[1,2], 'b':[3,4]}) + df.iloc[0, 0] = 999 + return df['a'][0] + '''), + expected=999 + ) + + def testPandasIORead(self): + self.assertRaises(Unauthorized, + self.createAndRunScript, + textwrap.dedent(''' + import pandas as pd + pd.read_csv('testPandasIORead.csv') + ''')) + + # Test the black_list configuration validity + for read_method in pandas_black_list: + self.assertRaises(Unauthorized, + self.createAndRunScript, + textwrap.dedent(''' + import pandas as pd + read_method = pd.{read_method} + read_method('testPandasIORead.data') + '''.format(read_method=read_method))) + + def testPandasDataFrameIOWrite(self): + self.assertRaises(ZopeGuardsUnauthorized, + self.createAndRunScript, + textwrap.dedent(''' + import pandas as pd + df = pd.DataFrame({'a':[1,2,3]}) + df.to_csv('testPandasDataFrameIOWrite.csv') + ''')) + + # Test the black_list configuration validity + for write_method in dataframe_black_list: + self.assertRaises(ZopeGuardsUnauthorized, + self.createAndRunScript, + textwrap.dedent(''' + import pandas as pd + df = pd.DataFrame(columns=['a','b'],data=[[1,2]]) + write_method = df.{write_method} + write_method('testPandasDataFrameIOWrite.data') + '''.format(write_method=write_method))) + + def testPandasSeriesIOWrite(self): + self.assertRaises(ZopeGuardsUnauthorized, + self.createAndRunScript, + textwrap.dedent(''' + import pandas as pd + df = pd.Series([4,5,6]) + df.to_csv('testPandasSeriesIOWrite.csv') + ''')) + + # Test the black_list configuration validity + for write_method in series_black_list: + self.assertRaises(ZopeGuardsUnauthorized, + self.createAndRunScript, + textwrap.dedent(''' + import pandas as pd + df = pd.Series([4,5,6]) + write_method = df.{write_method} + write_method('testPandasSeriesIOWrite.data') + '''.format(write_method=write_method))) + def test_suite(): suite = unittest.TestSuite() diff --git a/product/ERP5Type/patches/Restricted.py b/product/ERP5Type/patches/Restricted.py index ba06940c7a..c3a643543c 100644 --- a/product/ERP5Type/patches/Restricted.py +++ b/product/ERP5Type/patches/Restricted.py @@ -394,3 +394,84 @@ del member_id, member from random import SystemRandom allow_type(SystemRandom) ModuleSecurityInfo('os').declarePublic('urandom') + +# +# backport from wendelin +# +# we neeed to allow access to numpy's internal types +import pandas as pd +import numpy as np +allow_module('numpy') +allow_module('numpy.lib.recfunctions') +for dtype in ('int8', 'int16', 'int32', 'int64', \ + 'uint8', 'uint16', 'uint32', 'uint64', \ + 'float16', 'float32', 'float64', \ + 'complex64', 'complex128'): + z = np.array([0,], dtype = dtype) + allow_type(type(z[0])) + allow_type(type(z)) + + sz = np.array([(0,)], dtype = [('f0', dtype)]) + allow_type(type(sz[0])) + allow_type(type(sz)) + + rz = np.rec.array(np.array([(0,)], dtype = [('f0', dtype)])) + allow_type(type(rz[0])) + allow_type(type(rz)) + +allow_type(np.timedelta64) +allow_type(type(np.c_)) +allow_type(type(np.dtype('int16'))) + +allow_module('pandas') + +allow_type(pd.Series) +allow_type(pd.Timestamp) +allow_type(pd.DatetimeIndex) +# XXX: pd.DataFrame has its own security thus disable until we can fully integrate it +#allow_type(pd.DataFrame) +allow_type(pd.MultiIndex) +allow_type(pd.indexes.range.RangeIndex) +allow_type(pd.indexes.numeric.Int64Index) +allow_type(pd.core.groupby.DataFrameGroupBy) +allow_type(pd.core.groupby.SeriesGroupBy) +allow_class(pd.DataFrame) + +def restrictedMethod(s,name): + def dummyMethod(*args, **kw): + raise Unauthorized(name) + return dummyMethod + +# Note: These black_list methods are for pandas 0.19.2 +series_black_list = ['to_csv', 'to_json', 'to_pickle', 'to_hdf', + 'to_sql', 'to_msgpack'] +series_black_list_dict = {m: restrictedMethod for m in series_black_list} +ContainerAssertions[pd.Series] = _check_access_wrapper(pd.Series, + series_black_list_dict) + +pandas_black_list = ['read_csv', 'read_json', 'read_pickle', 'read_hdf', 'read_fwf', + 'read_excel', 'read_html', 'read_msgpack', + 'read_gbq', 'read_sas', 'read_stata'] +ModuleSecurityInfo('pandas').declarePrivate(*pandas_black_list) + +dataframe_black_list = ['to_csv', 'to_json', 'to_pickle', 'to_hdf', + 'to_excel', 'to_html', 'to_sql', 'to_msgpack', + 'to_latex', 'to_gbq', 'to_stata'] +dataframe_black_list_dict = {m: restrictedMethod for m in dataframe_black_list} +ContainerAssertions[pd.DataFrame] = _check_access_wrapper( + pd.DataFrame, dataframe_black_list_dict) + +# Modify 'safetype' dict in full_write_guard function +# of RestrictedPython (closure) directly to allow +# write access to ndarray and pandas DataFrame. +from RestrictedPython.Guards import full_write_guard +full_write_guard.func_closure[1].cell_contents.__self__[np.ndarray] = True +full_write_guard.func_closure[1].cell_contents.__self__[np.core.records.recarray] = True +full_write_guard.func_closure[1].cell_contents.__self__[np.core.records.record] = True +full_write_guard.func_closure[1].cell_contents.__self__[pd.DataFrame] = True +full_write_guard.func_closure[1].cell_contents.__self__[pd.Series] = True +full_write_guard.func_closure[1].cell_contents.__self__[pd.tseries.index.DatetimeIndex] = True +full_write_guard.func_closure[1].cell_contents.__self__[pd.core.indexing._iLocIndexer] = True +full_write_guard.func_closure[1].cell_contents.__self__[pd.core.indexing._LocIndexer] = True +full_write_guard.func_closure[1].cell_contents.__self__[pd.MultiIndex] = True +full_write_guard.func_closure[1].cell_contents.__self__[pd.Index] = True -- 2.30.9