Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
erp5
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
erp5
Commits
1cbff971
Commit
1cbff971
authored
May 19, 2022
by
Levin Zimmermann
Browse files
Options
Browse Files
Download
Plain Diff
Allow patched pandas.read_* in restricted Python
See merge request
nexedi/erp5!1615
parents
85317472
4360dbc6
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
233 additions
and
16 deletions
+233
-16
bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py
...rtal_components/test.erp5.testRestrictedPythonSecurity.py
+143
-8
product/ERP5Type/Pandas.py
product/ERP5Type/Pandas.py
+84
-0
product/ERP5Type/patches/Restricted.py
product/ERP5Type/patches/Restricted.py
+6
-8
No files found.
bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py
View file @
1cbff971
...
...
@@ -25,6 +25,7 @@
#
##############################################################################
import
json
import
os.path
import
tempfile
import
textwrap
...
...
@@ -572,14 +573,6 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
)
def
testPandasIORead
(
self
):
self
.
assertRaises
(
Unauthorized
,
self
.
createAndRunScript
,
'''
import pandas as pd
pd.read_csv('testPandasIORead.csv')
'''
)
# Test the black_list configuration validity
for
read_method
in
pandas_black_list
:
self
.
assertRaises
(
...
...
@@ -635,6 +628,148 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
write_method('testPandasSeriesIOWrite.data')
'''
.
format
(
write_method
=
write_method
))
def
_assertPandasRestrictedReadFunctionIsEqualTo
(
self
,
read_function
,
read_argument
,
expected_data_frame_init
):
self
.
createAndRunScript
(
'''
import pandas as pd
expected_data_frame = pd.DataFrame({expected_data_frame_init})
return pd.{read_function}({read_argument}).equals(expected_data_frame)
'''
.
format
(
expected_data_frame_init
=
expected_data_frame_init
,
read_function
=
read_function
,
read_argument
=
read_argument
,
),
expected
=
True
)
def
testPandasRestrictedReadFunctionProhibitedInput
(
self
):
"""
Test if patched pandas read_* functions raise with any input which isn't a string.
"""
for
pandas_read_function
in
(
"read_json"
,
"read_csv"
,
"read_fwf"
):
for
preparation
,
prohibited_input
in
(
(
''
,
100
),
(
'from StringIO import StringIO'
,
'StringIO("[1, 2, 3]")'
),
):
self
.
assertRaises
(
ZopeGuardsUnauthorized
,
self
.
createAndRunScript
,
'''
import pandas as pd
{preparation}
pd.{pandas_read_function}({prohibited_input})
'''
.
format
(
preparation
=
preparation
,
pandas_read_function
=
pandas_read_function
,
prohibited_input
=
prohibited_input
,
)
)
def
testPandasReadFwf
(
self
):
read_function
=
"read_fwf"
# Normal input should be correctly handled
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"100\n200"'
,
r"[[200]], columns=['100']"
,
)
# Ensure monkey patch parses keyword arguments to patched function
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"1020\n3040", widths=[2, 2]'
,
r"[[30, 40]], columns=['10', '20']"
,
)
# A string containing an url or file path should be handled as if
# it would be a normal csv string entry
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"file://path/to/fwf/file.fwf"'
,
r"[], columns=['file://path/to/fwf/file.fwf']"
,
)
def
testPandasReadCSV
(
self
):
read_function
=
"read_csv"
# Normal input should be correctly handled
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"11,2,300\n50.5,99,hello"'
,
r"[[50.5, 99, 'hello']], columns='11 2 300'.split(' ')"
,
)
# Ensure monkey patch parses keyword arguments to patched function
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"a;b", sep=";"'
,
r"[], columns=['a', 'b']"
,
)
# A string containing an url or file path should be handled as if
# it would be a normal csv string entry
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"https://people.sc.fsu.edu/~jburkardt/data/csv/addresses.csv"'
,
r"[], columns=['https://people.sc.fsu.edu/~jburkardt/data/csv/addresses.csv']"
,
)
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"file://path/to/csv/file.csv"'
,
r"[], columns=['file://path/to/csv/file.csv']"
,
)
def
testPandasReadJsonParsesInput
(
self
):
read_function
=
"read_json"
# Normal input should be correctly handled
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
'"[1, 2, 3]"'
,
"[1, 2, 3]"
)
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
'
\
'
{"column_name": [1, 2, 3], "another_column": [3, 9.2, 100]}
\
'
'
,
'{"column_name": [1, 2, 3], "another_column": [3, 9.2, 100]}'
,
)
# Ensure monkey patch parses keyword arguments to patched function
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"[1, 2, 3]\n[4, 5, 6]", lines=True'
,
"[[1, 2, 3], [4, 5, 6]]"
,
)
# URLs, etc. should raise a ValueError
# (see testPandasReadJsonProhibitsMalicousString)
def
testPandasReadJsonProhibitsMalicousString
(
self
):
"""
Test if file path, urls and other bad strings
raise value errors
"""
# Create valid json file which could be read
# by a non-patched read_json function.
test_file_path
=
".testPandasReadJson.json"
json_test_data
=
[
1
,
2
,
3
]
with
open
(
test_file_path
,
'w'
)
as
json_file
:
json
.
dump
(
json_test_data
,
json_file
)
self
.
addCleanup
(
os
.
remove
,
test_file_path
)
# Ensure json creation was successful
self
.
assertTrue
(
os
.
path
.
isfile
(
test_file_path
))
with
open
(
test_file_path
,
"r"
)
as
json_file
:
self
.
assertEqual
(
json_test_data
,
json
.
loads
(
json_file
.
read
()))
for
malicous_input
in
(
# If pandas would read this as an URL it should
# raise an URLError. But because it will try
# to read it as a json string, it will raise
# a ValueError.
"https://test-url.com/test-name.json"
,
"file://path/to/json/file.json"
,
# This shouldn't raise any error in case
# pandas read function wouldn't be patched.
test_file_path
,
# Gibberish should also raise a ValueError
"Invalid-string"
):
self
.
assertRaises
(
ValueError
,
self
.
createAndRunScript
,
'''
import pandas as pd
pd.read_json("{}")
'''
.
format
(
malicous_input
)
)
def
test_suite
():
suite
=
unittest
.
TestSuite
()
...
...
product/ERP5Type/Pandas.py
0 → 100644
View file @
1cbff971
##############################################################################
#
# Copyright (c) 2012 Nexedi SARL and Contributors. All Rights Reserved.
# Levin Zimmermann <levin.zimmermann@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
"""
Restricted pandas module.
From restricted python, use "import pandas" (see patches/Restricted.py).
"""
from
pandas
import
*
# Add restricted versions of IO functions
import
six
as
_six
from
AccessControl.ZopeGuards
import
Unauthorized
as
_ZopeGuardsUnauthorized
if
_six
.
PY2
:
from
StringIO
import
StringIO
as
_StringIO
else
:
from
io
import
StringIO
as
_StringIO
def
_addRestrictedPandasReadFunction
(
function_name
):
original_function
=
getattr
(
__import__
(
'pandas'
),
function_name
)
def
Pandas_read
(
data_string
,
*
args
,
**
kwargs
):
# Strict: don't use 'isinstance', only allow buildin str
# objects
if
type
(
data_string
)
is
not
str
:
raise
_ZopeGuardsUnauthorized
(
"Parsing object '%s' of type '%s' is prohibited!"
%
(
data_string
,
type
(
data_string
))
)
string_io
=
_StringIO
(
data_string
)
return
original_function
(
string_io
,
*
args
,
**
kwargs
)
disclaimer
=
"""
\
n
Disclaimer:
This function has been patched by ERP5 for zope sandbox usage.
Only objects of type 'str' are valid inputs, file paths, files,
urls, etc. are prohibited or ignored.
"""
Pandas_read
.
__doc__
=
original_function
.
__doc__
+
disclaimer
globals
().
update
({
function_name
:
Pandas_read
})
def
_addRestrictedPandasReadFunctionTuple
():
pandas_read_function_to_restrict_tuple
=
(
"read_json"
,
# "read_html", # needs installation of additional dependency: html5lib
"read_csv"
,
"read_fwf"
,
# "read_xml", # only available for pandas version >= 1.3.0
)
for
pandas_read_function_to_restrict
in
pandas_read_function_to_restrict_tuple
:
_addRestrictedPandasReadFunction
(
pandas_read_function_to_restrict
)
_addRestrictedPandasReadFunctionTuple
()
\ No newline at end of file
product/ERP5Type/patches/Restricted.py
View file @
1cbff971
...
...
@@ -371,6 +371,7 @@ MNAME_MAP = {
'calendar'
:
'Products.ERP5Type.Calendar'
,
'collections'
:
'Products.ERP5Type.Collections'
,
'six'
:
'Products.ERP5Type.Six'
,
'pandas'
:
'Products.ERP5Type.Pandas'
,
}
for
alias
,
real
in
six
.
iteritems
(
MNAME_MAP
):
assert
'.'
not
in
alias
,
alias
# TODO: support this
...
...
@@ -478,23 +479,20 @@ def restrictedMethod(s,name):
raise
Unauthorized
(
name
)
return
dummyMethod
try
:
import
pandas
as
pd
except
ImportError
:
pass
else
:
allow_module
(
'pandas'
)
allow_type
(
pd
.
Series
)
allow_type
(
pd
.
Timestamp
)
allow_type
(
pd
.
DatetimeIndex
)
# XXX: pd.DataFrame has its own security thus disable
# until we can fully integrate it
#allow_type(pd.DataFrame)
allow_type
(
pd
.
MultiIndex
)
allow_type
(
pd
.
indexes
.
range
.
RangeIndex
)
allow_type
(
pd
.
indexes
.
numeric
.
Int64Index
)
allow_type
(
pd
.
core
.
groupby
.
DataFrameGroupBy
)
allow_type
(
pd
.
core
.
groupby
.
SeriesGroupBy
)
allow_class
(
pd
.
DataFrame
)
# Note: These black_list methods are for pandas 0.19.2
...
...
@@ -503,10 +501,10 @@ else:
ContainerAssertions
[
pd
.
Series
]
=
_check_access_wrapper
(
pd
.
Series
,
dict
.
fromkeys
(
series_black_list
,
restrictedMethod
))
pandas_black_list
=
(
'read_
csv'
,
'read_json'
,
'read_
pickle'
,
'read_hdf'
,
'read_
fwf'
,
'read_
excel'
,
'read_html'
,
'read_msgpack'
,
pandas_black_list
=
(
'read_pickle'
,
'read_hdf'
,
'read_excel'
,
'read_html'
,
'read_msgpack'
,
'read_gbq'
,
'read_sas'
,
'read_stata'
)
ModuleSecurityInfo
(
'pandas'
).
declarePrivate
(
*
pandas_black_list
)
ModuleSecurityInfo
(
MNAME_MAP
[
'pandas'
]
).
declarePrivate
(
*
pandas_black_list
)
dataframe_black_list
=
(
'to_csv'
,
'to_json'
,
'to_pickle'
,
'to_hdf'
,
'to_excel'
,
'to_html'
,
'to_sql'
,
'to_msgpack'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment