Commit 12434484 authored by Jérome Perrin's avatar Jérome Perrin

tmp: replace MdxEngine executor by the simple wendelin exemple

Based on https://github.com/abilian/olapy/tree/simple_example_wendelin ,
but using directly numpy arrays for simplicity.
parent 3911da79
...@@ -77,60 +77,10 @@ class MdxEngine: ...@@ -77,60 +77,10 @@ class MdxEngine:
@classmethod @classmethod
def get_cubes_names(cls): def get_cubes_names(cls):
""":return: list cubes name that exists in cubes folder (under ~/olapy-data/cubes) and postgres database (if connected).""" """:return: list cubes name that exists in cubes folder (under ~/olapy-data/cubes) and postgres database (if connected)."""
# get csv files folders (cubes)
# toxworkdir does not expanduser properly under tox
# surrended with try, except and PASS so we continue getting cubes from different
# sources (db, csv...) without interruption
if 'OLAPY_PATH' in os.environ:
home_directory = os.environ.get('OLAPY_PATH')
elif cls.DATA_FOLDER is not None:
home_directory = os.path.dirname(cls.DATA_FOLDER)
elif RUNNING_TOX:
home_directory = os.environ.get('HOME_DIR')
else:
home_directory = expanduser("~")
olapy_data_location = os.path.join(home_directory, 'olapy-data')
# surrended with try, except and PASS so we continue getting cubes from different
# sources (db, csv...) without interruption
cubes_location = os.path.join(olapy_data_location, cls.CUBE_FOLDER)
try:
MdxEngine.csv_files_cubes = [
file for file in os.listdir(cubes_location)
if os.path.isdir(os.path.join(cubes_location, file))
]
except Exception:
type, value, traceback = sys.exc_info()
print('Error opening %s: %s' % (value.filename, value.strerror))
print('no csv folders')
pass
# get postgres databases
# surrended with try, except and PASS so we continue getting cubes from different
# sources (db, csv...) without interruption
try:
db = MyDB(db_config_file_path=olapy_data_location)
# TODO this work only with postgres
result = db.engine.execute(
'SELECT datname FROM pg_database WHERE datistemplate = false;')
available_tables = result.fetchall()
# cursor.execute("""SELECT datname FROM pg_database
# WHERE datistemplate = false;""")
MdxEngine.postgres_db_cubes = [
database[0] for database in available_tables
]
except Exception:
type, value, traceback = sys.exc_info()
print('Error opening %s: %s' % (value.filename, value.strerror))
print('no database connexion')
pass
return MdxEngine.csv_files_cubes + MdxEngine.postgres_db_cubes # wendelin
# ( we need to return at least one cube )
return ["ERP5", ]
def _get_default_cube_directory(self): def _get_default_cube_directory(self):
...@@ -163,26 +113,9 @@ class MdxEngine: ...@@ -163,26 +113,9 @@ class MdxEngine:
:return: dict with key as table name and DataFrame as value :return: dict with key as table name and DataFrame as value
""" """
config_file_parser = ConfigParser(self.cube_path) # wendelin
tables = {} from ...mdx.wendelin_olapy.wendelin_integration import loard_tables
tables = loard_tables()
if self.client == 'excel' and config_file_parser.config_file_exist(
client_type=self.
client) and self.cube in config_file_parser.get_cubes_names(
client_type=self.client):
# for web (config file) we need only star_schema_dataframes, not all tables
for cubes in config_file_parser.construct_cubes():
# TODO working with cubes.source == 'csv'
if cubes.source == 'postgres':
tables = _load_table_config_file(self, cubes)
elif self.cube in self.csv_files_cubes:
tables = _load_tables_csv_files(self)
elif self.cube in self.postgres_db_cubes:
tables = _load_tables_db(self)
return tables return tables
def get_measures(self): def get_measures(self):
...@@ -215,32 +148,10 @@ class MdxEngine: ...@@ -215,32 +148,10 @@ class MdxEngine:
:param cube_name: cube name with which we want to generate a star schema model :param cube_name: cube name with which we want to generate a star schema model
:return: star schema DataFrame :return: star schema DataFrame
""" """
fusion = None # wendelin
config_file_parser = ConfigParser(self.cube_path) from ...mdx.wendelin_olapy.wendelin_integration import merge
if config_file_parser.config_file_exist( fusion = merge()
self. return fusion
client) and self.cube in config_file_parser.get_cubes_names(
client_type=self.client):
for cubes in config_file_parser.construct_cubes(self.client):
# TODO cubes.source == 'csv'
if cubes.source == 'postgres':
# TODO one config file (I will try to merge dimensions between them in web part)
if self.client == 'web':
fusion = _construct_web_star_schema_config_file(self,
cubes)
else:
fusion = _construct_star_schema_config_file(self,
cubes)
elif self.cube in self.csv_files_cubes:
fusion = _construct_star_schema_csv_files(self)
elif self.cube in self.postgres_db_cubes:
fusion = _construct_star_schema_db(self)
return fusion[[
col for col in fusion.columns if col.lower()[-3:] != '_id'
]]
def get_all_tables_names(self, ignore_fact=False): def get_all_tables_names(self, ignore_fact=False):
""" """
......
import pandas as pd
import numpy as np
def mock_big_array():
# mock a facts table
f = np.array([
[1, 1000],
[2, 5222],
[3, 20000],
[4, 66666],
[5, 888888],
[6, 777777],
[7, 333333],
[8, 1111111]
])
# mock a dimension
d = np.array([
[1, 'data1', 'data8'],
[2, 'data4', 'data9'],
[3, 'data3', 'data9'],
[4, 'data4', 'data5'],
[5, 'data3', 'data2'],
[6, 'data4', 'data5'],
[7, 'data1', 'data2'],
[8, 'data4', 'data5']
])
return {'Facts': f,
'dimension': d}
def to_dataframe(ndarray,dim_type='dimension'):
"""
data -> ndarray[1:, 1:]
index -> ndarray[1:, 0]
columns -> ndarray[0, 1:]
:param ndarray: numpy multidimentional array
:return:
"""
if dim_type == 'facts':
column_name = 'measure'
else:
column_name = 'column'
# generate dataframe columns names
# ['id', 'column0', 'column1'....]
columns = ['id'] + [column_name + str(idx) for idx, val in enumerate(ndarray[0, 1:]) ]
df = pd.DataFrame(data=ndarray,
columns=columns)
# convert id column to integer
df[['id']] = df[['id']].astype(int)
return df
def loard_tables():
ndarrays = mock_big_array()
facts = to_dataframe(ndarrays['Facts'],dim_type='facts')
dim = to_dataframe(ndarrays['dimension'])
# remove id column
facts = facts[[col for col in facts.columns if col != 'id']]
# remove id column
dim = dim[[col for col in dim.columns if col != 'id']]
return {'Facts' : facts,
'Dim' : dim}
def merge():
# todo merge directly from load_tables() above
ndarrays = mock_big_array()
dim = to_dataframe(ndarrays['dimension'])
facts = to_dataframe(ndarrays['Facts'], dim_type='facts')
df = facts.merge(dim,on='id')
return df[[col for col in df.columns if col != 'id']]
# AND then use load_tables and merge in olapy's execte module directly
# ligne 152 : load_tables() -> get all dimensions and facts
#
# ligne 207 : get_star_schema_dataframe() -> call the merged dataframe
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment