Implement explicit (and currently hard-coded) left-joins on related keys,...

Implement explicit (and currently hard-coded) left-joins on related keys, along with explicit natural joins of simple external tables (like full_text)

git-svn-id: https://svn.erp5.org/repos/public/erp5/sandbox/catalog_join@41966 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent d882a46b
......@@ -27,11 +27,16 @@
#
##############################################################################
import re
from zLOG import LOG, WARNING, INFO
from interfaces.column_map import IColumnMap
from zope.interface.verify import verifyClass
from zope.interface import implements
from SQLCatalog import profiler_decorator
from Products.ZSQLCatalog.interfaces.column_map import IColumnMap
from Products.ZSQLCatalog.SQLCatalog import profiler_decorator
from Products.ZSQLCatalog.TableDefinition import (PlaceHolderTableDefinition,
TableAlias,
InnerJoin)
DEFAULT_GROUP_ID = None
......@@ -43,12 +48,14 @@ MAPPING_TRACE = False
# currently, it's not possible because related_key_dict is indexed by related key name, which makes 'source_title_1' lookup fail. It should be indexed by group (probably).
# TODO: rename all "related_key" references into "virtual_column"
re_sql_as = re.compile("\s+AS\s[^)]+$", re.IGNORECASE | re.MULTILINE)
class ColumnMap(object):
implements(IColumnMap)
@profiler_decorator
def __init__(self, catalog_table_name=None):
def __init__(self, catalog_table_name=None, table_override_map=None):
self.catalog_table_name = catalog_table_name
# Key: group
# Value: set of column names
......@@ -85,6 +92,10 @@ class ColumnMap(object):
self.straight_join_table_list = []
self.left_join_table_list = []
self.join_query_list = []
self.table_override_map = table_override_map or {}
self.table_definition = PlaceHolderTableDefinition()
# We need to keep track of the original definition to do inner joins on it
self._inner_table_definition = self.table_definition
@profiler_decorator
def registerColumn(self, raw_column, group=DEFAULT_GROUP_ID, simple_query=None):
......@@ -397,6 +408,8 @@ class ColumnMap(object):
table_alias_number_dict[alias_table_name] = table_alias_number
self.resolveTable(table_name, alias, group=group)
# now that we have all aliases, calculate inner joins
self._calculateInnerJoins()
if MAPPING_TRACE:
# Key: group
# Value: 2-tuple
......@@ -481,7 +494,7 @@ class ColumnMap(object):
@profiler_decorator
def _addJoinTable(self, table_name, group=DEFAULT_GROUP_ID):
"""
Declare given table as requiring to be joined with catalog table.
Declare given table as requiring to be joined with catalog table on uid.
table_name (string)
Table name.
......@@ -504,10 +517,109 @@ class ColumnMap(object):
for (group, table_name) in self.join_table_set]
def getStraightJoinTableList(self):
# XXX this function is unused and should be removed
return self.straight_join_table_list[:]
def getLeftJoinTableList(self):
# XXX this function is unused and should be removed
return self.left_join_table_list[:]
def _getTableOverride(self, table_name):
# self.table_override_map is a dictionary mapping table names to
# strings containing aliases of arbitrary table definitions
# (including subselects). So we split the alias and discard it
# since we do our own aliasing.
table_override_w_alias = self.table_override_map.get(table_name)
if table_override_w_alias is None:
return table_name
# XXX move the table aliasing cleanup to EntireQuery class, so we
# don't need SQL syntax knowledge in ColumnMap. Normalise the AS
# sql keyword to remove the last aliasing in the string if present. E.g.:
#
# '(SELECT sub_catalog.*
# FROM catalog AS sub_catalog
# WHERE sub_catalog.parent_uid=183) AS catalog'
#
# becomes:
#
# '(SELECT sub_catalog.*
# FROM catalog AS sub_catalog
# WHERE sub_catalog.parent_uid=183)'
table_override, removed = re_sql_as.subn('', table_override_w_alias)
assert removed < 2, ('More than one table aliasing was removed from %r' %
table_override_w_alias)
if removed:
LOG('ColumnMap', WARNING,
'Table overrides should not contain aliasing: %r' % table_override)
return table_override
def makeTableAliasDefinition(self, table_name, table_alias):
"""Make a table alias, giving a change to ColumnMap to override
the original table definition with another expression"""
table_name = self._getTableOverride(table_name)
assert table_name and table_alias, ("table_name (%r) and table_alias (%r) "
"must both be defined" %
(table_name, table_alias))
return TableAlias(table_name, table_alias)
def _setMinimalTableDefinition(self):
""" Set a minimal table definition: the main catalog alias
We don't do this at __init__ because we have neither the catalog
table name nor its intended alias at that point.
"""
inner_def = self._inner_table_definition
if inner_def.table_definition is None:
try:
catalog_table_alias = self.getCatalogTableAlias()
except KeyError:
return False
inner_def.replace(self.makeTableAliasDefinition(self.catalog_table_name,
catalog_table_alias))
return True
def getTableDefinition(self):
if self._setMinimalTableDefinition():
return self.table_definition
return None
def addJoin(self, join_definition, condition):
""" Replaces the current table_definition with a new one, assuming
it is a join definition, and replacing it's left side with the
previous table definition.
Effectively, this method wraps the current table definition within
the received join_definition.
"""
assert self._setMinimalTableDefinition()
assert join_definition.left_tabledef is None, join_definition.left_tabledef
join_definition.left_tabledef = self.table_definition
print "@@", join_definition
if TESTDEBUG == True:
import pdb;pdb.set_trace()
self.table_definition = join_definition
# def getFinalTableDefinition(self):
# self._calculateInnerJoins()
# return self.getTableDefinition()
def _calculateInnerJoins(self):
self._setMinimalTableDefinition()
catalog_table_alias = self.getCatalogTableAlias()
for group, table_name in self.join_table_set:
table_alias = self.getTableAlias(table_name, group=group)
table_alias_def = self.makeTableAliasDefinition(table_name, table_alias)
# XXX: perhaps refactor some of the code below to do:
# self._inner_table_definition.addInnerJoin(TableAlias(...),
# condition=(...))
self._inner_table_definition.replace(
InnerJoin(self._inner_table_definition.table_definition,
table_alias_def,
# XXX ColumnMap shouldn't have SQL knowledge
condition=('`%s`.`uid` = `%s`.`uid`' %
(table_alias, catalog_table_alias)),
)
)
verifyClass(IColumnMap, ColumnMap)
......@@ -78,7 +78,9 @@ class EntireQuery(object):
# XXX: should we provide a way to register column map as a separate
# method or do it here ?
# Column Map was not built yet, do it.
self.column_map = column_map = ColumnMap(catalog_table_name=self.catalog_table_name)
column_map = ColumnMap(catalog_table_name=self.catalog_table_name,
table_override_map=self.from_expression)
self.column_map = column_map
for extra_column in self.extra_column_list:
table, column = extra_column.replace('`', '').split('.')
if table != self.catalog_table_name:
......@@ -145,29 +147,34 @@ class EntireQuery(object):
None, ) * (3 - len(order_by)))
self.order_by_list = new_order_by_list
# generate SQLExpression from query
sql_expression_list = [self.query.asSQLExpression(sql_catalog, column_map, only_group_columns)]
sql_expression_list = [self.query.asSQLExpression(sql_catalog,
column_map,
only_group_columns)]
# generate join expression based on column_map.getJoinTableAliasList
append = sql_expression_list.append
for join_query in column_map.iterJoinQueryList():
append(join_query.asSQLExpression(sql_catalog, column_map, only_group_columns))
join_table_list = column_map.getJoinTableAliasList()
if len(join_table_list):
# XXX: Is there any special rule to observe when joining tables ?
# Maybe we could check which column is a primary key instead of
# hardcoding "uid".
where_pattern = '`%s`.`uid` = `%%s`.`uid`' % \
(column_map.getCatalogTableAlias(), )
# XXX: It would cleaner from completeness point of view to use column
# mapper to render column, but makes code much more complex to just do
# a simple text rendering. If there is any reason why we should have
# those column in the mapper, then we should use the clean way.
append(SQLExpression(self, where_expression=' AND '.join(
where_pattern % (x, ) for x in join_table_list
)))
#append = sql_expression_list.append
# for join_query in column_map.iterJoinQueryList():
# append(join_query.asSQLExpression(sql_catalog, column_map, only_group_columns))
# print "@@@ jql: %r, jtal: %r" % (column_map.join_query_list,
# column_map.getJoinTableAliasList())
# join_table_list = column_map.getJoinTableAliasList()
# if len(join_table_list):
# # XXX: Is there any special rule to observe when joining tables ?
# # Maybe we could check which column is a primary key instead of
# # hardcoding "uid".
# where_pattern = '`%s`.`uid` = `%%s`.`uid`' % \
# (column_map.getCatalogTableAlias(), )
# # XXX: It would cleaner from completeness point of view to use column
# # mapper to render column, but makes code much more complex to just do
# # a simple text rendering. If there is any reason why we should have
# # those column in the mapper, then we should use the clean way.
# append(SQLExpression(self, where_expression=' AND '.join(
# where_pattern % (x, ) for x in join_table_list
# )))
self.from_expression = column_map.getTableDefinition()
self.sql_expression_list = sql_expression_list
return SQLExpression(
self,
table_alias_dict=column_map.getTableAliasDict(),
table_alias_dict=None, # column_map.getTableAliasDict(),
from_expression=self.from_expression,
order_by_list=self.order_by_list,
group_by_list=self.group_by_list,
......
......@@ -166,7 +166,7 @@ class SQLExpression(object):
@profiler_decorator
def getFromExpression(self):
"""
Returns a string.
Returns a TableDefinition stored in one of the from_expressions or None
If there are nested SQLExpression, it checks that they either don't
define any from_expression or the exact same from_expression. Otherwise,
......@@ -175,7 +175,7 @@ class SQLExpression(object):
result = self.from_expression
for sql_expression in self.sql_expression_list:
from_expression = sql_expression.getFromExpression()
if None not in (result, from_expression):
if from_expression not in (result, None):
message = 'I don\'t know how to merge from_expressions'
if DEBUG:
message = message + '. I was created by %r, and I am working on %r (%r) out of [%s]' % (
......@@ -184,6 +184,8 @@ class SQLExpression(object):
sql_expression.query,
', '.join('%r (%r)' % (x, x.query) for x in self.sql_expression_list))
raise ValueError, message
if result is not None:
result.checkTableAliases()
return result
@profiler_decorator
......@@ -385,20 +387,31 @@ class SQLExpression(object):
SQL_SELECT_ALIAS_FORMAT % (column, alias)
for alias, column in self.getSelectDict().iteritems())
@profiler_decorator
def asSQLExpressionDict(self):
def getFromTableList(self):
table_alias_dict = self.getTableAliasDict()
if not table_alias_dict:
return None
from_table_list = []
append = from_table_list.append
for alias, table in table_alias_dict.iteritems():
append((SQL_TABLE_FORMAT % (alias, ), SQL_TABLE_FORMAT % (table, )))
from_expression_dict = self.getFromExpression()
if from_expression_dict is not None:
from_expression = SQL_LIST_SEPARATOR.join(
from_expression_dict.get(table, '`%s` AS `%s`' % (table, alias))
for alias, table in table_alias_dict.iteritems())
else:
from_expression = None
return from_table_list
@profiler_decorator
def asSQLExpressionDict(self):
from_expression = self.getFromExpression()
from_table_list = self.getFromTableList()
assert None in (from_expression,
from_table_list), ("Cannot return both a from_expression "
"and a from_table_list")
if from_expression is not None:
from_expression = from_expression.render()
# from_expression_dict = from_expression
# from_expression = SQL_LIST_SEPARATOR.join(
# from_expression_dict.get(table, '`%s` AS `%s`' % (table, alias))
# for alias, table in table_alias_dict.iteritems())
# else:
# from_expression = None
return {
'where_expression': self.getWhereExpression(),
'order_by_expression': self.getOrderByExpression(),
......
......@@ -37,12 +37,15 @@ from Products.ZSQLCatalog.interfaces.search_key import IRelatedKey
from zope.interface.verify import verifyClass
from zope.interface import implements
from Products.ZSQLCatalog.SQLCatalog import profiler_decorator
from Products.ZSQLCatalog.TableDefinition import TableAlias, InnerJoin, LeftJoin
BACKWARD_COMPATIBILITY = True
RELATED_QUERY_SEPARATOR = "\nAND -- related query separator\n"
class RelatedKey(SearchKey):
"""
This SearchKey handles searched on virtual columns of RelatedKey type.
This SearchKey handles searches on virtual columns of RelatedKey type.
It generates joins required by the virtual column to reach the actual
column to compare, plus a regular query on that column if needed.
"""
......@@ -117,9 +120,9 @@ class RelatedKey(SearchKey):
group = column_map.registerRelatedKey(related_column, self.real_column)
# Each table except last one must be registered to their own group, so that
# the same table can be used multiple time (and aliased multiple times)
# in the same related key. The last one must be register to related key
# in the same related key. The last one must be register to the related key
# "main" group (ie, the value of the "group" variable) to be the same as
# the ta ble used in join_condition.
# the table used in join_condition.
if table_alias_list is not None:
assert len(self.table_list) == len(table_alias_list)
for table_position in xrange(len(self.table_list) - 1):
......@@ -145,6 +148,23 @@ class RelatedKey(SearchKey):
column_map.registerCatalog()
return group
def stitchJoinDefinition(self, table_alias_list, join_query_list, column_map):
alias, table = table_alias_list[-1]
right = column_map.makeTableAliasDefinition(table, alias)
if not join_query_list:
# nothing to do, just return the table alias
assert len(table_alias_list) == 1
return right
else:
# create an InnerJoin of the last element of the alias list with
# a chain of InnerJoins of the rest of the list conditioned on
# the the last element of the join_query_list
left = self.stitchJoinDefinition(table_alias_list[:-1],
join_query_list[:-1],
column_map)
condition = join_query_list[-1]
return InnerJoin(left, right, condition)
@profiler_decorator
def buildSQLExpression(self, sql_catalog, column_map, only_group_columns, group):
"""
......@@ -170,18 +190,39 @@ class RelatedKey(SearchKey):
table_alias_list = [(getTableAlias(related_table, group=getRelatedKeyGroup(index, group)), related_table)
for (index, related_table) in enumerate(related_table_list)]
# table alias for destination table
table_alias_list.append((getTableAlias(destination_table, group=group), destination_table))
table_alias_list.append((getTableAlias(destination_table, group=group),
destination_table))
# map aliases to use in ZSQLMethod.
table_alias_dict = dict(('table_%s' % (index, ), table_alias[0])
for (index, table_alias) in enumerate(table_alias_list))
table_alias_dict = dict(('table_%s' % (index, ), table_alias)
for (index, (table_alias, table_name))
in enumerate(table_alias_list))
assert len(table_alias_list) == len(table_alias_dict)
query_table=column_map.getCatalogTableAlias()
rendered_related_key = related_key(
query_table=column_map.getCatalogTableAlias(),
query_table=query_table,
RELATED_QUERY_SEPARATOR=RELATED_QUERY_SEPARATOR,
src__=1,
**table_alias_dict)
join_condition_list = rendered_related_key.split(RELATED_QUERY_SEPARATOR)
assert len(join_condition_list) == len(table_alias_list), """
A related key must return the same number of querying conditions as the
tables it relates
""".strip()
# add a left join on this related key, based on the inner-join of the
# related key tables.
query_table_join_condition = join_condition_list.pop()
right = self.stitchJoinDefinition(table_alias_list,
join_condition_list,
column_map)
table_def = LeftJoin(None,
right,
condition=query_table_join_condition)
column_map.addJoin(table_def, condition=query_table_join_condition)
return None # XXX decide what to do with the comment below
# Important:
# Former catalog separated join condition from related query.
# Example:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment