Merge branch 'config-ids'

e276dcd5 · Bram Schoenmakers · 9d15ef05 · b769f4cd · e276dcd5 · e276dcd5
Commit e276dcd5 authored Jul 26, 2017 by Bram Schoenmakers
9 changed files
--- a/test/test_list_command.py
+++ b/test/test_list_command.py
--- a/test/test_list_format.py
+++ b/test/test_list_format.py
--- a/topydo.conf
+++ b/topydo.conf
@@ -7,6 +7,7 @@ default_command             = ls
 colors                      = auto
 ; identifiers can be 'linenumber' or 'text'
 identifiers                 = linenumber
+identifier_alphabet         = 0123456789abcdefghijklmnopqrstuvwxyz
 backup_count                = 5

 [add]

--- a/topydo/lib/Config.py
+++ b/topydo/lib/Config.py
@@ -20,6 +20,7 @@ import os
 import re
 import shlex

+from collections import OrderedDict
 from itertools import accumulate
 from string import ascii_lowercase

@@ -70,6 +71,7 @@ class _Config:
                'filename': 'todo.txt',
                'archive_filename': 'done.txt',
                'identifiers': 'linenumber',
+                'identifier_alphabet': '0123456789abcdefghijklmnopqrstuvwxyz',
                'backup_count': '5',
            },

@@ -474,6 +476,12 @@ class _Config:

        return shlex.split(result)

+    def identifier_alphabet(self):
+        alphabet = self.cp.get('topydo', 'identifier_alphabet')
+
+        # deduplicate characters alphabet. Use a dictionary, but an ordered one
+        # to keep determinism.
+        return list(OrderedDict([(c, None) for c in alphabet]).keys())

 def config(p_path=None, p_overrides=None):
    """

--- a/topydo/lib/HashListValues.py
+++ b/topydo/lib/HashListValues.py
@@ -21,48 +21,115 @@ value of each item.

 from hashlib import sha1

+from topydo.lib.Config import config
+
+_DEFAULT_ALPHABET = '0123456789abcdefghijklmnopqrstuvwxyz'
+
+# a two-dimensional lookup table, the first dimension is the length of the
+# configured alphabet, the second dimension is the width of the ID
+# The values are prime numbers that are used for populating the hash table.
 _TABLE_SIZES = {
-    # we choose a large table size to reduce the chance of collisions.
-    3: 46649,   # largest prime under zzz_36
-    4: 1679609  # largest prime under zzzz_36
+    10: {3: 997, 4: 9973, 5: 99991, 6: 999983},
+    11: {3: 1327, 4: 14639, 5: 161047, 6: 1771559},
+    12: {3: 1723, 4: 20731, 5: 248827, 6: 2985979},
+    13: {3: 2179, 4: 28559, 5: 371291, 6: 4826797},
+    14: {3: 2741, 4: 38393, 5: 573811, 6: 7529519},
+    15: {3: 3373, 4: 50599, 5: 759371, 6: 11390593},
+    16: {3: 4093, 4: 65521, 5: 1048573},
+    17: {3: 4909, 4: 83497, 5: 1419839},
+    18: {3: 5827, 4: 104971, 5: 1889561},
+    19: {3: 6857, 4: 130307, 5: 2476081},
+    20: {3: 7993, 4: 159979, 5: 3199997},
+    21: {3: 9257, 4: 194479, 5: 4084081},
+    22: {3: 10639, 4: 245239, 5: 5153623},
+    23: {3: 12163, 4: 279823, 5: 6436327},
+    24: {3: 13807, 4: 331769, 5: 7962607},
+    25: {3: 15619, 4: 390581, 5: 9765619},
+    26: {3: 17573, 4: 456959, 5: 11881357},
+    27: {3: 19681, 4: 531383, 5: 14348891},
+    28: {3: 21943, 4: 614639, 5: 17210353},
+    29: {3: 24379, 4: 707279, 5: 20511143},
+    30: {3: 26993, 4: 809993, 5: 24299981},
+    31: {3: 29789, 4: 923513, 5: 28629149},
+    32: {3: 32749, 4: 1048573},
+    33: {3: 35933, 4: 1185907},
+    34: {3: 39301, 4: 1336333},
+    35: {3: 42863, 4: 1500619},
+    36: {3: 46649, 4: 1679609},
+    37: {3: 50651, 4: 1874153},
+    38: {3: 54869, 4: 2085133},
+    39: {3: 59281, 4: 2313439},
+    40: {3: 63997, 4: 2559989},
+    41: {3: 68917, 4: 2825759},
+    42: {3: 74077, 4: 3111679},
+    43: {3: 79493, 4: 3418799},
+    44: {3: 85159, 4: 3748079},
+    45: {3: 91121, 4: 4100611},
+    46: {3: 97327, 4: 4477453},
+    47: {3: 103813, 4: 4879669},
+    48: {3: 110587, 4: 5308379},
+    49: {3: 117643, 4: 5764799},
+    50: {3: 124991, 4: 6249989},
 }


-def _to_base36(p_value):
-    """
-    Converts integer to base36 string.
+class _TableSizeException(Exception):
+    pass

-    Based on answer on
-    https://stackoverflow.com/questions/1181919/python-base-36-encoding
+def _get_table_size(p_alphabet, p_num):
    """
-    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
+    Returns a prime number that is suitable for the hash table size. The size
+    is dependent on the alphabet used, and the number of items that need to be
+    hashed. The table size is at least 100 times larger than the number of
+    items to be hashed, to avoid collisions.

-    base36 = ''
-    while p_value:
-        p_value, i = divmod(p_value, 36)
-        base36 = alphabet[i] + base36
-
-    return base36 or alphabet[0]
+    When the alphabet is too little or too large, then _TableSizeException is
+    raised. Currently an alphabet of 10 to 40 characters is supported.
+    """
+    try:
+        for width, size in sorted(_TABLE_SIZES[len(p_alphabet)].items()):
+            if p_num < size * 0.01:
+                return width, size
+    except KeyError:
+        pass

+    raise _TableSizeException('Could not find appropriate table size for given alphabet')

 def hash_list_values(p_list, p_key=lambda i: i):  # pragma: no branch
    """
    Calculates a unique value for each item in the list, these can be used as
    identifiers.

-    The value is based on hashing an item using the p_hash function.
+    The value is based on hashing an item using the p_key function.

    Suitable for lists not larger than approx. 16K items.

    Returns a tuple with the status and a list of tuples where each item is
    combined with the ID.
    """
+    def to_base(p_alphabet, p_value):
+        """
+        Converts integer to text ID with characters from the given alphabet.
+
+        Based on answer at
+        https://stackoverflow.com/questions/1181919/python-base-36-encoding
+        """
+        result = ''
+        while p_value:
+            p_value, i = divmod(p_value, len(p_alphabet))
+            result = p_alphabet[i] + result
+
+        return result or p_alphabet[0]
+
    result = []
    used = set()
+    alphabet = config().identifier_alphabet()

-    # choose a larger key size if there's >1% chance of collision
-    size = _TABLE_SIZES[3] \
-        if len(p_list) < _TABLE_SIZES[3] * 0.01 else _TABLE_SIZES[4]
+    try:
+        _, size = _get_table_size(alphabet, len(p_list))
+    except _TableSizeException:
+        alphabet = _DEFAULT_ALPHABET
+        _, size = _get_table_size(alphabet, len(p_list))

    for item in p_list:
        # obtain the to-be-hashed value
@@ -78,6 +145,19 @@ def hash_list_values(p_list, p_key=lambda i: i):  # pragma: no branch
            hash_value = (hash_value + 1) % size

        used.add(hash_value)
-        result.append((item, _to_base36(hash_value)))
+        result.append((item, to_base(alphabet, hash_value)))

    return result
+
+def max_id_length(p_num):
+    """
+    Returns the length of the IDs used, given the number of items that are
+    assigned an ID. Used for padding in lists.
+    """
+    try:
+        alphabet = config().identifier_alphabet()
+        length, _ = _get_table_size(alphabet, p_num)
+    except _TableSizeException:
+        length, _ = _get_table_size(_DEFAULT_ALPHABET, p_num)
+
+    return length
--- a/topydo/lib/ListFormat.py
+++ b/topydo/lib/ListFormat.py
@@ -162,9 +162,9 @@ class ListFormatParser(object):
            # todo ID
            'i': lambda t: str(self.todolist.number(t)),

-            # todo ID pre-filled with 1 or 2 spaces if its length is <3
-            'I': lambda t: _filler(str(self.todolist.number(t)), 3),
-
+            # todo ID, padded with spaces
+            'I': lambda t: _filler(str(self.todolist.number(t)),
+                self.todolist.max_id_length()),

            # list of tags (spaces) without hidden ones and due: and t:
            'k': lambda t: ' '.join([u'{}:{}'.format(tag, value)
@@ -179,8 +179,9 @@ class ListFormatParser(object):
            # line number
            'n': lambda t: str(self.todolist.linenumber(t)),

-            # line number, pre-filled with 1 or 2 spaces if its length <3
-            'N': lambda t: _filler(str(self.todolist.linenumber(t)), 3),
+            # line number, padded with spaces
+            'N': lambda t: _filler(str(self.todolist.linenumber(t)),
+                self.todolist.max_id_length()),

            # priority
            'p': lambda t: t.priority() if t.priority() else '',
@@ -206,8 +207,9 @@ class ListFormatParser(object):
            # unique text ID
            'u': lambda t: self.todolist.uid(t),

-            # unique text ID, pre-filled with 1 or 2 spaces if its length <3
-            'U': lambda t: _filler(self.todolist.uid(t), 3),
+            # unique text ID, padded with spaces
+            'U': lambda t: _filler(self.todolist.uid(t),
+                self.todolist.max_id_length()),

            # absolute completion date
            'x': lambda t: 'x ' + t.completion_date().isoformat() if t.is_completed() else '',

--- a/topydo/lib/TodoListBase.py
+++ b/topydo/lib/TodoListBase.py
@@ -18,12 +18,13 @@
 A list of todo items.
 """

+import math
 import re
 from datetime import date

 from topydo.lib import Filter
 from topydo.lib.Config import config
-from topydo.lib.HashListValues import hash_list_values
+from topydo.lib.HashListValues import hash_list_values, max_id_length
 from topydo.lib.printers.PrettyPrinter import PrettyPrinter
 from topydo.lib.Todo import Todo
 from topydo.lib.View import View
@@ -275,6 +276,19 @@ class TodoListBase(object):
        else:
            return self.linenumber(p_todo)

+    def max_id_length(self):
+        """
+        Returns the maximum length of a todo ID, used for formatting purposes.
+        """
+        if config().identifiers() == "text":
+            return max_id_length(len(self._todos))
+        else:
+            try:
+                return math.ceil(math.log(len(self._todos), 10))
+            except ValueError:
+                return 0
+
+
    def _update_todo_ids(self):
        # the idea is to have a hash that is independent of the position of the
        # todo. Use the text (without tags) of the todo to keep the id as

--- a/topydo/ui/columns/TodoListWidget.py
+++ b/topydo/ui/columns/TodoListWidget.py
@@ -16,6 +16,7 @@

 import urwid

+from topydo.lib.HashListValues import max_id_length
 from topydo.lib.Utils import translate_key_to_config
 from topydo.ui.columns.TodoWidget import TodoWidget

@@ -89,6 +90,7 @@ class TodoListWidget(urwid.LineBox):
        with this list.
        """
        old_focus_position = self.todolist.focus
+        id_length = max_id_length(self.view.todolist.count())

        del self.todolist[:]

@@ -99,7 +101,7 @@ class TodoListWidget(urwid.LineBox):
                self.todolist.append(urwid.Divider('-'))

            for todo in todos:
-                todowidget = TodoWidget.create(todo)
+                todowidget = TodoWidget.create(todo, id_length)
                todowidget.number = self.view.todolist.number(todo)
                self.todolist.append(todowidget)
                self.todolist.append(urwid.Divider('-'))

--- a/topydo/ui/columns/TodoWidget.py
+++ b/topydo/ui/columns/TodoWidget.py
@@ -53,7 +53,7 @@ def _markup(p_todo, p_focus):


 class TodoWidget(urwid.WidgetWrap):
-    def __init__(self, p_todo):
+    def __init__(self, p_todo, p_id_width=4):
        # clients use this to associate this widget with the given todo item
        self.todo = p_todo

@@ -101,7 +101,7 @@ class TodoWidget(urwid.WidgetWrap):
        self.columns = urwid.Columns(
            [
                (1, self.progress_bar),
-                (4, self.id_widget),
+                (p_id_width, self.id_widget),
                (3, priority_widget),
                ('weight', 1, self.text_widget),
            ],
@@ -159,7 +159,7 @@ class TodoWidget(urwid.WidgetWrap):
    cache = {}

    @classmethod
-    def create(p_class, p_todo):
+    def create(p_class, p_todo, p_id_width=4):
        """
        Creates a TodoWidget instance for the given todo. Widgets are
        cached, the same object is returned for the same todo item.
@@ -187,7 +187,7 @@ class TodoWidget(urwid.WidgetWrap):
            if parent_progress_may_have_changed(p_todo):
                widget.update_progress()
        else:
-            widget = p_class(p_todo)
+            widget = p_class(p_todo, p_id_width)
            p_class.cache[source] = widget

        return widget