Commit e276dcd5 authored by Bram Schoenmakers's avatar Bram Schoenmakers

Merge branch 'config-ids'

parents 9d15ef05 b769f4cd
This diff is collapsed.
This diff is collapsed.
...@@ -7,6 +7,7 @@ default_command = ls ...@@ -7,6 +7,7 @@ default_command = ls
colors = auto colors = auto
; identifiers can be 'linenumber' or 'text' ; identifiers can be 'linenumber' or 'text'
identifiers = linenumber identifiers = linenumber
identifier_alphabet = 0123456789abcdefghijklmnopqrstuvwxyz
backup_count = 5 backup_count = 5
[add] [add]
......
...@@ -20,6 +20,7 @@ import os ...@@ -20,6 +20,7 @@ import os
import re import re
import shlex import shlex
from collections import OrderedDict
from itertools import accumulate from itertools import accumulate
from string import ascii_lowercase from string import ascii_lowercase
...@@ -70,6 +71,7 @@ class _Config: ...@@ -70,6 +71,7 @@ class _Config:
'filename': 'todo.txt', 'filename': 'todo.txt',
'archive_filename': 'done.txt', 'archive_filename': 'done.txt',
'identifiers': 'linenumber', 'identifiers': 'linenumber',
'identifier_alphabet': '0123456789abcdefghijklmnopqrstuvwxyz',
'backup_count': '5', 'backup_count': '5',
}, },
...@@ -474,6 +476,12 @@ class _Config: ...@@ -474,6 +476,12 @@ class _Config:
return shlex.split(result) return shlex.split(result)
def identifier_alphabet(self):
alphabet = self.cp.get('topydo', 'identifier_alphabet')
# deduplicate characters alphabet. Use a dictionary, but an ordered one
# to keep determinism.
return list(OrderedDict([(c, None) for c in alphabet]).keys())
def config(p_path=None, p_overrides=None): def config(p_path=None, p_overrides=None):
""" """
......
...@@ -21,48 +21,115 @@ value of each item. ...@@ -21,48 +21,115 @@ value of each item.
from hashlib import sha1 from hashlib import sha1
from topydo.lib.Config import config
_DEFAULT_ALPHABET = '0123456789abcdefghijklmnopqrstuvwxyz'
# a two-dimensional lookup table, the first dimension is the length of the
# configured alphabet, the second dimension is the width of the ID
# The values are prime numbers that are used for populating the hash table.
_TABLE_SIZES = { _TABLE_SIZES = {
# we choose a large table size to reduce the chance of collisions. 10: {3: 997, 4: 9973, 5: 99991, 6: 999983},
3: 46649, # largest prime under zzz_36 11: {3: 1327, 4: 14639, 5: 161047, 6: 1771559},
4: 1679609 # largest prime under zzzz_36 12: {3: 1723, 4: 20731, 5: 248827, 6: 2985979},
13: {3: 2179, 4: 28559, 5: 371291, 6: 4826797},
14: {3: 2741, 4: 38393, 5: 573811, 6: 7529519},
15: {3: 3373, 4: 50599, 5: 759371, 6: 11390593},
16: {3: 4093, 4: 65521, 5: 1048573},
17: {3: 4909, 4: 83497, 5: 1419839},
18: {3: 5827, 4: 104971, 5: 1889561},
19: {3: 6857, 4: 130307, 5: 2476081},
20: {3: 7993, 4: 159979, 5: 3199997},
21: {3: 9257, 4: 194479, 5: 4084081},
22: {3: 10639, 4: 245239, 5: 5153623},
23: {3: 12163, 4: 279823, 5: 6436327},
24: {3: 13807, 4: 331769, 5: 7962607},
25: {3: 15619, 4: 390581, 5: 9765619},
26: {3: 17573, 4: 456959, 5: 11881357},
27: {3: 19681, 4: 531383, 5: 14348891},
28: {3: 21943, 4: 614639, 5: 17210353},
29: {3: 24379, 4: 707279, 5: 20511143},
30: {3: 26993, 4: 809993, 5: 24299981},
31: {3: 29789, 4: 923513, 5: 28629149},
32: {3: 32749, 4: 1048573},
33: {3: 35933, 4: 1185907},
34: {3: 39301, 4: 1336333},
35: {3: 42863, 4: 1500619},
36: {3: 46649, 4: 1679609},
37: {3: 50651, 4: 1874153},
38: {3: 54869, 4: 2085133},
39: {3: 59281, 4: 2313439},
40: {3: 63997, 4: 2559989},
41: {3: 68917, 4: 2825759},
42: {3: 74077, 4: 3111679},
43: {3: 79493, 4: 3418799},
44: {3: 85159, 4: 3748079},
45: {3: 91121, 4: 4100611},
46: {3: 97327, 4: 4477453},
47: {3: 103813, 4: 4879669},
48: {3: 110587, 4: 5308379},
49: {3: 117643, 4: 5764799},
50: {3: 124991, 4: 6249989},
} }
def _to_base36(p_value): class _TableSizeException(Exception):
""" pass
Converts integer to base36 string.
Based on answer on def _get_table_size(p_alphabet, p_num):
https://stackoverflow.com/questions/1181919/python-base-36-encoding
""" """
alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' Returns a prime number that is suitable for the hash table size. The size
is dependent on the alphabet used, and the number of items that need to be
hashed. The table size is at least 100 times larger than the number of
items to be hashed, to avoid collisions.
base36 = '' When the alphabet is too little or too large, then _TableSizeException is
while p_value: raised. Currently an alphabet of 10 to 40 characters is supported.
p_value, i = divmod(p_value, 36) """
base36 = alphabet[i] + base36 try:
for width, size in sorted(_TABLE_SIZES[len(p_alphabet)].items()):
return base36 or alphabet[0] if p_num < size * 0.01:
return width, size
except KeyError:
pass
raise _TableSizeException('Could not find appropriate table size for given alphabet')
def hash_list_values(p_list, p_key=lambda i: i): # pragma: no branch def hash_list_values(p_list, p_key=lambda i: i): # pragma: no branch
""" """
Calculates a unique value for each item in the list, these can be used as Calculates a unique value for each item in the list, these can be used as
identifiers. identifiers.
The value is based on hashing an item using the p_hash function. The value is based on hashing an item using the p_key function.
Suitable for lists not larger than approx. 16K items. Suitable for lists not larger than approx. 16K items.
Returns a tuple with the status and a list of tuples where each item is Returns a tuple with the status and a list of tuples where each item is
combined with the ID. combined with the ID.
""" """
def to_base(p_alphabet, p_value):
"""
Converts integer to text ID with characters from the given alphabet.
Based on answer at
https://stackoverflow.com/questions/1181919/python-base-36-encoding
"""
result = ''
while p_value:
p_value, i = divmod(p_value, len(p_alphabet))
result = p_alphabet[i] + result
return result or p_alphabet[0]
result = [] result = []
used = set() used = set()
alphabet = config().identifier_alphabet()
# choose a larger key size if there's >1% chance of collision try:
size = _TABLE_SIZES[3] \ _, size = _get_table_size(alphabet, len(p_list))
if len(p_list) < _TABLE_SIZES[3] * 0.01 else _TABLE_SIZES[4] except _TableSizeException:
alphabet = _DEFAULT_ALPHABET
_, size = _get_table_size(alphabet, len(p_list))
for item in p_list: for item in p_list:
# obtain the to-be-hashed value # obtain the to-be-hashed value
...@@ -78,6 +145,19 @@ def hash_list_values(p_list, p_key=lambda i: i): # pragma: no branch ...@@ -78,6 +145,19 @@ def hash_list_values(p_list, p_key=lambda i: i): # pragma: no branch
hash_value = (hash_value + 1) % size hash_value = (hash_value + 1) % size
used.add(hash_value) used.add(hash_value)
result.append((item, _to_base36(hash_value))) result.append((item, to_base(alphabet, hash_value)))
return result return result
def max_id_length(p_num):
"""
Returns the length of the IDs used, given the number of items that are
assigned an ID. Used for padding in lists.
"""
try:
alphabet = config().identifier_alphabet()
length, _ = _get_table_size(alphabet, p_num)
except _TableSizeException:
length, _ = _get_table_size(_DEFAULT_ALPHABET, p_num)
return length
...@@ -162,9 +162,9 @@ class ListFormatParser(object): ...@@ -162,9 +162,9 @@ class ListFormatParser(object):
# todo ID # todo ID
'i': lambda t: str(self.todolist.number(t)), 'i': lambda t: str(self.todolist.number(t)),
# todo ID pre-filled with 1 or 2 spaces if its length is <3 # todo ID, padded with spaces
'I': lambda t: _filler(str(self.todolist.number(t)), 3), 'I': lambda t: _filler(str(self.todolist.number(t)),
self.todolist.max_id_length()),
# list of tags (spaces) without hidden ones and due: and t: # list of tags (spaces) without hidden ones and due: and t:
'k': lambda t: ' '.join([u'{}:{}'.format(tag, value) 'k': lambda t: ' '.join([u'{}:{}'.format(tag, value)
...@@ -179,8 +179,9 @@ class ListFormatParser(object): ...@@ -179,8 +179,9 @@ class ListFormatParser(object):
# line number # line number
'n': lambda t: str(self.todolist.linenumber(t)), 'n': lambda t: str(self.todolist.linenumber(t)),
# line number, pre-filled with 1 or 2 spaces if its length <3 # line number, padded with spaces
'N': lambda t: _filler(str(self.todolist.linenumber(t)), 3), 'N': lambda t: _filler(str(self.todolist.linenumber(t)),
self.todolist.max_id_length()),
# priority # priority
'p': lambda t: t.priority() if t.priority() else '', 'p': lambda t: t.priority() if t.priority() else '',
...@@ -206,8 +207,9 @@ class ListFormatParser(object): ...@@ -206,8 +207,9 @@ class ListFormatParser(object):
# unique text ID # unique text ID
'u': lambda t: self.todolist.uid(t), 'u': lambda t: self.todolist.uid(t),
# unique text ID, pre-filled with 1 or 2 spaces if its length <3 # unique text ID, padded with spaces
'U': lambda t: _filler(self.todolist.uid(t), 3), 'U': lambda t: _filler(self.todolist.uid(t),
self.todolist.max_id_length()),
# absolute completion date # absolute completion date
'x': lambda t: 'x ' + t.completion_date().isoformat() if t.is_completed() else '', 'x': lambda t: 'x ' + t.completion_date().isoformat() if t.is_completed() else '',
......
...@@ -18,12 +18,13 @@ ...@@ -18,12 +18,13 @@
A list of todo items. A list of todo items.
""" """
import math
import re import re
from datetime import date from datetime import date
from topydo.lib import Filter from topydo.lib import Filter
from topydo.lib.Config import config from topydo.lib.Config import config
from topydo.lib.HashListValues import hash_list_values from topydo.lib.HashListValues import hash_list_values, max_id_length
from topydo.lib.printers.PrettyPrinter import PrettyPrinter from topydo.lib.printers.PrettyPrinter import PrettyPrinter
from topydo.lib.Todo import Todo from topydo.lib.Todo import Todo
from topydo.lib.View import View from topydo.lib.View import View
...@@ -275,6 +276,19 @@ class TodoListBase(object): ...@@ -275,6 +276,19 @@ class TodoListBase(object):
else: else:
return self.linenumber(p_todo) return self.linenumber(p_todo)
def max_id_length(self):
"""
Returns the maximum length of a todo ID, used for formatting purposes.
"""
if config().identifiers() == "text":
return max_id_length(len(self._todos))
else:
try:
return math.ceil(math.log(len(self._todos), 10))
except ValueError:
return 0
def _update_todo_ids(self): def _update_todo_ids(self):
# the idea is to have a hash that is independent of the position of the # the idea is to have a hash that is independent of the position of the
# todo. Use the text (without tags) of the todo to keep the id as # todo. Use the text (without tags) of the todo to keep the id as
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
import urwid import urwid
from topydo.lib.HashListValues import max_id_length
from topydo.lib.Utils import translate_key_to_config from topydo.lib.Utils import translate_key_to_config
from topydo.ui.columns.TodoWidget import TodoWidget from topydo.ui.columns.TodoWidget import TodoWidget
...@@ -89,6 +90,7 @@ class TodoListWidget(urwid.LineBox): ...@@ -89,6 +90,7 @@ class TodoListWidget(urwid.LineBox):
with this list. with this list.
""" """
old_focus_position = self.todolist.focus old_focus_position = self.todolist.focus
id_length = max_id_length(self.view.todolist.count())
del self.todolist[:] del self.todolist[:]
...@@ -99,7 +101,7 @@ class TodoListWidget(urwid.LineBox): ...@@ -99,7 +101,7 @@ class TodoListWidget(urwid.LineBox):
self.todolist.append(urwid.Divider('-')) self.todolist.append(urwid.Divider('-'))
for todo in todos: for todo in todos:
todowidget = TodoWidget.create(todo) todowidget = TodoWidget.create(todo, id_length)
todowidget.number = self.view.todolist.number(todo) todowidget.number = self.view.todolist.number(todo)
self.todolist.append(todowidget) self.todolist.append(todowidget)
self.todolist.append(urwid.Divider('-')) self.todolist.append(urwid.Divider('-'))
......
...@@ -53,7 +53,7 @@ def _markup(p_todo, p_focus): ...@@ -53,7 +53,7 @@ def _markup(p_todo, p_focus):
class TodoWidget(urwid.WidgetWrap): class TodoWidget(urwid.WidgetWrap):
def __init__(self, p_todo): def __init__(self, p_todo, p_id_width=4):
# clients use this to associate this widget with the given todo item # clients use this to associate this widget with the given todo item
self.todo = p_todo self.todo = p_todo
...@@ -101,7 +101,7 @@ class TodoWidget(urwid.WidgetWrap): ...@@ -101,7 +101,7 @@ class TodoWidget(urwid.WidgetWrap):
self.columns = urwid.Columns( self.columns = urwid.Columns(
[ [
(1, self.progress_bar), (1, self.progress_bar),
(4, self.id_widget), (p_id_width, self.id_widget),
(3, priority_widget), (3, priority_widget),
('weight', 1, self.text_widget), ('weight', 1, self.text_widget),
], ],
...@@ -159,7 +159,7 @@ class TodoWidget(urwid.WidgetWrap): ...@@ -159,7 +159,7 @@ class TodoWidget(urwid.WidgetWrap):
cache = {} cache = {}
@classmethod @classmethod
def create(p_class, p_todo): def create(p_class, p_todo, p_id_width=4):
""" """
Creates a TodoWidget instance for the given todo. Widgets are Creates a TodoWidget instance for the given todo. Widgets are
cached, the same object is returned for the same todo item. cached, the same object is returned for the same todo item.
...@@ -187,7 +187,7 @@ class TodoWidget(urwid.WidgetWrap): ...@@ -187,7 +187,7 @@ class TodoWidget(urwid.WidgetWrap):
if parent_progress_may_have_changed(p_todo): if parent_progress_may_have_changed(p_todo):
widget.update_progress() widget.update_progress()
else: else:
widget = p_class(p_todo) widget = p_class(p_todo, p_id_width)
p_class.cache[source] = widget p_class.cache[source] = widget
return widget return widget
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment