Commit e276dcd5 authored by Bram Schoenmakers's avatar Bram Schoenmakers

Merge branch 'config-ids'

parents 9d15ef05 b769f4cd
This diff is collapsed.
This diff is collapsed.
......@@ -7,6 +7,7 @@ default_command = ls
colors = auto
; identifiers can be 'linenumber' or 'text'
identifiers = linenumber
identifier_alphabet = 0123456789abcdefghijklmnopqrstuvwxyz
backup_count = 5
[add]
......
......@@ -20,6 +20,7 @@ import os
import re
import shlex
from collections import OrderedDict
from itertools import accumulate
from string import ascii_lowercase
......@@ -70,6 +71,7 @@ class _Config:
'filename': 'todo.txt',
'archive_filename': 'done.txt',
'identifiers': 'linenumber',
'identifier_alphabet': '0123456789abcdefghijklmnopqrstuvwxyz',
'backup_count': '5',
},
......@@ -474,6 +476,12 @@ class _Config:
return shlex.split(result)
def identifier_alphabet(self):
alphabet = self.cp.get('topydo', 'identifier_alphabet')
# deduplicate characters alphabet. Use a dictionary, but an ordered one
# to keep determinism.
return list(OrderedDict([(c, None) for c in alphabet]).keys())
def config(p_path=None, p_overrides=None):
"""
......
......@@ -21,48 +21,115 @@ value of each item.
from hashlib import sha1
from topydo.lib.Config import config
_DEFAULT_ALPHABET = '0123456789abcdefghijklmnopqrstuvwxyz'
# a two-dimensional lookup table, the first dimension is the length of the
# configured alphabet, the second dimension is the width of the ID
# The values are prime numbers that are used for populating the hash table.
_TABLE_SIZES = {
# we choose a large table size to reduce the chance of collisions.
3: 46649, # largest prime under zzz_36
4: 1679609 # largest prime under zzzz_36
10: {3: 997, 4: 9973, 5: 99991, 6: 999983},
11: {3: 1327, 4: 14639, 5: 161047, 6: 1771559},
12: {3: 1723, 4: 20731, 5: 248827, 6: 2985979},
13: {3: 2179, 4: 28559, 5: 371291, 6: 4826797},
14: {3: 2741, 4: 38393, 5: 573811, 6: 7529519},
15: {3: 3373, 4: 50599, 5: 759371, 6: 11390593},
16: {3: 4093, 4: 65521, 5: 1048573},
17: {3: 4909, 4: 83497, 5: 1419839},
18: {3: 5827, 4: 104971, 5: 1889561},
19: {3: 6857, 4: 130307, 5: 2476081},
20: {3: 7993, 4: 159979, 5: 3199997},
21: {3: 9257, 4: 194479, 5: 4084081},
22: {3: 10639, 4: 245239, 5: 5153623},
23: {3: 12163, 4: 279823, 5: 6436327},
24: {3: 13807, 4: 331769, 5: 7962607},
25: {3: 15619, 4: 390581, 5: 9765619},
26: {3: 17573, 4: 456959, 5: 11881357},
27: {3: 19681, 4: 531383, 5: 14348891},
28: {3: 21943, 4: 614639, 5: 17210353},
29: {3: 24379, 4: 707279, 5: 20511143},
30: {3: 26993, 4: 809993, 5: 24299981},
31: {3: 29789, 4: 923513, 5: 28629149},
32: {3: 32749, 4: 1048573},
33: {3: 35933, 4: 1185907},
34: {3: 39301, 4: 1336333},
35: {3: 42863, 4: 1500619},
36: {3: 46649, 4: 1679609},
37: {3: 50651, 4: 1874153},
38: {3: 54869, 4: 2085133},
39: {3: 59281, 4: 2313439},
40: {3: 63997, 4: 2559989},
41: {3: 68917, 4: 2825759},
42: {3: 74077, 4: 3111679},
43: {3: 79493, 4: 3418799},
44: {3: 85159, 4: 3748079},
45: {3: 91121, 4: 4100611},
46: {3: 97327, 4: 4477453},
47: {3: 103813, 4: 4879669},
48: {3: 110587, 4: 5308379},
49: {3: 117643, 4: 5764799},
50: {3: 124991, 4: 6249989},
}
def _to_base36(p_value):
"""
Converts integer to base36 string.
class _TableSizeException(Exception):
pass
Based on answer on
https://stackoverflow.com/questions/1181919/python-base-36-encoding
def _get_table_size(p_alphabet, p_num):
"""
alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
Returns a prime number that is suitable for the hash table size. The size
is dependent on the alphabet used, and the number of items that need to be
hashed. The table size is at least 100 times larger than the number of
items to be hashed, to avoid collisions.
base36 = ''
while p_value:
p_value, i = divmod(p_value, 36)
base36 = alphabet[i] + base36
return base36 or alphabet[0]
When the alphabet is too little or too large, then _TableSizeException is
raised. Currently an alphabet of 10 to 40 characters is supported.
"""
try:
for width, size in sorted(_TABLE_SIZES[len(p_alphabet)].items()):
if p_num < size * 0.01:
return width, size
except KeyError:
pass
raise _TableSizeException('Could not find appropriate table size for given alphabet')
def hash_list_values(p_list, p_key=lambda i: i): # pragma: no branch
"""
Calculates a unique value for each item in the list, these can be used as
identifiers.
The value is based on hashing an item using the p_hash function.
The value is based on hashing an item using the p_key function.
Suitable for lists not larger than approx. 16K items.
Returns a tuple with the status and a list of tuples where each item is
combined with the ID.
"""
def to_base(p_alphabet, p_value):
"""
Converts integer to text ID with characters from the given alphabet.
Based on answer at
https://stackoverflow.com/questions/1181919/python-base-36-encoding
"""
result = ''
while p_value:
p_value, i = divmod(p_value, len(p_alphabet))
result = p_alphabet[i] + result
return result or p_alphabet[0]
result = []
used = set()
alphabet = config().identifier_alphabet()
# choose a larger key size if there's >1% chance of collision
size = _TABLE_SIZES[3] \
if len(p_list) < _TABLE_SIZES[3] * 0.01 else _TABLE_SIZES[4]
try:
_, size = _get_table_size(alphabet, len(p_list))
except _TableSizeException:
alphabet = _DEFAULT_ALPHABET
_, size = _get_table_size(alphabet, len(p_list))
for item in p_list:
# obtain the to-be-hashed value
......@@ -78,6 +145,19 @@ def hash_list_values(p_list, p_key=lambda i: i): # pragma: no branch
hash_value = (hash_value + 1) % size
used.add(hash_value)
result.append((item, _to_base36(hash_value)))
result.append((item, to_base(alphabet, hash_value)))
return result
def max_id_length(p_num):
"""
Returns the length of the IDs used, given the number of items that are
assigned an ID. Used for padding in lists.
"""
try:
alphabet = config().identifier_alphabet()
length, _ = _get_table_size(alphabet, p_num)
except _TableSizeException:
length, _ = _get_table_size(_DEFAULT_ALPHABET, p_num)
return length
......@@ -162,9 +162,9 @@ class ListFormatParser(object):
# todo ID
'i': lambda t: str(self.todolist.number(t)),
# todo ID pre-filled with 1 or 2 spaces if its length is <3
'I': lambda t: _filler(str(self.todolist.number(t)), 3),
# todo ID, padded with spaces
'I': lambda t: _filler(str(self.todolist.number(t)),
self.todolist.max_id_length()),
# list of tags (spaces) without hidden ones and due: and t:
'k': lambda t: ' '.join([u'{}:{}'.format(tag, value)
......@@ -179,8 +179,9 @@ class ListFormatParser(object):
# line number
'n': lambda t: str(self.todolist.linenumber(t)),
# line number, pre-filled with 1 or 2 spaces if its length <3
'N': lambda t: _filler(str(self.todolist.linenumber(t)), 3),
# line number, padded with spaces
'N': lambda t: _filler(str(self.todolist.linenumber(t)),
self.todolist.max_id_length()),
# priority
'p': lambda t: t.priority() if t.priority() else '',
......@@ -206,8 +207,9 @@ class ListFormatParser(object):
# unique text ID
'u': lambda t: self.todolist.uid(t),
# unique text ID, pre-filled with 1 or 2 spaces if its length <3
'U': lambda t: _filler(self.todolist.uid(t), 3),
# unique text ID, padded with spaces
'U': lambda t: _filler(self.todolist.uid(t),
self.todolist.max_id_length()),
# absolute completion date
'x': lambda t: 'x ' + t.completion_date().isoformat() if t.is_completed() else '',
......
......@@ -18,12 +18,13 @@
A list of todo items.
"""
import math
import re
from datetime import date
from topydo.lib import Filter
from topydo.lib.Config import config
from topydo.lib.HashListValues import hash_list_values
from topydo.lib.HashListValues import hash_list_values, max_id_length
from topydo.lib.printers.PrettyPrinter import PrettyPrinter
from topydo.lib.Todo import Todo
from topydo.lib.View import View
......@@ -275,6 +276,19 @@ class TodoListBase(object):
else:
return self.linenumber(p_todo)
def max_id_length(self):
"""
Returns the maximum length of a todo ID, used for formatting purposes.
"""
if config().identifiers() == "text":
return max_id_length(len(self._todos))
else:
try:
return math.ceil(math.log(len(self._todos), 10))
except ValueError:
return 0
def _update_todo_ids(self):
# the idea is to have a hash that is independent of the position of the
# todo. Use the text (without tags) of the todo to keep the id as
......
......@@ -16,6 +16,7 @@
import urwid
from topydo.lib.HashListValues import max_id_length
from topydo.lib.Utils import translate_key_to_config
from topydo.ui.columns.TodoWidget import TodoWidget
......@@ -89,6 +90,7 @@ class TodoListWidget(urwid.LineBox):
with this list.
"""
old_focus_position = self.todolist.focus
id_length = max_id_length(self.view.todolist.count())
del self.todolist[:]
......@@ -99,7 +101,7 @@ class TodoListWidget(urwid.LineBox):
self.todolist.append(urwid.Divider('-'))
for todo in todos:
todowidget = TodoWidget.create(todo)
todowidget = TodoWidget.create(todo, id_length)
todowidget.number = self.view.todolist.number(todo)
self.todolist.append(todowidget)
self.todolist.append(urwid.Divider('-'))
......
......@@ -53,7 +53,7 @@ def _markup(p_todo, p_focus):
class TodoWidget(urwid.WidgetWrap):
def __init__(self, p_todo):
def __init__(self, p_todo, p_id_width=4):
# clients use this to associate this widget with the given todo item
self.todo = p_todo
......@@ -101,7 +101,7 @@ class TodoWidget(urwid.WidgetWrap):
self.columns = urwid.Columns(
[
(1, self.progress_bar),
(4, self.id_widget),
(p_id_width, self.id_widget),
(3, priority_widget),
('weight', 1, self.text_widget),
],
......@@ -159,7 +159,7 @@ class TodoWidget(urwid.WidgetWrap):
cache = {}
@classmethod
def create(p_class, p_todo):
def create(p_class, p_todo, p_id_width=4):
"""
Creates a TodoWidget instance for the given todo. Widgets are
cached, the same object is returned for the same todo item.
......@@ -187,7 +187,7 @@ class TodoWidget(urwid.WidgetWrap):
if parent_progress_may_have_changed(p_todo):
widget.update_progress()
else:
widget = p_class(p_todo)
widget = p_class(p_todo, p_id_width)
p_class.cache[source] = widget
return widget
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment