diff --git a/.gitignore b/.gitignore index 07996c07c8a935e80d3b2f1d14cbdfd7f02864d0..8b88dbb58be5aec5b83361484153dee3def8cfb3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +**/.hypothesis/ /.mypy_cache/ /build/ /dist/ diff --git a/README.md b/README.md index 98a6b6643779b5fadf10d347914bd54f75353bcf..0bc9c3756db13b0f0dcf908e238ac675867996fd 100644 --- a/README.md +++ b/README.md @@ -22,15 +22,16 @@ actual code can then be found in the [_src/ilp_keyboard_layout_optimization_ sub We included [a bash script _pull_and_optimize.sh_ ](https://git.tu-berlin.de/blutub3d/ilp_keyboard_layout_optimization/-/blob/main/pull_and_optimize.sh) in our codebase to streamline a remote development workflow. We work on the code on a -computer, that is well equipped for that task. The committed and pushed code then +computer, that is well-equipped for that task. The committed and pushed code then gets processed on another machine, which uses this script, to update its code base -and run the parameters handed over. It is designed to have the Python script name -for execution with a Python interpreter and PySCIPOpt as the only parameter, e.g. +and run the parameters handed over. It is designed to be called without parameters +to execute the _optimize_ module of the [latest version released on Test.PyPI.org +](https://test.pypi.org/project/ilp-keyboard-layout-optimization/). ```shell -$ ./pull_and_optimize.sh ilp_optimize.py +$ ./pull_and_optimize.sh ``` The execution requires the Docker image of our repository [docker_pyscipopt -](https://github.com/BjoernLudwigPTB/docker_pyscipopt) to be built in advance but it +](https://github.com/BjoernLudwigPTB/docker_pyscipopt) to be built in advance, but it could be easily adapted for a local installation of the SCIP Optimization Suite. \ No newline at end of file diff --git a/dev-requirements.in b/dev-requirements.in index 4982ff8cc31d04a06d02cb2839b5c05d2fbcbaa6..dcf4dd60d3b060e766f7c043a304c95b8e293aee 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -1,8 +1,9 @@ -c requirements.txt +black build -twine +hypothesis mypy -black pylint pytest +twine diff --git a/dev-requirements.txt b/dev-requirements.txt index 4f402bbe76d991b295ffa2926ed4681f02262348..6f203abb14dbd812f2d236c46142f2ce2d719d0c 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -7,8 +7,10 @@ astroid==2.9.3 # via pylint attrs==21.4.0 - # via pytest -black==21.12b0 + # via + # hypothesis + # pytest +black==22.1.0 # via -r dev-requirements.in bleach==4.1.0 # via readme-renderer @@ -18,7 +20,7 @@ certifi==2021.10.8 # via requests cffi==1.15.0 # via cryptography -charset-normalizer==2.0.10 +charset-normalizer==2.0.11 # via requests click==8.0.3 # via black @@ -28,9 +30,11 @@ cryptography==36.0.1 # via secretstorage docutils==0.18.1 # via readme-renderer +hypothesis==6.36.1 + # via -r dev-requirements.in idna==3.3 # via requests -importlib-metadata==4.10.1 +importlib-metadata==4.11.0 # via # keyring # twine @@ -65,7 +69,7 @@ pep517==0.12.0 # via build pkginfo==1.8.2 # via twine -platformdirs==2.4.1 +platformdirs==2.5.0 # via # black # pylint @@ -79,9 +83,9 @@ pygments==2.11.2 # via readme-renderer pylint==2.12.2 # via -r dev-requirements.in -pyparsing==3.0.6 +pyparsing==3.0.7 # via packaging -pytest==6.2.5 +pytest==7.0.0 # via -r dev-requirements.in readme-renderer==32.0 # via twine @@ -97,26 +101,27 @@ secretstorage==3.3.1 # via keyring six==1.16.0 # via bleach +sortedcontainers==2.4.0 + # via hypothesis toml==0.10.2 - # via - # pylint - # pytest -tomli==1.2.3 + # via pylint +tomli==2.0.1 # via # black # build # mypy # pep517 + # pytest tqdm==4.62.3 # via twine -twine==3.7.1 +twine==3.8.0 # via -r dev-requirements.in typing-extensions==4.0.1 - # via - # black - # mypy + # via mypy urllib3==1.26.8 - # via requests + # via + # requests + # twine webencodings==0.5.1 # via bleach wrapt==1.13.3 diff --git a/docker/Dockerfile b/docker/Dockerfile index 1f18f658fc05262b4f563dad4c7e9fa21e9f6f2c..b70efcd183d00ac2bdec0f82b75250426938a665 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,10 +1,19 @@ -FROM pyscipopt:4.0.0 +FROM pyscipopt:4.0.0 AS optimizer USER root RUN python -m pip install --upgrade \ pip \ - -i https://test.pypi.org/simple/ \ - ilp-keyboard-layout-optimization + ilp-keyboard-layout-optimization USER user + +FROM optimizer as tester + +USER root + +RUN python -m pip install --upgrade \ + ilp-keyboard-layout-optimization[test] + +USER user + diff --git a/pull_and_optimize.sh b/pull_and_optimize.sh index 2f718bb008819e9a3066e4b64d9206e0d0a7a5c5..fb2aafe1b6c85600c82d18e6603441a636fa5108 100755 --- a/pull_and_optimize.sh +++ b/pull_and_optimize.sh @@ -2,9 +2,19 @@ # This script was written to streamline a remote development workflow. We work on the # code on a computer, that is well equipped for that task. The committed and pushed # code then gets processed on another machine, which uses this script, to update its -# code base and run the parameters handed over. It is designed to have the script -# name for execution with a Python interpreter and PySCIPOpt as the only parameter, e.g. -# $ ./pull_and_optimize.sh ilp_optimize.py +# code base and run the parameters handed over. It is designed to be called without +# parameters to execute the optimize module of the latest version released on +# Test.PyPI.org. +# +# $ ./pull_and_optimize.sh +# +# Alternatively you could invoke any other command in the Python interpreter by +# appending any command, normally appended to 'python <YOUR_COMMAND>' to the script. +# i.e. +# +# +# $ ./pull_and_optimize.sh -m pytest +# # The execution requires the Docker image of our repository # https://github.com/BjoernLudwigPTB/docker_pyscipopt to be built in advance. SCRIPT_PATH="${BASH_SOURCE}" @@ -15,10 +25,9 @@ while [ -L "${SCRIPT_PATH}" ]; do done SCRIPT_PATH="$(readlink -f "${SCRIPT_PATH}")" SCRIPT_DIR="$(cd -P "$(dirname -- "${SCRIPT_PATH}")" >/dev/null 2>&1 && pwd)" -set -x -cd ${SCRIPT_DIR} -git pull -docker build -t ilp_keyboard_layout_optimization:latest docker/ +cd ${SCRIPT_DIR} && \ +git pull && \ +docker build --no-cache -t ilp_keyboard_layout_optimization:latest docker/ && \ docker run -it --rm ilp_keyboard_layout_optimization \ - -m ilp_keyboard_layout_optimization.optimize + ${1:--m ilp_keyboard_layout_optimization.optimize} diff --git a/setup.cfg b/setup.cfg index 482a06144f521a148b778b17cf63afe032d40474..34bc04a03a21dc5b94ec56075e89f30b47245f05 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = ilp_keyboard_layout_optimization -version = 0.0.2a7 +version = 0.0.2a9 description = The QAP variant of keyboard layout optimization, i.e. character to key assignments long_description = file: README.md @@ -41,3 +41,8 @@ python_requires = >=3.10 [options.packages.find] where = src + +[options.extras_require] +test = + hypothesis + pytest diff --git a/src/ilp_keyboard_layout_optimization/data_aquisition/__init__.py b/src/ilp_keyboard_layout_optimization/data_aquisition/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/ilp_keyboard_layout_optimization/data_aquisition/chars.py b/src/ilp_keyboard_layout_optimization/data_aquisition/chars.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc435beb49630e9f53f4e0e18fd264049581dd7 --- /dev/null +++ b/src/ilp_keyboard_layout_optimization/data_aquisition/chars.py @@ -0,0 +1,78 @@ +"""This module contains the class providing a unified interface for character sets""" + +__all__ = ["Chars"] + +import string +from itertools import product +from typing import Optional, Tuple, Union + +from ..type_aliases import Bigram, CharTuple + + +class Chars: + """A unified interface to a collection of characters and corresponding bigrams + + Parameters + ---------- + chars : str or CharTupel, optional + A string of concatenated (special) characters or a CharTupel of single + characters, that are supposed to be considered. Defaults to the most common + letters, numbers and punctuation in German texts. + """ + + _chars: str + _monos: CharTuple + _bis: Tuple[Bigram] + + def __init__(self, chars: Optional[Union[str, CharTuple]] = None): + if chars is None: + self._chars = ( + string.ascii_lowercase + + string.ascii_uppercase + + string.digits + + string.punctuation + + "üöäÜÖÄß–…" + ) + else: + self.chars = chars + + @property + def chars(self) -> str: + return self._chars + + @chars.setter + def chars(self, chars: Union[str, CharTuple]): + if isinstance(chars, str): + self._chars = chars + else: # isinstance(chars, CharTuple): + self._chars = "".join(char for char in chars) + try: + del self._monos + except AttributeError: + pass + try: + del self._bis + except AttributeError: + pass + + @property + def monos(self) -> CharTuple: + try: + return self._monos + except AttributeError: + self._monos = self._str2char_tuple(self.chars) + return self._monos + + @staticmethod + def _str2char_tuple(char_str: str) -> CharTuple: + return tuple(char for char in char_str) + + @property + def bis(self): + try: + return self._bis + except AttributeError: + self._bis = tuple( + "".join(bigram_tuple) for bigram_tuple in product(self.chars, repeat=2) + ) + return self._bis diff --git a/src/ilp_keyboard_layout_optimization/ilp.py b/src/ilp_keyboard_layout_optimization/ilp.py index 033724a9e4d1d4201e14e3b74864a5a711159502..0c03cca4b72a685f055a6aefec1b528d1bb605e4 100644 --- a/src/ilp_keyboard_layout_optimization/ilp.py +++ b/src/ilp_keyboard_layout_optimization/ilp.py @@ -4,13 +4,13 @@ from typing import Iterable from pyscipopt import Model, quicksum -from .types import ( - CharKeyPair, - CharKeyQuadruple, - CharTuple, - KeyTuple, +from .data_aquisition.chars import Chars +from .type_aliases import ( + CharPosPair, + CharPosQuadruple, LinCosts, LinVars, + PosTuple, QuadCosts, QuadVars, ) @@ -20,122 +20,123 @@ class KeyboardOptimization: """Instances of this class represent instances of the keyboard layout QAP The IP variant of an optimization of character to key assignments can be modeled - as a so called quadratic assignment problem (QAP). The task is to assign a set of + as a so-called quadratic assignment problem (QAP). The task is to assign a set of characters to a set of keys on a keyboard. The way in which they should be arranged has to meet certain criteria such as for instance: characters that are - often typed after one another should not be assigned to keys, that are supposed + often typed after one another should not be assigned to positions, that are supposed to be pressed by the same finger. Parameters ---------- chars : CharTuple the (special) characters to be assign - keys: KeyTuple - the keys to which we want to assign the (special) characters + poss: PosTuple + the positions to which we want to assign the (special) characters """ - chars: CharTuple - keys: KeyTuple + chars: Chars + poss: PosTuple - def __init__(self, chars: CharTuple, keys: KeyTuple): - assert len(chars) == len(keys) + def __init__(self, chars: Chars, poss: PosTuple): + assert len(chars.monos) == len(poss) self.chars = chars - self.keys = keys - self.char_key_assigns: LinVars = {} - self.quad_char_key_assigns: QuadVars = {} - self.char_key_costs: LinCosts = {} - self.quad_char_key_costs: QuadCosts = {} + self.poss = poss + self.char_pos_assigns: LinVars = {} + self.quad_char_pos_assigns: QuadVars = {} + self.char_pos_costs: LinCosts = {} + self.quad_char_pos_costs: QuadCosts = {} self.model: Model = Model("Keyboard Layout Optimization") def set_up_model(self, char_key_costs: LinCosts, quad_char_key_costs: QuadCosts): """Set up all the variables and initialize the costs for the SCIP model""" for (char, key) in self.char_key_assigns_keys: - self.char_key_assigns[char, key] = self.model.addVar( + self.char_pos_assigns[char, key] = self.model.addVar( name=f"{key}={char}", vtype="B" ) for (char, char_2, key, key_2) in self.quad_char_key_assigns_keys: - self.quad_char_key_assigns[char, char_2, key, key_2] = self.model.addVar( + self.quad_char_pos_assigns[char, char_2, key, key_2] = self.model.addVar( name=f"{key}={char}_and_{key_2}={char_2}", vtype="C", lb=0, ub=1 ) - assert len(quad_char_key_costs) == len(self.quad_char_key_assigns) - assert len(char_key_costs) == len(self.char_key_assigns) - self.char_key_costs = char_key_costs - self.quad_char_key_costs = quad_char_key_costs + assert len(quad_char_key_costs) == len(self.quad_char_pos_assigns) + assert len(char_key_costs) == len(self.char_pos_assigns) + self.char_pos_costs = char_key_costs + self.quad_char_pos_costs = quad_char_key_costs constr = {} - for char in self.chars: + for char in self.chars.monos: self.model.addCons( - quicksum(self.char_key_assigns[char, key] for key in self.keys) == 1, + quicksum(self.char_pos_assigns[char, key] for key in self.poss) == 1, f"AllCharacterAssignedOnce({char})", ) for (key, key_2) in self.key_pairs: self.model.addCons( quicksum( - self.quad_char_key_assigns[char, char_2, key, key_2] - for char_2 in self.chars + self.quad_char_pos_assigns[char, char_2, key, key_2] + for char_2 in self.chars.monos if char_2 != char ) - <= self.char_key_assigns[char, key], - f"QuadCharacterAssignedLEQThanLocation({char},{key},{key_2})", + <= self.char_pos_assigns[char, key], + f"QuadCharacterAssignedLEQThanPosition({char},{key},{key_2})", ) self.model.addCons( quicksum( - self.quad_char_key_assigns[char_2, char, key_2, key] - for char_2 in self.chars + self.quad_char_pos_assigns[char_2, char, key_2, key] + for char_2 in self.chars.monos if char_2 != char ) - <= self.char_key_assigns[char, key], - f"QuadCharacterAssignedLEQThanSecondLocation({char},{key_2},{key})", + <= self.char_pos_assigns[char, key], + f"QuadCharacterAssignedLEQThanSecondPosition({char},{key_2},{key})", ) - for char_2 in self.chars: - for key in self.keys: + for char_2 in self.chars.monos: + for key in self.poss: if char_2 != char: self.model.addCons( quicksum( - self.quad_char_key_assigns[char, char_2, key, key_2] - for key_2 in self.keys + self.quad_char_pos_assigns[char, char_2, key, key_2] + for key_2 in self.poss if key_2 != key ) - <= self.char_key_assigns[char, key], + <= self.char_pos_assigns[char, key], f"QuadCharacterAssignedLEQThanCharacter({char},{char_2}," f"{key})", ) self.model.addCons( quicksum( - self.quad_char_key_assigns[char_2, char, key_2, key] - for key_2 in self.keys + self.quad_char_pos_assigns[char_2, char, key_2, key] + for key_2 in self.poss if key_2 != key ) - <= self.char_key_assigns[char, key], + <= self.char_pos_assigns[char, key], f"QuadCharacterAssignedLEQThanSecondCharacter({char_2}," f"{char},{key})", ) for (char, char_2, key, key_2) in self.quad_char_key_assigns_keys: self.model.addCons( - self.char_key_assigns[char, key] + self.char_key_assigns[char_2, key_2] - <= 1 + self.quad_char_key_assigns[char, char_2, key, key_2], + self.char_pos_assigns[char, key] + self.char_pos_assigns[char_2, key_2] + <= 1 + self.quad_char_pos_assigns[char, char_2, key, key_2], f"IntegrableQuadAssign({char},{key_2},{key})", ) - for key in self.keys: + for key in self.poss: constr[key] = self.model.addCons( - quicksum(self.char_key_assigns[char, key] for char in self.chars) == 1, - f"AllLocationAssignedOnce({key})", + quicksum(self.char_pos_assigns[char, key] for char in self.chars.monos) + == 1, + f"AllPositionsAssignedOnce({key})", ) self.model.setObjective( quicksum( costs * assigns for (costs, assigns) in zip( - self.char_key_costs.values(), self.char_key_assigns.values() + self.char_pos_costs.values(), self.char_pos_assigns.values() ) ) + quicksum( costs * assigns for (costs, assigns) in zip( - self.quad_char_key_costs.values(), - self.quad_char_key_assigns.values(), + self.quad_char_pos_costs.values(), + self.quad_char_pos_assigns.values(), ) ), "minimize", @@ -159,10 +160,10 @@ class KeyboardOptimization: for (char, key) in self.char_key_assigns_keys: print( f"({char}, {key}): " - f"{self.model.getVal(self.char_key_assigns[char, key])}, " - f"cost: {self.char_key_costs[char, key]}" + f"{self.model.getVal(self.char_pos_assigns[char, key])}, " + f"cost: {self.char_pos_costs[char, key]}" ) - if self.model.getVal(self.char_key_assigns[char, key]) == 1: + if self.model.getVal(self.char_pos_assigns[char, key]) == 1: solution_assignments.append((char, key)) assert "('u', 'left_pinky_home')" in str(solution_assignments) @@ -180,16 +181,16 @@ class KeyboardOptimization: print(f"{str(solution_assignments)}") @property - def char_key_assigns_keys(self) -> Iterable[CharKeyPair]: + def char_key_assigns_keys(self) -> Iterable[CharPosPair]: """An iterator for the pairs of (special) characters and corresponding keys""" - return product(self.chars, self.keys) + return product(self.chars.monos, self.poss) @property - def quad_char_key_assigns_keys(self) -> Iterable[CharKeyQuadruple]: + def quad_char_key_assigns_keys(self) -> Iterable[CharPosQuadruple]: """An iterator for quadruples of character pairs and corresponding key pairs""" flattened_tuple_of_quads = chain.from_iterable( chain.from_iterable( - product(permutations(self.chars, 2), permutations(self.keys, 2)) + product(permutations(self.chars.monos, 2), permutations(self.poss, 2)) ) ) iter_of_quads = (iter(flattened_tuple_of_quads),) * 4 @@ -198,4 +199,4 @@ class KeyboardOptimization: @property def key_pairs(self) -> Iterable: """An iterator for all pairs of keys that are possible""" - return permutations(self.keys, 2) + return permutations(self.poss, 2) diff --git a/src/ilp_keyboard_layout_optimization/optimize.py b/src/ilp_keyboard_layout_optimization/optimize.py index 2e7f7e4e3067cc8bbfc5d19fc99fceaa141319ad..714ef970718e28ac1819d8eb5df1f7d7dd506219 100644 --- a/src/ilp_keyboard_layout_optimization/optimize.py +++ b/src/ilp_keyboard_layout_optimization/optimize.py @@ -12,8 +12,9 @@ $ python -m ilp_keyboard_layout_optimization.optimize We might add command line parameters at a later time. For now please edit the main function at the very bottom of this file to change inputs. """ +from ilp_keyboard_layout_optimization.data_aquisition.chars import Chars from ilp_keyboard_layout_optimization.ilp import KeyboardOptimization -from ilp_keyboard_layout_optimization.types import LinCosts, QuadCosts +from ilp_keyboard_layout_optimization.type_aliases import LinCosts, QuadCosts def prepare_costs( @@ -33,48 +34,48 @@ def prepare_costs( """ _linear_costs = {} _quad_costs = {} - for (char, loc) in optimization_problem.char_key_assigns_keys: + for (char, pos) in optimization_problem.char_key_assigns_keys: if ( - (char == "u" and loc == "left_pinky_home") - or (char == "n" and loc == "right_index_home") - or (char == "r" and loc == "right_middle_home") - or (char == "t" and loc == "right_ring_home") - or (char == "d" and loc == "right_pinky_home") + (char == "u" and pos == "left_pinky_home") + or (char == "n" and pos == "right_index_home") + or (char == "r" and pos == "right_middle_home") + or (char == "t" and pos == "right_ring_home") + or (char == "d" and pos == "right_pinky_home") ): - _linear_costs[char, loc] = 0.0 + _linear_costs[char, pos] = 0.0 continue - _linear_costs[char, loc] = 1.0 + _linear_costs[char, pos] = 1.0 - for (char, char_2, loc, loc_2) in optimization_problem.quad_char_key_assigns_keys: + for (char, char_2, pos, pos_2) in optimization_problem.quad_char_key_assigns_keys: if ( ( char == "u" - and loc == "left_pinky_home" + and pos == "left_pinky_home" and char_2 == "i" - and loc_2 == "left_middle_home" + and pos_2 == "left_middle_home" ) or ( char == "i" - and loc == "left_middle_home" + and pos == "left_middle_home" and char_2 == "a" - and loc_2 == "left_index_home" + and pos_2 == "left_index_home" ) or ( char == "a" - and loc == "left_index_home" + and pos == "left_index_home" and char_2 == "e" - and loc_2 == "left_ring_home" + and pos_2 == "left_ring_home" ) ): - _quad_costs[char, char_2, loc, loc_2] = 0.0 + _quad_costs[char, char_2, pos, pos_2] = 0.0 continue - _quad_costs[char, char_2, loc, loc_2] = 1.0 + _quad_costs[char, char_2, pos, pos_2] = 1.0 return _linear_costs, _quad_costs if __name__ == "__main__": - test_chars = ("a", "e", "i", "u", "n", "r", "t", "d") - test_locs = ( + test_chars = Chars(("a", "e", "i", "u", "n", "r", "t", "d")) + test_poss = ( "left_pinky_home", "left_ring_home", "left_middle_home", @@ -84,7 +85,7 @@ if __name__ == "__main__": "right_ring_home", "right_pinky_home", ) - optimization_model = KeyboardOptimization(test_chars, test_locs) + optimization_model = KeyboardOptimization(test_chars, test_poss) linear_costs, quad_costs = prepare_costs(optimization_model) optimization_model.set_up_model(linear_costs, quad_costs) optimization_model.solve() diff --git a/src/ilp_keyboard_layout_optimization/receive_data.py b/src/ilp_keyboard_layout_optimization/receive_data.py index 3c19218ad6975ee9bd00e22c0a9de4f5ce02f2ea..4841175717d9e174051beb09f0de27725aa70200 100644 --- a/src/ilp_keyboard_layout_optimization/receive_data.py +++ b/src/ilp_keyboard_layout_optimization/receive_data.py @@ -1,6 +1,6 @@ """This module contains a class representing (special) character counts""" -__all__ = ["CharacterCounts"] +__all__ = ["CharProbs"] import csv from math import comb @@ -8,11 +8,11 @@ from os.path import abspath, basename from typing import Optional from urllib.request import urlopen -from .types import CharSet, CharTuple +from src.ilp_keyboard_layout_optimization.type_aliases import CharSet, CharTuple -class CharacterCounts: - """Instances represent all relevant (special) character counts +class CharProbs: + """Instances represent all relevant (special) character probabilities Parameters ---------- @@ -120,11 +120,10 @@ class CharacterCounts: row["bigram"][0] in self.chars and row["bigram"][1] in self.chars ): bi_probs[row["bigram"]] = absolute_count - total_sum = sum(bi_probs.values()) - assert total_sum > 0 - normalizer = 100 / total_sum + absolute_sum = sum(bi_probs.values()) for character, count in bi_probs.items(): - bi_probs[character] = count / normalizer + bi_probs[character] = count / absolute_sum + assert round(sum(bi_probs.values()), 8) == 1 assert len(bi_probs) == reader.line_num or len(bi_probs) <= comb( reader.line_num, 2 ) @@ -142,10 +141,10 @@ class CharacterCounts: if __name__ == "__main__": - CharacterCounts( - ("a", "b", "c"), + CharProbs( + None, "http://www.ids-mannheim.de/fileadmin/kl/derewo/" "DeReChar-v-uni-204-a-c-2018-02-28-1.0.csv", "http://practicalcryptography.com/media/cryptanalysis/files/" - "icelandic_bigrams.txt", + "german_bigrams.txt", ) diff --git a/src/ilp_keyboard_layout_optimization/type_aliases.py b/src/ilp_keyboard_layout_optimization/type_aliases.py new file mode 100644 index 0000000000000000000000000000000000000000..1eb2b5db5aa09d43afb9abe2a04d66e3809fdae6 --- /dev/null +++ b/src/ilp_keyboard_layout_optimization/type_aliases.py @@ -0,0 +1,44 @@ +"""This module contains type aliases for type hints and thus more convenient coding""" + +__all__ = [ + "Bigram", + "Char", + "CharPosPair", + "CharPosQuadruple", + "CharSet", + "CharTuple", + "LinCosts", + "LinVars", + "Pos", + "PosPair", + "PosTuple", + "QuadCosts", + "QuadVars", +] + +Char = str +"""A (special) character""" +Pos = str +"""A position""" +CharPosPair = tuple[Char, Pos] +"""A pair of a (special) character and a position""" +Bigram = str +"""A length-two string of (special) characters""" +PosPair = tuple[Pos, Pos] +"""A tuple of two positions""" +CharTuple = tuple[Char, ...] +"""A tuple of several (special) characters""" +CharSet = set[Char] +"""A set of several (special) characters""" +PosTuple = tuple[Pos, ...] +"""A tuple of several positions""" +LinCosts = dict[CharPosPair, float] +"""A dictionary assigning costs to (special) character bigrams""" +CharPosQuadruple = tuple[Char, Char, Pos, Pos] +"""A four-tuple: two (special) characters and their respective positions""" +QuadCosts = dict[CharPosQuadruple, float] +"""A dictionary assigning costs to (special) character, position quadruples""" +LinVars = dict[CharPosPair, bool] +"""A dictionary of binary decisions of assigning (special) characters to positions""" +QuadVars = dict[CharPosQuadruple, bool] +"""A dictionary of binary vars assigning two (special) characters to two positions""" diff --git a/src/ilp_keyboard_layout_optimization/types.py b/src/ilp_keyboard_layout_optimization/types.py deleted file mode 100644 index 0727d4695682d621634decbdbc5807b15adf2b84..0000000000000000000000000000000000000000 --- a/src/ilp_keyboard_layout_optimization/types.py +++ /dev/null @@ -1,40 +0,0 @@ -"""This module contains custom types for type hints and thus more convenient coding""" - -__all__ = [ - "CharKeyPair", - "CharKeyQuadruple", - "CharTuple", - "CharSet", - "KeyTuple", - "LinCosts", - "LinVars", - "QuadCosts", - "QuadVars", -] - -Char = str -"""A (special) character""" -Key = str -"""A key""" -CharKeyPair = tuple[Char, Key] -"""A pair of a (special) character and a key""" -Bigram = tuple[Char, Char] -"""A tuple of two (special) characters""" -KeyPair = tuple[Key, Key] -"""A tuple of two keys""" -CharTuple = tuple[Char, ...] -"""A tuple of several (special) characters""" -CharSet = set[Char] -"""A set of several (special) characters""" -KeyTuple = tuple[Key, ...] -"""A tuple of several keys""" -LinCosts = dict[CharKeyPair, float] -"""A dictionary assigning costs to (special) character bigrams""" -CharKeyQuadruple = tuple[Char, Key, Char, Key] -"""A four-tuple: (special) character, key, another (special) character, another key""" -QuadCosts = dict[CharKeyQuadruple, float] -"""A dictionary assigning costs to (special) character, key quadruples""" -LinVars = dict[CharKeyPair, bool] -"""A dictionary of binary decisions of assigning (special) characters to keys""" -QuadVars = dict[CharKeyQuadruple, bool] -"""A dictionary of binary decisions of assigning two (special) characters to two keys""" diff --git a/test/test_chars.py b/test/test_chars.py new file mode 100644 index 0000000000000000000000000000000000000000..210849769b3bc54f5700460e358cd0a0f9f367eb --- /dev/null +++ b/test/test_chars.py @@ -0,0 +1,148 @@ +from typing import Iterable, List + +from hypothesis import given, settings, strategies as hst + +from src.ilp_keyboard_layout_optimization.data_aquisition.chars import ( + Chars, +) +from src.ilp_keyboard_layout_optimization.type_aliases import CharTuple + + +def test_chars_init(): + assert Chars() + + +def test_chars_chars_type(): + assert isinstance(Chars().chars, str) + + +def test_chars_monos_type(): + assert isinstance(Chars().monos, CharTuple.__origin__) + + +@given(hst.lists(hst.characters(), min_size=1)) +@settings(deadline=None) +def test_chars_input_tuple(char_tuple): + assert Chars(tuple(char_tuple)).chars == "".join(char_tuple) + + +@given(hst.text(min_size=1)) +def test_chars_input_str(char_string): + assert Chars(char_string).chars == char_string + + +@given(hst.text(min_size=1)) +def test_chars_input_tuple_equals_input_str(char_string): + char_tuple = Chars._str2char_tuple(char_string) + assert Chars(char_string).chars == Chars(char_tuple).chars + + +@given(hst.text(min_size=1)) +def test_chars_input_str(char_string): + assert Chars(char_string).chars == char_string + + +def test_chars_default(): + all_test_chars = list(Chars().chars) + basic_latin = _get_unicode_chars(range(0x0021, 0x007F)) + latin_1_supp = _get_unicode_chars( + (0x00C4, 0x00D6, 0x00DC, 0x00E4, 0x00F6, 0x00FC, 0x00DF) + ) + general_punc = _get_unicode_chars((0x2013, 0x2026)) + extended_alphabet = basic_latin + latin_1_supp + general_punc + for char in extended_alphabet: + assert char in all_test_chars + all_test_chars.remove(char) + assert not all_test_chars + + +def _get_unicode_chars(code_point_range: Iterable) -> List[str]: + return [chr(char) for char in code_point_range] + + +def test_chars_monograms(): + assert Chars().monos + + +def test_chars_monograms_multiple_times(): + test_chars = Chars() + first_time_monos = test_chars.monos + second_time_monos = test_chars.monos + assert first_time_monos == second_time_monos + + +@given(hst.lists(hst.characters(), min_size=3)) +def test_chars_monograms_after_resetting(char_tuple): + first_test_chars = Chars(char_tuple) + assert first_test_chars.monos == tuple(char_tuple) + first_test_chars.chars = char_tuple[1:-1] + assert first_test_chars.monos == tuple(char_tuple[1:-1]) + + +def test_chars_bigrams(): + assert Chars().bis + + +def test_chars_bigrams_length(): + for bigram in Chars().bis: + assert len(bigram) == 2 + + +def test_bigrams_default(): + all_test_bigrams = list(Chars("1234").bis) + actual_bigrams = ( + "11", + "12", + "21", + "13", + "31", + "14", + "41", + "22", + "23", + "32", + "24", + "42", + "33", + "34", + "44", + "43", + ) + for bigram in actual_bigrams: + assert bigram in all_test_bigrams + all_test_bigrams.remove(bigram) + assert not all_test_bigrams + + +def test_chars_bigrams_multiple_times(): + test_chars = Chars() + first_time_bis = test_chars.bis + second_time_bis = test_chars.bis + assert first_time_bis == second_time_bis + + +def test_chars_bigrams_after_resetting(): + test_chars = Chars("12") + first_bigram_list = list(test_chars.bis) + first_actual_bigrams = ( + "11", + "12", + "21", + "22", + ) + for bigram in first_actual_bigrams: + assert bigram in first_bigram_list + first_bigram_list.remove(bigram) + assert not first_bigram_list + test_chars.chars = "23" + second_bigram_list = list(test_chars.bis) + second_actual_bigrams = ( + "22", + "23", + "32", + "33", + ) + for bigram in second_actual_bigrams: + assert bigram in second_bigram_list + second_bigram_list.remove(bigram) + assert not second_bigram_list diff --git a/test/test_initialization.py b/test/test_initialization.py index 1d06626c4832998d411465477c8605ea7e267f0e..3e502a2e6018b9f7beb913774c2e398c83b95d68 100644 --- a/test/test_initialization.py +++ b/test/test_initialization.py @@ -12,7 +12,7 @@ def three_chars() -> CharTuple: @pytest.fixture(scope="session") -def three_locs() -> CharTuple: +def three_poss() -> CharTuple: return "left_pinky_home", "left_ring_home", "right_index_home" @@ -23,36 +23,36 @@ def test_keyboard_optimization_init_throw_errors(init_params): KeyboardOptimization(init_params) -def test_keyboard_optimization_init(three_chars, three_locs): - KeyboardOptimization(three_chars, three_locs) +def test_keyboard_optimization_init(three_chars, three_poss): + KeyboardOptimization(three_chars, three_poss) -def test_keyboard_optimization_char_loc_assigns_keys(three_chars, three_locs): +def test_keyboard_optimization_char_pos_assigns_keys(three_chars, three_poss): linear_keys = tuple( - KeyboardOptimization(three_chars, three_locs).char_key_assigns_keys + KeyboardOptimization(three_chars, three_poss).char_key_assigns_keys ) - assert len(linear_keys) == len(three_chars) * len(three_locs) + assert len(linear_keys) == len(three_chars) * len(three_poss) for char in three_chars: - for loc in three_locs: - assert (char, loc) in linear_keys + for pos in three_poss: + assert (char, pos) in linear_keys -def test_keyboard_optimization_quad_char_loc_assigns_keys(three_chars, three_locs): +def test_keyboard_optimization_quad_char_pos_assigns_keys(three_chars, three_poss): quadratic_keys = tuple( - KeyboardOptimization(three_chars, three_locs).quad_char_key_assigns_keys + KeyboardOptimization(three_chars, three_poss).quad_char_key_assigns_keys ) assert len(quadratic_keys) == len(three_chars) * (len(three_chars) - 1) * len( - three_locs - ) * (len(three_locs) - 1) + three_poss + ) * (len(three_poss) - 1) for (char, char_2) in permutations(three_chars, 2): - for (loc, loc_2) in permutations(three_locs, 2): - assert (char, char_2, loc, loc_2) in quadratic_keys + for (pos, pos_2) in permutations(three_poss, 2): + assert (char, char_2, pos, pos_2) in quadratic_keys -def test_keyboard_optimization_quad_locs(three_chars, three_locs): - quad_locs = tuple(KeyboardOptimization(three_chars, three_locs).key_pairs) - assert len(quad_locs) == len(three_locs) * (len(three_locs) - 1) - for loc in three_locs: - for loc_2 in three_locs: - if loc != loc_2: - assert (loc, loc_2) in quad_locs +def test_keyboard_optimization_quad_poss(three_chars, three_poss): + quad_poss = tuple(KeyboardOptimization(three_chars, three_poss).key_pairs) + assert len(quad_poss) == len(three_poss) * (len(three_poss) - 1) + for pos in three_poss: + for pos_2 in three_poss: + if pos != pos_2: + assert (pos, pos_2) in quad_poss diff --git a/test/test_receive_data.py b/test/test_receive_data.py index 42393c7e08e10433152be8c3273fecac20452d73..2e7e193c443424b28b6fbf880828e0d2fb8b3502 100644 --- a/test/test_receive_data.py +++ b/test/test_receive_data.py @@ -3,17 +3,17 @@ from urllib.request import urlopen import pytest -from ilp_keyboard_layout_optimization.receive_data import CharacterCounts +from src.ilp_keyboard_layout_optimization.receive_data import CharProbs @pytest.fixture def characters_count(): - return CharacterCounts() + return CharProbs() @pytest.fixture() def characters_count_custom(): - return CharacterCounts( + return CharProbs( ("A", "B", "C"), "http://www.ids-mannheim.de/fileadmin/kl/derewo/" "DeReChar-v-uni-204-a-c-2018-02-28-1.0.csv", diff --git a/test/test_types.py b/test/test_types.py index 60ba63db6e311a9a738455f28d1b1a3304f369fb..a1601f66d6c8d8e915243cfb5044db6eecafd3be 100644 --- a/test/test_types.py +++ b/test/test_types.py @@ -1,13 +1,13 @@ from ilp_keyboard_layout_optimization.costs import FreqTuple -from ilp_keyboard_layout_optimization.types import ( +from ilp_keyboard_layout_optimization.type_aliases import ( Bigram, Char, - CharKeyPair, - CharKeyQuadruple, + CharPosPair, + CharPosQuadruple, CharTuple, - Key, - KeyPair, - KeyTuple, + Pos, + PosPair, + PosTuple, LinCosts, LinVars, QuadCosts, @@ -25,44 +25,44 @@ def test_char(): def test_key(): - assert Key == str + assert Pos == str def test_char_key_pair(): - assert CharKeyPair == tuple[Char, Key] + assert CharPosPair == tuple[Char, Pos] def test_bigram(): - assert Bigram == tuple[Char, Char] + assert Bigram == str -def test_loc_pair(): - assert KeyPair == tuple[Key, Key] +def test_pos_pair(): + assert PosPair == tuple[Pos, Pos] def test_lin_costs(): - assert LinCosts == dict[CharKeyPair, float] + assert LinCosts == dict[CharPosPair, float] def test_char_tuple(): assert CharTuple == tuple[Char, ...] -def test_loc_tuple(): - assert KeyTuple == tuple[Key, ...] +def test_pos_tuple(): + assert PosTuple == tuple[Pos, ...] def test_lin_vars(): - assert LinVars == dict[CharKeyPair, bool] + assert LinVars == dict[CharPosPair, bool] -def test_quad_loc_quadruple(): - assert CharKeyQuadruple == tuple[Char, Key, Char, Key] +def test_quad_pos_quadruple(): + assert CharPosQuadruple == tuple[Char, Pos, Char, Pos] def test_quad_costs(): - assert QuadCosts == dict[CharKeyQuadruple, float] + assert QuadCosts == dict[CharPosQuadruple, float] def test_quad_vars(): - assert QuadVars == dict[CharKeyQuadruple, bool] + assert QuadVars == dict[CharPosQuadruple, bool]