xref: /OK3568_Linux_fs/yocto/poky/meta/recipes-devtools/python/python3-hypothesis/test_rle.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun# This file is part of Hypothesis, which may be found at
2*4882a593Smuzhiyun# https://github.com/HypothesisWorks/hypothesis/
3*4882a593Smuzhiyun#
4*4882a593Smuzhiyun# Most of this work is copyright (C) 2013-2021 David R. MacIver
5*4882a593Smuzhiyun# (david@drmaciver.com), but it contains contributions by others. See
6*4882a593Smuzhiyun# CONTRIBUTING.rst for a full list of people who may hold copyright, and
7*4882a593Smuzhiyun# consult the git log if you need to determine who owns an individual
8*4882a593Smuzhiyun# contribution.
9*4882a593Smuzhiyun#
10*4882a593Smuzhiyun# This Source Code Form is subject to the terms of the Mozilla Public License,
11*4882a593Smuzhiyun# v. 2.0. If a copy of the MPL was not distributed with this file, You can
12*4882a593Smuzhiyun# obtain one at https://mozilla.org/MPL/2.0/.
13*4882a593Smuzhiyun#
14*4882a593Smuzhiyun# END HEADER
15*4882a593Smuzhiyun#
16*4882a593Smuzhiyun# SPDX-License-Identifier: MPL-2.0
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun"""This example demonstrates testing a run length encoding scheme. That is, we
19*4882a593Smuzhiyuntake a sequence and represent it by a shorter sequence where each 'run' of
20*4882a593Smuzhiyunconsecutive equal elements is represented as a single element plus a count. So
21*4882a593Smuzhiyune.g.
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun[1, 1, 1, 1, 2, 1] is represented as [[1, 4], [2, 1], [1, 1]]
24*4882a593Smuzhiyun
25*4882a593SmuzhiyunThis demonstrates the useful decode(encode(x)) == x invariant that is often
26*4882a593Smuzhiyuna fruitful source of testing with Hypothesis.
27*4882a593Smuzhiyun
28*4882a593SmuzhiyunIt also has an example of testing invariants in response to changes in the
29*4882a593Smuzhiyununderlying data.
30*4882a593Smuzhiyun"""
31*4882a593Smuzhiyun
32*4882a593Smuzhiyunfrom hypothesis import assume, given, strategies as st
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun
35*4882a593Smuzhiyundef run_length_encode(seq):
36*4882a593Smuzhiyun    """Encode a sequence as a new run-length encoded sequence."""
37*4882a593Smuzhiyun    if not seq:
38*4882a593Smuzhiyun        return []
39*4882a593Smuzhiyun    # By starting off the count at zero we simplify the iteration logic
40*4882a593Smuzhiyun    # slightly.
41*4882a593Smuzhiyun    result = [[seq[0], 0]]
42*4882a593Smuzhiyun    for s in seq:
43*4882a593Smuzhiyun        if (
44*4882a593Smuzhiyun            # If you uncomment this line this branch will be skipped and we'll
45*4882a593Smuzhiyun            # always append a new run of length 1. Note which tests fail.
46*4882a593Smuzhiyun            # False and
47*4882a593Smuzhiyun            s
48*4882a593Smuzhiyun            == result[-1][0]
49*4882a593Smuzhiyun            # Try uncommenting this line and see what problems occur:
50*4882a593Smuzhiyun            # and result[-1][-1] < 2
51*4882a593Smuzhiyun        ):
52*4882a593Smuzhiyun            result[-1][1] += 1
53*4882a593Smuzhiyun        else:
54*4882a593Smuzhiyun            result.append([s, 1])
55*4882a593Smuzhiyun    return result
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun
58*4882a593Smuzhiyundef run_length_decode(seq):
59*4882a593Smuzhiyun    """Take a previously encoded sequence and reconstruct the original from
60*4882a593Smuzhiyun    it."""
61*4882a593Smuzhiyun    result = []
62*4882a593Smuzhiyun    for s, i in seq:
63*4882a593Smuzhiyun        for _ in range(i):
64*4882a593Smuzhiyun            result.append(s)
65*4882a593Smuzhiyun    return result
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun# We use lists of a type that should have a relatively high duplication rate,
69*4882a593Smuzhiyun# otherwise we'd almost never get any runs.
70*4882a593SmuzhiyunLists = st.lists(st.integers(0, 10))
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun@given(Lists)
74*4882a593Smuzhiyundef test_decodes_to_starting_sequence(ls):
75*4882a593Smuzhiyun    """If we encode a sequence and then decode the result, we should get the
76*4882a593Smuzhiyun    original sequence back.
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun    Otherwise we've done something very wrong.
79*4882a593Smuzhiyun    """
80*4882a593Smuzhiyun    assert run_length_decode(run_length_encode(ls)) == ls
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun@given(Lists, st.data())
84*4882a593Smuzhiyundef test_duplicating_an_element_does_not_increase_length(ls, data):
85*4882a593Smuzhiyun    """The previous test could be passed by simply returning the input sequence
86*4882a593Smuzhiyun    so we need something that tests the compression property of our encoding.
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun    In this test we deliberately introduce or extend a run and assert
89*4882a593Smuzhiyun    that this does not increase the length of our encoding, because they
90*4882a593Smuzhiyun    should be part of the same run in the final result.
91*4882a593Smuzhiyun    """
92*4882a593Smuzhiyun    # We use assume to get a valid index into the list. We could also have used
93*4882a593Smuzhiyun    # e.g. flatmap, but this is relatively straightforward and will tend to
94*4882a593Smuzhiyun    # perform better.
95*4882a593Smuzhiyun    assume(ls)
96*4882a593Smuzhiyun    i = data.draw(st.integers(0, len(ls) - 1))
97*4882a593Smuzhiyun    ls2 = list(ls)
98*4882a593Smuzhiyun    # duplicating the value at i right next to it guarantees they are part of
99*4882a593Smuzhiyun    # the same run in the resulting compression.
100*4882a593Smuzhiyun    ls2.insert(i, ls2[i])
101*4882a593Smuzhiyun    assert len(run_length_encode(ls2)) == len(run_length_encode(ls))
102