1*4882a593Smuzhiyun# This file is part of Hypothesis, which may be found at 2*4882a593Smuzhiyun# https://github.com/HypothesisWorks/hypothesis/ 3*4882a593Smuzhiyun# 4*4882a593Smuzhiyun# Most of this work is copyright (C) 2013-2021 David R. MacIver 5*4882a593Smuzhiyun# (david@drmaciver.com), but it contains contributions by others. See 6*4882a593Smuzhiyun# CONTRIBUTING.rst for a full list of people who may hold copyright, and 7*4882a593Smuzhiyun# consult the git log if you need to determine who owns an individual 8*4882a593Smuzhiyun# contribution. 9*4882a593Smuzhiyun# 10*4882a593Smuzhiyun# This Source Code Form is subject to the terms of the Mozilla Public License, 11*4882a593Smuzhiyun# v. 2.0. If a copy of the MPL was not distributed with this file, You can 12*4882a593Smuzhiyun# obtain one at https://mozilla.org/MPL/2.0/. 13*4882a593Smuzhiyun# 14*4882a593Smuzhiyun# END HEADER 15*4882a593Smuzhiyun# 16*4882a593Smuzhiyun# SPDX-License-Identifier: MPL-2.0 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun"""This example demonstrates testing a run length encoding scheme. That is, we 19*4882a593Smuzhiyuntake a sequence and represent it by a shorter sequence where each 'run' of 20*4882a593Smuzhiyunconsecutive equal elements is represented as a single element plus a count. So 21*4882a593Smuzhiyune.g. 22*4882a593Smuzhiyun 23*4882a593Smuzhiyun[1, 1, 1, 1, 2, 1] is represented as [[1, 4], [2, 1], [1, 1]] 24*4882a593Smuzhiyun 25*4882a593SmuzhiyunThis demonstrates the useful decode(encode(x)) == x invariant that is often 26*4882a593Smuzhiyuna fruitful source of testing with Hypothesis. 27*4882a593Smuzhiyun 28*4882a593SmuzhiyunIt also has an example of testing invariants in response to changes in the 29*4882a593Smuzhiyununderlying data. 30*4882a593Smuzhiyun""" 31*4882a593Smuzhiyun 32*4882a593Smuzhiyunfrom hypothesis import assume, given, strategies as st 33*4882a593Smuzhiyun 34*4882a593Smuzhiyun 35*4882a593Smuzhiyundef run_length_encode(seq): 36*4882a593Smuzhiyun """Encode a sequence as a new run-length encoded sequence.""" 37*4882a593Smuzhiyun if not seq: 38*4882a593Smuzhiyun return [] 39*4882a593Smuzhiyun # By starting off the count at zero we simplify the iteration logic 40*4882a593Smuzhiyun # slightly. 41*4882a593Smuzhiyun result = [[seq[0], 0]] 42*4882a593Smuzhiyun for s in seq: 43*4882a593Smuzhiyun if ( 44*4882a593Smuzhiyun # If you uncomment this line this branch will be skipped and we'll 45*4882a593Smuzhiyun # always append a new run of length 1. Note which tests fail. 46*4882a593Smuzhiyun # False and 47*4882a593Smuzhiyun s 48*4882a593Smuzhiyun == result[-1][0] 49*4882a593Smuzhiyun # Try uncommenting this line and see what problems occur: 50*4882a593Smuzhiyun # and result[-1][-1] < 2 51*4882a593Smuzhiyun ): 52*4882a593Smuzhiyun result[-1][1] += 1 53*4882a593Smuzhiyun else: 54*4882a593Smuzhiyun result.append([s, 1]) 55*4882a593Smuzhiyun return result 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun 58*4882a593Smuzhiyundef run_length_decode(seq): 59*4882a593Smuzhiyun """Take a previously encoded sequence and reconstruct the original from 60*4882a593Smuzhiyun it.""" 61*4882a593Smuzhiyun result = [] 62*4882a593Smuzhiyun for s, i in seq: 63*4882a593Smuzhiyun for _ in range(i): 64*4882a593Smuzhiyun result.append(s) 65*4882a593Smuzhiyun return result 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun# We use lists of a type that should have a relatively high duplication rate, 69*4882a593Smuzhiyun# otherwise we'd almost never get any runs. 70*4882a593SmuzhiyunLists = st.lists(st.integers(0, 10)) 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun@given(Lists) 74*4882a593Smuzhiyundef test_decodes_to_starting_sequence(ls): 75*4882a593Smuzhiyun """If we encode a sequence and then decode the result, we should get the 76*4882a593Smuzhiyun original sequence back. 77*4882a593Smuzhiyun 78*4882a593Smuzhiyun Otherwise we've done something very wrong. 79*4882a593Smuzhiyun """ 80*4882a593Smuzhiyun assert run_length_decode(run_length_encode(ls)) == ls 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun@given(Lists, st.data()) 84*4882a593Smuzhiyundef test_duplicating_an_element_does_not_increase_length(ls, data): 85*4882a593Smuzhiyun """The previous test could be passed by simply returning the input sequence 86*4882a593Smuzhiyun so we need something that tests the compression property of our encoding. 87*4882a593Smuzhiyun 88*4882a593Smuzhiyun In this test we deliberately introduce or extend a run and assert 89*4882a593Smuzhiyun that this does not increase the length of our encoding, because they 90*4882a593Smuzhiyun should be part of the same run in the final result. 91*4882a593Smuzhiyun """ 92*4882a593Smuzhiyun # We use assume to get a valid index into the list. We could also have used 93*4882a593Smuzhiyun # e.g. flatmap, but this is relatively straightforward and will tend to 94*4882a593Smuzhiyun # perform better. 95*4882a593Smuzhiyun assume(ls) 96*4882a593Smuzhiyun i = data.draw(st.integers(0, len(ls) - 1)) 97*4882a593Smuzhiyun ls2 = list(ls) 98*4882a593Smuzhiyun # duplicating the value at i right next to it guarantees they are part of 99*4882a593Smuzhiyun # the same run in the resulting compression. 100*4882a593Smuzhiyun ls2.insert(i, ls2[i]) 101*4882a593Smuzhiyun assert len(run_length_encode(ls2)) == len(run_length_encode(ls)) 102