xref: /OK3568_Linux_fs/yocto/poky/bitbake/lib/hashserv/tests.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1#! /usr/bin/env python3
2#
3# Copyright (C) 2018-2019 Garmin Ltd.
4#
5# SPDX-License-Identifier: GPL-2.0-only
6#
7
8from . import create_server, create_client
9import hashlib
10import logging
11import multiprocessing
12import os
13import sys
14import tempfile
15import threading
16import unittest
17import socket
18import time
19import signal
20
21def server_prefunc(server, idx):
22    logging.basicConfig(level=logging.DEBUG, filename='bbhashserv-%d.log' % idx, filemode='w',
23                        format='%(levelname)s %(filename)s:%(lineno)d %(message)s')
24    server.logger.debug("Running server %d" % idx)
25    sys.stdout = open('bbhashserv-stdout-%d.log' % idx, 'w')
26    sys.stderr = sys.stdout
27
28class HashEquivalenceTestSetup(object):
29    METHOD = 'TestMethod'
30
31    server_index = 0
32
33    def start_server(self, dbpath=None, upstream=None, read_only=False, prefunc=server_prefunc):
34        self.server_index += 1
35        if dbpath is None:
36            dbpath = os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index)
37
38        def cleanup_server(server):
39            if server.process.exitcode is not None:
40                return
41
42            server.process.terminate()
43            server.process.join()
44
45        server = create_server(self.get_server_addr(self.server_index),
46                               dbpath,
47                               upstream=upstream,
48                               read_only=read_only)
49        server.dbpath = dbpath
50
51        server.serve_as_process(prefunc=prefunc, args=(self.server_index,))
52        self.addCleanup(cleanup_server, server)
53
54        def cleanup_client(client):
55            client.close()
56
57        client = create_client(server.address)
58        self.addCleanup(cleanup_client, client)
59
60        return (client, server)
61
62    def setUp(self):
63        if sys.version_info < (3, 5, 0):
64            self.skipTest('Python 3.5 or later required')
65
66        self.temp_dir = tempfile.TemporaryDirectory(prefix='bb-hashserv')
67        self.addCleanup(self.temp_dir.cleanup)
68
69        (self.client, self.server) = self.start_server()
70
71    def assertClientGetHash(self, client, taskhash, unihash):
72        result = client.get_unihash(self.METHOD, taskhash)
73        self.assertEqual(result, unihash)
74
75
76class HashEquivalenceCommonTests(object):
77    def test_create_hash(self):
78        # Simple test that hashes can be created
79        taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
80        outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
81        unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
82
83        self.assertClientGetHash(self.client, taskhash, None)
84
85        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
86        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
87
88    def test_create_equivalent(self):
89        # Tests that a second reported task with the same outhash will be
90        # assigned the same unihash
91        taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
92        outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
93        unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
94
95        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
96        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
97
98        # Report a different task with the same outhash. The returned unihash
99        # should match the first task
100        taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
101        unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
102        result = self.client.report_unihash(taskhash2, self.METHOD, outhash, unihash2)
103        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
104
105    def test_duplicate_taskhash(self):
106        # Tests that duplicate reports of the same taskhash with different
107        # outhash & unihash always return the unihash from the first reported
108        # taskhash
109        taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a'
110        outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e'
111        unihash = '218e57509998197d570e2c98512d0105985dffc9'
112        self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
113
114        self.assertClientGetHash(self.client, taskhash, unihash)
115
116        outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d'
117        unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c'
118        self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2)
119
120        self.assertClientGetHash(self.client, taskhash, unihash)
121
122        outhash3 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
123        unihash3 = '9217a7d6398518e5dc002ed58f2cbbbc78696603'
124        self.client.report_unihash(taskhash, self.METHOD, outhash3, unihash3)
125
126        self.assertClientGetHash(self.client, taskhash, unihash)
127
128    def test_huge_message(self):
129        # Simple test that hashes can be created
130        taskhash = 'c665584ee6817aa99edfc77a44dd853828279370'
131        outhash = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44'
132        unihash = '90e9bc1d1f094c51824adca7f8ea79a048d68824'
133
134        self.assertClientGetHash(self.client, taskhash, None)
135
136        siginfo = "0" * (self.client.max_chunk * 4)
137
138        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash, {
139            'outhash_siginfo': siginfo
140        })
141        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
142
143        result_unihash = self.client.get_taskhash(self.METHOD, taskhash, True)
144        self.assertEqual(result_unihash['taskhash'], taskhash)
145        self.assertEqual(result_unihash['unihash'], unihash)
146        self.assertEqual(result_unihash['method'], self.METHOD)
147
148        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
149        self.assertEqual(result_outhash['taskhash'], taskhash)
150        self.assertEqual(result_outhash['method'], self.METHOD)
151        self.assertEqual(result_outhash['unihash'], unihash)
152        self.assertEqual(result_outhash['outhash'], outhash)
153        self.assertEqual(result_outhash['outhash_siginfo'], siginfo)
154
155    def test_stress(self):
156        def query_server(failures):
157            client = Client(self.server.address)
158            try:
159                for i in range(1000):
160                    taskhash = hashlib.sha256()
161                    taskhash.update(str(i).encode('utf-8'))
162                    taskhash = taskhash.hexdigest()
163                    result = client.get_unihash(self.METHOD, taskhash)
164                    if result != taskhash:
165                        failures.append("taskhash mismatch: %s != %s" % (result, taskhash))
166            finally:
167                client.close()
168
169        # Report hashes
170        for i in range(1000):
171            taskhash = hashlib.sha256()
172            taskhash.update(str(i).encode('utf-8'))
173            taskhash = taskhash.hexdigest()
174            self.client.report_unihash(taskhash, self.METHOD, taskhash, taskhash)
175
176        failures = []
177        threads = [threading.Thread(target=query_server, args=(failures,)) for t in range(100)]
178
179        for t in threads:
180            t.start()
181
182        for t in threads:
183            t.join()
184
185        self.assertFalse(failures)
186
187    def test_upstream_server(self):
188        # Tests upstream server support. This is done by creating two servers
189        # that share a database file. The downstream server has it upstream
190        # set to the test server, whereas the side server doesn't. This allows
191        # verification that the hash requests are being proxied to the upstream
192        # server by verifying that they appear on the downstream client, but not
193        # the side client. It also verifies that the results are pulled into
194        # the downstream database by checking that the downstream and side servers
195        # match after the downstream is done waiting for all backfill tasks
196        (down_client, down_server) = self.start_server(upstream=self.server.address)
197        (side_client, side_server) = self.start_server(dbpath=down_server.dbpath)
198
199        def check_hash(taskhash, unihash, old_sidehash):
200            nonlocal down_client
201            nonlocal side_client
202
203            # check upstream server
204            self.assertClientGetHash(self.client, taskhash, unihash)
205
206            # Hash should *not* be present on the side server
207            self.assertClientGetHash(side_client, taskhash, old_sidehash)
208
209            # Hash should be present on the downstream server, since it
210            # will defer to the upstream server. This will trigger
211            # the backfill in the downstream server
212            self.assertClientGetHash(down_client, taskhash, unihash)
213
214            # After waiting for the downstream client to finish backfilling the
215            # task from the upstream server, it should appear in the side server
216            # since the database is populated
217            down_client.backfill_wait()
218            self.assertClientGetHash(side_client, taskhash, unihash)
219
220        # Basic report
221        taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a'
222        outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e'
223        unihash = '218e57509998197d570e2c98512d0105985dffc9'
224        self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
225
226        check_hash(taskhash, unihash, None)
227
228        # Duplicated taskhash with multiple output hashes and unihashes.
229        # All servers should agree with the originally reported hash
230        outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d'
231        unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c'
232        self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2)
233
234        check_hash(taskhash, unihash, unihash)
235
236        # Report an equivalent task. The sideload will originally report
237        # no unihash until backfilled
238        taskhash3 = "044c2ec8aaf480685a00ff6ff49e6162e6ad34e1"
239        unihash3 = "def64766090d28f627e816454ed46894bb3aab36"
240        self.client.report_unihash(taskhash3, self.METHOD, outhash, unihash3)
241
242        check_hash(taskhash3, unihash, None)
243
244        # Test that reporting a unihash in the downstream client isn't
245        # propagating to the upstream server
246        taskhash4 = "e3da00593d6a7fb435c7e2114976c59c5fd6d561"
247        outhash4 = "1cf8713e645f491eb9c959d20b5cae1c47133a292626dda9b10709857cbe688a"
248        unihash4 = "3b5d3d83f07f259e9086fcb422c855286e18a57d"
249        down_client.report_unihash(taskhash4, self.METHOD, outhash4, unihash4)
250        down_client.backfill_wait()
251
252        self.assertClientGetHash(down_client, taskhash4, unihash4)
253        self.assertClientGetHash(side_client, taskhash4, unihash4)
254        self.assertClientGetHash(self.client, taskhash4, None)
255
256        # Test that reporting a unihash in the downstream is able to find a
257        # match which was previously reported to the upstream server
258        taskhash5 = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
259        outhash5 = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
260        unihash5 = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
261        result = self.client.report_unihash(taskhash5, self.METHOD, outhash5, unihash5)
262
263        taskhash6 = '35788efcb8dfb0a02659d81cf2bfd695fb30fafa'
264        unihash6 = 'f46d3fbb439bd9b921095da657a4de906510d2ce'
265        result = down_client.report_unihash(taskhash6, self.METHOD, outhash5, unihash6)
266        self.assertEqual(result['unihash'], unihash5, 'Server failed to copy unihash from upstream')
267
268        # Tests read through from server with
269        taskhash7 = '9d81d76242cc7cfaf7bf74b94b9cd2e29324ed74'
270        outhash7 = '8470d56547eea6236d7c81a644ce74670ca0bbda998e13c629ef6bb3f0d60b69'
271        unihash7 = '05d2a63c81e32f0a36542ca677e8ad852365c538'
272        self.client.report_unihash(taskhash7, self.METHOD, outhash7, unihash7)
273
274        result = down_client.get_taskhash(self.METHOD, taskhash7, True)
275        self.assertEqual(result['unihash'], unihash7, 'Server failed to copy unihash from upstream')
276        self.assertEqual(result['outhash'], outhash7, 'Server failed to copy unihash from upstream')
277        self.assertEqual(result['taskhash'], taskhash7, 'Server failed to copy unihash from upstream')
278        self.assertEqual(result['method'], self.METHOD)
279
280        taskhash8 = '86978a4c8c71b9b487330b0152aade10c1ee58aa'
281        outhash8 = 'ca8c128e9d9e4a28ef24d0508aa20b5cf880604eacd8f65c0e366f7e0cc5fbcf'
282        unihash8 = 'd8bcf25369d40590ad7d08c84d538982f2023e01'
283        self.client.report_unihash(taskhash8, self.METHOD, outhash8, unihash8)
284
285        result = down_client.get_outhash(self.METHOD, outhash8, taskhash8)
286        self.assertEqual(result['unihash'], unihash8, 'Server failed to copy unihash from upstream')
287        self.assertEqual(result['outhash'], outhash8, 'Server failed to copy unihash from upstream')
288        self.assertEqual(result['taskhash'], taskhash8, 'Server failed to copy unihash from upstream')
289        self.assertEqual(result['method'], self.METHOD)
290
291        taskhash9 = 'ae6339531895ddf5b67e663e6a374ad8ec71d81c'
292        outhash9 = 'afc78172c81880ae10a1fec994b5b4ee33d196a001a1b66212a15ebe573e00b5'
293        unihash9 = '6662e699d6e3d894b24408ff9a4031ef9b038ee8'
294        self.client.report_unihash(taskhash9, self.METHOD, outhash9, unihash9)
295
296        result = down_client.get_taskhash(self.METHOD, taskhash9, False)
297        self.assertEqual(result['unihash'], unihash9, 'Server failed to copy unihash from upstream')
298        self.assertEqual(result['taskhash'], taskhash9, 'Server failed to copy unihash from upstream')
299        self.assertEqual(result['method'], self.METHOD)
300
301    def test_ro_server(self):
302        (ro_client, ro_server) = self.start_server(dbpath=self.server.dbpath, read_only=True)
303
304        # Report a hash via the read-write server
305        taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
306        outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
307        unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
308
309        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
310        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
311
312        # Check the hash via the read-only server
313        self.assertClientGetHash(ro_client, taskhash, unihash)
314
315        # Ensure that reporting via the read-only server fails
316        taskhash2 = 'c665584ee6817aa99edfc77a44dd853828279370'
317        outhash2 = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44'
318        unihash2 = '90e9bc1d1f094c51824adca7f8ea79a048d68824'
319
320        with self.assertRaises(ConnectionError):
321            ro_client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
322
323        # Ensure that the database was not modified
324        self.assertClientGetHash(self.client, taskhash2, None)
325
326
327    def test_slow_server_start(self):
328        # Ensures that the server will exit correctly even if it gets a SIGTERM
329        # before entering the main loop
330
331        event = multiprocessing.Event()
332
333        def prefunc(server, idx):
334            nonlocal event
335            server_prefunc(server, idx)
336            event.wait()
337
338        def do_nothing(signum, frame):
339            pass
340
341        old_signal = signal.signal(signal.SIGTERM, do_nothing)
342        self.addCleanup(signal.signal, signal.SIGTERM, old_signal)
343
344        _, server = self.start_server(prefunc=prefunc)
345        server.process.terminate()
346        time.sleep(30)
347        event.set()
348        server.process.join(300)
349        self.assertIsNotNone(server.process.exitcode, "Server did not exit in a timely manner!")
350
351    def test_diverging_report_race(self):
352        # Tests that a reported task will correctly pick up an updated unihash
353
354        # This is a baseline report added to the database to ensure that there
355        # is something to match against as equivalent
356        outhash1 = 'afd11c366050bcd75ad763e898e4430e2a60659b26f83fbb22201a60672019fa'
357        taskhash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab'
358        unihash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab'
359        result = self.client.report_unihash(taskhash1, self.METHOD, outhash1, unihash1)
360
361        # Add a report that is equivalent to Task 1. It should ignore the
362        # provided unihash and report the unihash from task 1
363        taskhash2 = '6259ae8263bd94d454c086f501c37e64c4e83cae806902ca95b4ab513546b273'
364        unihash2 = taskhash2
365        result = self.client.report_unihash(taskhash2, self.METHOD, outhash1, unihash2)
366        self.assertEqual(result['unihash'], unihash1)
367
368        # Add another report for Task 2, but with a different outhash (e.g. the
369        # task is non-deterministic). It should still be marked with the Task 1
370        # unihash because it has the Task 2 taskhash, which is equivalent to
371        # Task 1
372        outhash3 = 'd2187ee3a8966db10b34fe0e863482288d9a6185cb8ef58a6c1c6ace87a2f24c'
373        result = self.client.report_unihash(taskhash2, self.METHOD, outhash3, unihash2)
374        self.assertEqual(result['unihash'], unihash1)
375
376
377    def test_diverging_report_reverse_race(self):
378        # Same idea as the previous test, but Tasks 2 and 3 are reported in
379        # reverse order the opposite order
380
381        outhash1 = 'afd11c366050bcd75ad763e898e4430e2a60659b26f83fbb22201a60672019fa'
382        taskhash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab'
383        unihash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab'
384        result = self.client.report_unihash(taskhash1, self.METHOD, outhash1, unihash1)
385
386        taskhash2 = '6259ae8263bd94d454c086f501c37e64c4e83cae806902ca95b4ab513546b273'
387        unihash2 = taskhash2
388
389        # Report Task 3 first. Since there is nothing else in the database it
390        # will use the client provided unihash
391        outhash3 = 'd2187ee3a8966db10b34fe0e863482288d9a6185cb8ef58a6c1c6ace87a2f24c'
392        result = self.client.report_unihash(taskhash2, self.METHOD, outhash3, unihash2)
393        self.assertEqual(result['unihash'], unihash2)
394
395        # Report Task 2. This is equivalent to Task 1 but there is already a mapping for
396        # taskhash2 so it will report unihash2
397        result = self.client.report_unihash(taskhash2, self.METHOD, outhash1, unihash2)
398        self.assertEqual(result['unihash'], unihash2)
399
400        # The originally reported unihash for Task 3 should be unchanged even if it
401        # shares a taskhash with Task 2
402        self.assertClientGetHash(self.client, taskhash2, unihash2)
403
404class TestHashEquivalenceUnixServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
405    def get_server_addr(self, server_idx):
406        return "unix://" + os.path.join(self.temp_dir.name, 'sock%d' % server_idx)
407
408
409class TestHashEquivalenceUnixServerLongPath(HashEquivalenceTestSetup, unittest.TestCase):
410    DEEP_DIRECTORY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ccccccccccccccccccccccccccccccccccccccccccc"
411    def get_server_addr(self, server_idx):
412        os.makedirs(os.path.join(self.temp_dir.name, self.DEEP_DIRECTORY), exist_ok=True)
413        return "unix://" + os.path.join(self.temp_dir.name, self.DEEP_DIRECTORY, 'sock%d' % server_idx)
414
415
416    def test_long_sock_path(self):
417        # Simple test that hashes can be created
418        taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
419        outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
420        unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
421
422        self.assertClientGetHash(self.client, taskhash, None)
423
424        result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
425        self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
426
427
428class TestHashEquivalenceTCPServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
429    def get_server_addr(self, server_idx):
430        # Some hosts cause asyncio module to misbehave, when IPv6 is not enabled.
431        # If IPv6 is enabled, it should be safe to use localhost directly, in general
432        # case it is more reliable to resolve the IP address explicitly.
433        return socket.gethostbyname("localhost") + ":0"
434