1#! /usr/bin/env python3 2# 3# Copyright (C) 2018-2019 Garmin Ltd. 4# 5# SPDX-License-Identifier: GPL-2.0-only 6# 7 8from . import create_server, create_client 9import hashlib 10import logging 11import multiprocessing 12import os 13import sys 14import tempfile 15import threading 16import unittest 17import socket 18import time 19import signal 20 21def server_prefunc(server, idx): 22 logging.basicConfig(level=logging.DEBUG, filename='bbhashserv-%d.log' % idx, filemode='w', 23 format='%(levelname)s %(filename)s:%(lineno)d %(message)s') 24 server.logger.debug("Running server %d" % idx) 25 sys.stdout = open('bbhashserv-stdout-%d.log' % idx, 'w') 26 sys.stderr = sys.stdout 27 28class HashEquivalenceTestSetup(object): 29 METHOD = 'TestMethod' 30 31 server_index = 0 32 33 def start_server(self, dbpath=None, upstream=None, read_only=False, prefunc=server_prefunc): 34 self.server_index += 1 35 if dbpath is None: 36 dbpath = os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index) 37 38 def cleanup_server(server): 39 if server.process.exitcode is not None: 40 return 41 42 server.process.terminate() 43 server.process.join() 44 45 server = create_server(self.get_server_addr(self.server_index), 46 dbpath, 47 upstream=upstream, 48 read_only=read_only) 49 server.dbpath = dbpath 50 51 server.serve_as_process(prefunc=prefunc, args=(self.server_index,)) 52 self.addCleanup(cleanup_server, server) 53 54 def cleanup_client(client): 55 client.close() 56 57 client = create_client(server.address) 58 self.addCleanup(cleanup_client, client) 59 60 return (client, server) 61 62 def setUp(self): 63 if sys.version_info < (3, 5, 0): 64 self.skipTest('Python 3.5 or later required') 65 66 self.temp_dir = tempfile.TemporaryDirectory(prefix='bb-hashserv') 67 self.addCleanup(self.temp_dir.cleanup) 68 69 (self.client, self.server) = self.start_server() 70 71 def assertClientGetHash(self, client, taskhash, unihash): 72 result = client.get_unihash(self.METHOD, taskhash) 73 self.assertEqual(result, unihash) 74 75 76class HashEquivalenceCommonTests(object): 77 def test_create_hash(self): 78 # Simple test that hashes can be created 79 taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9' 80 outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f' 81 unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd' 82 83 self.assertClientGetHash(self.client, taskhash, None) 84 85 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) 86 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash') 87 88 def test_create_equivalent(self): 89 # Tests that a second reported task with the same outhash will be 90 # assigned the same unihash 91 taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4' 92 outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8' 93 unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646' 94 95 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) 96 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash') 97 98 # Report a different task with the same outhash. The returned unihash 99 # should match the first task 100 taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4' 101 unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b' 102 result = self.client.report_unihash(taskhash2, self.METHOD, outhash, unihash2) 103 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash') 104 105 def test_duplicate_taskhash(self): 106 # Tests that duplicate reports of the same taskhash with different 107 # outhash & unihash always return the unihash from the first reported 108 # taskhash 109 taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a' 110 outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e' 111 unihash = '218e57509998197d570e2c98512d0105985dffc9' 112 self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) 113 114 self.assertClientGetHash(self.client, taskhash, unihash) 115 116 outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d' 117 unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c' 118 self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2) 119 120 self.assertClientGetHash(self.client, taskhash, unihash) 121 122 outhash3 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4' 123 unihash3 = '9217a7d6398518e5dc002ed58f2cbbbc78696603' 124 self.client.report_unihash(taskhash, self.METHOD, outhash3, unihash3) 125 126 self.assertClientGetHash(self.client, taskhash, unihash) 127 128 def test_huge_message(self): 129 # Simple test that hashes can be created 130 taskhash = 'c665584ee6817aa99edfc77a44dd853828279370' 131 outhash = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44' 132 unihash = '90e9bc1d1f094c51824adca7f8ea79a048d68824' 133 134 self.assertClientGetHash(self.client, taskhash, None) 135 136 siginfo = "0" * (self.client.max_chunk * 4) 137 138 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash, { 139 'outhash_siginfo': siginfo 140 }) 141 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash') 142 143 result_unihash = self.client.get_taskhash(self.METHOD, taskhash, True) 144 self.assertEqual(result_unihash['taskhash'], taskhash) 145 self.assertEqual(result_unihash['unihash'], unihash) 146 self.assertEqual(result_unihash['method'], self.METHOD) 147 148 result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash) 149 self.assertEqual(result_outhash['taskhash'], taskhash) 150 self.assertEqual(result_outhash['method'], self.METHOD) 151 self.assertEqual(result_outhash['unihash'], unihash) 152 self.assertEqual(result_outhash['outhash'], outhash) 153 self.assertEqual(result_outhash['outhash_siginfo'], siginfo) 154 155 def test_stress(self): 156 def query_server(failures): 157 client = Client(self.server.address) 158 try: 159 for i in range(1000): 160 taskhash = hashlib.sha256() 161 taskhash.update(str(i).encode('utf-8')) 162 taskhash = taskhash.hexdigest() 163 result = client.get_unihash(self.METHOD, taskhash) 164 if result != taskhash: 165 failures.append("taskhash mismatch: %s != %s" % (result, taskhash)) 166 finally: 167 client.close() 168 169 # Report hashes 170 for i in range(1000): 171 taskhash = hashlib.sha256() 172 taskhash.update(str(i).encode('utf-8')) 173 taskhash = taskhash.hexdigest() 174 self.client.report_unihash(taskhash, self.METHOD, taskhash, taskhash) 175 176 failures = [] 177 threads = [threading.Thread(target=query_server, args=(failures,)) for t in range(100)] 178 179 for t in threads: 180 t.start() 181 182 for t in threads: 183 t.join() 184 185 self.assertFalse(failures) 186 187 def test_upstream_server(self): 188 # Tests upstream server support. This is done by creating two servers 189 # that share a database file. The downstream server has it upstream 190 # set to the test server, whereas the side server doesn't. This allows 191 # verification that the hash requests are being proxied to the upstream 192 # server by verifying that they appear on the downstream client, but not 193 # the side client. It also verifies that the results are pulled into 194 # the downstream database by checking that the downstream and side servers 195 # match after the downstream is done waiting for all backfill tasks 196 (down_client, down_server) = self.start_server(upstream=self.server.address) 197 (side_client, side_server) = self.start_server(dbpath=down_server.dbpath) 198 199 def check_hash(taskhash, unihash, old_sidehash): 200 nonlocal down_client 201 nonlocal side_client 202 203 # check upstream server 204 self.assertClientGetHash(self.client, taskhash, unihash) 205 206 # Hash should *not* be present on the side server 207 self.assertClientGetHash(side_client, taskhash, old_sidehash) 208 209 # Hash should be present on the downstream server, since it 210 # will defer to the upstream server. This will trigger 211 # the backfill in the downstream server 212 self.assertClientGetHash(down_client, taskhash, unihash) 213 214 # After waiting for the downstream client to finish backfilling the 215 # task from the upstream server, it should appear in the side server 216 # since the database is populated 217 down_client.backfill_wait() 218 self.assertClientGetHash(side_client, taskhash, unihash) 219 220 # Basic report 221 taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a' 222 outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e' 223 unihash = '218e57509998197d570e2c98512d0105985dffc9' 224 self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) 225 226 check_hash(taskhash, unihash, None) 227 228 # Duplicated taskhash with multiple output hashes and unihashes. 229 # All servers should agree with the originally reported hash 230 outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d' 231 unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c' 232 self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2) 233 234 check_hash(taskhash, unihash, unihash) 235 236 # Report an equivalent task. The sideload will originally report 237 # no unihash until backfilled 238 taskhash3 = "044c2ec8aaf480685a00ff6ff49e6162e6ad34e1" 239 unihash3 = "def64766090d28f627e816454ed46894bb3aab36" 240 self.client.report_unihash(taskhash3, self.METHOD, outhash, unihash3) 241 242 check_hash(taskhash3, unihash, None) 243 244 # Test that reporting a unihash in the downstream client isn't 245 # propagating to the upstream server 246 taskhash4 = "e3da00593d6a7fb435c7e2114976c59c5fd6d561" 247 outhash4 = "1cf8713e645f491eb9c959d20b5cae1c47133a292626dda9b10709857cbe688a" 248 unihash4 = "3b5d3d83f07f259e9086fcb422c855286e18a57d" 249 down_client.report_unihash(taskhash4, self.METHOD, outhash4, unihash4) 250 down_client.backfill_wait() 251 252 self.assertClientGetHash(down_client, taskhash4, unihash4) 253 self.assertClientGetHash(side_client, taskhash4, unihash4) 254 self.assertClientGetHash(self.client, taskhash4, None) 255 256 # Test that reporting a unihash in the downstream is able to find a 257 # match which was previously reported to the upstream server 258 taskhash5 = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9' 259 outhash5 = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f' 260 unihash5 = 'f46d3fbb439bd9b921095da657a4de906510d2cd' 261 result = self.client.report_unihash(taskhash5, self.METHOD, outhash5, unihash5) 262 263 taskhash6 = '35788efcb8dfb0a02659d81cf2bfd695fb30fafa' 264 unihash6 = 'f46d3fbb439bd9b921095da657a4de906510d2ce' 265 result = down_client.report_unihash(taskhash6, self.METHOD, outhash5, unihash6) 266 self.assertEqual(result['unihash'], unihash5, 'Server failed to copy unihash from upstream') 267 268 # Tests read through from server with 269 taskhash7 = '9d81d76242cc7cfaf7bf74b94b9cd2e29324ed74' 270 outhash7 = '8470d56547eea6236d7c81a644ce74670ca0bbda998e13c629ef6bb3f0d60b69' 271 unihash7 = '05d2a63c81e32f0a36542ca677e8ad852365c538' 272 self.client.report_unihash(taskhash7, self.METHOD, outhash7, unihash7) 273 274 result = down_client.get_taskhash(self.METHOD, taskhash7, True) 275 self.assertEqual(result['unihash'], unihash7, 'Server failed to copy unihash from upstream') 276 self.assertEqual(result['outhash'], outhash7, 'Server failed to copy unihash from upstream') 277 self.assertEqual(result['taskhash'], taskhash7, 'Server failed to copy unihash from upstream') 278 self.assertEqual(result['method'], self.METHOD) 279 280 taskhash8 = '86978a4c8c71b9b487330b0152aade10c1ee58aa' 281 outhash8 = 'ca8c128e9d9e4a28ef24d0508aa20b5cf880604eacd8f65c0e366f7e0cc5fbcf' 282 unihash8 = 'd8bcf25369d40590ad7d08c84d538982f2023e01' 283 self.client.report_unihash(taskhash8, self.METHOD, outhash8, unihash8) 284 285 result = down_client.get_outhash(self.METHOD, outhash8, taskhash8) 286 self.assertEqual(result['unihash'], unihash8, 'Server failed to copy unihash from upstream') 287 self.assertEqual(result['outhash'], outhash8, 'Server failed to copy unihash from upstream') 288 self.assertEqual(result['taskhash'], taskhash8, 'Server failed to copy unihash from upstream') 289 self.assertEqual(result['method'], self.METHOD) 290 291 taskhash9 = 'ae6339531895ddf5b67e663e6a374ad8ec71d81c' 292 outhash9 = 'afc78172c81880ae10a1fec994b5b4ee33d196a001a1b66212a15ebe573e00b5' 293 unihash9 = '6662e699d6e3d894b24408ff9a4031ef9b038ee8' 294 self.client.report_unihash(taskhash9, self.METHOD, outhash9, unihash9) 295 296 result = down_client.get_taskhash(self.METHOD, taskhash9, False) 297 self.assertEqual(result['unihash'], unihash9, 'Server failed to copy unihash from upstream') 298 self.assertEqual(result['taskhash'], taskhash9, 'Server failed to copy unihash from upstream') 299 self.assertEqual(result['method'], self.METHOD) 300 301 def test_ro_server(self): 302 (ro_client, ro_server) = self.start_server(dbpath=self.server.dbpath, read_only=True) 303 304 # Report a hash via the read-write server 305 taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9' 306 outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f' 307 unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd' 308 309 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) 310 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash') 311 312 # Check the hash via the read-only server 313 self.assertClientGetHash(ro_client, taskhash, unihash) 314 315 # Ensure that reporting via the read-only server fails 316 taskhash2 = 'c665584ee6817aa99edfc77a44dd853828279370' 317 outhash2 = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44' 318 unihash2 = '90e9bc1d1f094c51824adca7f8ea79a048d68824' 319 320 with self.assertRaises(ConnectionError): 321 ro_client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2) 322 323 # Ensure that the database was not modified 324 self.assertClientGetHash(self.client, taskhash2, None) 325 326 327 def test_slow_server_start(self): 328 # Ensures that the server will exit correctly even if it gets a SIGTERM 329 # before entering the main loop 330 331 event = multiprocessing.Event() 332 333 def prefunc(server, idx): 334 nonlocal event 335 server_prefunc(server, idx) 336 event.wait() 337 338 def do_nothing(signum, frame): 339 pass 340 341 old_signal = signal.signal(signal.SIGTERM, do_nothing) 342 self.addCleanup(signal.signal, signal.SIGTERM, old_signal) 343 344 _, server = self.start_server(prefunc=prefunc) 345 server.process.terminate() 346 time.sleep(30) 347 event.set() 348 server.process.join(300) 349 self.assertIsNotNone(server.process.exitcode, "Server did not exit in a timely manner!") 350 351 def test_diverging_report_race(self): 352 # Tests that a reported task will correctly pick up an updated unihash 353 354 # This is a baseline report added to the database to ensure that there 355 # is something to match against as equivalent 356 outhash1 = 'afd11c366050bcd75ad763e898e4430e2a60659b26f83fbb22201a60672019fa' 357 taskhash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab' 358 unihash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab' 359 result = self.client.report_unihash(taskhash1, self.METHOD, outhash1, unihash1) 360 361 # Add a report that is equivalent to Task 1. It should ignore the 362 # provided unihash and report the unihash from task 1 363 taskhash2 = '6259ae8263bd94d454c086f501c37e64c4e83cae806902ca95b4ab513546b273' 364 unihash2 = taskhash2 365 result = self.client.report_unihash(taskhash2, self.METHOD, outhash1, unihash2) 366 self.assertEqual(result['unihash'], unihash1) 367 368 # Add another report for Task 2, but with a different outhash (e.g. the 369 # task is non-deterministic). It should still be marked with the Task 1 370 # unihash because it has the Task 2 taskhash, which is equivalent to 371 # Task 1 372 outhash3 = 'd2187ee3a8966db10b34fe0e863482288d9a6185cb8ef58a6c1c6ace87a2f24c' 373 result = self.client.report_unihash(taskhash2, self.METHOD, outhash3, unihash2) 374 self.assertEqual(result['unihash'], unihash1) 375 376 377 def test_diverging_report_reverse_race(self): 378 # Same idea as the previous test, but Tasks 2 and 3 are reported in 379 # reverse order the opposite order 380 381 outhash1 = 'afd11c366050bcd75ad763e898e4430e2a60659b26f83fbb22201a60672019fa' 382 taskhash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab' 383 unihash1 = '3bde230c743fc45ab61a065d7a1815fbfa01c4740e4c895af2eb8dc0f684a4ab' 384 result = self.client.report_unihash(taskhash1, self.METHOD, outhash1, unihash1) 385 386 taskhash2 = '6259ae8263bd94d454c086f501c37e64c4e83cae806902ca95b4ab513546b273' 387 unihash2 = taskhash2 388 389 # Report Task 3 first. Since there is nothing else in the database it 390 # will use the client provided unihash 391 outhash3 = 'd2187ee3a8966db10b34fe0e863482288d9a6185cb8ef58a6c1c6ace87a2f24c' 392 result = self.client.report_unihash(taskhash2, self.METHOD, outhash3, unihash2) 393 self.assertEqual(result['unihash'], unihash2) 394 395 # Report Task 2. This is equivalent to Task 1 but there is already a mapping for 396 # taskhash2 so it will report unihash2 397 result = self.client.report_unihash(taskhash2, self.METHOD, outhash1, unihash2) 398 self.assertEqual(result['unihash'], unihash2) 399 400 # The originally reported unihash for Task 3 should be unchanged even if it 401 # shares a taskhash with Task 2 402 self.assertClientGetHash(self.client, taskhash2, unihash2) 403 404class TestHashEquivalenceUnixServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase): 405 def get_server_addr(self, server_idx): 406 return "unix://" + os.path.join(self.temp_dir.name, 'sock%d' % server_idx) 407 408 409class TestHashEquivalenceUnixServerLongPath(HashEquivalenceTestSetup, unittest.TestCase): 410 DEEP_DIRECTORY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ccccccccccccccccccccccccccccccccccccccccccc" 411 def get_server_addr(self, server_idx): 412 os.makedirs(os.path.join(self.temp_dir.name, self.DEEP_DIRECTORY), exist_ok=True) 413 return "unix://" + os.path.join(self.temp_dir.name, self.DEEP_DIRECTORY, 'sock%d' % server_idx) 414 415 416 def test_long_sock_path(self): 417 # Simple test that hashes can be created 418 taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9' 419 outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f' 420 unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd' 421 422 self.assertClientGetHash(self.client, taskhash, None) 423 424 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash) 425 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash') 426 427 428class TestHashEquivalenceTCPServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase): 429 def get_server_addr(self, server_idx): 430 # Some hosts cause asyncio module to misbehave, when IPv6 is not enabled. 431 # If IPv6 is enabled, it should be safe to use localhost directly, in general 432 # case it is more reliable to resolve the IP address explicitly. 433 return socket.gethostbyname("localhost") + ":0" 434