#!/usr/bin/env python2 import os import sys import simplejson def parse_serialize_payload( line ): """ Given a SERIALIZE payload, parse the payload. Return a dict that maps each field name to its value, where the dict will have a special '__order__' key that maps to the order in which the fields occur. """ cur = 0 ret = {} order = [] opcode = line[cur] cur += 1 # should be a separator assert line[cur] == ':', "Missing separator for opcode" cur += 1 # will be formatted as a CSV of '$key=$len:$value' while cur < len(line): # find next '=' try: i_key = line[cur:].index('=') key = line[cur:cur+i_key] except: print >> sys.stderr, "cur=%s, line_buf=%s" % (cur, line[cur:]) raise try: i_len = line[cur+i_key+1:].index(':') len_str = line[cur+i_key+1: cur+i_key+1+i_len] except: print >> sys.stderr, "cur=%s, i_key=%s, key=%s, line_buf=%s" % (cur, i_key, key, line[cur+i_key+1:]) raise try: value_len = int(len_str) value = line[cur+i_key+1+i_len+1: cur+i_key+1+i_len+1+value_len] except: print >> sys.stderr, "cur=%s, i_key=%s, key=%s, i_len=%s, len_str=%s, line_buf=%s" % (cur, i_key, key, i_len, len_str, line[cur+i_key+1:]) raise order.append(key) ret[key] = value # advance cur += i_key+1 + i_len+1 + value_len+1 ret['__order__'] = order return ret def extract_serialize_payload( line ): """ Given a line that contains SERIALIZE token, extract the netstring payload """ assert " SERIALIZE: " in line, "Missing 'SERIALIZE:' token" i = line.index(" SERIALIZE: ") payload = line[i + len(" SERIALIZE: "):] return payload def find_serialization_outputs( test_output_path ): """ Find the SERIALIZE outputs from the given test log output. Parse them and return the parsed list. Add '__block_height__' to each item (the height at which the serialization was found) Add '__ignore_fields__' list to each item for fields we can ignore at that height (as well as the versions for which it applies) """ ret = [] curr_height = 0 ignore_fields = {} with open(test_output_path, "r") as f: while True: line = f.readline() if len(line) == 0: break # if we find this line, then we're out of tests if "Scenario checks passed; verifying history" in line: break # block height? if "Snapshotting block " in line: i = line.index("Snapshotting block ") try: curr_height = int(line[i + len("Snapshotting block "):]) except: # only other possible string assert "consensus hash of " in line[i + len("Snapshotting block "):] pass line = line.strip() if line.startswith("BLOCKSTACK_SERIALIZATION_CHECK_IGNORE"): # format: BLOCKSTACK_SERIALIZATION_CHECK_IGNORE field # NOTE: since this gets printed *before* the "Snapshotting block", we'll have to use curr_height+1 parts = line.split() assert len(parts) == 2 field = parts[1] if not ignore_fields.has_key(curr_height+1): ignore_fields[curr_height+1] = [] ignore_fields[curr_height+1].append(field) if " SERIALIZE: " in line: # finished this block's data payload_bin = extract_serialize_payload( line ) payload = parse_serialize_payload( payload_bin ) payload['__block_height__'] = curr_height payload['__ignore_fields__'] = ignore_fields.get(curr_height, [])[:] ret.append( payload ) return ret def find_ignore_serialization_fields( test_output_path ): """ Find any occurrences of "SERIALIZATION_FIELD_IGNORE ..." and return them as a CSV """ ret = [] with open(test_output_path, "r") as f: while True: line = f.readline() if len(line) == 0: break line = line.strip() if line.startswith("SERIALIZATION_CHECK_IGNORE"): parts = line.split(" ") assert len(parts) == 2 ignore = parts[1] ret.append(ignore) return ret def serialization_eq( height, serialization_1, serialization_2, ignore=[] ): """ Given two parsed serialization payloads, verify that they represent the same data (ignoring fields given in @ignore) Return (True, None) on success Return (False, {'error': ...}) on failure """ if len(serialization_1['__order__']) != len(serialization_2['__order__']): return False, {'error': "Order length mismatch (%s != %s)" % (len(serialization_1['__order__']), len(serialization_2['__order__']))} mismatches_1 = [] mismatches_2 = [] for i in xrange(0, len(serialization_1['__order__'])): key_1 = serialization_1['__order__'][i] key_2 = serialization_2['__order__'][i] if key_1 in ignore or key_2 in ignore: continue if serialization_1[key_1] != serialization_2[key_2]: mismatches_1.append( key_1 ) mismatches_2.append( key_2 ) if len(mismatches_1) > 0: errstr = "" for i in xrange(0, len(mismatches_1)): errstr += "Key mismatch at %s: %s (%s) != %s (%s)\n" % (height, mismatches_1[i], serialization_1[mismatches_1[i]], mismatches_2[i], serialization_2[mismatches_2[i]]) errstr += "First serialization:\n%s\n" % simplejson.dumps(serialization_1, indent=4, sort_keys=True) errstr += "Second serialization:\n%s\n" % simplejson.dumps(serialization_2, indent=4, sort_keys=True) return False, {'error': errstr} return True, None def group_by_block_height( serializations ): """ Given a list of parsed serializations, group them by __block_height__ """ ret = {} for s in serializations: height = s['__block_height__'] if not ret.has_key(height): ret[height] = [] ret[height].append(s) return ret def compare_serializations( test_output_old, test_output_new ): """ Given the paths to two different test outputs, verify that their sequence of SERIALIZEs match (up to txid). """ serializations_old = find_serialization_outputs( test_output_old ) serializations_new = find_serialization_outputs( test_output_new ) serialization_ignore = ['txid','consensus_hash','name_consensus_hash','name_hash','vtxindex'] rc = True if len(serializations_old) != len(serializations_new): print >> sys.stderr, " Mismatched number of serializations (%s != %s)" % (len(serializations_old), len(serializations_new)) return False # group by block height (since bitcoind can re-order transactions) block_serializations_old = group_by_block_height( serializations_old ) block_serializations_new = group_by_block_height( serializations_new ) for height in sorted(block_serializations_old.keys()): if not block_serializations_new.has_key(height): print >> sys.stderr, " Missing block height %s in second log" % height return False for height in sorted(block_serializations_new.keys()): if not block_serializations_old.has_key(height): print >> sys.stderr, " Missing block height %s in first log" % height return False for height in sorted(block_serializations_old.keys()): s_old = block_serializations_old[height] s_new = block_serializations_new[height] if len(s_old) != len(s_new): print >> sys.stderr, " Mismatched number of serializations at block %s (%s != %s)" % (height, len(s_old), len(s_new)) return False matched = False err = None for s1 in s_old: # has to match one serialization in the second listing # order doesn't matter, since bitcoind reorders them anyway for s2 in s_new: # serializations to ignore (use the fields from the new log to ignore fields in the old log) ignore = serialization_ignore[:] ignore += s2['__ignore_fields__'] res, err = serialization_eq( height, s1, s2, ignore=ignore ) if res: matched = True s_new.remove(s2) break if not matched: # soldier on here so we can print all mismatches print >> sys.stderr, " Mismatched serializations in block %s" % height print >> sys.stderr, err['error'] rc = False return rc def is_test_successful( test_output ): """ Is this a successful test? """ with open(test_output, "r") as f: while True: line = f.readline() if len(line) == 0: break line = line.strip() if line.startswith("SUCCESS "): return True return False def skip_check( test_output ): """ Should we skip the serialization test? i.e. is it expected to fail? """ with open(test_output, "r") as f: while True: line = f.readline() if len(line) == 0: break line = line.strip() if line == "BLOCKSTACK_SERIALIZATION_CHANGE_BEHAVIOR": # this is a known breaking change return True return False if __name__ == "__main__": try: test_output_old = sys.argv[1] test_output_new = sys.argv[2] except: print >> sys.stderr, "Usage: %s TEST_OUTPUT_OLD TEST_OUTPUT_NEW" % sys.argv[0] sys.exit(1) if not is_test_successful( test_output_old ): print >> sys.stderr, " %s is a failed old test" % test_output_old sys.exit(2) if not is_test_successful( test_output_new ): print >> sys.stderr, " %s is a failed new test" % test_output_new sys.exit(1) if skip_check( test_output_new ): print >> sys.stderr, " %s is a breaking chnage" % test_output_new sys.exit(2) res = compare_serializations( test_output_old, test_output_new ) if res: sys.exit(0) else: sys.exit(1)