diff --git a/bitcoin_tools/analysis/plots.py b/bitcoin_tools/analysis/plots.py index 71eb5df..2eae751 100644 --- a/bitcoin_tools/analysis/plots.py +++ b/bitcoin_tools/analysis/plots.py @@ -78,7 +78,7 @@ def plot_distribution(xs, ys, title, xlabel, ylabel, log_axis=None, save_fig=Fal plt.plot(xs, ys) # marker='o' else: for i in range(len(xs)): - plt.plot(xs[i], ys[i], ' ', linestyle='solid') # marker='o' + plt.plot(xs[i], ys[i], linestyle='solid') # marker='o' # Plot title and xy labels plt.title(title, {'color': 'k', 'fontsize': font_size}) @@ -102,6 +102,10 @@ def plot_distribution(xs, ys, title, xlabel, ylabel, log_axis=None, save_fig=Fal ymin, ymax = plt.ylim() plt.ylim(ymin, y_sup_lim) + tick_val = [0, 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000] + tick_lab = ['0', '100K', '200K', '300K', '400K', '500K', '600K', '700K', '800K'] + plt.xticks(tick_val, tick_lab) + # Output result if save_fig: plt.savefig(CFG.figs_path + save_fig + '.pdf', format='pdf', dpi=600) diff --git a/bitcoin_tools/analysis/status/README.md b/bitcoin_tools/analysis/status/README.md index 6ca5025..3fdb0a9 100644 --- a/bitcoin_tools/analysis/status/README.md +++ b/bitcoin_tools/analysis/status/README.md @@ -3,7 +3,7 @@ **STATUS** (**ST**atistical **A**nalysis **T**ool for **U**txo **S**et) is an open source tool that provides an easy way to access, decode and analyze data from the Bitcoin's `utxo set`. The accompanying working paper further explains its design, application, and presents results of a recently performed analysis: [https://eprint.iacr.org/2017/1095.pdf](https://eprint.iacr.org/2017/1095.pdf) -STATUS is coded in Python 2 and works for both the existing versions of Bitcoin Core's `utxo set`, that is, the first defined format (versions 0.8 - 0.14) and the recently defined one (version 0.15). +STATUS is developed and tested with Python 3.11 and works for both the existing versions of Bitcoin Core's `utxo set`, that is, the first defined format (versions 0.8 - 0.14) and the recently defined one (version 0.15). STATUS works, from now on, with 0.15 format. For 0.8-0.14 version refer to `ldb_0.14` branch. diff --git a/bitcoin_tools/analysis/status/data_dump.py b/bitcoin_tools/analysis/status/data_dump.py index 21276ca..0288e35 100644 --- a/bitcoin_tools/analysis/status/data_dump.py +++ b/bitcoin_tools/analysis/status/data_dump.py @@ -1,7 +1,7 @@ from bitcoin_tools import CFG from bitcoin_tools.analysis.status import FEE_STEP from bitcoin_tools.analysis.status.utils import check_multisig, get_min_input_size, roundup_rate, check_multisig_type, \ - get_serialized_size_fast, get_est_input_size, load_estimation_data, check_native_segwit + get_serialized_size_fast, get_est_input_size, load_estimation_data, check_native_segwit, check_native_taproot import ujson @@ -118,10 +118,14 @@ def utxo_dump(fin_name, fout_name, coin, count_p2sh=False, non_std_only=False): else: multisig = check_multisig_type(out["data"]) segwit = check_native_segwit(out["data"]) + taproot = check_native_taproot(out["data"]) + if multisig: non_std_type = multisig elif segwit[0]: non_std_type = segwit[1] + elif taproot[0]: + non_std_type = taproot[1] else: non_std_type = False diff --git a/bitcoin_tools/analysis/status/kill_at_heigh.py b/bitcoin_tools/analysis/status/kill_at_heigh.py index e847c27..2634f1f 100644 --- a/bitcoin_tools/analysis/status/kill_at_heigh.py +++ b/bitcoin_tools/analysis/status/kill_at_heigh.py @@ -45,10 +45,10 @@ def kill_line(line, kill_at_height, pid): try: height_str = re.search(' height=\d+ ', line) block_height = height_str.group()[8:-1] - print " Block height {} read".format(block_height) + print (" Block height {} read".format(block_height)) if block_height == kill_at_height: kill(pid, SIGTERM) - print "Process with pid {} KILLED!!!".format(pid) + print ("Process with pid {} KILLED!!!".format(pid)) exit() except AttributeError as err: @@ -79,7 +79,7 @@ def follow(thefile, kill_at_height, pid, start_at=START_AT_ORIGIN): if line and not kill_line(line, kill_at_height, pid): continue if not line: - print "Waiting {} for new data...".format(SLEEP_INT) + print ("Waiting {} for new data...".format(SLEEP_INT)) sleep(SLEEP_INT) continue @@ -107,15 +107,15 @@ def follow(thefile, kill_at_height, pid, start_at=START_AT_ORIGIN): start = START_AT_ORIGIN if not kill_at_height or not pid: - print "Usage: " - print " python kill_at_heigh.py -k block_heigh -p pid [-f file] [-o] [-e] " + print ("Usage: ") + print (" python kill_at_heigh.py -k block_heigh -p pid [-f file] [-o] [-e] ") exit() - print "Starting to monitor file {} at {}".format(log_filename, start) - print "I'm going to kill process with pid {} at height {}".format(pid, kill_at_height) + print ("Starting to monitor file {} at {}".format(log_filename, start)) + print ("I'm going to kill process with pid {} at height {}".format(pid, kill_at_height)) logfile = open(log_filename, "r") follow(logfile, kill_at_height, pid, start_at=start) except IOError as err: - print "Bitcoind log file {} not found".format(log_filename) + print ("Bitcoind log file {} not found".format(log_filename)) diff --git a/bitcoin_tools/analysis/status/plots.py b/bitcoin_tools/analysis/status/plots.py index e234f98..c4019f3 100644 --- a/bitcoin_tools/analysis/status/plots.py +++ b/bitcoin_tools/analysis/status/plots.py @@ -104,20 +104,20 @@ def overview_from_file(tx_fin_name, utxo_fin_name): samples = get_samples(['num_utxos', 'total_len', 'height'], fin_name=tx_fin_name) - print "\t Max height: ", str(max(samples['height'])) - print "\t Num. of tx: ", str(len(samples['num_utxos'])) - print "\t Num. of UTXOs: ", str(sum(samples['num_utxos'])) - print "\t Avg. num. of UTXOs per tx: ", str(np.mean(samples['num_utxos'])) - print "\t Std. num. of UTXOs per tx: ", str(np.std(samples['num_utxos'])) - print "\t Median num. of UTXOs per tx: ", str(np.median(samples['num_utxos'])) + print ("\t Max height: ", str(max(samples['height']))) + print ("\t Num. of tx: ", str(len(samples['num_utxos']))) + print ("\t Num. of UTXOs: ", str(sum(samples['num_utxos']))) + print ("\t Avg. num. of UTXOs per tx: ", str(np.mean(samples['num_utxos']))) + print ("\t Std. num. of UTXOs per tx: ", str(np.std(samples['num_utxos']))) + print ("\t Median num. of UTXOs per tx: ", str(np.median(samples['num_utxos']))) len_attribute = "total_len" - print "\t Size of the (serialized) UTXO set: ", str(np.sum(samples[len_attribute])) + print ("\t Size of the (serialized) UTXO set: ", str(np.sum(samples[len_attribute]))) samples = get_samples("register_len", fin_name=utxo_fin_name) len_attribute = "register_len" - print "\t Avg. size per register: ", str(np.mean(samples[len_attribute])) - print "\t Std. size per register: ", str(np.std(samples[len_attribute])) - print "\t Median size per register: ", str(np.median(samples[len_attribute])) + print ("\t Avg. size per register: ", str(np.mean(samples[len_attribute]))) + print ("\t Std. size per register: ", str(np.std(samples[len_attribute]))) + print ("\t Median size per register: ", str(np.median(samples[len_attribute]))) diff --git a/bitcoin_tools/analysis/status/run_analysis.py b/bitcoin_tools/analysis/status/run_analysis.py index 63d4957..e1651ef 100644 --- a/bitcoin_tools/analysis/status/run_analysis.py +++ b/bitcoin_tools/analysis/status/run_analysis.py @@ -6,6 +6,7 @@ from bitcoin_tools import CFG from getopt import getopt from sys import argv +import copy def set_out_names(count_p2sh, non_std_only): @@ -57,10 +58,10 @@ def non_std_outs_analysis(samples): # and 3-3, and put the rest into "Other". groups = [[u'multisig-1-3'], [u'multisig-1-2'], [u'multisig-1-1'], [u'multisig-3-3'], [u'multisig-2-2'], - [u'multisig-2-3'], ["P2WSH"], ["P2WPKH"], [False, u'multisig-OP_NOTIF-OP_NOTIF', + [u'multisig-2-3'], ["P2WSH"], ["P2WPKH"],["P2TR"], [False, u'multisig-OP_NOTIF-OP_NOTIF', u'multisig-<2153484f55544f555420544f2023424954434f494e2d41535345545320202020202020202' u'0202020202020202020202020202020202020202020202020202020>-1']] - labels = ['M. 1-3', 'M. 1-2', 'M. 1-1', 'M. 3-3', 'M. 2-2', 'M. 2-3', "P2WSH", "P2WPKH", 'Other'] + labels = ['M. 1-3', 'M. 1-2', 'M. 1-1', 'M. 3-3', 'M. 2-2', 'M. 2-3', "P2WSH", "P2WPKH","P2TR", 'Other'] out_name = "utxo_non_std_type" @@ -69,6 +70,26 @@ def non_std_outs_analysis(samples): "#A69229", "#B69229", "#F69229"], labels_out=True) +def all_outs_analysis(samples): + + for i in range(len(samples["out_type"])): + if samples["non_std_type"][i] == "std": + samples["non_std_type"][i] = samples["out_type"][i] + + samples_special = samples.pop("non_std_type") + + groups = [[0], [2, 3, 4, 5], [1], [u'multisig-1-3', u'multisig-1-2', u'multisig-1-1', u'multisig-3-3', + u'multisig-2-2', u'multisig-2-3'], ["P2WSH"], ["P2WPKH"], ["P2TR"]] + + labels = ['P2PKH', 'P2PK', 'P2SH', 'MULTISIG', "P2WSH", + "P2WPKH", "P2TR"] + + out_name = "utxo_all_type" + + plot_pie_chart_from_samples(samples=samples_special, save_fig=out_name, labels=labels, groups=groups, title="", + labels_out=True) + + def tx_based_analysis(tx_fin_name): """ Performs a transaction based analysis from a given input file (resulting from a transaction dump of the chainstate) @@ -127,7 +148,7 @@ def utxo_based_analysis(utxo_fin_name): log_axis = [False, 'x', [False, 'x'], [False, 'x'], [False, 'x'], [False, 'x']] x_attributes_pie = ['out_type', 'out_type'] - xlabels_pie = [['C-even', 'C-odd', 'U-even', 'U-odd'], ['P2PKH', 'P2PK', 'P2SH', 'Other']] + xlabels_pie = [['C-even', 'C-odd', 'U-even', 'U-odd'], ['P2PKH', 'P2PK', 'P2SH']] out_names_pie = ["utxo_pk_types", "utxo_types"] pie_groups = [[[2], [3], [4], [5]], [[0], [2, 3, 4, 5], [1]]] @@ -136,17 +157,19 @@ def utxo_based_analysis(utxo_fin_name): # Since the attributes for the pie chart are already included in the normal chart, we won't pass them to the # sampling function. samples = get_samples(x_attributes + [x_attribute_special], fin_name=utxo_fin_name) + samples_all = copy.deepcopy(samples) samples_special = samples.pop(x_attribute_special) - for attribute, label, log, out in zip(x_attributes, xlabels, log_axis, out_names): - xs, ys = get_cdf(samples[attribute], normalize=True) - plots_from_samples(xs=xs, ys=ys, xlabel=label, log_axis=log, save_fig=out, ylabel="Number of UTXOs") + #for attribute, label, log, out in zip(x_attributes, xlabels, log_axis, out_names): + # xs, ys = get_cdf(samples[attribute], normalize=True) + # plots_from_samples(xs=xs, ys=ys, xlabel=label, log_axis=log, save_fig=out, ylabel="Number of UTXOs") - for attribute, label, out, groups in (zip(x_attributes_pie, xlabels_pie, out_names_pie, pie_groups)): - plot_pie_chart_from_samples(samples=samples[attribute], save_fig=out, labels=label, title="", groups=groups, - colors=["#165873", "#428C5C", "#4EA64B", "#ADD96C"], labels_out=True) - # Special case: non-standard - non_std_outs_analysis(samples_special) + #for attribute, label, out, groups in (zip(x_attributes_pie, xlabels_pie, out_names_pie, pie_groups)): + # plot_pie_chart_from_samples(samples=samples[attribute], save_fig=out, labels=label, title="", groups=groups, + # colors=["#165873", "#428C5C", "#4EA64B", "#ADD96C"], labels_out=True) + # Special case: non-standardf + #non_std_outs_analysis(samples_special) + all_outs_analysis(samples_all) def dust_analysis(utxo_fin_name, f_dust, fltr=None): @@ -244,6 +267,7 @@ def utxo_based_analysis_with_filters(utxo_fin_name): lambda x: x["out_type"] in [2, 3, 4, 5], lambda x: x["non_std_type"] == "P2WPKH", lambda x: x["non_std_type"] == "P2WSH", + lambda x: x["non_std_type"] == "P2TR", lambda x: x["non_std_type"] is not False and "multisig" in x["non_std_type"], lambda x: x["non_std_type"] is False, lambda x: x["amount"] == 1, @@ -256,7 +280,7 @@ def utxo_based_analysis_with_filters(utxo_fin_name): lambda x: x["out_type"] == 1, lambda x: x["amount"] == 1] - legends = [['P2PKH', 'P2SH', 'P2PK', 'P2WPKH', 'P2WSH', 'Multisig', 'Other'], + legends = [['P2PKH', 'P2SH', 'P2PK', 'P2WPKH', 'P2WSH', 'P2TR' , 'Multisig', 'Other'], ['$=1$', '$1 < x \leq 10$', '$10 < x \leq 10^2$', '$10^2 < x \leq 10^4$', '$10^4 < x \leq 10^6$', '$10^6 < x \leq 10^8$', '$10^8 < x$'], ['P2SH'], ['Amount = 1']] comparative = [True, True, False, False] @@ -322,33 +346,33 @@ def run_experiment(coin, chainstate, count_p2sh, non_std_only): f_utxos, f_parsed_txs, f_parsed_utxos, f_dust = set_out_names(count_p2sh, non_std_only) # Parse all the data in the chainstate. - print "Parsing the chainstate." + print("Parsing the chainstate.") parse_ldb(f_utxos, fin_name=chainstate) # Parses transactions and utxos from the dumped data. - print "Adding meta-data for transactions and UTXOs." + print("Adding meta-data for transactions and UTXOs.") transaction_dump(f_utxos, f_parsed_txs) utxo_dump(f_utxos, f_parsed_utxos, coin, count_p2sh=count_p2sh, non_std_only=non_std_only) # Print basic stats from data - print "Running overview analysis." + print("Running overview analysis.") overview_from_file(f_parsed_txs, f_parsed_utxos) # Generate plots from tx data (from f_parsed_txs) - print "Running transaction based analysis." + print("Running transaction based analysis.") tx_based_analysis(f_parsed_txs) # Generate plots from utxo data (from f_parsed_utxos) - print "Running UTXO based analysis." + print ("Running UTXO based analysis.") utxo_based_analysis(f_parsed_utxos) # # Aggregates dust and generates plots. - print "Running dust analysis." + print ("Running dust analysis.") dust_analysis(f_parsed_utxos, f_dust) dust_analysis_all_fees(f_parsed_utxos) # Generate plots with filters - print "Running analysis with filters." + print ("Running analysis with filters.") utxo_based_analysis_with_filters(f_parsed_utxos) tx_based_analysis_with_filters(f_parsed_txs) diff --git a/bitcoin_tools/analysis/status/run_comparative_analysis.py b/bitcoin_tools/analysis/status/run_comparative_analysis.py index fd2a10b..b2cc606 100644 --- a/bitcoin_tools/analysis/status/run_comparative_analysis.py +++ b/bitcoin_tools/analysis/status/run_comparative_analysis.py @@ -134,13 +134,13 @@ def run_experiment(f_dust, f_parsed_utxos, f_parsed_txs): :rtype: None """ - print "Running comparative data analysis." + print ("Running comparative data analysis.") # Comparative dust analysis between different snapshots fin_names = ['height-' + str(i) + 'K/' + f_parsed_utxos + '.json' for i in range(100, 550, 50)] dust_files = ['height-' + str(i) + 'K/' + f_dust + '.json' for i in range(100, 550, 50)] legend = [str(i) + 'K' for i in range(100, 550, 50)] - print "Comparing dust from different snapshots." + print ("Comparing dust from different snapshots.") # Get dust files from different dates to compare (Change / Add the ones you'll need) compare_dust(dust_files=dust_files, legend=legend) @@ -154,12 +154,12 @@ def run_experiment(f_dust, f_parsed_utxos, f_parsed_txs): # Comparative analysis between different snapshots # UTXO amount comparison - print "Comparing UTXO amount from different snapshots." + print ("Comparing UTXO amount from different snapshots.") compare_attribute(fin_names=fin_names, x_attribute='amount', xlabel='Amount (Satoshi)', legend=legend, out_name='cmp_utxo_amount') # UTXO size comparison - print "Comparing UTXO size from different snapshots." + print ("Comparing UTXO size from different snapshots.") compare_attribute(fin_names=fin_names, x_attribute='register_len', xlabel='Size (bytes)', legend=legend, out_name='cmp_utxo_size') diff --git a/bitcoin_tools/analysis/status/utils.py b/bitcoin_tools/analysis/status/utils.py index e15c860..7714ad1 100644 --- a/bitcoin_tools/analysis/status/utils.py +++ b/bitcoin_tools/analysis/status/utils.py @@ -9,6 +9,7 @@ from bitcoin_tools.core.keys import get_uncompressed_pk + def txout_compress(n): """ Compresses the Satoshi amount of a UTXO to be stored in the LevelDB. Code is a port from the Bitcoin Core C++ source: @@ -51,7 +52,7 @@ def txout_decompress(x): return 0 x -= 1 e = x % 10 - x /= 10 + x = int(x/10) if e < 9: d = (x % 9) + 1 x /= 9 @@ -61,7 +62,7 @@ def txout_decompress(x): while e > 0: n *= 10 e -= 1 - return n + return int(n) def b128_encode(n): @@ -213,7 +214,6 @@ def decode_utxo(coin, outpoint): tx_id = outpoint[2:66] # Finally get the transaction index by decoding the remaining bytes as a b128 VARINT tx_index = b128_decode(outpoint[66:]) - # Once all the outpoint data has been parsed, we can proceed with the data encoded in the coin, that is, block # height, whether the transaction is coinbase or not, value, script type and script. # We start by decoding the first b128 VARINT of the provided data, that may contain 2*Height + coinbase @@ -305,15 +305,15 @@ def display_decoded_utxo(decoded_utxo): :rtype: None """ - print "isCoinbase: " + str(decoded_utxo['coinbase']) + print("isCoinbase: " + str(decoded_utxo['coinbase'])) out = decoded_utxo['out'] - print "vout[" + str(decoded_utxo['index']) + "]:" - print "\tSatoshi amount: " + str(out['amount']) - print "\tOutput code type: " + str(out['out_type']) - print "\tHash160 (Address): " + out['data'] + print("vout[" + str(decoded_utxo['index']) + "]:") + print("\tSatoshi amount: " + str(out['amount'])) + print("\tOutput code type: " + str(out['out_type'])) + print("\tHash160 (Address): " + out['data']) - print "Block height: " + str(decoded_utxo['height']) + print("Block height: " + str(decoded_utxo['height'])) def parse_ldb(fout_name, fin_name=CFG.chainstate_path, decode=True): @@ -337,19 +337,20 @@ def parse_ldb(fout_name, fin_name=CFG.chainstate_path, decode=True): db = plyvel.DB(fin_name, compression=None) # Change with path to chainstate # Load obfuscation key (if it exists) - o_key = db.get((unhexlify("0e00") + "obfuscate_key")) + o_key = db.get(bytes(unhexlify("0e00").decode("utf-8") + "obfuscate_key", 'UTF-8')) # If the key exists, the leading byte indicates the length of the key (8 byte by default). If there is no key, # 8-byte zeros are used (since the key will be XORed with the given values). if o_key is not None: - o_key = hexlify(o_key)[2:] + o_key = hexlify(o_key)[2:].decode() # For every UTXO (identified with a leading 'c'), the key (tx_id) and the value (encoded utxo) is displayed. # UTXOs are obfuscated using the obfuscation key (o_key), in order to get them non-obfuscated, a XOR between the # value and the key (concatenated until the length of the value is reached) if performed). for key, o_value in db.iterator(prefix=prefix): serialized_length = len(key) + len(o_value) - key = hexlify(key) + + key = hexlify(key).decode() if o_key is not None: utxo = deobfuscate_value(o_key, hexlify(o_value)) else: @@ -452,11 +453,12 @@ def aggregate_dust_np(fin_name, fout_name="dust.json", fltr=None): data_len_np[rate] += data["utxo_data_len"] # Same with estimated non-profitable outputs. - if MIN_FEE_PER_BYTE <= data['non_profitable_est'] <= MAX_FEE_PER_BYTE: - rate = data['non_profitable_est'] - npest[rate] += 1 - value_npest[rate] += data["amount"] - data_len_npest[rate] += data["utxo_data_len"] + if data['non_profitable_est'] is not None: + if MIN_FEE_PER_BYTE <= data['non_profitable_est'] <= MAX_FEE_PER_BYTE: + rate = data['non_profitable_est'] + npest[rate] += 1 + value_npest[rate] += data["amount"] + data_len_npest[rate] += data["utxo_data_len"] # And we increase the total counters for each read utxo. total_utxo = total_utxo + 1 @@ -573,6 +575,21 @@ def check_native_segwit(script): return False, None +def check_native_taproot(script): + """ + Checks whether a given output script is a native taproot type. + + :param script: The script to be checked. + :type script: str + :return: tuple, (True, taproot type) if the script is a native taproot, (False, None) otherwise + :rtype: tuple, first element boolean + """ + + if len(script) == 34*2 and script[:4] == "5120": #OP_PUSHNUM_1 OP_PUSHBYTES_32 + return True, "P2TR" + + return False, None + def get_min_input_size(out, height, count_p2sh=False, coin="bitcoin", compressed_pk_height=0): """ Computes the minimum size an input created by a given output type (parsed from the chainstate) will have. @@ -613,7 +630,7 @@ def get_min_input_size(out, height, count_p2sh=False, coin="bitcoin", compressed # Since we are looking for the minimum size, we will consider all signatures to be 71-byte long in order to define # a lower bound. - if out_type is 0: + if out_type == 0: # P2PKH if coin in ["bitcoin", "bitcoincash"]: # Bitcoin core starts using compressed pk in version (0.6.0, 30/03/12, around block height 173480) @@ -624,18 +641,18 @@ def get_min_input_size(out, height, count_p2sh=False, coin="bitcoin", compressed else: height_limit = compressed_pk_height if height_limit == 0: - print "Warning: You are calculating the minimum input size for a coin other than Bitcoin, " \ + print("Warning: You are calculating the minimum input size for a coin other than Bitcoin, " \ "Bitcoin Cash and Litecoin. By default the height ar which compressed public keys where first " \ - "used is not set, so 0 is used. Consider changing the compressed_pk_height " + "used is not set, so 0 is used. Consider changing the compressed_pk_height ") if height < height_limit: # uncompressed keys scriptSig = 138 # PUSH sig (1 byte) + sig (71 bytes) + PUSH pk (1 byte) + uncompressed pk (65 bytes) else: # compressed keys - scriptSig = 106 # PUSH sig (1 byte) + sig (71 bytes) + PUSH pk (1 byte) + compressed pk (33 bytes) + scriptSig = 106 # PUSH sig (1 byte) + sig (71 bytes) + PUSH pk (1 byte) + compressed pk (33 bytes) scriptSig_len = 1 - elif out_type is 1: + elif out_type == 1: # P2SH # P2SH inputs can have arbitrary length. Defining the length of the original script by just knowing the hash # is infeasible. Two approaches can be followed in this case. The first one consists on considering P2SH @@ -657,6 +674,7 @@ def get_min_input_size(out, height, count_p2sh=False, coin="bitcoin", compressed scriptSig_len = 1 else: segwit = check_native_segwit(script) + taproot = check_native_taproot(script) # P2MS if check_multisig(script): # Multisig can be 15-15 at most. @@ -666,6 +684,12 @@ def get_min_input_size(out, height, count_p2sh=False, coin="bitcoin", compressed elif segwit[0] and segwit[1] == "P2WPKH": scriptSig = 27 # PUSH sig (1 byte) + sig (71 bytes) + PUSH pk (1 byte) + pk (33 bytes) (106 / 4 = 27) scriptSig_len = 1 + elif segwit[0] and segwit[1] == "P2WSH": + scriptSig = 27 # PUSH sig (1 byte) + sig (71 bytes) + PUSH pk (1 byte) + pk (33 bytes) (106 / 4 = 27) + scriptSig_len = 1 + elif taproot[0] and taproot[1] == "P2TR": + scriptSig = 57.5 # PUSH sig (1 byte) + sig (71 bytes) + PUSH pk (1 byte) + pk (33 bytes) (106 / 4 = 27) + scriptSig_len = 1 else: # All other types (non-standard outs) are counted just as the fixed size + 1 byte of the scripSig_len scriptSig = 0 @@ -701,7 +725,7 @@ def load_estimation_data(coin): p2wsh_scriptsize = ujson.load(f) except IOError: - print "Warning: No estimation data found. Non-profitable estimation charts will always show 0." + print("Warning: No estimation data found. Non-profitable estimation charts will always show 0.") p2pkh_pksize, p2sh_scriptsize, nonstd_scriptsize, p2wsh_scriptsize, max_height = None, None, None, None, None return p2pkh_pksize, p2sh_scriptsize, nonstd_scriptsize, p2wsh_scriptsize, max_height @@ -717,6 +741,7 @@ def get_est_input_size(out, height, p2pkh_pksize, p2sh_scriptsize, nonstd_script :param out: Output to be analyzed. :type out: dict + :type out: dict :param height: Block height where the utxo was created. Used to set P2PKH min_size. :type height: int :param p2pkh_pksize: Estimation data for P2PKH outputs. @@ -757,9 +782,9 @@ def get_est_input_size(out, height, p2pkh_pksize, p2sh_scriptsize, nonstd_script # If we don't have updated estimation data, a warning will be displayed and the last estimation point will be used # for the rest of values. if height >= max_height: - print "Warning: There is no estimation data for that height. The last available estimation will be used." + print("Warning: There is no estimation data for that height. The last available estimation will be used.") - if out_type is 0: + if out_type == 0: # P2PKH if height >= max_height: p2pkh_est_data = p2pkh_pksize[str(max_height - 1)] @@ -767,7 +792,7 @@ def get_est_input_size(out, height, p2pkh_pksize, p2sh_scriptsize, nonstd_script p2pkh_est_data = p2pkh_pksize[str(height)] scriptSig = 74 + p2pkh_est_data # PUSH sig (1 byte) + sig (72 bytes) + PUSH pk (1 byte) + PK est scriptSig_len = 1 - elif out_type is 1: + elif out_type == 1: # P2SH scriptSig = p2sh_scriptsize scriptSig_len = int(ceil(scriptSig / float(256))) @@ -857,13 +882,15 @@ def deobfuscate_value(obfuscation_key, value): # Get the extended obfuscation key by concatenating the obfuscation key with itself until it is as large as the # value to be de-obfuscated. + if l_obf < l_value: - extended_key = (obfuscation_key * ((l_value / l_obf) + 1))[:l_value] + extended_key = (obfuscation_key * int((l_value / l_obf) + 1))[:l_value] else: extended_key = obfuscation_key[:l_value] r = format(int(value, 16) ^ int(extended_key, 16), 'x').zfill(l_value) + return r @@ -944,11 +971,11 @@ def get_serialized_size_fast(utxo): :rtype int """ - if utxo.get("out_type") is 0: + if utxo.get("out_type") == 0: # P2PKH: OP_DUP (1 byte) + OP_HASH160 (1 byte) + PUSH (1 byte) + HASH160 (20 bytes) + OP_EQUALVERIFY (1 byte) + # OP_CHECKSIG (1 byte) = 25 bytes out_size = 25 - elif utxo.get("out_type") is 1: + elif utxo.get("out_type") == 1: # P2SH: OP_HASH160 (1 byte) + PUSH (1 byte) + HAS160 (20 bytes) + OP_EQUAL (1 byte) = 23 bytes out_size = 23 elif utxo.get("out_type") in [2, 3]: diff --git a/bitcoin_tools/core/transaction.py b/bitcoin_tools/core/transaction.py index 2fcc3a4..2f6abea 100644 --- a/bitcoin_tools/core/transaction.py +++ b/bitcoin_tools/core/transaction.py @@ -230,7 +230,7 @@ def deserialize(cls, hex_tx): tx.version = int(change_endianness(parse_element(tx, 4)), 16) # INPUTS - tx.inputs = decode_verint(parse_varint(tx)) + tx.inputs = decode_varint(parse_varint(tx)) for i in range(tx.inputs): tx.prev_tx_id.append(change_endianness(parse_element(tx, 32))) @@ -516,29 +516,29 @@ def display(self): :rtype: None """ - print "version: " + str(self.version) + " (" + change_endianness(int2bytes(self.version, 4)) + ")" - print "number of inputs: " + str(self.inputs) + " (" + encode_varint(self.inputs) + ")" + print ("version: " + str(self.version) + " (" + change_endianness(int2bytes(self.version, 4)) + ")") + print ("number of inputs: " + str(self.inputs) + " (" + encode_varint(self.inputs) + ")") for i in range(self.inputs): - print "input " + str(i) - print "\t previous txid (little endian): " + self.prev_tx_id[i] + \ - " (" + change_endianness(self.prev_tx_id[i]) + ")" - print "\t previous tx output (little endian): " + str(self.prev_out_index[i]) + \ - " (" + change_endianness(int2bytes(self.prev_out_index[i], 4)) + ")" - print "\t input script (scriptSig) length: " + str(self.scriptSig_len[i]) \ - + " (" + encode_varint((self.scriptSig_len[i])) + ")" - print "\t input script (scriptSig): " + self.scriptSig[i].content - print "\t decoded scriptSig: " + Script.deserialize(self.scriptSig[i].content) + print ("input " + str(i)) + print ("\t previous txid (little endian): " + self.prev_tx_id[i] + \ + " (" + change_endianness(self.prev_tx_id[i]) + ")") + print ("\t previous tx output (little endian): " + str(self.prev_out_index[i]) + \ + " (" + change_endianness(int2bytes(self.prev_out_index[i], 4)) + ")") + print ("\t input script (scriptSig) length: " + str(self.scriptSig_len[i]) \ + + " (" + encode_varint((self.scriptSig_len[i])) + ")") + print ("\t input script (scriptSig): " + self.scriptSig[i].content) + print ("\t decoded scriptSig: " + Script.deserialize(self.scriptSig[i].content)) if self.scriptSig[i].type is "P2SH": - print "\t \t decoded redeemScript: " + InputScript.deserialize(self.scriptSig[i].get_element(-1)[1:-1]) - print "\t nSequence: " + str(self.nSequence[i]) + " (" + int2bytes(self.nSequence[i], 4) + ")" - print "number of outputs: " + str(self.outputs) + " (" + encode_varint(self.outputs) + ")" + print ("\t \t decoded redeemScript: " + InputScript.deserialize(self.scriptSig[i].get_element(-1)[1:-1])) + print ("\t nSequence: " + str(self.nSequence[i]) + " (" + int2bytes(self.nSequence[i], 4) + ")") + print ("number of outputs: " + str(self.outputs) + " (" + encode_varint(self.outputs) + ")") for i in range(self.outputs): - print "output " + str(i) - print "\t Satoshis to be spent (little endian): " + str(self.value[i]) + \ - " (" + change_endianness(int2bytes(self.value[i], 8)) + ")" - print "\t output script (scriptPubKey) length: " + str(self.scriptPubKey_len[i]) \ - + " (" + encode_varint(self.scriptPubKey_len[i]) + ")" - print "\t output script (scriptPubKey): " + self.scriptPubKey[i].content - print "\t decoded scriptPubKey: " + Script.deserialize(self.scriptPubKey[i].content) - - print "nLockTime: " + str(self.nLockTime) + " (" + int2bytes(self.nLockTime, 4) + ")" + print ("output " + str(i)) + print ("\t Satoshis to be spent (little endian): " + str(self.value[i]) + \ + " (" + change_endianness(int2bytes(self.value[i], 8)) + ")") + print ("\t output script (scriptPubKey) length: " + str(self.scriptPubKey_len[i]) \ + + " (" + encode_varint(self.scriptPubKey_len[i]) + ")") + print ("\t output script (scriptPubKey): " + self.scriptPubKey[i].content) + print ("\t decoded scriptPubKey: " + Script.deserialize(self.scriptPubKey[i].content)) + + print ("nLockTime: " + str(self.nLockTime) + " (" + int2bytes(self.nLockTime, 4) + ")") diff --git a/bitcoin_tools/sample_conf.py b/bitcoin_tools/sample_conf.py index 56d030e..0572e22 100644 --- a/bitcoin_tools/sample_conf.py +++ b/bitcoin_tools/sample_conf.py @@ -12,4 +12,4 @@ chainstate_path = home_dir + ".bitcoin/chainstate" # Path to the chainstate. data_path = bitcoin_tools_dir + "data/" # Data storage path (for IO). figs_path = bitcoin_tools_dir + "figs/" # Figure store dir, where images from analysis will be stored. -estimated_data_dir = bitcoin_tools_dir + 'estimation_data/' # Data for non-profitability with estimations +estimated_data_dir = bitcoin_tools_dir + 'estimation_data/' # Data for non-profitability with estimations \ No newline at end of file diff --git a/bitcoin_tools/utils.py b/bitcoin_tools/utils.py index ccc419e..1e98c99 100644 --- a/bitcoin_tools/utils.py +++ b/bitcoin_tools/utils.py @@ -1,4 +1,4 @@ -from urllib2 import urlopen, Request +from urllib.request import urlopen, Request from json import loads @@ -14,9 +14,9 @@ def change_endianness(x): # If there is an odd number of elements, we make it even by adding a 0 if (len(x) % 2) == 1: x += "0" - y = x.decode('hex') + y = bytes.fromhex(x) z = y[::-1] - return z.encode('hex') + return z.hex() def int2bytes(a, b): @@ -35,7 +35,7 @@ def int2bytes(a, b): raise Exception(str(a) + " is too big to be represented with " + str(b) + " bytes. Maximum value is " + str(m) + ".") - return ('%0' + str(2 * b) + 'x') % a + return ('%0' + str(2 * b) + 'x') % int(a) def parse_element(tx, size):