|
| 1 | +"""High-speed binary file upload to Notehub via the Notecard.""" |
| 2 | + |
| 3 | +import sys |
| 4 | +import time |
| 5 | + |
| 6 | +from notecard.cobs import cobs_encode |
| 7 | +from notecard.notecard import Notecard |
| 8 | + |
| 9 | +if sys.implementation.name == 'cpython': |
| 10 | + import hashlib |
| 11 | + |
| 12 | + def _md5_hash(data): |
| 13 | + """Create an MD5 digest of the given data.""" |
| 14 | + return hashlib.md5(data).hexdigest() |
| 15 | +else: |
| 16 | + from .md5 import digest as _md5_hash |
| 17 | + |
| 18 | +BINARY_STAGE_RETRIES = 50 |
| 19 | +WEB_POST_RETRIES = 20 |
| 20 | +WEB_POST_RETRY_DELAY_SECS = 15 |
| 21 | + |
| 22 | +try: |
| 23 | + _monotonic = time.monotonic |
| 24 | +except AttributeError: |
| 25 | + _monotonic = time.time |
| 26 | + |
| 27 | + |
| 28 | +def _stage_binary_chunk(card, chunk_data): |
| 29 | + """Stage a binary chunk into the Notecard's binary buffer. |
| 30 | +
|
| 31 | + Performs card.binary.put + raw byte transmit + verification, with |
| 32 | + retries on failure. |
| 33 | +
|
| 34 | + Args: |
| 35 | + card (Notecard): The Notecard object. |
| 36 | + chunk_data (bytearray): The raw chunk data to stage. |
| 37 | +
|
| 38 | + Raises: |
| 39 | + Exception: If staging fails after all retries. |
| 40 | + """ |
| 41 | + encoded = cobs_encode(bytearray(chunk_data), ord('\n')) |
| 42 | + md5 = _md5_hash(chunk_data) |
| 43 | + req = { |
| 44 | + 'req': 'card.binary.put', |
| 45 | + 'cobs': len(encoded), |
| 46 | + 'status': md5, |
| 47 | + } |
| 48 | + encoded.append(ord('\n')) |
| 49 | + |
| 50 | + tries_left = BINARY_STAGE_RETRIES |
| 51 | + while tries_left > 0: |
| 52 | + try: |
| 53 | + card.lock() |
| 54 | + rsp = card.Transaction(req, lock=False) |
| 55 | + if 'err' in rsp: |
| 56 | + raise Exception(rsp['err']) |
| 57 | + card.transmit(encoded, delay=False) |
| 58 | + except Exception: |
| 59 | + tries_left -= 1 |
| 60 | + if tries_left == 0: |
| 61 | + raise |
| 62 | + continue |
| 63 | + finally: |
| 64 | + card.unlock() |
| 65 | + |
| 66 | + rsp = card.Transaction({'req': 'card.binary'}) |
| 67 | + if 'err' in rsp: |
| 68 | + tries_left -= 1 |
| 69 | + if tries_left == 0: |
| 70 | + raise Exception( |
| 71 | + f'Failed to stage binary data: {rsp["err"]}') |
| 72 | + continue |
| 73 | + |
| 74 | + expected_len = len(chunk_data) |
| 75 | + actual_len = rsp.get('length', 0) |
| 76 | + if actual_len != expected_len: |
| 77 | + tries_left -= 1 |
| 78 | + if tries_left == 0: |
| 79 | + raise Exception( |
| 80 | + f'Binary length mismatch: expected {expected_len}, ' |
| 81 | + f'got {actual_len}.') |
| 82 | + continue |
| 83 | + |
| 84 | + return |
| 85 | + |
| 86 | + raise Exception('Failed to stage binary data after retries.') |
| 87 | + |
| 88 | + |
| 89 | +def upload(card, data, route, target=None, label=None, |
| 90 | + content_type='application/octet-stream', max_chunk_size=0, |
| 91 | + progress_cb=None): |
| 92 | + """Upload binary data to a Notehub proxy route via the Notecard. |
| 93 | +
|
| 94 | + The data is chunked to fit in the Notecard's binary buffer, staged |
| 95 | + via card.binary.put, and sent to Notehub via web.post with |
| 96 | + binary:true. |
| 97 | +
|
| 98 | + Args: |
| 99 | + card (Notecard): The Notecard object. |
| 100 | + data (bytes or bytearray): The binary data to upload. |
| 101 | + route (str): The Notehub proxy route alias. |
| 102 | + target (str, optional): URL path appended to the route (sent as |
| 103 | + ``name`` in the web.post request). |
| 104 | + label (str, optional): Filename label for the upload. |
| 105 | + content_type (str): MIME type. Default ``application/octet-stream``. |
| 106 | + max_chunk_size (int): Maximum chunk size in bytes. 0 means use the |
| 107 | + Notecard's maximum buffer capacity. |
| 108 | + progress_cb (callable, optional): Called after each chunk with a dict |
| 109 | + containing progress information. |
| 110 | +
|
| 111 | + Returns: |
| 112 | + dict: Upload statistics with keys ``bytes_uploaded``, ``chunks``, |
| 113 | + ``duration_secs``, and ``bytes_per_sec``. |
| 114 | +
|
| 115 | + Raises: |
| 116 | + ValueError: If ``route`` is empty or ``data`` is empty. |
| 117 | + Exception: If the upload fails. |
| 118 | + """ |
| 119 | + if not route: |
| 120 | + raise ValueError('route must not be empty.') |
| 121 | + if not data: |
| 122 | + raise ValueError('data must not be empty.') |
| 123 | + |
| 124 | + rsp = card.Transaction({'req': 'card.binary', 'reset': True}) |
| 125 | + if 'err' in rsp and '{bad-bin}' not in rsp['err']: |
| 126 | + raise Exception( |
| 127 | + f'Error querying card.binary: {rsp["err"]}') |
| 128 | + |
| 129 | + buf_capacity = rsp.get('max', 0) |
| 130 | + if buf_capacity == 0: |
| 131 | + raise Exception( |
| 132 | + 'Notecard binary buffer capacity is zero or not reported.') |
| 133 | + |
| 134 | + if max_chunk_size > 0: |
| 135 | + chunk_size = min(max_chunk_size, buf_capacity) |
| 136 | + else: |
| 137 | + chunk_size = buf_capacity |
| 138 | + |
| 139 | + total_len = len(data) |
| 140 | + total_chunks = (total_len + chunk_size - 1) // chunk_size |
| 141 | + upload_start = _monotonic() |
| 142 | + bytes_sent = 0 |
| 143 | + |
| 144 | + for chunk_idx in range(total_chunks): |
| 145 | + offset = chunk_idx * chunk_size |
| 146 | + end = min(offset + chunk_size, total_len) |
| 147 | + chunk_data = data[offset:end] |
| 148 | + chunk_len = len(chunk_data) |
| 149 | + chunk_md5 = _md5_hash(chunk_data) |
| 150 | + |
| 151 | + _stage_binary_chunk(card, chunk_data) |
| 152 | + |
| 153 | + web_req = { |
| 154 | + 'req': 'web.post', |
| 155 | + 'route': route, |
| 156 | + 'binary': True, |
| 157 | + 'content': content_type, |
| 158 | + 'offset': offset, |
| 159 | + 'total': total_len, |
| 160 | + 'status': chunk_md5, |
| 161 | + } |
| 162 | + if target: |
| 163 | + web_req['name'] = target |
| 164 | + if label: |
| 165 | + web_req['label'] = label |
| 166 | + |
| 167 | + web_tries = WEB_POST_RETRIES |
| 168 | + while web_tries > 0: |
| 169 | + rsp = card.Transaction(web_req) |
| 170 | + result_code = rsp.get('result', 0) |
| 171 | + if result_code >= 300 or 'err' in rsp: |
| 172 | + web_tries -= 1 |
| 173 | + if web_tries == 0: |
| 174 | + err_detail = rsp.get('err', f'HTTP {result_code}') |
| 175 | + raise Exception( |
| 176 | + f'web.post failed after retries: {err_detail}') |
| 177 | + time.sleep(WEB_POST_RETRY_DELAY_SECS) |
| 178 | + _stage_binary_chunk(card, chunk_data) |
| 179 | + continue |
| 180 | + break |
| 181 | + |
| 182 | + bytes_sent += chunk_len |
| 183 | + elapsed = _monotonic() - upload_start |
| 184 | + current_bps = chunk_len / elapsed if elapsed > 0 else 0 |
| 185 | + avg_bps = bytes_sent / elapsed if elapsed > 0 else 0 |
| 186 | + remaining = total_len - bytes_sent |
| 187 | + eta = remaining / avg_bps if avg_bps > 0 else 0 |
| 188 | + |
| 189 | + if progress_cb: |
| 190 | + progress_cb({ |
| 191 | + 'chunk': chunk_idx + 1, |
| 192 | + 'total_chunks': total_chunks, |
| 193 | + 'bytes_sent': bytes_sent, |
| 194 | + 'total_bytes': total_len, |
| 195 | + 'percent_complete': (bytes_sent / total_len) * 100, |
| 196 | + 'bytes_per_sec': current_bps, |
| 197 | + 'avg_bytes_per_sec': avg_bps, |
| 198 | + 'eta_secs': eta, |
| 199 | + }) |
| 200 | + |
| 201 | + duration = _monotonic() - upload_start |
| 202 | + return { |
| 203 | + 'bytes_uploaded': bytes_sent, |
| 204 | + 'chunks': total_chunks, |
| 205 | + 'duration_secs': duration, |
| 206 | + 'bytes_per_sec': bytes_sent / duration if duration > 0 else 0, |
| 207 | + } |
0 commit comments