From f2d18169e81969a2455af0866ccd73ecff99e47b Mon Sep 17 00:00:00 2001 From: Rodrigo Torres Date: Fri, 14 Jan 2022 14:33:14 -0800 Subject: [PATCH 01/44] Add github workflow "build.yml" --- .github/workflows/build.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..043dccc --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,21 @@ +name: Build + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Install dependencies + run: sudo apt-get install -qq scons gcc-avr binutils-avr avr-libc + + - name: Build + run: scons From 7c3b47548a21c928ad6b5311ca25940942aa2e54 Mon Sep 17 00:00:00 2001 From: Rodrigo Torres Date: Fri, 14 Jan 2022 14:42:22 -0800 Subject: [PATCH 02/44] Update Build Status badge Updates build status badge to reflect status of new build.yml workflow. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e9bd6c7..8bbcfc6 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # nanoBoot -[![Build Status](https://travis-ci.org/volium/nanoBoot.svg?branch=master)](https://travis-ci.org/volium/nanoBoot) +[![Build](https://github.com/volium/nanoBoot/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/volium/nanoBoot/actions/workflows/build.yml) This repository contains the source code for the USB HID-based bootloader for ATmegaXXU4 family of devices. From 5a3a4129fe5cec128522883f00b7d85afba2ec16 Mon Sep 17 00:00:00 2001 From: Rodrigo Torres Date: Fri, 14 Jan 2022 14:45:43 -0800 Subject: [PATCH 03/44] Delete .travis.yml Deletes .travis.yml now that CI has been moved to github workflow (.github/workflows/build.yml) --- .travis.yml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index cf0e0b6..0000000 --- a/.travis.yml +++ /dev/null @@ -1,6 +0,0 @@ -language: c -before_install: - - sudo apt-get update -qq -install: - - sudo apt-get install -qq scons gcc-avr binutils-avr avr-libc -script: scons From 4dadff46071659556cc4b82d82183060e8ec8014 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:01:41 +0300 Subject: [PATCH 04/44] Save 2 bytes in the setup packet read loop Instead of using a separate register for the loop counter, compare the value in the XL register with the expected end address. This saves one instruction (2 bytes). Signed-off-by: Sergey Vlasov --- nanoBoot.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index a2497e3..3231c82 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -620,11 +620,10 @@ USB_Endpoint_ISR: ; Shorter version clr XH ; Clear XH Register ldi XL, 18 ; Load XL Register with number 18 (this will be used to refer to r18) - ldi r16, 8 ; Load r16 with number 8 (the number of fields we need to read) load: ldd r0, Y+oUEDATX ; Load r0 with the value in the USB Endpoint Data Register (UEDATX) st X+, r0 ; Store the value of r0 to the location pointed by X (r18), post increment X (X now points to r19) - dec r16 ; Decrement r16 - brne load ; Jump back to 'load' if r16 is not zero + cpi XL, 18+8 ; Compare XL with the location past the last byte that we need to read + brne load ; Jump back to 'load' if there are still bytes to read ; Our response is based on data direction... sbrc reg_bmRequestType, 7 ; Skip the next instruction if bit 7 of bmRequestType is not set; for host to device (OUT) transaction, bit 7 is cleared From 0fee36073ec8f0c8c56ed72a02e2f7ef1869abb9 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:09:22 +0300 Subject: [PATCH 05/44] Swap send_hid_descriptor and send_hid_report_descriptor Reorder the descriptor handling code so that the part that falls through to `process_descriptor` is handling a single USB descriptor prefixed with its length. Does not save any bytes by itself, just prepares the code for subsequent changes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 3231c82..16873d4 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -840,21 +840,21 @@ send_config_descriptor: ldi r16, 34 ; Load r16 with length of config_descriptor (34 bytes) rjmp process_descriptor ; jump to process_descriptor -send_hid_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor - ldi r16, 9 ; Load r16 with length of hid_descriptor (9 bytes) - rjmp process_descriptor ; jump to process_descriptor - send_hid_report_descriptor: ; We only load the lower portion (lo8) of the address of the descriptor, ; the higher portion is common for all descriptors ldi ZL, lo8(hid_report_descriptor); Load ZL with the least significant 8 bits of hid_report_descriptor ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) + rjmp process_descriptor ; jump to process_descriptor ; If needed, include other descriptors here +send_hid_descriptor: + ; We only load the lower portion (lo8) of the address of the descriptor, + ; the higher portion is common for all descriptors + ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor + ldi r16, 9 ; Load r16 with length of hid_descriptor (9 bytes) + process_descriptor: ; Acknowledge the SETUP packet From 97f2edb4885111e603e3ec24cb9a41ff5a91e1c4 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:29:20 +0300 Subject: [PATCH 06/44] Save 2 bytes in the descriptor sending code In two cases (`send_hid_descriptor` and `send_device_descriptor`) the returned data contains just a single USB descriptor, which contains its length in bytes in the first data byte. Replace two instructions to load the descriptor lengths with one instruction to read the length from the first data byte, saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 16873d4..83b087a 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -830,8 +830,7 @@ send_device_descriptor: ; We only load the lower portion (lo8) of the address of the descriptor, ; the higher portion is common for all descriptors ldi ZL, lo8(device_descriptor) ; Load ZL with the least significant 8 bits of device_descriptor - ldi r16, 18 ; Load r16 with length of device_descriptor (18 bytes) - rjmp process_descriptor ; jump to process_descriptor + rjmp process_single_descriptor ; jump to process_single_descriptor send_config_descriptor: ; We only load the lower portion (lo8) of the address of the descriptor, @@ -853,7 +852,10 @@ send_hid_descriptor: ; We only load the lower portion (lo8) of the address of the descriptor, ; the higher portion is common for all descriptors ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor - ldi r16, 9 ; Load r16 with length of hid_descriptor (9 bytes) + +process_single_descriptor: + + lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes process_descriptor: From d033d5e13be4ac30d9e7b5c09aeb9bb2bcb9ad8c Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:41:13 +0300 Subject: [PATCH 07/44] Move SET_CONFIGURATION in preparation for fallthrough No size or behavior changes, just a preparation to save some bytes with fallthrough. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 83b087a..19e359d 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -652,6 +652,13 @@ HANDLE_USB_STANDARD_DEVICE: rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST +SET_CONFIGURATION: + + rcall process_Host2Device ; This function affects r17 + + rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + + HANDLE_USB_CLAS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) breq SET_HID_REPORT ; jump to SET_HID_REPORT @@ -677,13 +684,6 @@ SET_ADDRESS: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST -SET_CONFIGURATION: - - rcall process_Host2Device ; This function affects r17 - - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST - - SET_HID_REPORT: ; Acknowledge the SETUP packet From 5ec17e8811b9f48922caa1896ac773ea69d2453f Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:56:18 +0300 Subject: [PATCH 08/44] Save 2 bytes by using fallthrough for SET_CONFIGURATION Restructure conditional jumps to use fallthrough for the `bRequest == 9` case; this removes one jump instruction, saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 19e359d..d37f385 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -648,14 +648,13 @@ HANDLE_USB_STANDARD_DEVICE: cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) breq SET_ADDRESS ; jump to SET_ADDRESS cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) - breq SET_CONFIGURATION ; jump to SET_CONFIGURATION - - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - + brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to SET_CONFIGURATION if equal SET_CONFIGURATION: rcall process_Host2Device ; This function affects r17 +UNHANDLED_SETUP_REQUEST_1: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST From 0cb39fab7d57b3ce0887e89d044b7ed742c43b09 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:00:55 +0300 Subject: [PATCH 09/44] Move HANDLE_USB_CLAS_INTERFACE in preparation for fallthrough No size or behavior changes, just a preparation to save some bytes with fallthrough. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index d37f385..dc1a807 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -642,6 +642,12 @@ HOST_TO_DEVICE: rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST +HANDLE_USB_CLAS_INTERFACE: + cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) + breq SET_HID_REPORT ; jump to SET_HID_REPORT + rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + + HANDLE_USB_STANDARD_DEVICE: ; Once we know we support the OUT transaction, we need to filter it based on the value in bRequest @@ -658,12 +664,6 @@ UNHANDLED_SETUP_REQUEST_1: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST -HANDLE_USB_CLAS_INTERFACE: - cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) - breq SET_HID_REPORT ; jump to SET_HID_REPORT - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - - SET_ADDRESS: ; Set device address; for this we only need to copy the value in wValueL which contains the address From b76a56e0155d6724119434d7a98e313331877c42 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:05:44 +0300 Subject: [PATCH 10/44] Save 2 bytes by using fallthrough for HANDLE_USB_CLAS_INTERFACE Restructure conditional jumps to use fallthrough for the `HANDLE_USB_CLAS_INTERFACE` case; this removes one jump instruction, saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index dc1a807..aa069f6 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -638,10 +638,8 @@ HOST_TO_DEVICE: andi reg_bmRequestType, (0x60 | 0x1F) ; Mask reg_bmRequestType with the bits that define request type and recipient (CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT) cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare the masked value in r16 with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) - breq HANDLE_USB_CLAS_INTERFACE ; jump to HANDLE_USB_CLAS_INTERFACE - - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - + brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to HANDLE_USB_CLAS_INTERFACE if equal HANDLE_USB_CLAS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) breq SET_HID_REPORT ; jump to SET_HID_REPORT From cfa7e72fc0e6468e2725aa91b1fa27c08f213749 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:13:07 +0300 Subject: [PATCH 11/44] Save 2 bytes by using fallthrough for GET_DESCRIPTOR Restructure conditional jumps to use fallthrough for the `GET_DESCRIPTOR` case; this removes one jump instruction, saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index aa069f6..e924418 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -772,6 +772,7 @@ finish_hid_request: ; Clear Transmitter Ready Flag rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX +UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST @@ -789,11 +790,8 @@ DEVICE_TO_HOST: brne UNHANDLED_DEVICE_TO_HOST ; If bmRequestType is not 0x80, we know it's not a GET_DESCRIPTOR request, so jump to UNHANDLED_DEVICE_TO_HOST cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) - breq GET_DESCRIPTOR ; jump to GET_DESCRIPTOR - -UNHANDLED_DEVICE_TO_HOST: - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x80/0x81 or bRequest is not 0x06, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - + brne UNHANDLED_DEVICE_TO_HOST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to GET_DESCRIPTOR if equal GET_DESCRIPTOR: ; Just get the descriptor address into From f279141b5ac5ca281f33fa3be77c4db4e8733b79 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:38:43 +0300 Subject: [PATCH 12/44] Save 2 bytes by reusing the SET_ADDRESS code for SET_CONFIGURATION `SET_CONFIGURATION` is basically a noop - the only thing that needs to be done is `process_Host2Device`. Instead of handling it in two instructions, reuse the `SET_ADDRESS` code for it, passing the current UDADDR value to it (so it effectively does nothing too), which can be done in a single instruction, thus saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index e924418..8953512 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -656,11 +656,10 @@ HANDLE_USB_STANDARD_DEVICE: ; fallthrough to SET_CONFIGURATION if equal SET_CONFIGURATION: - rcall process_Host2Device ; This function affects r17 - -UNHANDLED_SETUP_REQUEST_1: - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, + ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. + ldd reg_wValueL, Y+oUDADDR ; load the existing UDADDR value where the SET_ADDRESS code would expect the new address SET_ADDRESS: @@ -679,6 +678,7 @@ SET_ADDRESS: ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) +UNHANDLED_SETUP_REQUEST_1: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST SET_HID_REPORT: From bd1bd68e200485aa16445c9d22dd53bb205ee102 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Mon, 23 Aug 2021 12:02:01 +0300 Subject: [PATCH 13/44] Fix USB errors due to unconfigured interrupt in endpoint Although the subset of USB HID protocol that is actually used by the bootloader uses only the default control endpoint (0), the HID spec requires the device to have an interrupt in endpoint, and the host can poll that endpoint even when the HID report descriptor does not actually declare any input reports. Polling an unconfigured endpoint causes USB errors, which may prevent the bootloader from functioning properly. Apparently this was happening in Mac OS, making the bootloader unusable there (however, Windows and Linux did not expose the problem; on Linux it was possible to provoke these errors by opening the `/dev/hidrawN` device corresponding to the bootloader, but existing flashing tools do not use that method to access the bootloader device). Add the code to configure endpoint 1 as Interrupt IN, matching the USB descriptors; this is enough to make the USB controller generate NAK replies for that endpoint correctly, and the rest of bootloader code may continue using just endpoint 0 as before. The binary size increases by 12 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 48 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 8953512..1dab22b 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -553,29 +553,55 @@ USB_General_ISR: ; service_EORSTI: ; unused label ; ================================================================= -; = Configure Endpoint0 +; = Configure Endpoints ; ================================================================= - ; ASSUMPTION! - ; We only use Endpoint0, and the reset value of the USB Device Select Endpoint Number Register (UENUM) is Zero, - ; so we don't need to select it or do anything else + ; Even though the bootloader uses only endpoint 0, the HID spec requires any HID device to have an + ; Interrupt IN endpoint, and the host can decide to poll that endpoint even when the HID report + ; descriptor does not actually declare any input reports. Polling an unconfigured endpoint causes + ; USB errors, therefore endpoint 1 must be configured here too. - ; Enable Endpoint + ; Enable and configure endpoint 1 as Interrupt IN: + ; UENUM = 1; + ; UECONX |= (1 << EPEN); + ; UECFG0X = (1 << EPTYPE1) | (1 << EPTYPE0) | (1 << EPDIR); + ; UECFG1X = (1 << EPSIZE1) | (1 << EPSIZE0) | (1 << ALLOC); + + std Y+oUENUM, rONE ; Select Endpoint 1 + + ; Set Endpoint Enable Bit (EPEN), all other bits set to zero has no effect on UECONX + std Y+oUECONX, rONE ; Store the USB Endpoint Configuration Register (UECONX) with the value needed to enable Endpoint 1 + + ldi r16, (_BV(EPTYPE1) | _BV(EPTYPE0) | _BV(EPDIR)) ; Load r16 with the value to configure Endpoint 1 + ; Endpoint Type Bits (EPTYPE1:0); 11 to set as Interrupt Endpoint + ; Endpoint Direction Bit (EPDIR); set to configure IN direction + + std Y+oUECFG0X, r16 ; Store r16 to the USB Endpoint Configuration0 Register (UECFG0X); + + ldi r16, (_BV(EPSIZE1) | _BV(EPSIZE0) | _BV(ALLOC)) ; Load r16 with the value to configure Endpoint 1 (and also 0 below) + ; Endpoint Size Bits (EPSIZE2:0); 011 to set to 64 bytes + ; Endpoint Bank Bits (EPBK1:0); 00 to set One bank + ; Endpoint Allocation Bit (ALLOC); set to allocate the endpoint memory + + std Y+oUECFG1X, r16 ; Store r16 to the USB Endpoint Configuration1 Register (UECFG1X); + + ; Enable and configure endpoint 0 as Control (this is done last, so that endpoint 0 will remain selected): + ; UENUM = 0; ; UECONX |= (1 << EPEN); ; UECFG0X = 0; - ; UECFG1X = 0x32; + ; UECFG1X = (1 << EPSIZE1) | (1 << EPSIZE0) | (1 << ALLOC); + + std Y+oUENUM, rZERO ; Select Endpoint0 + ; Set Endpoint Enable Bit (EPEN), all other bits set to zero has no effect on UECONX - std Y+oUECONX, rONE ; Store the USB Endpoint Configuration Register (UECONX) with the value needed to enable Enpoint 0 + std Y+oUECONX, rONE ; Store the USB Endpoint Configuration Register (UECONX) with the value needed to enable Endpoint 0 ; SIZE OPTIMIZATION: Not needed due to known reset value (Zero) ; std Y+oUECFG0X, rZERO ; Store rZERO to the USB Endpoint Configuration0 Register (UECFG0X); ; Endpoint Type Bits (EPTYPE1:0): 00 to set as Control Endpoint ; Endpoint Direction Bit (EPDIR): clear to configure OUT direction; needed for Control Endpoint - ldi r16, (_BV(EPSIZE1) | _BV(EPSIZE0) | _BV(ALLOC)) ; Load r16 with the value to configure Enpoint 0 - ; Endpoint Size Bits (EPSIZE2:0); 011 to set to 64 bytes - ; Endpoint Bank Bits (EPBK1:0); 00 to set One bank - ; Endpoint Allocation Bit (ALLOC); set to allocate the endpoint memory + ; SIZE OPTIMIZATION: r16 is already loaded with the required value while configuring endpoint 1 above std Y+oUECFG1X, r16 ; Store r16 to the USB Endpoint Configuration1 Register (UECFG1X); From 1e42ddd6f3ee01d3a74ea4902d7c2fb7069b9260 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 25 Aug 2021 17:03:37 +0300 Subject: [PATCH 14/44] Save 2 bytes in the flash write loop Instead of saving the initial address value in a separate register pair, and then reloading it after the flash write loop, keep the address in the Z register and subtract 128 from it after the data write loop - this should give the same result, because the loop counter is explicitly initialized by the bootloader code. However, because the `sbiw` instruction supports only the 0...63 range for its constant argument, another optimization trick is also used - instead of incrementing the whole 16-bit address, only the low byte is incremented; this should give the same result, because the block start address must be aligned to the flash page size (128 bytes), therefore any carry to the high byte should not happen within the page (it may happen just past the end of the page, but if both instruction will not perform that carry, the final result will be the same). Signed-off-by: Sergey Vlasov --- nanoBoot.S | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 1dab22b..586702a 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -716,15 +716,13 @@ SET_HID_REPORT: rcall wait_RXOUTI ; This function loads r17 with value of UEINTX load_page_address: - ; We store the page address in r15:r14 and not in r31:r30 because we need - ; to keep track of the page when we call write_page_to_flash - ldd r14, Y+oUEDATX ; Load r14 with LSB of page address - ldd r15, Y+oUEDATX ; Load r15 with MSB of page address + ldd r30, Y+oUEDATX ; Load r30 with LSB of page address + ldd r31, Y+oUEDATX ; Load r31 with MSB of page address check_page_address: ldi r26, 0xFF ; Load value 0xFF to r26 - cp r26, r14 ; Compare low byte of page address against 0xFF - cpc r26, r15 ; Compare high byte of page address against 0xFF + cp r26, r30 ; Compare low byte of page address against 0xFF + cpc r26, r31 ; Compare high byte of page address against 0xFF brne erase_page ; if r15:r14 != 0xFFFF jump to erase_page quit_bootloader: @@ -734,9 +732,6 @@ quit_bootloader: erase_page: - ; Set page address in Z-Register - movw r30, r14 ; Copy r15:r14 to r31:r30 (Z-Register) - ldi r17, (_BV(PGERS)|_BV(SPMEN)) ; load r17 with the value needed to erase the currently specified page rcall do_SPM ; execute page erase (this function requires r17 to be loaded first with the right value for SPMCSR) @@ -771,13 +766,15 @@ write_page_buffer: rcall do_SPM ; execute page buffer write (this function requires r17 to be loaded first with the right value for SPMCSR) increment_byte_address: - adiw r30, 2 ; Increment Z-Register (the current byte address) by 2 + subi r30, -2 ; Increment the current address by 2. + ; Only the low byte needs to be incremented, because the block start address must be page aligned, + ; therefore any carry to the high byte may happen only past the end of the block. dec r16 ; decrement r16 (number of words per page) brne check_endpoint_for_more_data ; loop while r16 is not equal to SPM_PAGESIZE (128) - ; Set page address in Z-Register - movw r30, r14 ; Copy r15:r14 (the original page address) back to r31:r30 (Z-Register) + ; Restore the page address in Z-Register + subi r30, SPM_PAGESIZE ; Move the address back to the start of page (again only the low byte needs to be changed). write_page_to_flash: ldi r17, (_BV(PGWRT)|_BV(SPMEN)) ; load r17 with the value needed to commit the current page buffer to the flash From 68b74cb71945a34a1a89de4e42ce955b7fe73943 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 25 Aug 2021 17:42:45 +0300 Subject: [PATCH 15/44] Add bootloader overwrite protection To protect the user from accidents (or even deliberate attempts to brick the device), compare the specified page address with the bootloader start address, and skip the flash write if the address could overlap with the bootloader. Handling the error case by jumping to `finish_hid_request` did not work (it returned an error, but then the bootloader stopped responding to any further USB requests); apparently it is important to consume the proper number of bytes from the USB FIFO. Because of that, the bad address case is handled by running the same loop that is used for the normal flash write case, but with all `spm` instructions disabled by setting a flag bit. Bit 7 of `reg_bRequest` is chosen for that role (that bit is guaranteed to be 0 when starting to handle a normal `SET_HID_REPORT` request without needing to add any instructions to clear it). The `START_APPLICATION` command handling is also changed to use the same code path to save space. Because of additional optimization of the address comparison and the `START_APPLICATION` handling code, the binary size is increased by just 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 586702a..a179a2c 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -720,15 +720,27 @@ load_page_address: ldd r31, Y+oUEDATX ; Load r31 with MSB of page address check_page_address: - ldi r26, 0xFF ; Load value 0xFF to r26 - cp r26, r30 ; Compare low byte of page address against 0xFF - cpc r26, r31 ; Compare high byte of page address against 0xFF - brne erase_page ; if r15:r14 != 0xFFFF jump to erase_page - -quit_bootloader: - ; we received the START_APPLICATION command, change value of BootLoaderActive flag - clt ; clear the BootLoaderActive flag (T flag in SREG) - rjmp finish_hid_request ; jump to finish_hid_request + ; Protect against overwriting the bootloader - allow flash write only if the specified address is + ; less than the bootloader start address. Only the high byte needs to be tested, because the + ; bootloader start is guaranteed to be on a 256 bytes boundary. + cpi r31, hi8(reset_vector) ; Compare high byte of page address against the high byte of the bootloader start addresss + brcs erase_page ; If the address is below the bootloader start, allow the flash write operation + + ; The address is definitely not correct for a flash write operation; however, simply jumping to + ; finish_hid_request would not just fail this SET_HID_REPORT request - apparently not reading the + ; OUT data properly results in the bootloader not responding to any subsequent USB requests too. + ; Instead of doing that, we run the normal flash write loop even if the address was bad, but set + ; the "disable flash write" bit, so that the actual flash write instructions will be skipped. + ; Bit 7 of reg_bRequest is used for that purpose - is is known to be 0 in the normal case. + sbr reg_bRequest, _BV(7) ; Set the "disable flash write" bit + + ; If the address is out of the allowed range for flash write, it may be the special value for the + ; START_APPLICATION command (0xffff); check for that value in the shortest way possible. + adiw r30, 1 ; Increment the address to turn 0xffff into 0x0000 + brne erase_page ; If the address was out of range and not 0xffff, jump to the regular flash write code + ; (which would just consume the OUT data to make USB work properly). + clt ; Otherwise (the address was 0xffff) clear the BootLoaderActive flag (T flag in SREG), + ; then fallthrough to the regular flash write code too. erase_page: @@ -1063,7 +1075,12 @@ do_SPM: ; NOTE: This function assumes r17 already has the correct value for the SPMCSR register, depending on the ; desired SPM operation + ; NOTE: If bit 7 of reg_bRequest is set to 1, the actual SPM instruction will not be executed + ; (the wait loop will still run, but should just complete immediately). out _SFR_IO_ADDR(SPMCSR), r17 ; store value in r17 to the Store Program Memory Control and Status Register (SPMCSR) + sbrs reg_bRequest, 7 ; Skip the actual flash operation if the "disable flash write" bit is set. + ; This is apparently safe, because the SPM instruction must be executed within 4 cycles after setting SPMEN, + ; and the sbrs instruction takes just 1 cycle when not skipping. spm ; execute spm instruction based on the value loaded to SPMCSR wait_SPM: From f2f6ed6bc5ae47c8ce78c5976988d124b61499c9 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 15:38:37 +0900 Subject: [PATCH 16/44] LED support: MACRO for onboard LED Signed-off-by: Osamu Aoki --- nanoBoot.S | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index a179a2c..46dbf10 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -48,6 +48,35 @@ ; hfuse memory = 0xDF ; efuse memory = 0xC4 +; LED -- Configure this for the LED +; +; Teensy 2.0 compatible board (Adjust for other board) +; -- LED is ON with ATmega32u4 PIN D6 HIGH +#define LED_BIT 6 +#define LED_CONF DDRD +#define LED_PORT PORTD + +; Leonardo/Nano compatible board (Adjust for other board) +; -- LED is ON with ATmega32u4 PIN C7 HIGH +; #define LED_BIT 7 +; #define LED_CONF DDRC +; #define LED_PORT PORTC + +; Pro Micro compatible board (Adjust for other board) +; -- LED is ON with ATmega32u4 PIN D5 LOW +; #define LED_BIT 5 +; #define LED_CONF DDRD +; #define LED_PORT PORTD + +; Pro Micro compatible board (Adjust for other board) +; -- LED is ON with ATmega32u4 PIN B3 LOW +; #define LED_BIT 3 +; #define LED_CONF DDRB +; #define LED_PORT PORTB + +; Except for Pro Micro compatible board, initially LED is off. +; This code assumes Teensy 2.0 or Leonardo/Nano compatible board + ; SW assumptions: ; All Endpoints are being configured sequentially in ascending order, ; but, since we only use EP0, this is not that important From 8b3bcabcb98afd2363eece51795f6a7acf4c8bf8 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 15:38:52 +0900 Subject: [PATCH 17/44] LED support: Initialize LED port and OFF Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index 46dbf10..0eb5a24 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -355,6 +355,9 @@ run_application: ; We get here if the cause of th jmp 0 ; Simply jump to 0x0000 (application) IMPORTANT NOTE!! This CANNOT be an 'rjmp'!! run_bootloader: + sbi _SFR_IO_ADDR(LED_CONF), LED_BIT ; Set IO register as output for LED + ; No need to trun off LED initially (non-promicro) -- MCU port is initialized as 0 + ;sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED initially (promicro) set ; Initialize BootLoaderActive flag (T flag in SREG) From e25b4abac9ea9a9b934fa3163a7cb805c7a352f7 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 16:21:44 +0900 Subject: [PATCH 18/44] LED support: Turn on LED before sending descriptor Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index 0eb5a24..d9fe89c 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -931,6 +931,9 @@ verifyMaxDescriptorLength: send_descriptor: + sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun on LED before exiting (non-promicro) + ;cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun on LED before exiting (promicro) + ; Abort if RXSTPI is set ldd r17, Y+oUEINTX ; Load r17 with the value in the USB Endpoint Interrupt Register (UEINTX); sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is cleared From 79dec07c565cf28721c4f3732f084d0fd9771b26 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 15:39:03 +0900 Subject: [PATCH 19/44] LED support: Turn off LED before exiting bootloader size 512 bytes Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index d9fe89c..e85dace 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -553,6 +553,9 @@ exit_bootloader: ; = Watchdog Timer initialization ; ================================================================= + cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting (non-promicro) + ;sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting (promicro) + ; NOTE!! This part of the code assumes MCUSR has already been cleared ; Enable WDT, ~250 ms timeout (force a timeout to reset the AVR) From f259046d99892f259d0e813fb148fd68c95e08b7 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Thu, 20 Jan 2022 00:48:05 +0900 Subject: [PATCH 20/44] Fix typo in comment Signed-off-by: Osamu Aoki --- nanoBoot.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanoBoot.S b/nanoBoot.S index e85dace..02991cf 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -346,7 +346,7 @@ main: mov r17, rZERO ; Load r17 with zero to disable the Watchdog Timer completely - rcall set_watchdog_timer ; Call the subroutine that sets the wathdog timer with the values loaded in r16 and r17 + rcall set_watchdog_timer ; Call the subroutine that sets the watchdog timer with the values loaded in r16 and r17 ; check_reset_flags: sbrs rMCUSR, EXTRF ; Skip the next instruction if EXTRF is set (if External Reset Flag, skip next instruction, go to run_bootloader) From 0df6c69f90cf4a106e2615b3ef26d1cf5d812639 Mon Sep 17 00:00:00 2001 From: Rodrigo Torres Date: Thu, 20 Jan 2022 14:20:30 -0800 Subject: [PATCH 21/44] Update comments, fix typos Updates some outdated comments and fixes typos after latest round of merges. --- nanoBoot.S | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 02991cf..fccf016 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -78,8 +78,10 @@ ; This code assumes Teensy 2.0 or Leonardo/Nano compatible board ; SW assumptions: -; All Endpoints are being configured sequentially in ascending order, -; but, since we only use EP0, this is not that important +; The bootloader only "needs" endpoint 0; however, the HID spec requires any HID device to have an +; Interrupt IN endpoint, and the host can decide to poll that endpoint even when the HID report +; descriptor does not actually declare any input reports. Because of this, endpoints 0 and 1 are +; configured (in reversed order). See commit bd1bd68e200485aa16445c9d22dd53bb205ee102 for details. ; Register Assignments: @@ -700,8 +702,8 @@ HOST_TO_DEVICE: andi reg_bmRequestType, (0x60 | 0x1F) ; Mask reg_bmRequestType with the bits that define request type and recipient (CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT) cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare the masked value in r16 with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal - ; fallthrough to HANDLE_USB_CLAS_INTERFACE if equal -HANDLE_USB_CLAS_INTERFACE: + ; fallthrough to HANDLE_USB_CLASS_INTERFACE if equal +HANDLE_USB_CLASS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) breq SET_HID_REPORT ; jump to SET_HID_REPORT rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST @@ -717,6 +719,7 @@ HANDLE_USB_STANDARD_DEVICE: ; fallthrough to SET_CONFIGURATION if equal SET_CONFIGURATION: + ; Optimization by "sigprof" that saves 2 bytes ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. From e3cf9e10f32ed31cafa6929339c28af6607dd4ad Mon Sep 17 00:00:00 2001 From: Rodrigo Torres Date: Thu, 20 Jan 2022 12:11:45 -0800 Subject: [PATCH 22/44] Add opt-in support for "LED_ENABLED" feature Makes LED support "opt-in" by defining "LED_ENABLED". Feature takes 6 bytes when enabled. Adds macros to handle active high and active low differences. Compared to Osamu's original implementation, moves turning on the LED to "run_bootloader", instead of "send_descriptor"; the behavior is pretty much the same, but it saves two bytes for the active-low case, since we only call `cbi` once when the bootloader becomes active (after setting T flag in SREG). Removes old and unused LED_PIN declaration. --- nanoBoot.S | 107 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 32 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index fccf016..506353c 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -48,34 +48,76 @@ ; hfuse memory = 0xDF ; efuse memory = 0xC4 -; LED -- Configure this for the LED -; -; Teensy 2.0 compatible board (Adjust for other board) + +; ========================================================== +; LED SUPPORT START + +; Turn LED on while bootloader is active +; NOTE: This feature uses 6 bytes for active high and 8 bytes for active low + +; Uncomment the following line to enable LED feature +; #define LED_ENABLED + +; LED Configuration +; Uncomment or add a new LED configuration for your specific board + +; Adafruit's Atmega32u4 Breakout Board (Product ID: 296) - Now discontinued +; https://www.adafruit.com/product/296 +; -- LED is ON with ATmega32u4 PIN E6 HIGH +; #define LED_BIT 6 +; #define LED_CONF DDRE +; #define LED_PORT PORTE +; #define LED_ACTIVE_LEVEL 1 + +; Teensy 2.0 compatible board ; -- LED is ON with ATmega32u4 PIN D6 HIGH -#define LED_BIT 6 -#define LED_CONF DDRD -#define LED_PORT PORTD +; #define LED_BIT 6 +; #define LED_CONF DDRD +; #define LED_PORT PORTD +; #define LED_ACTIVE_LEVEL 1 -; Leonardo/Nano compatible board (Adjust for other board) +; Leonardo/Nano compatible board ; -- LED is ON with ATmega32u4 PIN C7 HIGH -; #define LED_BIT 7 -; #define LED_CONF DDRC -; #define LED_PORT PORTC +; #define LED_BIT 7 +; #define LED_CONF DDRC +; #define LED_PORT PORTC +; #define LED_ACTIVE_LEVEL 1 -; Pro Micro compatible board (Adjust for other board) +; Pro Micro compatible board ; -- LED is ON with ATmega32u4 PIN D5 LOW -; #define LED_BIT 5 -; #define LED_CONF DDRD -; #define LED_PORT PORTD +; #define LED_BIT 5 +; #define LED_CONF DDRD +; #define LED_PORT PORTD +; #define LED_ACTIVE_LEVEL 0 -; Pro Micro compatible board (Adjust for other board) +; Pro Micro compatible board ; -- LED is ON with ATmega32u4 PIN B3 LOW -; #define LED_BIT 3 -; #define LED_CONF DDRB -; #define LED_PORT PORTB +; #define LED_BIT 3 +; #define LED_CONF DDRB +; #define LED_PORT PORTB +; #define LED_ACTIVE_LEVEL 0 + +#if defined(LED_ENABLED) + #if !defined(LED_PORT) || !defined(LED_CONF) || !defined(LED_BIT) || !defined(LED_ACTIVE_LEVEL) + #error "If LED feature is enabled, the following need to be defined: LED_BIT, LED_CONF, LED_PORT, LED_ACTIVE_LEVEL" + #else + ; Set IO register as output for LED + #define ENABLE_LED_OUTPUT sbi _SFR_IO_ADDR(LED_CONF), LED_BIT + #if LED_ACTIVE_LEVEL == 1 + #define TURN_LED_ON sbi _SFR_IO_ADDR(LED_PORT), LED_BIT + #define TURN_LED_OFF cbi _SFR_IO_ADDR(LED_PORT), LED_BIT + #elif LED_ACTIVE_LEVEL == 0 + #define TURN_LED_ON cbi _SFR_IO_ADDR(LED_PORT), LED_BIT + #define TURN_LED_OFF sbi _SFR_IO_ADDR(LED_PORT), LED_BIT + #else + #error "LED_ACTIVE_LEVEL needs to be either 1 (active high) or 0 (active low)" + #endif + #endif +#endif + +; LED SUPPORT END +; ========================================================== -; Except for Pro Micro compatible board, initially LED is off. -; This code assumes Teensy 2.0 or Leonardo/Nano compatible board ; SW assumptions: ; The bootloader only "needs" endpoint 0; however, the HID spec requires any HID device to have an @@ -162,9 +204,6 @@ # define BOOT_ADDRESS 0 #endif -; For debugging purposes -.equ LED_PIN, 6 - .section .vectors ; We still want the reset vector to jump to "main" @@ -357,11 +396,16 @@ run_application: ; We get here if the cause of th jmp 0 ; Simply jump to 0x0000 (application) IMPORTANT NOTE!! This CANNOT be an 'rjmp'!! run_bootloader: - sbi _SFR_IO_ADDR(LED_CONF), LED_BIT ; Set IO register as output for LED - ; No need to trun off LED initially (non-promicro) -- MCU port is initialized as 0 - ;sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED initially (promicro) set ; Initialize BootLoaderActive flag (T flag in SREG) +#if defined(LED_ENABLED) + ; Set IO register as output for LED + ENABLE_LED_OUTPUT + ; Turn LED on while bootloader is active + TURN_LED_ON +#endif + + ; ================================================================= ; = Setup IRQ Vector Table @@ -551,13 +595,15 @@ exit_bootloader: ori r16, _BV(DETACH) ; Set the DETACH bit to enable the detachment std Y+oUDCON, r16 ; Store r16 to the USB Device Configuration Register (UDCON) +#if defined(LED_ENABLED) + ; Turn LED off before exiting + TURN_LED_OFF +#endif + ; ================================================================= ; = Watchdog Timer initialization ; ================================================================= - cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting (non-promicro) - ;sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting (promicro) - ; NOTE!! This part of the code assumes MCUSR has already been cleared ; Enable WDT, ~250 ms timeout (force a timeout to reset the AVR) @@ -937,9 +983,6 @@ verifyMaxDescriptorLength: send_descriptor: - sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun on LED before exiting (non-promicro) - ;cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun on LED before exiting (promicro) - ; Abort if RXSTPI is set ldd r17, Y+oUEINTX ; Load r17 with the value in the USB Endpoint Interrupt Register (UEINTX); sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is cleared From c82b526f89903cea073935f6d577eeaabfa049b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Torres Date: Mon, 24 Jan 2022 09:43:19 -0800 Subject: [PATCH 23/44] Address feedback from Osamu Moves "TURN_LED_ON" within "SET_CONFIGURATION" so LED is turned on ONLY when device has gone through enumeration. Adds check for "LED_ACTIVE_LEVEL" to turn LED off at the beginning of "run_bootloader" if LED is active low. This has a 2-byte penalty which makes nanoBoot go beyond 512 bytes on ProMicro-compatible boards. --- nanoBoot.S | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 506353c..d29097f 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -53,7 +53,8 @@ ; LED SUPPORT START ; Turn LED on while bootloader is active -; NOTE: This feature uses 6 bytes for active high and 8 bytes for active low +; NOTE: This feature uses 6 bytes for active high and 8 bytes for active low; +; see "Enable LED" in the "run_bootloader" section for details. ; Uncomment the following line to enable LED feature ; #define LED_ENABLED @@ -398,11 +399,16 @@ run_application: ; We get here if the cause of th run_bootloader: set ; Initialize BootLoaderActive flag (T flag in SREG) +; Enable LED #if defined(LED_ENABLED) ; Set IO register as output for LED ENABLE_LED_OUTPUT - ; Turn LED on while bootloader is active - TURN_LED_ON + + ; If the LED is active low, we need to turn it off here (set LED_BIT) since the MCU IO port is + ; initialized as 0. This is a 2-byte penalty when using active low LED. + #if LED_ACTIVE_LEVEL == 0 + TURN_LED_OFF + #endif #endif @@ -765,6 +771,14 @@ HANDLE_USB_STANDARD_DEVICE: ; fallthrough to SET_CONFIGURATION if equal SET_CONFIGURATION: +#if defined(LED_ENABLED) + ; Turn LED on towards the end of enumeration (SET_CONFIGURATION is done after SET_ADDRESS) + ; TODO: If we ever have space, we could add a flag here to mark the fact that we have entered + ; this state, and turn the LED on at the end of the setup request. For now this is the best we + ; can do. + TURN_LED_ON +#endif + ; Optimization by "sigprof" that saves 2 bytes ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. From 7bee10b0dcd29074dbdeea9eee513bce7fad9de3 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:59:54 +0300 Subject: [PATCH 24/44] Save 2 bytes in the watchdog handling code `set_watchdog_timer` was called in two places, but the parameter value passed in r16 (the value used to unlock the watchdog configuration) was the same in both cases. Move the duplicated initialization of r16 into the `set_watchdog_timer` function itself, saving 2 bytes. Cherry-picked from testing branch of Sergey Vlasov c2b4ba5 ("Save 2 bytes in the watchdog handling code", 2021-08-20) Trivial merge conflict resolution of a comment string applied. This enables LED for promicro while keeping size within 512 bytes. Signed-off-by: Osamu Aoki --- nanoBoot.S | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index d29097f..13dc630 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -383,12 +383,9 @@ main: ; We MUST disable the Watchdog Timer first, otherwise it will remain enabled and will keep resetting the system, so... ; Disable Watchdog Timer - ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration - ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) - mov r17, rZERO ; Load r17 with zero to disable the Watchdog Timer completely - rcall set_watchdog_timer ; Call the subroutine that sets the watchdog timer with the values loaded in r16 and r17 + rcall set_watchdog_timer ; Call the subroutine that sets the watchdog timer with the value loaded in r17 ; check_reset_flags: sbrs rMCUSR, EXTRF ; Skip the next instruction if EXTRF is set (if External Reset Flag, skip next instruction, go to run_bootloader) @@ -613,13 +610,10 @@ exit_bootloader: ; NOTE!! This part of the code assumes MCUSR has already been cleared ; Enable WDT, ~250 ms timeout (force a timeout to reset the AVR) - ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration - ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) - ldi r17, _BV(WDE) | _BV(WDP2) ; Load r17 with the value needed to set the desired Watchdog Configuration (WDCE = 0, not set!) ; Write the WDE and Watchdog prescaler bits (WDP); System Reset Mode (WDE = 1) and ~250 ms timeout (WDP2 = 1) - rcall set_watchdog_timer ; Call the subroutine that sets the wathdog timer with the values loaded in r16 and r17 + rcall set_watchdog_timer ; Call the subroutine that sets the watchdog timer with the value loaded in r17 ; for (;;); final_loop: @@ -1077,8 +1071,11 @@ EP_ISR_END: set_watchdog_timer: - ; IMPORTANT!! This function assumes the correct values for the WDTCSR register - ; configuration are already loaded onto r16 and 17. + ; IMPORTANT!! This function assumes the correct value for the WDTCSR register + ; configuration is already loaded onto r17; it also modifies r16. + + ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration + ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) wdr ; Reset the Watchdog Timer From 8d9539dfe3d8036624a9c4dea13b9557b4b9f9eb Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 22:32:44 +0300 Subject: [PATCH 25/44] Save 4 bytes by simplifying wait_finish_transfer Instead of looping until one of two bits in UEINTX is set, and then retesting the value after the loop, just do `reti` inside the loop if `RXSTPI` is set. This saves 4 bytes by removing both a duplicate bit test instruction and an extra jump instruction. --- nanoBoot.S | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 13dc630..c02edc9 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -1020,20 +1020,10 @@ send_packet_done: ; Wait for the host to send an OUT packet (RXOUTI to assert), but abort if a SETUP packet is received wait_finish_transfer: ldd r17, Y+oUEINTX ; Load r17 with the most current value in the USB Endpoint Interrupt Register (UEINTX); - sbrs r17, RXOUTI ; Skip the next instruction if the Received OUT Data Interrupt Flag (RXOUTI) is set (there's already an OUT packet from the host), go to acknowledge_rxouti - sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is not set; no need to abort, we haven't received another SETUP packet, we can keep looping - rjmp acknowledge_rxouti ; Jump if either RXOUTI or RXSTPI are set - rjmp wait_finish_transfer ; Loop back to finish_transfer until either Received OUT Data Interrupt Flag (RXOUTI) or Received SETUP Interrupt Flag (RXSTPI) is set - -acknowledge_rxouti: - - ; We could have gotten here if we got out of the previous loop (wait_finish_transfer) if either RXOUTI or RXSTPI asserted, since RXSTPI has the HIGHEST priority, - ; we check for it here first, to decide whether or not we need to abort - - ; Abort if RXSTPI is set - ; NOTE: R17 already has the most current value of UEINTX, no need to load it again sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is cleared reti ; Return if RXSTPI is set, we need to prioritize SETUP packets + sbrs r17, RXOUTI ; Skip the next instruction if the Received OUT Data Interrupt Flag (RXOUTI) is set (there's already an OUT packet from the host) + rjmp wait_finish_transfer ; Loop back to finish_transfer if none of RXSTPI or RXOUTI flags are set ; Acknowledge the OUT packet rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX From 6d1a28bb4aae89fe512b620df3a4690fd7b68bef Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 23:05:23 +0300 Subject: [PATCH 26/44] Save 4 bytes by refactoring UEINTX handling subroutines The `wait_TXINI` and `wait_RXOUTI` subroutines were always called after a call to some other `clear_XXX` subroutine to clear a bit in `UEINTX`. Replace those two subroutines with `clear_bit_and_wait_TXINI` and `clear_bit_and_wait_RXOUTI`, which get the bit to be cleared as a parameter in `r17`, and then inline the remaining `clear_XXX` subroutines (a subroutine with just 2 instructions in its body actually takes more space than inline code if it is called less than 3 times). --- nanoBoot.S | 92 ++++++++++++++++-------------------------------------- 1 file changed, 27 insertions(+), 65 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index c02edc9..7dd072c 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -801,11 +801,9 @@ UNHANDLED_SETUP_REQUEST_1: SET_HID_REPORT: - ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX - - ; Wait for command from the host - rcall wait_RXOUTI ; This function loads r17 with value of UEINTX + ; Acknowledge the SETUP packet and wait for command from the host + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + rcall clear_bit_and_wait_RXOUTI ; This function loads r17 with value of UEINTX load_page_address: ldd r30, Y+oUEDATX ; Load r30 with LSB of page address @@ -853,11 +851,9 @@ check_endpoint_for_more_data: or r26, r26 brne fill_page_buffer ; if r26 is not zero, it means there's data in the endpoint which we can use to fill the page buffer, jump there - ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX - - ; Wait for more data from the host - rcall wait_RXOUTI ; This function loads r17 with value of UEINTX + ; Acknowledge the OUT packet and wait for more data from the host + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + rcall clear_bit_and_wait_RXOUTI ; This function loads r17 with value of UEINTX fill_page_buffer: ; There's data at the endpoint buffer, start fill_page_buffer sequence @@ -890,14 +886,13 @@ reenable_rww_section: finish_hid_request: - ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX - - ; Wait for TXINI (OK to transmit) - rcall wait_TXINI ; This function loads r17 with value of UEINTX + ; Acknowledge the OUT packet and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + rcall clear_bit_and_wait_TXINI ; This function loads r17 with value of UEINTX ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST @@ -982,7 +977,8 @@ process_single_descriptor: process_descriptor: ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) verifyMaxDescriptorLength: cp reg_wLengthL, r16 ; Compare the value in r24 (wLengthL) against the value in r16 (length of descriptor to send) @@ -1015,7 +1011,8 @@ transfer_descriptor: send_packet_done: ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) ; Wait for the host to send an OUT packet (RXOUTI to assert), but abort if a SETUP packet is received wait_finish_transfer: @@ -1026,7 +1023,8 @@ wait_finish_transfer: rjmp wait_finish_transfer ; Loop back to finish_transfer if none of RXSTPI or RXOUTI flags are set ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) UNHANDLED_SETUP_REQUEST: @@ -1039,7 +1037,8 @@ UNHANDLED_SETUP_REQUEST: ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) ; STALL transaction @@ -1083,20 +1082,14 @@ process_Host2Device: ; NOTE: All the functions here affect r17 - ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX - - ; Wait for TXINI (OK to transmit) - rcall wait_TXINI ; This function loads r17 with value of UEINTX - - ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX - - ; SIZE OPTIMIZATION: Fall through to wait_TXINI instead of rcall'ing it - ; Wait for TXINI (OK to transmit) - ; rcall wait_TXINI ; This function loads r17 with value of UEINTX - ; ret ; Return from call + ; Acknowledge the SETUP packet and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + rcall clear_bit_and_wait_TXINI ; This function loads r17 with value of UEINTX + ; Clear Transmitter Ready Flag and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 +clear_bit_and_wait_TXINI: + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) wait_TXINI: ; NOTE: This function uses r17, we can use this fact to code other stuff @@ -1110,39 +1103,8 @@ wait_TXINI: ret ; Return from call -clear_RXSTPI: - - ; NOTE: This function affects r17 - - ; Acknowledge the SETUP packet - ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - -clear_TXINI: - - ; NOTE: This function affects r17 - - ; Clear Transmitter Ready Flag - ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - -clear_RXOUTI: - - ; NOTE: This function affects r17 - - ; Acknowledge the OUT packet - ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 +clear_bit_and_wait_RXOUTI: std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - wait_RXOUTI: ; NOTE: This function uses r17, we can use this fact to code other stuff From d462fed865fec45b00149183a467b95f86e90564 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 24 Aug 2021 16:59:18 +0300 Subject: [PATCH 27/44] Save 2 bytes by refactoring the GET_DESCRIPTOR code Rewrite the part of the GET_DESCRIPTOR handling code that loads the address and length of the requested descriptor to use less jumps; the resulting code consumes 2 bytes less, even though it is actually more correct (no longer replies with some descriptor to requests for an unknown descriptor type). The new code also avoids hardcoding the high address byte, and no longer depends on the fact that all descriptors have the same high address byte (but it depends on the fact that all offsets between adjacent descriptors can fit into the `adiw` constant argument (0...63)). --- nanoBoot.S | 75 +++++++++++++++++------------------------------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 7dd072c..ca8f7bd 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -919,60 +919,31 @@ GET_DESCRIPTOR: ; Just get the descriptor address into ; [RAMPZ:]Z, and the length into r16 - ; We know ALL descriptors are at the beginning of the bootloader, in the reset_vector space, - ; and by inspection we can determine that they all share the same high byte of the address (0x7EXX) - ldi ZH, 0x7E ; Load ZH with the most significant 8 bits of the descriptors address (0x7E) - - ; High byte of wValue for GET_DESCRIPTOR transactions specifies Descriptor Type - ; NOTE! We are skipping the comparison for 0x01 (Device Descriptor), since that can't really - ; be excluded, we simply assume that's the default to save space here. See @SAVE_SPACE below. - cpi reg_wValueH, 0x02 ; Compare high byte of wValue with value 2; - breq send_config_descriptor ; If high byte of wValue is 0x02 (Configuration Descriptor), jump to handle that - cpi reg_wValueH, 0x21 ; Compare high byte of wValue with value 0x21; - breq send_hid_descriptor ; If high byte of wValue is 0x21 (HID Class HID Descriptor), jump to handle that - cpi reg_wValueH, 0x22 ; Compare high byte of wValue with value 0x22; - breq send_hid_report_descriptor ; If high byte of wValue is 0x22 (HID Class HID Report Descriptor), jump to handle that - - ; If needed, include other descriptors here - - ; @SAVE_SPACE: I was able to comment this out and things still work, but it's probably bad (saves 6 bytes) - ; The following 2 lines are also dropped since we are skipping "rjmp UNHANDLED_SETUP_REQUEST" (osamuaoki) - ; cpi reg_wValueH, 0x01 ; Compare high byte of wValue with value 1; - ; breq send_device_descriptor ; If high byte of wValue is 0x01 (Device Descriptor), jump to handle that - ; NOTE: Originally, only this rjmp was skipped and things were still working, that's what - ; osamuaoki was able to use to optimize the check for (Device Descriptor), and simply fall through. - ; rjmp UNHANDLED_SETUP_REQUEST ; If the requested descriptor is not supported jump to UNHANDLED_SETUP_REQUEST - -send_device_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(device_descriptor) ; Load ZL with the least significant 8 bits of device_descriptor - rjmp process_single_descriptor ; jump to process_single_descriptor - -send_config_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(config_descriptor) ; Load ZL with the least significant 8 bits of config_descriptor + ldi ZH, hi8(config_descriptor) ; Load the high address part of config_descriptor into ZH + ldi ZL, lo8(config_descriptor) ; Load the low address part of config_descriptor into ZL ldi r16, 34 ; Load r16 with length of config_descriptor (34 bytes) - rjmp process_descriptor ; jump to process_descriptor - -send_hid_report_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(hid_report_descriptor); Load ZL with the least significant 8 bits of hid_report_descriptor - ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) - rjmp process_descriptor ; jump to process_descriptor - - ; If needed, include other descriptors here - -send_hid_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor - + cpi reg_wValueH, 0x02 ; Compare high byte of wValue with value 2; + breq process_descriptor ; If high byte of wValue is 0x02 (Configuration Descriptor), jump to handle that + adiw r30, hid_descriptor - config_descriptor ; Change Z to point to hid_descriptor + cpi reg_wValueH, 0x21 ; Compare high byte of wValue with value 0x21 (HID Class HID Descriptor) + ; The following code will also be reused for the device descriptor - both of these descriptors + ; contain the size in the first byte, and getting the size from there saves one instruction. This + ; trick cannot be applied to the Configuration Descriptor (which is actually a collection of + ; multiple descriptors) and the HID Report Descriptor (which has a completely different format). process_single_descriptor: - - lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes + lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes. + ; This instruction does not change any flags in SREG, therefore it can be placed + ; between the compare and the corresponding conditional jump. + breq process_descriptor ; If the last compare result was equal, jump to return the descriptor data. + adiw r30, device_descriptor - hid_descriptor ; Change Z to point to device_descriptor + cpi reg_wValueH, 0x01 ; Compare high byte of wValue with value 1; + breq process_single_descriptor ; If high byte of wValue is 0x01 (Device Descriptor), jump to handle that; + ; reuse the code for hid_descriptor above. + adiw r30, hid_report_descriptor - device_descriptor ; Change Z to point to hid_report_descriptor + ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) + cpi reg_wValueH, 0x22 ; Compare high byte of wValue with value 0x22; + brne UNHANDLED_SETUP_REQUEST ; If high byte of wValue is NOT 0x22 (HID Class HID Report Descriptor), reject the setup request; + ; otherwise fallthrough to process_descriptor. process_descriptor: From 01570fc0f10a58229d8f21cb5514c6fef69b8661 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 12:52:23 +0900 Subject: [PATCH 28/44] Update bootloader size info. Signed-off-by: Osamu Aoki --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8bbcfc6..46ed5f5 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,14 @@ The name *nanoBoot* comes from the fact that the compiled source fits in the sma It's very likely that a few sections can be rewritten to make it even smaller, and the ultimate goal is to support EEPROM programming as well, although that would require changes to the host code. -The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115), and is exactly 506 bytes long. +The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). + +Binary size: +* 494 bytes (as is) +* 500 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 502 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) + +Here, LED supports require user to uncomment few lines in `nanoBoot.S`. ## HW assumptions: From 31987aec4f64862cd28b51a4d44a5daeafe2e92c Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 15:58:56 +0300 Subject: [PATCH 29/44] Save 4 bytes in the USB detach code The original LUFA code performed `UDCON |= (1 << DETACH);` to detach the USB device; however, the other bits of `UDCON` are known to be 0 at this time, therefore a simple write of a constant value could be performed here. In addition, the value of `(1 << DETACH)` is 1 on all AVR chips that could be potentially supported by this code, therefore even the instruction to load the constant value into a register could be omitted. Doing these changes removes 2 instructions, saving 4 bytes. --- nanoBoot.S | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index ca8f7bd..0db4fc5 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -594,9 +594,13 @@ main_loop: exit_bootloader: ; Detach device from USB Bus ; UDCON |= (1 << DETACH); - ldd r16, Y+oUDCON ; Load r16 with the value in the USB Device Configuration Register (UDCON) - ori r16, _BV(DETACH) ; Set the DETACH bit to enable the detachment - std Y+oUDCON, r16 ; Store r16 to the USB Device Configuration Register (UDCON) + ; SIZE OPTIMIZATION: All other UDCON bits except DETACH can be set to 0 at this time, and the value + ; of _BV(DETACH) is 0x01, therefore we can just store rONE into UDCON. + ; In theory this step could even be removed completely, because the watchdog reset should set the + ; DETACH bit anyway, but doing this here ensures that the host detects the USB device detach before + ; the application is started, which could avoid issues if the application does not add some delay + ; before enabling USB. + std Y+oUDCON, rONE ; Store _BV(DETACH) (== 0x01) to the USB Device Configuration Register (UDCON) #if defined(LED_ENABLED) ; Turn LED off before exiting From a7ce7aca7d32b3159bd485ae933816bfab620379 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:18:23 +0300 Subject: [PATCH 30/44] Save 2 bytes in the USBCON init code The initialization of USBCON does not need to read the current register value - just writing the reset value into that register is enough. This removes one instruction, saving 2 bytes. In theory this write could even be omitted completely, saving 4 more bytes, but this would make the code less robust if the application code attempting to enter the bootloader does not initialize some USB controller registers properly, therefore leaving that USBCON write there is safer. --- nanoBoot.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 0db4fc5..cc685b0 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -489,13 +489,12 @@ run_bootloader: ; USBCON &= ~(1 << VBUSTE); ; USBCON &= ~(1 << USBE); - ; IMPORTANT NOTE: To reduce code size, we are going to reseve r16 to handle all writes to the USB Controller Register (USBCON) - ; this way we don't have to keep loading the value to it (ldd) - ldd r16, Y+oUSBCON ; Load r16 with the value in the USB Configuration Register (USBCON) - - ; The right value of USBCON is already in r16, just clear VBUS Pad Enable Bit (OTGPADE), - ; VBUS Transition Interrupt Enable Bit (VBUSTE) and USB macro Enable Bit (USBE) - andi r16, ~(_BV(OTGPADE)|_BV(VBUSTE)|_BV(USBE)) + ; SIZE OPTIMIZATION: Instead of resetting just some specific bits, initialize the whole USBCON + ; register with its reset value (although even this could be omitted, this initialization is left + ; here in case the application tries to enter the bootloader in a slightly incorrect way). + ; As a further optimization, the USBCON register value is left in r16 for use in subsequent code + ; which modifies various bits of that register. + ldi r16, _BV(FRZCLK) ; Load r16 with the reset value for the USB Configuration Register (USBCON) std Y+oUSBCON, r16 ; Store r16 to the USB Configuration Register (USBCON) ; Enable USB Regulator (USB_REG_On) From d50e04e4a664fb4588e2072c07d884d265f582f0 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:37:46 +0300 Subject: [PATCH 31/44] Save 2 bytes in the HOST_TO_DEVICE parsing code The instruction which applied the mask of `(CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT)` to bmRequestType was not actually needed, because the value of this mask is 0x7F, and the only bit which is not covered by the mask (0x80) is already known to be 0, therefore the masking did not actually change anything. Removing this instruction saves 2 bytes. Signed-off-by: Sergey Vlasov Since following commits are skipped previously when cherry-picking. 97c8d96 ("TEMPORARY: EEPROM code which does not fit", 2021-08-25) e48801a ("WIP: Optimize some more code to make the EEPROM support fit", 2021-08-25) 78b804d ("WIP: Redo the EEPROM write implementation", 2021-08-29) e7e94f3 ("Save 2 bytes by reusing the TXINI clearing mask", 2021-08-29) Adjusted to keep branch to thunk code. Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index cc685b0..1154dc4 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -748,8 +748,7 @@ HOST_TO_DEVICE: cpi reg_bmRequestType, 0x00 ; Compare r18 (bmRequestType) with value 0x00 (OUT Type Resquest, USB Standard Request, Recipient is the device) breq HANDLE_USB_STANDARD_DEVICE ; If bmRequestType is 0x00, we know it's either a SET_ADDRESS or SET_CONFIGURATION request, so jump to HANDLE_USB_STANDARD_DEVICE - andi reg_bmRequestType, (0x60 | 0x1F) ; Mask reg_bmRequestType with the bits that define request type and recipient (CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT) - cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare the masked value in r16 with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) + cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare bmRequestType with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal ; fallthrough to HANDLE_USB_CLASS_INTERFACE if equal HANDLE_USB_CLASS_INTERFACE: From 9f14f883168840499861097d0e537bb1c28bffb6 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:48:27 +0300 Subject: [PATCH 32/44] Save 2 bytes in the DEVICE_TO_HOST parsing code The DEVICE_TO_HOST handling code needs to handle only the GET_DESCRIPTOR requests, which may come with two possible bmRequestType values: - 0x80 (IN direction, USB Standard Request, Recipient is the device) if a descriptor which applies to the device as a whole is requested (this code is used for the device and configuration descriptors); - 0x81 (IN direction, USB Standard Request, Recipient is the interface) if a descriptor specific to a particular interface is requested (this code is used for the HID class and HID report descriptors). Because these codes are numerically sequential, and the direction bit has already been tested (therefore it is known that bmRequestType >= 0x80), it is enough to make a single comparison with 0x82 to detemine whether bmRequestType has one of the above values. Removing the bit masking operation which is not needed after that change saves 2 bytes. --- nanoBoot.S | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 1154dc4..b6aec91 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -906,12 +906,14 @@ DEVICE_TO_HOST: ; If we get here, we know bit 7 of bmRequestType is set, meaning it is a DEVICE_TO_HOST (IN) request, ; now we need to filter out any unhandled requests - cbr reg_bmRequestType, 0x01 ; We mask reg_bmRequestType with value 0x01, bit 0 of bmRequestType is set if the recipient of the request is the interface, - ; and we need to handle that case since the host will query the interface to retrieve the hid_descriptor, obviously we also - ; need to handle the recipient being the device (bit 0 = 0) since all other descriptors are targeted to it - - cpi reg_bmRequestType, 0x80 ; Compare r18 (bmRequestType) with value 0x80 (IN Type Resquest, USB Standard Request, Recipient is the device/interface) - brne UNHANDLED_DEVICE_TO_HOST ; If bmRequestType is not 0x80, we know it's not a GET_DESCRIPTOR request, so jump to UNHANDLED_DEVICE_TO_HOST + ; SIZE OPTIMIZATION: The only bmRequestType values that we care about are: + ; - 0x80 - IN Type Request, USB Standard Request, Recipient is the device + ; - 0x81 - IN Type Request, USB Standard Request, Recipient is the interface + ; At this step it is known that bmRequestType >= 0x80, therefore checking for bmRequestType < 0x82 + ; is enough to detect whether bmRequestType has one of the above values. + + cpi reg_bmRequestType, 0x82 ; Check whether bmRequestType is less than 0x82 (then it must be either 0x80 or 0x81) + brcc UNHANDLED_DEVICE_TO_HOST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) brne UNHANDLED_DEVICE_TO_HOST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal From fe8419d9dd28c1eee39a423c0b64a37da6b6aac0 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 14:59:22 +0900 Subject: [PATCH 33/44] Update binary size Signed-off-by: Osamu Aoki --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 46ed5f5..6ec8ad3 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ It's very likely that a few sections can be rewritten to make it even smaller, a The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). Binary size: -* 494 bytes (as is) -* 500 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 502 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) +* 484 bytes (as is) +* 490 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 492 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) Here, LED supports require user to uncomment few lines in `nanoBoot.S`. From ecaa078ba465fa54d91c2891bf13108b37082ebb Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Thu, 23 Apr 2020 00:09:55 +0900 Subject: [PATCH 34/44] Save 6 bytes by using Y+ for extended IO Access code to data in the extended IO address range can be made compact by using Y+. Both WDT and USB registers are in extended IO address. Fortunately, calls to these 2 types of addresses do not overlap. * initial code calls WDT * main code calls USB * exit code calls WDT This patch enables to use Y+ for WDT in addtion to USB. * YH is common to USB and WDT and 0 (very first). * YL is different. It's initialization is moved to each subroutine to avoid code duplication. * inside of the set_watchdog_timer subroutine * USB Initialization section Signed-off-by: Osamu Aoki --- nanoBoot.S | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index b6aec91..c503520 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -197,6 +197,14 @@ #define oUEBCHX (UEBCHX - USB_BASE) #define oUEINT (UEINT - USB_BASE) ; This register has the bits to identify which endpoint triggered an interrupt +; +; To facilitate coding, we will also use the Y register to point to the first Extended IO register; +; We can then use LDD / STD (Y+oU....) to address non-USB extended IO registers (EIO_BASE + relative offset) +; (These are used only in start-up and exit routines when USB is not active) +; +#define EIO_BASE WDTCSR +#define oWDTCSR (WDTCSR - EIO_BASE) +#define oCLKPR (CLKPR - EIO_BASE) #include @@ -381,6 +389,11 @@ main: in rMCUSR, _SFR_IO_ADDR(MCUSR) ; Load MCU Status Register to rMCUSR out _SFR_IO_ADDR(MCUSR), rZERO ; Load MCU Status Register with rZERO (clear reset flags, particularly clear WDRF in MCUSR), necessary before disabling the Watchdog + ; Use Y+ for different purpose with YH=R29 to 0 for addressing extended io for any 64 bytes of YL specified section + ; * WDT initialization routine: YL=lo8(EIO_BASE) --- (wdt_init) -- start and end of bootloader + ; * USB communication routine: YL=lo8(USB_BASE) --- (usb_init) -- main part of bootloader + clr YH ; 0 = hi8(USB_BASE) = hi8(EIO_BASE) = 0 common initialization + ; We MUST disable the Watchdog Timer first, otherwise it will remain enabled and will keep resetting the system, so... ; Disable Watchdog Timer mov r17, rZERO ; Load r17 with zero to disable the Watchdog Timer completely @@ -458,8 +471,9 @@ run_bootloader: ; code to work as expected. ldi r17, _BV(CLKPCE) ; Load r17 with the value needed to "unlock" the prescaler of the Clock; Clock Prescaler Change Enable bit (CLKPCE) set to one, all other bits set to zero. - sts CLKPR, r17 ; Store r17 to the Clock Prescaler Register (CLKPR) - sts CLKPR, rZERO ; Store rZERO to the Clock Prescaler Register (CLKPR), setting CLKPS3, CLKPS2, CLKPS1 and CLKPS0 to zero (Clock Division Factor = 1; System Clock is 16 MHz) + ; still YH=0, YL=lo8(EIO_BASE) initial routine + std Y+oCLKPR, r17 ; Store r17 to the Clock Prescaler Register (CLKPR) + std Y+oCLKPR, rZERO ; Store rZERO to the Clock Prescaler Register (CLKPR), setting CLKPS3, CLKPS2, CLKPS1 and CLKPS0 to zero (Clock Division Factor = 1; System Clock is 16 MHz) ; ================================================================= ; = Basic device setup is NOW COMPLETE!! @@ -470,8 +484,9 @@ run_bootloader: ; = Configure Y register to point to USB_BASE (UHWCON register) ; ================================================================= - ldi YL, lo8(USB_BASE) ; Load YL with the least significant 8 bits of USB_BASE - ldi YH, hi8(USB_BASE) ; Load YH with the most significant 8 bits of USB_BASE + ldi YL, lo8(USB_BASE) ; Load YL with the least significant 8 bits of USB_BASE (usb_init) + ; still YH=0 + ; ldi YH, hi8(USB_BASE) ; Load YH with the most significant 8 bits of USB_BASE ; ================================================================= ; = From LUFA simplified - USB_Init:_start @@ -1038,14 +1053,17 @@ set_watchdog_timer: ; IMPORTANT!! This function assumes the correct value for the WDTCSR register ; configuration is already loaded onto r17; it also modifies r16. - ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration - ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) + ; always set YH to hi(EIO_BASE) before calling + ldi YL, lo8(EIO_BASE) ; Load YL with EIO_BASE (wdt_init) wdr ; Reset the Watchdog Timer + ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration + ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) + std Y+oWDTCSR, r16 ; Store r16 to the Watchdog Timer Control Register (WDTCSR) + ; Load the desired configuration to the Watchdog Timer Control Register (WDTCSR) - sts WDTCSR, r16 ; Store r16 to the Watchdog Timer Control Register (WDTCSR) - sts WDTCSR, r17 ; Store r17 to the Watchdog Timer Control Register (WDTCSR) + std Y+oWDTCSR, r17 ; Store r17 to the Watchdog Timer Control Register (WDTCSR) ret From 42d4f5a7d1d4ec7d7db17d6ac26123ec5a7d1cf2 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 16:06:01 +0900 Subject: [PATCH 35/44] Update binary size Signed-off-by: Osamu Aoki --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6ec8ad3..af70c7e 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ It's very likely that a few sections can be rewritten to make it even smaller, a The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). Binary size: -* 484 bytes (as is) -* 490 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 492 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) +* 478 bytes (as is) +* 484 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 486 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) Here, LED supports require user to uncomment few lines in `nanoBoot.S`. From 436b91e6f0bf3cb64241cf5b48aef5e5f378074c Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 24 Aug 2021 17:17:06 +0300 Subject: [PATCH 36/44] Move SET_HID_REPORT in preparation for fallthrough No size or behavior changes, just a preparation to save some bytes with fallthrough. Cherry picked from Sergey's repo LED code properly moved. Signed-off-by: Osamu Aoki --- nanoBoot.S | 88 ++++++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index c503520..5da9dd1 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -771,51 +771,6 @@ HANDLE_USB_CLASS_INTERFACE: breq SET_HID_REPORT ; jump to SET_HID_REPORT rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - -HANDLE_USB_STANDARD_DEVICE: - - ; Once we know we support the OUT transaction, we need to filter it based on the value in bRequest - cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) - breq SET_ADDRESS ; jump to SET_ADDRESS - cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal - ; fallthrough to SET_CONFIGURATION if equal -SET_CONFIGURATION: - -#if defined(LED_ENABLED) - ; Turn LED on towards the end of enumeration (SET_CONFIGURATION is done after SET_ADDRESS) - ; TODO: If we ever have space, we could add a flag here to mark the fact that we have entered - ; this state, and turn the LED on at the end of the setup request. For now this is the best we - ; can do. - TURN_LED_ON -#endif - - ; Optimization by "sigprof" that saves 2 bytes - ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, - ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. - - ldd reg_wValueL, Y+oUDADDR ; load the existing UDADDR value where the SET_ADDRESS code would expect the new address - -SET_ADDRESS: - - ; Set device address; for this we only need to copy the value in wValueL which contains the address - ; for the device set by the host to the USB Device Address Register (UDADDR); since the SET_ADDRESS - ; request is only executed once during enumeration, and because allowed address values are 1 through - ; 127 (7 LSBs), we don't need to care about the ADDEN bit (bit 7). We can also simply set the ADDEN - ; bit and store the value again in UDADDR to enable the USB Device Address. - - std Y+oUDADDR, reg_wValueL ; Store wValueL to the USB Device Address Register (UDADDR) - - rcall process_Host2Device ; This function affects r17 - - ; EnableDeviceAddress - ; UDADDR |= (1 << ADDEN) - ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address - std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) - -UNHANDLED_SETUP_REQUEST_1: - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST - SET_HID_REPORT: ; Acknowledge the SETUP packet and wait for command from the host @@ -915,6 +870,49 @@ UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST +HANDLE_USB_STANDARD_DEVICE: + + ; Once we know we support the OUT transaction, we need to filter it based on the value in bRequest + cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) + breq SET_ADDRESS ; jump to SET_ADDRESS + cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) + brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to SET_CONFIGURATION if equal +SET_CONFIGURATION: +#if defined(LED_ENABLED) + ; Turn LED on towards the end of enumeration (SET_CONFIGURATION is done after SET_ADDRESS) + ; TODO: If we ever have space, we could add a flag here to mark the fact that we have entered + ; this state, and turn the LED on at the end of the setup request. For now this is the best we + ; can do. + TURN_LED_ON +#endif + + ; Optimization by "sigprof" that saves 2 bytes + ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, + ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. + + ldd reg_wValueL, Y+oUDADDR ; load the existing UDADDR value where the SET_ADDRESS code would expect the new address + +SET_ADDRESS: + + ; Set device address; for this we only need to copy the value in wValueL which contains the address + ; for the device set by the host to the USB Device Address Register (UDADDR); since the SET_ADDRESS + ; request is only executed once during enumeration, and because allowed address values are 1 through + ; 127 (7 LSBs), we don't need to care about the ADDEN bit (bit 7). We can also simply set the ADDEN + ; bit and store the value again in UDADDR to enable the USB Device Address. + + std Y+oUDADDR, reg_wValueL ; Store wValueL to the USB Device Address Register (UDADDR) + + rcall process_Host2Device ; This function affects r17 + + ; EnableDeviceAddress + ; UDADDR |= (1 << ADDEN) + ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address + std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) + +UNHANDLED_SETUP_REQUEST_1: + rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + ; IN transactions DEVICE_TO_HOST: From 87517698a9ee40c6d57842262308203f403e6762 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 18:48:28 +0900 Subject: [PATCH 37/44] Save 2 bytes by using fallthrough for SET_HID_REPORT Restructure conditional jumps to use fallthrough for the `SET_HID_REPORT` case; this removes one jump instruction, saving 2 bytes. Cherry picked from Sergey Vlasov : a358e89 ("Save 2 bytes by using fallthrough for SET_HID_REPORT", 2021-08-24) Comment by Osamu (conflict resolution) UNHANDLED_SETUP_REQUEST_1 was used since br command PC(word) offset is 6 bit offset address 00 00007ede first call ending up at UNHANDLED_SETUP_REQUEST This requires thunk to reach with br** command 4A 00 00007f28 old UNHANDLED_DEVICE_TO_HOST entry ending up at UNHANDLED_SETUP_REQUEST 5E 14 00 00007f3c UNHANDLED_SETUP_REQUEST_1 entry ending up at UNHANDLED_SETUP_REQUEST !B8 6E 5A 00007f96 UNHANDLED_SETUP_REQUEST entry br** commands 6 bit offset PC offset (64) +/- 2^7 byte offset +7E, -80 (126) rjmp command 11 bit offset PC offset (2K) +/- 2^12 byte offset +FFE, -1000 (4094) Signed-off-by: Osamu Aoki --- nanoBoot.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 5da9dd1..60409d9 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -768,8 +768,8 @@ HOST_TO_DEVICE: ; fallthrough to HANDLE_USB_CLASS_INTERFACE if equal HANDLE_USB_CLASS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) - breq SET_HID_REPORT ; jump to SET_HID_REPORT - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + brne UNHANDLED_SETUP_REQUEST_1 ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + ; fallthrough to SET_HID_REPORT SET_HID_REPORT: From ed793b41bd23936b7a7807ae82f1a107130749d1 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 18:48:38 +0900 Subject: [PATCH 38/44] Drop UNHANDLED_DEVICE_TO_HOST thunk entry Due to reordering of code, UNHANDLED_SETUP_REQUEST is within scope of direct jump from br** command now. 00007f3e : ; - 0x80 - IN Type Request, USB Standard Request, Recipient is the device ; - 0x81 - IN Type Request, USB Standard Request, Recipient is the interface ; At this step it is known that bmRequestType >= 0x80, therefore checking for bmRequestType < 0x82 ; is enough to detect whether bmRequestType has one of the above values. cpi reg_bmRequestType, 0x82 ; Check whether bmRequestType is less than 0x82 (then it must be either 0x80 or 0x81) 7f3e: 22 38 cpi r18, 0x82 ; 130 brcc UNHANDLED_SETUP_REQUEST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) 7f40: 50 f5 brcc .+84 ; 0x7f96 cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) 7f42: 36 30 cpi r19, 0x06 ; 6 brne UNHANDLED_SETUP_REQUEST ; jump to UNHANDLED_SETUP_REQUEST through f not equal 7f44: 41 f5 brne .+80 ; 0x7f96 Signed-off-by: Osamu Aoki --- nanoBoot.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 60409d9..045b5c2 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -866,7 +866,6 @@ finish_hid_request: ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) -UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST @@ -926,10 +925,10 @@ DEVICE_TO_HOST: ; is enough to detect whether bmRequestType has one of the above values. cpi reg_bmRequestType, 0x82 ; Check whether bmRequestType is less than 0x82 (then it must be either 0x80 or 0x81) - brcc UNHANDLED_DEVICE_TO_HOST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) + brcc UNHANDLED_SETUP_REQUEST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) - brne UNHANDLED_DEVICE_TO_HOST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + brne UNHANDLED_SETUP_REQUEST ; jump to UNHANDLED_SETUP_REQUEST if not equal ; fallthrough to GET_DESCRIPTOR if equal GET_DESCRIPTOR: From e98d8fa489ca023f16177608cefb309a2bf92ead Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 19:06:10 +0900 Subject: [PATCH 39/44] Update binary size Signed-off-by: Osamu Aoki --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index af70c7e..eb7be07 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ It's very likely that a few sections can be rewritten to make it even smaller, a The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). Binary size: -* 478 bytes (as is) -* 484 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 486 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) +* 476 bytes (as is) +* 482 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 484 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) Here, LED supports require user to uncomment few lines in `nanoBoot.S`. From 0ae76c2066d9003713faf6edb9c5596adcdb8ba1 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 21:10:59 +0900 Subject: [PATCH 40/44] Save 2 bytes with using clear_UEINTX_bit_and_reti Signed-off-by: Osamu Aoki --- nanoBoot.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 045b5c2..29b725a 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -864,10 +864,7 @@ finish_hid_request: ; Clear Transmitter Ready Flag ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST - + rjmp clear_UEINTX_bit_and_reti ; Store r17 to the USB Endpoint Interrupt Register (UEINTX), then return from interrupt HANDLE_USB_STANDARD_DEVICE: @@ -1011,6 +1008,8 @@ wait_finish_transfer: ; Acknowledge the OUT packet ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + +clear_UEINTX_bit_and_reti: std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) UNHANDLED_SETUP_REQUEST: From 96d54539bfc723523e5bc8eabe5987eaec163c04 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 22:28:24 +0900 Subject: [PATCH 41/44] Save 6 bytes by refactoring UNHANDLED_SETUP_REQUEST usage Remove the code which tried to check whether the SETUP packet has been handled - now UNHANDLED_SETUP_REQUEST can be used only if the SETUP packed needs to be acknowledged and replied with STALL, and in other cases the code should just do `reti` directly. In addition, UNHANDLED_SETUP_REQUEST is placed in the middle of the code, so that it would be reachable by conditional branches directly. Signed-off-by: Sergey Vlasov This is based on 6bc21dd ("Save 6 bytes by refactoring UNHANDLED_SETUP_REQUEST usage", 2021-09-04) UNHANDLED_SETUP_REQUEST is moved up so we don't need to branch through thunk like UNHANDLED_SETUP_REQUEST_1. So drop UNHANDLED_SETUP_REQUEST_1 usage. Signed-off-by: Osamu Aoki --- nanoBoot.S | 56 +++++++++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 29b725a..3822014 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -764,11 +764,11 @@ HOST_TO_DEVICE: breq HANDLE_USB_STANDARD_DEVICE ; If bmRequestType is 0x00, we know it's either a SET_ADDRESS or SET_CONFIGURATION request, so jump to HANDLE_USB_STANDARD_DEVICE cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare bmRequestType with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + brne UNHANDLED_SETUP_REQUEST ; jump to UNHANDLED_SETUP_REQUEST if not equal ; fallthrough to HANDLE_USB_CLASS_INTERFACE if equal HANDLE_USB_CLASS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) - brne UNHANDLED_SETUP_REQUEST_1 ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + brne UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST ; fallthrough to SET_HID_REPORT SET_HID_REPORT: @@ -872,8 +872,27 @@ HANDLE_USB_STANDARD_DEVICE: cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) breq SET_ADDRESS ; jump to SET_ADDRESS cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal - ; fallthrough to SET_CONFIGURATION if equal + breq SET_CONFIGURATION ; jump to SET_CONFIGURATION + +UNHANDLED_SETUP_REQUEST: + + ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall + + ; Acknowledge the SETUP packet + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) + + ; STALL transaction + + ; // Endpoint_StallTransaction(); + ; UECONX |= (1 << STALLRQ); + ; Size optimization: We know that the only other bit that should be set in UECONX is EPEN, therefore + ; reading the current register value is not needed. + ldi r16, _BV(STALLRQ) | _BV(EPEN) ; Set the STALL Request Handshake Bit (STALLRQ) and EPEN in r16 + std Y+oUECONX, r16 ; Store r16 to the USB Endpoint Configuration Register (UECONX) + + reti ; Return from interrupt + SET_CONFIGURATION: #if defined(LED_ENABLED) ; Turn LED on towards the end of enumeration (SET_CONFIGURATION is done after SET_ADDRESS) @@ -906,7 +925,6 @@ SET_ADDRESS: ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) -UNHANDLED_SETUP_REQUEST_1: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST ; IN transactions @@ -1011,33 +1029,7 @@ wait_finish_transfer: clear_UEINTX_bit_and_reti: std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - -UNHANDLED_SETUP_REQUEST: - - ; if (Endpoint_IsSETUPReceived()) - ; (UEINTX & (1 << RXSTPI)) - ldd r16, Y+oUEINTX ; Load r16 with the value in the USB Endpoint Interrupt Register (UEINTX); - sbrs r16, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is set; received SETUP packet? - reti ; Return if RXSTPI is not set, SETUP packet already handled - - ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall - - ; Acknowledge the SETUP packet - ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ; STALL transaction - - ; // Endpoint_StallTransaction(); - ; UECONX |= (1 << STALLRQ); - ldd r16, Y+oUECONX ; Load r16 with the value in the USB Endpoint Configuration Register (UECONX) - ori r16, _BV(STALLRQ) ; Set the STALL Request Handshake Bit (STALLRQ) in r16 - std Y+oUECONX, r16 ; Store r16 to the USB Endpoint Configuration Register (UECONX) - - -EP_ISR_END: - - reti ; Return from interrupt + rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST ; ================================================================= From fc9f2e109b20d67539dd883658accf43ab9f4cd9 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 22:51:43 +0900 Subject: [PATCH 42/44] Update binary size Signed-off-by: Osamu Aoki --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index eb7be07..93e0a35 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ It's very likely that a few sections can be rewritten to make it even smaller, a The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). Binary size: -* 476 bytes (as is) -* 482 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 484 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) +* 468 bytes (as is) +* 474 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 476 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) Here, LED supports require user to uncomment few lines in `nanoBoot.S`. From 6927f96c624ccd7060eff39d6b85e6033f87dd35 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 19:35:53 +0900 Subject: [PATCH 43/44] LED enabled for Teensy compatible Signed-off-by: Osamu Aoki --- nanoBoot.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 3822014..83da694 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -57,7 +57,7 @@ ; see "Enable LED" in the "run_bootloader" section for details. ; Uncomment the following line to enable LED feature -; #define LED_ENABLED +#define LED_ENABLED ; LED Configuration ; Uncomment or add a new LED configuration for your specific board @@ -72,10 +72,10 @@ ; Teensy 2.0 compatible board ; -- LED is ON with ATmega32u4 PIN D6 HIGH -; #define LED_BIT 6 -; #define LED_CONF DDRD -; #define LED_PORT PORTD -; #define LED_ACTIVE_LEVEL 1 +#define LED_BIT 6 +#define LED_CONF DDRD +#define LED_PORT PORTD +#define LED_ACTIVE_LEVEL 1 ; Leonardo/Nano compatible board ; -- LED is ON with ATmega32u4 PIN C7 HIGH From 7b9f2965ca1a9bbf8a9e0550095db41804726051 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 22:59:00 +0900 Subject: [PATCH 44/44] Update README for my fork Signed-off-by: Osamu Aoki --- README.md | 96 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 93e0a35..d87ccf9 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,39 @@ -# nanoBoot +# nanoBoot (w/LED) -[![Build](https://github.com/volium/nanoBoot/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/volium/nanoBoot/actions/workflows/build.yml) +## HID bootloader with LED support & overwrite protection -This repository contains the source code for the USB HID-based bootloader for ATmegaXXU4 family of devices. + -The name *nanoBoot* comes from the fact that the compiled source fits in the smallest available boot size on the ATMegaXXu4 devices, 256 words or 512 bytes. The code is based on Dean Camera's [LUFA](https://github.com/abcminiuser/lufa) USB implementation, but it is **EXTREMELY** streamlined, size-optimized and targeted for the [ATmega16U4](http://www.atmel.com/devices/atmega16u4.aspx) and [ATmega32u4](http://www.atmel.com/devices/atmega32u4.aspx) devices; I had to make quite a few hardware assumptions, mostly to the fuse settings related to clock configuration for things to be as compact as possible, but the code still allows for some flexibility. +This repository [nanoBoot w/LED](https://github.com/osamuaoki/nanoBoot) contains the source code for the USB HID-based bootloader for ATmega32U4 family of devices with **LED support and overwrite protection**. -It's very likely that a few sections can be rewritten to make it even smaller, and the ultimate goal is to support EEPROM programming as well, although that would require changes to the host code. +The name **nanoBoot** comes from the fact that the compiled source fits in the smallest available boot size on the ATMega32u4 devices, 256 words or **512 bytes**. The code is based on Dean Camera's [LUFA](https://github.com/abcminiuser/lufa) USB implementation, but it is **EXTREMELY** streamlined, size-optimized and targeted for the [ATmega16U4](http://www.atmel.com/devices/atmega16u4.aspx) and [ATmega32u4](http://www.atmel.com/devices/atmega32u4.aspx) devices. -The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). +Initial and major portion of manual assembly code optimization efforts were performed by [volium](https://github.com/volium) and published as the original [volium/nanoBoot](https://github.com/volium/nanoBoot). + +Some tweaks were performed by osamu to allow arbitrary setting for CKDIV8 fuse and it was merged to the upstream. + +There were a lot of manual size optimization and program size check feature addition by [sigprof](https://github.com/sigprof) and published as [sigprof/nanoBoot](https://github.com/sigprof/nanoBoot) + +Osamu gathered all useful code and made a linear history commits with his LED support added as **led** branch here at [osamuaoki/nanoBoot](https://github.com/osamuaoki/nanoBoot). Binary size: -* 468 bytes (as is) -* 474 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 476 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) +* 468 bytes (proposed to upstream as default) + * no LED +* 474 bytes (opt-in for upstream but default in my branch) + * enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 476 bytes (opt-in) + * enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) + +The current version (2021-12-08) will be tested manually with the compiled `hid_bootloader_cli.c` from [LUFA](https://github.com/abcminiuser/lufa) on Debian GNU/Linux 12 (bookworm/testing). -Here, LED supports require user to uncomment few lines in `nanoBoot.S`. +Required packages on Debian GNU/Linux system: `gcc-avr`, `avr-libc`, `binutils-avr`, `libusb-dev`, `build-essential`, `git` ## HW assumptions: * CLK is 16 MHz Crystal and fuses are setup correctly to support it: * Select Clock Source (CKSEL3:CKSEL0) fuses are set to Extenal Crystal, CKSEL=1111 SUT=11 - * Divide clock by 8 fuse (CKDIV8) can be set to either 0 or 1 + * Divide clock by 8 fuse (CKDIV8) can be any value. + * 16 MHz operation needs 5V VCC for MCU * Bootloader starts on reset; Hardware Boot Enable fuse is configured, HWBE=0 * Boot Flash Size is set correctly to 256 words (512 bytes), StartAddress=0x3F00, BOOTSZ=11 * Device signature = 0x1E9587 @@ -31,10 +43,64 @@ Here, LED supports require user to uncomment few lines in `nanoBoot.S`. * hfuse memory = 0xD6 (EESAVE=0, BOOTRST=0) * efuse memory = 0xC7 (=0xF7, No BOD) -* Alternatively, BOD can be used to ease CKSEL-SUT setting requirements to - allow teensy-like FUSE settings: +* Alternatively BOD can be used to ease CKSEL-SUT setting requirements to + allow teensy like FUSE setting * lfuse memory = 0x5F (CKDIV8=0, 16CK + 0ms) * hfuse memory = 0xDF (EESAVE=1, BOOTRST=1) - * efuse memory = 0xF4 (BOD=2.4V) + * efuse memory = 0xC4 (=0xF4, BOD=2.4V) + +* LED on D6 port for Teensy 2.0 (Configurable in #define for any board) + +## Usage + +Please install this bootloader `nanoBoot.hex` using the ISP connected programmer (e.g. AVRISP mkII). + +``` +$ sudo avrdude -v -p atmega32u4 -c avrisp2 -Pusb -e -U flash:w:nanoBoot.hex \ + -U lfuse:w:0x5f:m -U hfuse:w:0xdf:m -U efuse:w:0xc4:m +``` + +You can start this bootloader by connecting the board to the PC with USB cable and pressing the RESET button. It is good idea to monitor the PC's USB connection. + +``` + $ watch lsusb +``` + +If this bootloader is started, you should see "Atmel". + +Please note, now this bootloader turns on LED just before sending device ID. Thus monitoring of USB is now optional. + +(If LED doesn't turn on even after 10 second wait for any reason, press the RESET button again.) + +Then program MCU with, e.g., a `LED.hex` firmware as: + +``` + $ sudo hid_bootloader_cli -mmcu=atmega32u4 -v LED.hex +``` +Please note, this bootloader turns off LED upon finish programming. + +(Pressing the RESET button during active bootloader execution seems to halt the bootloader. This seems to be the reason you need to press the RESET button again.) + +For your convenience, pre-compiled HEX file and associated scripts are provided under the `precompile` directory. + +## Configuration + +Only the first configuration choice is tested with a Teensy 2.0 compatible board. + +In `Makefile`: + +* `F_CPU = 16000000` or `F_CPU = 8000000` +* `BOOT_START_OFFSET = 0x7E00` or any valid ones for MCU + +In `nanoBoot.S`: + +* Adjust `#define LED_BIT`, `#define LED_CONF`, and `#define LED_PORT` , and `#define LED_ACTIVE_LEVEL` for each board. Default is Teensy 2.0 setting. + +## Documentation + +"The documentation is part of the source code itself, and even though some people may find it extremely verbose, I think that's better than lack of documentation; after all, assembly can be hard to read sometimes... ohhh yes, in case that was not expected, this is all written in pure GAS (GNU Assembly), compiled using the [Atmel AVR 8-bit Toolchain](http://www.atmel.com/tools/atmelavrtoolchainforwindows.aspx)." (per volium) + +"The elegant programming techniques presented by volium with detailed comments were very enlightening for me to get started. It's delightful for me to read. Don't miss it!" (per osamu) -The documentation is part of the source code itself, and even though some people may find it extremely verbose, I think that's better than lack of documentation; after all, assembly can be hard to read sometimes... ohhh yes, in case that was not expected, this is all written in pure GAS (GNU Assembly), compiled using the [Atmel AVR 8-bit Toolchain](http://www.atmel.com/tools/atmelavrtoolchainforwindows.aspx). + * [AVR Instruction Set Manual](http://ww1.microchip.com/downloads/en/devicedoc/atmel-0856-avr-instruction-set-manual.pdf) + * [ATmega16U4, ATmega32U4 - Complete Datasheet](http://ww1.microchip.com/downloads/en/devicedoc/atmel-7766-8-bit-avr-atmega16u4-32u4_datasheet.pdf)