From 24e176d96c161f78649e864fc5e3283b48846cf3 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 22:32:44 +0300 Subject: [PATCH 01/27] Save 4 bytes by simplifying wait_finish_transfer Instead of looping until one of two bits in UEINTX is set, and then retesting the value after the loop, just do `reti` inside the loop if `RXSTPI` is set. This saves 4 bytes by removing both a duplicate bit test instruction and an extra jump instruction. --- nanoBoot.S | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 13dc630..c02edc9 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -1020,20 +1020,10 @@ send_packet_done: ; Wait for the host to send an OUT packet (RXOUTI to assert), but abort if a SETUP packet is received wait_finish_transfer: ldd r17, Y+oUEINTX ; Load r17 with the most current value in the USB Endpoint Interrupt Register (UEINTX); - sbrs r17, RXOUTI ; Skip the next instruction if the Received OUT Data Interrupt Flag (RXOUTI) is set (there's already an OUT packet from the host), go to acknowledge_rxouti - sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is not set; no need to abort, we haven't received another SETUP packet, we can keep looping - rjmp acknowledge_rxouti ; Jump if either RXOUTI or RXSTPI are set - rjmp wait_finish_transfer ; Loop back to finish_transfer until either Received OUT Data Interrupt Flag (RXOUTI) or Received SETUP Interrupt Flag (RXSTPI) is set - -acknowledge_rxouti: - - ; We could have gotten here if we got out of the previous loop (wait_finish_transfer) if either RXOUTI or RXSTPI asserted, since RXSTPI has the HIGHEST priority, - ; we check for it here first, to decide whether or not we need to abort - - ; Abort if RXSTPI is set - ; NOTE: R17 already has the most current value of UEINTX, no need to load it again sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is cleared reti ; Return if RXSTPI is set, we need to prioritize SETUP packets + sbrs r17, RXOUTI ; Skip the next instruction if the Received OUT Data Interrupt Flag (RXOUTI) is set (there's already an OUT packet from the host) + rjmp wait_finish_transfer ; Loop back to finish_transfer if none of RXSTPI or RXOUTI flags are set ; Acknowledge the OUT packet rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX From 6416c0d226ea5719545d76f9da772a881893e6fd Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 23:05:23 +0300 Subject: [PATCH 02/27] Save 4 bytes by refactoring UEINTX handling subroutines The `wait_TXINI` and `wait_RXOUTI` subroutines were always called after a call to some other `clear_XXX` subroutine to clear a bit in `UEINTX`. Replace those two subroutines with `clear_bit_and_wait_TXINI` and `clear_bit_and_wait_RXOUTI`, which get the bit to be cleared as a parameter in `r17`, and then inline the remaining `clear_XXX` subroutines (a subroutine with just 2 instructions in its body actually takes more space than inline code if it is called less than 3 times). --- nanoBoot.S | 92 ++++++++++++++++-------------------------------------- 1 file changed, 27 insertions(+), 65 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index c02edc9..7dd072c 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -801,11 +801,9 @@ UNHANDLED_SETUP_REQUEST_1: SET_HID_REPORT: - ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX - - ; Wait for command from the host - rcall wait_RXOUTI ; This function loads r17 with value of UEINTX + ; Acknowledge the SETUP packet and wait for command from the host + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + rcall clear_bit_and_wait_RXOUTI ; This function loads r17 with value of UEINTX load_page_address: ldd r30, Y+oUEDATX ; Load r30 with LSB of page address @@ -853,11 +851,9 @@ check_endpoint_for_more_data: or r26, r26 brne fill_page_buffer ; if r26 is not zero, it means there's data in the endpoint which we can use to fill the page buffer, jump there - ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX - - ; Wait for more data from the host - rcall wait_RXOUTI ; This function loads r17 with value of UEINTX + ; Acknowledge the OUT packet and wait for more data from the host + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + rcall clear_bit_and_wait_RXOUTI ; This function loads r17 with value of UEINTX fill_page_buffer: ; There's data at the endpoint buffer, start fill_page_buffer sequence @@ -890,14 +886,13 @@ reenable_rww_section: finish_hid_request: - ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX - - ; Wait for TXINI (OK to transmit) - rcall wait_TXINI ; This function loads r17 with value of UEINTX + ; Acknowledge the OUT packet and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + rcall clear_bit_and_wait_TXINI ; This function loads r17 with value of UEINTX ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST @@ -982,7 +977,8 @@ process_single_descriptor: process_descriptor: ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) verifyMaxDescriptorLength: cp reg_wLengthL, r16 ; Compare the value in r24 (wLengthL) against the value in r16 (length of descriptor to send) @@ -1015,7 +1011,8 @@ transfer_descriptor: send_packet_done: ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) ; Wait for the host to send an OUT packet (RXOUTI to assert), but abort if a SETUP packet is received wait_finish_transfer: @@ -1026,7 +1023,8 @@ wait_finish_transfer: rjmp wait_finish_transfer ; Loop back to finish_transfer if none of RXSTPI or RXOUTI flags are set ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) UNHANDLED_SETUP_REQUEST: @@ -1039,7 +1037,8 @@ UNHANDLED_SETUP_REQUEST: ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) ; STALL transaction @@ -1083,20 +1082,14 @@ process_Host2Device: ; NOTE: All the functions here affect r17 - ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX - - ; Wait for TXINI (OK to transmit) - rcall wait_TXINI ; This function loads r17 with value of UEINTX - - ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX - - ; SIZE OPTIMIZATION: Fall through to wait_TXINI instead of rcall'ing it - ; Wait for TXINI (OK to transmit) - ; rcall wait_TXINI ; This function loads r17 with value of UEINTX - ; ret ; Return from call + ; Acknowledge the SETUP packet and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + rcall clear_bit_and_wait_TXINI ; This function loads r17 with value of UEINTX + ; Clear Transmitter Ready Flag and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 +clear_bit_and_wait_TXINI: + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) wait_TXINI: ; NOTE: This function uses r17, we can use this fact to code other stuff @@ -1110,39 +1103,8 @@ wait_TXINI: ret ; Return from call -clear_RXSTPI: - - ; NOTE: This function affects r17 - - ; Acknowledge the SETUP packet - ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - -clear_TXINI: - - ; NOTE: This function affects r17 - - ; Clear Transmitter Ready Flag - ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - -clear_RXOUTI: - - ; NOTE: This function affects r17 - - ; Acknowledge the OUT packet - ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 +clear_bit_and_wait_RXOUTI: std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - wait_RXOUTI: ; NOTE: This function uses r17, we can use this fact to code other stuff From 29403b8aec07e4666bba688cff30bacb462f209c Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 24 Aug 2021 16:59:18 +0300 Subject: [PATCH 03/27] Save 2 bytes by refactoring the GET_DESCRIPTOR code Rewrite the part of the GET_DESCRIPTOR handling code that loads the address and length of the requested descriptor to use less jumps; the resulting code consumes 2 bytes less, even though it is actually more correct (no longer replies with some descriptor to requests for an unknown descriptor type). The new code also avoids hardcoding the high address byte, and no longer depends on the fact that all descriptors have the same high address byte (but it depends on the fact that all offsets between adjacent descriptors can fit into the `adiw` constant argument (0...63)). --- nanoBoot.S | 75 +++++++++++++++++------------------------------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 7dd072c..ca8f7bd 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -919,60 +919,31 @@ GET_DESCRIPTOR: ; Just get the descriptor address into ; [RAMPZ:]Z, and the length into r16 - ; We know ALL descriptors are at the beginning of the bootloader, in the reset_vector space, - ; and by inspection we can determine that they all share the same high byte of the address (0x7EXX) - ldi ZH, 0x7E ; Load ZH with the most significant 8 bits of the descriptors address (0x7E) - - ; High byte of wValue for GET_DESCRIPTOR transactions specifies Descriptor Type - ; NOTE! We are skipping the comparison for 0x01 (Device Descriptor), since that can't really - ; be excluded, we simply assume that's the default to save space here. See @SAVE_SPACE below. - cpi reg_wValueH, 0x02 ; Compare high byte of wValue with value 2; - breq send_config_descriptor ; If high byte of wValue is 0x02 (Configuration Descriptor), jump to handle that - cpi reg_wValueH, 0x21 ; Compare high byte of wValue with value 0x21; - breq send_hid_descriptor ; If high byte of wValue is 0x21 (HID Class HID Descriptor), jump to handle that - cpi reg_wValueH, 0x22 ; Compare high byte of wValue with value 0x22; - breq send_hid_report_descriptor ; If high byte of wValue is 0x22 (HID Class HID Report Descriptor), jump to handle that - - ; If needed, include other descriptors here - - ; @SAVE_SPACE: I was able to comment this out and things still work, but it's probably bad (saves 6 bytes) - ; The following 2 lines are also dropped since we are skipping "rjmp UNHANDLED_SETUP_REQUEST" (osamuaoki) - ; cpi reg_wValueH, 0x01 ; Compare high byte of wValue with value 1; - ; breq send_device_descriptor ; If high byte of wValue is 0x01 (Device Descriptor), jump to handle that - ; NOTE: Originally, only this rjmp was skipped and things were still working, that's what - ; osamuaoki was able to use to optimize the check for (Device Descriptor), and simply fall through. - ; rjmp UNHANDLED_SETUP_REQUEST ; If the requested descriptor is not supported jump to UNHANDLED_SETUP_REQUEST - -send_device_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(device_descriptor) ; Load ZL with the least significant 8 bits of device_descriptor - rjmp process_single_descriptor ; jump to process_single_descriptor - -send_config_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(config_descriptor) ; Load ZL with the least significant 8 bits of config_descriptor + ldi ZH, hi8(config_descriptor) ; Load the high address part of config_descriptor into ZH + ldi ZL, lo8(config_descriptor) ; Load the low address part of config_descriptor into ZL ldi r16, 34 ; Load r16 with length of config_descriptor (34 bytes) - rjmp process_descriptor ; jump to process_descriptor - -send_hid_report_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(hid_report_descriptor); Load ZL with the least significant 8 bits of hid_report_descriptor - ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) - rjmp process_descriptor ; jump to process_descriptor - - ; If needed, include other descriptors here - -send_hid_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor - + cpi reg_wValueH, 0x02 ; Compare high byte of wValue with value 2; + breq process_descriptor ; If high byte of wValue is 0x02 (Configuration Descriptor), jump to handle that + adiw r30, hid_descriptor - config_descriptor ; Change Z to point to hid_descriptor + cpi reg_wValueH, 0x21 ; Compare high byte of wValue with value 0x21 (HID Class HID Descriptor) + ; The following code will also be reused for the device descriptor - both of these descriptors + ; contain the size in the first byte, and getting the size from there saves one instruction. This + ; trick cannot be applied to the Configuration Descriptor (which is actually a collection of + ; multiple descriptors) and the HID Report Descriptor (which has a completely different format). process_single_descriptor: - - lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes + lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes. + ; This instruction does not change any flags in SREG, therefore it can be placed + ; between the compare and the corresponding conditional jump. + breq process_descriptor ; If the last compare result was equal, jump to return the descriptor data. + adiw r30, device_descriptor - hid_descriptor ; Change Z to point to device_descriptor + cpi reg_wValueH, 0x01 ; Compare high byte of wValue with value 1; + breq process_single_descriptor ; If high byte of wValue is 0x01 (Device Descriptor), jump to handle that; + ; reuse the code for hid_descriptor above. + adiw r30, hid_report_descriptor - device_descriptor ; Change Z to point to hid_report_descriptor + ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) + cpi reg_wValueH, 0x22 ; Compare high byte of wValue with value 0x22; + brne UNHANDLED_SETUP_REQUEST ; If high byte of wValue is NOT 0x22 (HID Class HID Report Descriptor), reject the setup request; + ; otherwise fallthrough to process_descriptor. process_descriptor: From c1e6e6863a574ddc9bb6cc9eb72af612bbcc0523 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 12:52:23 +0900 Subject: [PATCH 04/27] Update bootloader size info. Signed-off-by: Osamu Aoki --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c728d1..4be715f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,14 @@ The name *nanoBoot* comes from the fact that the compiled source fits in the sma It's very likely that a few sections can be rewritten to make it even smaller, and the ultimate goal is to support EEPROM programming as well, although that would require changes to the host code. -The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115), and is exactly 506 bytes long. +The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). + +Binary size: +* 494 bytes (as is) +* 500 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 502 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) + +Here, LED supports require user to uncomment few lines in `nanoBoot.S`. ## HW assumptions: From c636884e1e57630f7d8ea963884df6f81c62c2ff Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 15:58:56 +0300 Subject: [PATCH 05/27] Save 4 bytes in the USB detach code The original LUFA code performed `UDCON |= (1 << DETACH);` to detach the USB device; however, the other bits of `UDCON` are known to be 0 at this time, therefore a simple write of a constant value could be performed here. In addition, the value of `(1 << DETACH)` is 1 on all AVR chips that could be potentially supported by this code, therefore even the instruction to load the constant value into a register could be omitted. Doing these changes removes 2 instructions, saving 4 bytes. --- nanoBoot.S | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index ca8f7bd..0db4fc5 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -594,9 +594,13 @@ main_loop: exit_bootloader: ; Detach device from USB Bus ; UDCON |= (1 << DETACH); - ldd r16, Y+oUDCON ; Load r16 with the value in the USB Device Configuration Register (UDCON) - ori r16, _BV(DETACH) ; Set the DETACH bit to enable the detachment - std Y+oUDCON, r16 ; Store r16 to the USB Device Configuration Register (UDCON) + ; SIZE OPTIMIZATION: All other UDCON bits except DETACH can be set to 0 at this time, and the value + ; of _BV(DETACH) is 0x01, therefore we can just store rONE into UDCON. + ; In theory this step could even be removed completely, because the watchdog reset should set the + ; DETACH bit anyway, but doing this here ensures that the host detects the USB device detach before + ; the application is started, which could avoid issues if the application does not add some delay + ; before enabling USB. + std Y+oUDCON, rONE ; Store _BV(DETACH) (== 0x01) to the USB Device Configuration Register (UDCON) #if defined(LED_ENABLED) ; Turn LED off before exiting From 224d46c7a079465b6819a89dd3e566b9e0c070de Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:18:23 +0300 Subject: [PATCH 06/27] Save 2 bytes in the USBCON init code The initialization of USBCON does not need to read the current register value - just writing the reset value into that register is enough. This removes one instruction, saving 2 bytes. In theory this write could even be omitted completely, saving 4 more bytes, but this would make the code less robust if the application code attempting to enter the bootloader does not initialize some USB controller registers properly, therefore leaving that USBCON write there is safer. --- nanoBoot.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 0db4fc5..cc685b0 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -489,13 +489,12 @@ run_bootloader: ; USBCON &= ~(1 << VBUSTE); ; USBCON &= ~(1 << USBE); - ; IMPORTANT NOTE: To reduce code size, we are going to reseve r16 to handle all writes to the USB Controller Register (USBCON) - ; this way we don't have to keep loading the value to it (ldd) - ldd r16, Y+oUSBCON ; Load r16 with the value in the USB Configuration Register (USBCON) - - ; The right value of USBCON is already in r16, just clear VBUS Pad Enable Bit (OTGPADE), - ; VBUS Transition Interrupt Enable Bit (VBUSTE) and USB macro Enable Bit (USBE) - andi r16, ~(_BV(OTGPADE)|_BV(VBUSTE)|_BV(USBE)) + ; SIZE OPTIMIZATION: Instead of resetting just some specific bits, initialize the whole USBCON + ; register with its reset value (although even this could be omitted, this initialization is left + ; here in case the application tries to enter the bootloader in a slightly incorrect way). + ; As a further optimization, the USBCON register value is left in r16 for use in subsequent code + ; which modifies various bits of that register. + ldi r16, _BV(FRZCLK) ; Load r16 with the reset value for the USB Configuration Register (USBCON) std Y+oUSBCON, r16 ; Store r16 to the USB Configuration Register (USBCON) ; Enable USB Regulator (USB_REG_On) From a29cb77a5ddc1a5d6b900aa49cc47c078d727920 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:37:46 +0300 Subject: [PATCH 07/27] Save 2 bytes in the HOST_TO_DEVICE parsing code The instruction which applied the mask of `(CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT)` to bmRequestType was not actually needed, because the value of this mask is 0x7F, and the only bit which is not covered by the mask (0x80) is already known to be 0, therefore the masking did not actually change anything. Removing this instruction saves 2 bytes. Signed-off-by: Sergey Vlasov Since following commits are skipped previously when cherry-picking. 97c8d96 ("TEMPORARY: EEPROM code which does not fit", 2021-08-25) e48801a ("WIP: Optimize some more code to make the EEPROM support fit", 2021-08-25) 78b804d ("WIP: Redo the EEPROM write implementation", 2021-08-29) e7e94f3 ("Save 2 bytes by reusing the TXINI clearing mask", 2021-08-29) Adjusted to keep branch to thunk code. Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index cc685b0..1154dc4 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -748,8 +748,7 @@ HOST_TO_DEVICE: cpi reg_bmRequestType, 0x00 ; Compare r18 (bmRequestType) with value 0x00 (OUT Type Resquest, USB Standard Request, Recipient is the device) breq HANDLE_USB_STANDARD_DEVICE ; If bmRequestType is 0x00, we know it's either a SET_ADDRESS or SET_CONFIGURATION request, so jump to HANDLE_USB_STANDARD_DEVICE - andi reg_bmRequestType, (0x60 | 0x1F) ; Mask reg_bmRequestType with the bits that define request type and recipient (CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT) - cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare the masked value in r16 with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) + cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare bmRequestType with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal ; fallthrough to HANDLE_USB_CLASS_INTERFACE if equal HANDLE_USB_CLASS_INTERFACE: From 3aa9547fc6ef8e760f44985cba1e68e029e768cb Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:48:27 +0300 Subject: [PATCH 08/27] Save 2 bytes in the DEVICE_TO_HOST parsing code The DEVICE_TO_HOST handling code needs to handle only the GET_DESCRIPTOR requests, which may come with two possible bmRequestType values: - 0x80 (IN direction, USB Standard Request, Recipient is the device) if a descriptor which applies to the device as a whole is requested (this code is used for the device and configuration descriptors); - 0x81 (IN direction, USB Standard Request, Recipient is the interface) if a descriptor specific to a particular interface is requested (this code is used for the HID class and HID report descriptors). Because these codes are numerically sequential, and the direction bit has already been tested (therefore it is known that bmRequestType >= 0x80), it is enough to make a single comparison with 0x82 to detemine whether bmRequestType has one of the above values. Removing the bit masking operation which is not needed after that change saves 2 bytes. --- nanoBoot.S | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 1154dc4..b6aec91 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -906,12 +906,14 @@ DEVICE_TO_HOST: ; If we get here, we know bit 7 of bmRequestType is set, meaning it is a DEVICE_TO_HOST (IN) request, ; now we need to filter out any unhandled requests - cbr reg_bmRequestType, 0x01 ; We mask reg_bmRequestType with value 0x01, bit 0 of bmRequestType is set if the recipient of the request is the interface, - ; and we need to handle that case since the host will query the interface to retrieve the hid_descriptor, obviously we also - ; need to handle the recipient being the device (bit 0 = 0) since all other descriptors are targeted to it - - cpi reg_bmRequestType, 0x80 ; Compare r18 (bmRequestType) with value 0x80 (IN Type Resquest, USB Standard Request, Recipient is the device/interface) - brne UNHANDLED_DEVICE_TO_HOST ; If bmRequestType is not 0x80, we know it's not a GET_DESCRIPTOR request, so jump to UNHANDLED_DEVICE_TO_HOST + ; SIZE OPTIMIZATION: The only bmRequestType values that we care about are: + ; - 0x80 - IN Type Request, USB Standard Request, Recipient is the device + ; - 0x81 - IN Type Request, USB Standard Request, Recipient is the interface + ; At this step it is known that bmRequestType >= 0x80, therefore checking for bmRequestType < 0x82 + ; is enough to detect whether bmRequestType has one of the above values. + + cpi reg_bmRequestType, 0x82 ; Check whether bmRequestType is less than 0x82 (then it must be either 0x80 or 0x81) + brcc UNHANDLED_DEVICE_TO_HOST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) brne UNHANDLED_DEVICE_TO_HOST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal From 61a60ed2f08e0ec29f474a098e5cbac1fd9d26d6 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 14:59:22 +0900 Subject: [PATCH 09/27] Update binary size Signed-off-by: Osamu Aoki --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4be715f..193e87b 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ It's very likely that a few sections can be rewritten to make it even smaller, a The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). Binary size: -* 494 bytes (as is) -* 500 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 502 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) +* 484 bytes (as is) +* 490 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 492 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) Here, LED supports require user to uncomment few lines in `nanoBoot.S`. From f242a41159fd67734e06ded8c3e16f5efd10606d Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Thu, 23 Apr 2020 00:09:55 +0900 Subject: [PATCH 10/27] Save 6 bytes by using Y+ for extended IO Access code to data in the extended IO address range can be made compact by using Y+. Both WDT and USB registers are in extended IO address. Fortunately, calls to these 2 types of addresses do not overlap. * initial code calls WDT * main code calls USB * exit code calls WDT This patch enables to use Y+ for WDT in addtion to USB. * YH is common to USB and WDT and 0 (very first). * YL is different. It's initialization is moved to each subroutine to avoid code duplication. * inside of the set_watchdog_timer subroutine * USB Initialization section Signed-off-by: Osamu Aoki --- nanoBoot.S | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index b6aec91..c503520 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -197,6 +197,14 @@ #define oUEBCHX (UEBCHX - USB_BASE) #define oUEINT (UEINT - USB_BASE) ; This register has the bits to identify which endpoint triggered an interrupt +; +; To facilitate coding, we will also use the Y register to point to the first Extended IO register; +; We can then use LDD / STD (Y+oU....) to address non-USB extended IO registers (EIO_BASE + relative offset) +; (These are used only in start-up and exit routines when USB is not active) +; +#define EIO_BASE WDTCSR +#define oWDTCSR (WDTCSR - EIO_BASE) +#define oCLKPR (CLKPR - EIO_BASE) #include @@ -381,6 +389,11 @@ main: in rMCUSR, _SFR_IO_ADDR(MCUSR) ; Load MCU Status Register to rMCUSR out _SFR_IO_ADDR(MCUSR), rZERO ; Load MCU Status Register with rZERO (clear reset flags, particularly clear WDRF in MCUSR), necessary before disabling the Watchdog + ; Use Y+ for different purpose with YH=R29 to 0 for addressing extended io for any 64 bytes of YL specified section + ; * WDT initialization routine: YL=lo8(EIO_BASE) --- (wdt_init) -- start and end of bootloader + ; * USB communication routine: YL=lo8(USB_BASE) --- (usb_init) -- main part of bootloader + clr YH ; 0 = hi8(USB_BASE) = hi8(EIO_BASE) = 0 common initialization + ; We MUST disable the Watchdog Timer first, otherwise it will remain enabled and will keep resetting the system, so... ; Disable Watchdog Timer mov r17, rZERO ; Load r17 with zero to disable the Watchdog Timer completely @@ -458,8 +471,9 @@ run_bootloader: ; code to work as expected. ldi r17, _BV(CLKPCE) ; Load r17 with the value needed to "unlock" the prescaler of the Clock; Clock Prescaler Change Enable bit (CLKPCE) set to one, all other bits set to zero. - sts CLKPR, r17 ; Store r17 to the Clock Prescaler Register (CLKPR) - sts CLKPR, rZERO ; Store rZERO to the Clock Prescaler Register (CLKPR), setting CLKPS3, CLKPS2, CLKPS1 and CLKPS0 to zero (Clock Division Factor = 1; System Clock is 16 MHz) + ; still YH=0, YL=lo8(EIO_BASE) initial routine + std Y+oCLKPR, r17 ; Store r17 to the Clock Prescaler Register (CLKPR) + std Y+oCLKPR, rZERO ; Store rZERO to the Clock Prescaler Register (CLKPR), setting CLKPS3, CLKPS2, CLKPS1 and CLKPS0 to zero (Clock Division Factor = 1; System Clock is 16 MHz) ; ================================================================= ; = Basic device setup is NOW COMPLETE!! @@ -470,8 +484,9 @@ run_bootloader: ; = Configure Y register to point to USB_BASE (UHWCON register) ; ================================================================= - ldi YL, lo8(USB_BASE) ; Load YL with the least significant 8 bits of USB_BASE - ldi YH, hi8(USB_BASE) ; Load YH with the most significant 8 bits of USB_BASE + ldi YL, lo8(USB_BASE) ; Load YL with the least significant 8 bits of USB_BASE (usb_init) + ; still YH=0 + ; ldi YH, hi8(USB_BASE) ; Load YH with the most significant 8 bits of USB_BASE ; ================================================================= ; = From LUFA simplified - USB_Init:_start @@ -1038,14 +1053,17 @@ set_watchdog_timer: ; IMPORTANT!! This function assumes the correct value for the WDTCSR register ; configuration is already loaded onto r17; it also modifies r16. - ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration - ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) + ; always set YH to hi(EIO_BASE) before calling + ldi YL, lo8(EIO_BASE) ; Load YL with EIO_BASE (wdt_init) wdr ; Reset the Watchdog Timer + ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration + ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) + std Y+oWDTCSR, r16 ; Store r16 to the Watchdog Timer Control Register (WDTCSR) + ; Load the desired configuration to the Watchdog Timer Control Register (WDTCSR) - sts WDTCSR, r16 ; Store r16 to the Watchdog Timer Control Register (WDTCSR) - sts WDTCSR, r17 ; Store r17 to the Watchdog Timer Control Register (WDTCSR) + std Y+oWDTCSR, r17 ; Store r17 to the Watchdog Timer Control Register (WDTCSR) ret From 1576141250c79bd46e1647665b4d2bd475a9be32 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 16:06:01 +0900 Subject: [PATCH 11/27] Update binary size Signed-off-by: Osamu Aoki --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 193e87b..40d2434 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ It's very likely that a few sections can be rewritten to make it even smaller, a The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). Binary size: -* 484 bytes (as is) -* 490 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 492 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) +* 478 bytes (as is) +* 484 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 486 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) Here, LED supports require user to uncomment few lines in `nanoBoot.S`. From 37ff063788b405641336c0ac3d85016d39ad10aa Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 24 Aug 2021 17:17:06 +0300 Subject: [PATCH 12/27] Move SET_HID_REPORT in preparation for fallthrough No size or behavior changes, just a preparation to save some bytes with fallthrough. Cherry picked from Sergey's repo LED code properly moved. Signed-off-by: Osamu Aoki --- nanoBoot.S | 88 ++++++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index c503520..5da9dd1 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -771,51 +771,6 @@ HANDLE_USB_CLASS_INTERFACE: breq SET_HID_REPORT ; jump to SET_HID_REPORT rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - -HANDLE_USB_STANDARD_DEVICE: - - ; Once we know we support the OUT transaction, we need to filter it based on the value in bRequest - cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) - breq SET_ADDRESS ; jump to SET_ADDRESS - cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal - ; fallthrough to SET_CONFIGURATION if equal -SET_CONFIGURATION: - -#if defined(LED_ENABLED) - ; Turn LED on towards the end of enumeration (SET_CONFIGURATION is done after SET_ADDRESS) - ; TODO: If we ever have space, we could add a flag here to mark the fact that we have entered - ; this state, and turn the LED on at the end of the setup request. For now this is the best we - ; can do. - TURN_LED_ON -#endif - - ; Optimization by "sigprof" that saves 2 bytes - ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, - ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. - - ldd reg_wValueL, Y+oUDADDR ; load the existing UDADDR value where the SET_ADDRESS code would expect the new address - -SET_ADDRESS: - - ; Set device address; for this we only need to copy the value in wValueL which contains the address - ; for the device set by the host to the USB Device Address Register (UDADDR); since the SET_ADDRESS - ; request is only executed once during enumeration, and because allowed address values are 1 through - ; 127 (7 LSBs), we don't need to care about the ADDEN bit (bit 7). We can also simply set the ADDEN - ; bit and store the value again in UDADDR to enable the USB Device Address. - - std Y+oUDADDR, reg_wValueL ; Store wValueL to the USB Device Address Register (UDADDR) - - rcall process_Host2Device ; This function affects r17 - - ; EnableDeviceAddress - ; UDADDR |= (1 << ADDEN) - ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address - std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) - -UNHANDLED_SETUP_REQUEST_1: - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST - SET_HID_REPORT: ; Acknowledge the SETUP packet and wait for command from the host @@ -915,6 +870,49 @@ UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST +HANDLE_USB_STANDARD_DEVICE: + + ; Once we know we support the OUT transaction, we need to filter it based on the value in bRequest + cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) + breq SET_ADDRESS ; jump to SET_ADDRESS + cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) + brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to SET_CONFIGURATION if equal +SET_CONFIGURATION: +#if defined(LED_ENABLED) + ; Turn LED on towards the end of enumeration (SET_CONFIGURATION is done after SET_ADDRESS) + ; TODO: If we ever have space, we could add a flag here to mark the fact that we have entered + ; this state, and turn the LED on at the end of the setup request. For now this is the best we + ; can do. + TURN_LED_ON +#endif + + ; Optimization by "sigprof" that saves 2 bytes + ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, + ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. + + ldd reg_wValueL, Y+oUDADDR ; load the existing UDADDR value where the SET_ADDRESS code would expect the new address + +SET_ADDRESS: + + ; Set device address; for this we only need to copy the value in wValueL which contains the address + ; for the device set by the host to the USB Device Address Register (UDADDR); since the SET_ADDRESS + ; request is only executed once during enumeration, and because allowed address values are 1 through + ; 127 (7 LSBs), we don't need to care about the ADDEN bit (bit 7). We can also simply set the ADDEN + ; bit and store the value again in UDADDR to enable the USB Device Address. + + std Y+oUDADDR, reg_wValueL ; Store wValueL to the USB Device Address Register (UDADDR) + + rcall process_Host2Device ; This function affects r17 + + ; EnableDeviceAddress + ; UDADDR |= (1 << ADDEN) + ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address + std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) + +UNHANDLED_SETUP_REQUEST_1: + rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + ; IN transactions DEVICE_TO_HOST: From ec4040316dc9e525519bc57de93906318bb9f039 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 18:48:28 +0900 Subject: [PATCH 13/27] Save 2 bytes by using fallthrough for SET_HID_REPORT Restructure conditional jumps to use fallthrough for the `SET_HID_REPORT` case; this removes one jump instruction, saving 2 bytes. Cherry picked from Sergey Vlasov : a358e89 ("Save 2 bytes by using fallthrough for SET_HID_REPORT", 2021-08-24) Comment by Osamu (conflict resolution) UNHANDLED_SETUP_REQUEST_1 was used since br command PC(word) offset is 6 bit offset address 00 00007ede first call ending up at UNHANDLED_SETUP_REQUEST This requires thunk to reach with br** command 4A 00 00007f28 old UNHANDLED_DEVICE_TO_HOST entry ending up at UNHANDLED_SETUP_REQUEST 5E 14 00 00007f3c UNHANDLED_SETUP_REQUEST_1 entry ending up at UNHANDLED_SETUP_REQUEST !B8 6E 5A 00007f96 UNHANDLED_SETUP_REQUEST entry br** commands 6 bit offset PC offset (64) +/- 2^7 byte offset +7E, -80 (126) rjmp command 11 bit offset PC offset (2K) +/- 2^12 byte offset +FFE, -1000 (4094) Signed-off-by: Osamu Aoki --- nanoBoot.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 5da9dd1..60409d9 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -768,8 +768,8 @@ HOST_TO_DEVICE: ; fallthrough to HANDLE_USB_CLASS_INTERFACE if equal HANDLE_USB_CLASS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) - breq SET_HID_REPORT ; jump to SET_HID_REPORT - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + brne UNHANDLED_SETUP_REQUEST_1 ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + ; fallthrough to SET_HID_REPORT SET_HID_REPORT: From 3315be511d0bc0a43c4e206a3f726436fe934dc7 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 18:48:38 +0900 Subject: [PATCH 14/27] Drop UNHANDLED_DEVICE_TO_HOST thunk entry Due to reordering of code, UNHANDLED_SETUP_REQUEST is within scope of direct jump from br** command now. 00007f3e : ; - 0x80 - IN Type Request, USB Standard Request, Recipient is the device ; - 0x81 - IN Type Request, USB Standard Request, Recipient is the interface ; At this step it is known that bmRequestType >= 0x80, therefore checking for bmRequestType < 0x82 ; is enough to detect whether bmRequestType has one of the above values. cpi reg_bmRequestType, 0x82 ; Check whether bmRequestType is less than 0x82 (then it must be either 0x80 or 0x81) 7f3e: 22 38 cpi r18, 0x82 ; 130 brcc UNHANDLED_SETUP_REQUEST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) 7f40: 50 f5 brcc .+84 ; 0x7f96 cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) 7f42: 36 30 cpi r19, 0x06 ; 6 brne UNHANDLED_SETUP_REQUEST ; jump to UNHANDLED_SETUP_REQUEST through f not equal 7f44: 41 f5 brne .+80 ; 0x7f96 Signed-off-by: Osamu Aoki --- nanoBoot.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 60409d9..045b5c2 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -866,7 +866,6 @@ finish_hid_request: ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) -UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST @@ -926,10 +925,10 @@ DEVICE_TO_HOST: ; is enough to detect whether bmRequestType has one of the above values. cpi reg_bmRequestType, 0x82 ; Check whether bmRequestType is less than 0x82 (then it must be either 0x80 or 0x81) - brcc UNHANDLED_DEVICE_TO_HOST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) + brcc UNHANDLED_SETUP_REQUEST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) - brne UNHANDLED_DEVICE_TO_HOST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + brne UNHANDLED_SETUP_REQUEST ; jump to UNHANDLED_SETUP_REQUEST if not equal ; fallthrough to GET_DESCRIPTOR if equal GET_DESCRIPTOR: From f8e9b4169c14e64f2aa01bbb72e521081261b8ab Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 19:06:10 +0900 Subject: [PATCH 15/27] Update binary size Signed-off-by: Osamu Aoki --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 40d2434..584dc69 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ It's very likely that a few sections can be rewritten to make it even smaller, a The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). Binary size: -* 478 bytes (as is) -* 484 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 486 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) +* 476 bytes (as is) +* 482 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 484 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) Here, LED supports require user to uncomment few lines in `nanoBoot.S`. From 9c2d620dfbae2c6e74ffcf72d0e91aaf94df35f0 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sat, 5 Feb 2022 21:10:59 +0900 Subject: [PATCH 16/27] Save 2 bytes with using clear_UEINTX_bit_and_reti Signed-off-by: Osamu Aoki --- nanoBoot.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 045b5c2..29b725a 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -864,10 +864,7 @@ finish_hid_request: ; Clear Transmitter Ready Flag ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST - + rjmp clear_UEINTX_bit_and_reti ; Store r17 to the USB Endpoint Interrupt Register (UEINTX), then return from interrupt HANDLE_USB_STANDARD_DEVICE: @@ -1011,6 +1008,8 @@ wait_finish_transfer: ; Acknowledge the OUT packet ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + +clear_UEINTX_bit_and_reti: std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) UNHANDLED_SETUP_REQUEST: From 55026aad66917d66713730f5d5b4eb552bf6253c Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 14:33:31 +0900 Subject: [PATCH 17/27] Move up UNHANDLED_SETUP_REQUEST to the center br** command has relatively narrow jump range. Since UNHANDLED_SETUP_REQUEST is frequent entry point, moving it up to keep it at the center of code makes sense. This enables to drop thunk. Signed-off-by: Osamu Aoki --- nanoBoot.S | 58 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 29b725a..5a73da3 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -872,8 +872,35 @@ HANDLE_USB_STANDARD_DEVICE: cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) breq SET_ADDRESS ; jump to SET_ADDRESS cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal - ; fallthrough to SET_CONFIGURATION if equal + breq SET_CONFIGURATION ; jump to SET_CONFIGURATION if equal + ; fallthrough to UNHANDLED_SETUP_REQUEST if not equal +UNHANDLED_SETUP_REQUEST: + + ; if (Endpoint_IsSETUPReceived()) + ; (UEINTX & (1 << RXSTPI)) + ldd r16, Y+oUEINTX ; Load r16 with the value in the USB Endpoint Interrupt Register (UEINTX); + sbrs r16, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is set; received SETUP packet? + reti ; Return if RXSTPI is not set, SETUP packet already handled + + ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall + + ; Acknowledge the SETUP packet + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) + + ; STALL transaction + + ; // Endpoint_StallTransaction(); + ; UECONX |= (1 << STALLRQ); + ldd r16, Y+oUECONX ; Load r16 with the value in the USB Endpoint Configuration Register (UECONX) + ori r16, _BV(STALLRQ) ; Set the STALL Request Handshake Bit (STALLRQ) in r16 + std Y+oUECONX, r16 ; Store r16 to the USB Endpoint Configuration Register (UECONX) + + +EP_ISR_END: + + reti ; Return from interrupt + SET_CONFIGURATION: #if defined(LED_ENABLED) ; Turn LED on towards the end of enumeration (SET_CONFIGURATION is done after SET_ADDRESS) @@ -1011,33 +1038,8 @@ wait_finish_transfer: clear_UEINTX_bit_and_reti: std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) + rjmp UNHANDLED_SETUP_REQUEST -UNHANDLED_SETUP_REQUEST: - - ; if (Endpoint_IsSETUPReceived()) - ; (UEINTX & (1 << RXSTPI)) - ldd r16, Y+oUEINTX ; Load r16 with the value in the USB Endpoint Interrupt Register (UEINTX); - sbrs r16, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is set; received SETUP packet? - reti ; Return if RXSTPI is not set, SETUP packet already handled - - ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall - - ; Acknowledge the SETUP packet - ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ; STALL transaction - - ; // Endpoint_StallTransaction(); - ; UECONX |= (1 << STALLRQ); - ldd r16, Y+oUECONX ; Load r16 with the value in the USB Endpoint Configuration Register (UECONX) - ori r16, _BV(STALLRQ) ; Set the STALL Request Handshake Bit (STALLRQ) in r16 - std Y+oUECONX, r16 ; Store r16 to the USB Endpoint Configuration Register (UECONX) - - -EP_ISR_END: - - reti ; Return from interrupt ; ================================================================= From 1ccfbdcf4631f1a3f44ef3bb18e08ef13f62aba1 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 14:37:05 +0900 Subject: [PATCH 18/27] UNHANDLED_SETUP_REQUEST is accessible No more need to call through its thunk Signed-off-by: Osamu Aoki --- nanoBoot.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 5a73da3..a0181c2 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -764,11 +764,11 @@ HOST_TO_DEVICE: breq HANDLE_USB_STANDARD_DEVICE ; If bmRequestType is 0x00, we know it's either a SET_ADDRESS or SET_CONFIGURATION request, so jump to HANDLE_USB_STANDARD_DEVICE cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare bmRequestType with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + brne UNHANDLED_SETUP_REQUEST ; jump to UNHANDLED_SETUP_REQUEST if not equal ; fallthrough to HANDLE_USB_CLASS_INTERFACE if equal HANDLE_USB_CLASS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) - brne UNHANDLED_SETUP_REQUEST_1 ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + brne UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST ; fallthrough to SET_HID_REPORT SET_HID_REPORT: @@ -932,8 +932,6 @@ SET_ADDRESS: ; UDADDR |= (1 << ADDEN) ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) - -UNHANDLED_SETUP_REQUEST_1: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST ; IN transactions From 78bbe0266edb1aeee5f711011bd2a69a55199dd2 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 07:25:05 +0900 Subject: [PATCH 19/27] Save 2 bytes by using R29=YH as rZERO Signed-off-by: Osamu Aoki --- nanoBoot.S | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index a0181c2..7584e3a 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -130,10 +130,19 @@ ; Register Assignments: ; R0 = temp ; R1 = temp (traditionally, R1 stores constant 0, but we use it as temp because SPM uses R1:R0) -; R2 = 0 (common constant, use instead of traditional R1) +; R29 = 0 (common constant, use instead of traditional R1 since YH is always 0 within bootloader) +; R2 unused ; R3 = 1 (common constant, number is used constantly in the code) ; R4 = copy of MCUSR (MCU Status Register) -; Y = USB_BASE +; R4 ... R15 unused +; R16 local variable +; R17 local variable +; R18 ... R25 local array variable used by USB +; R26, R27 X index register +; R28, R29 index register +; R28 = YL = lo8(USB_BASE) or lo8(EIO_BASE) +; R29 = YH = 0 = rZERO +; R30, R31 Z index register ; Global Flags: ; T Flag (SREG) = BootLoaderActive @@ -141,7 +150,7 @@ ; Global Defines: -#define rZERO r2 +#define rZERO r29 #define rONE r3 #define rMCUSR r4 @@ -220,7 +229,7 @@ reset_vector: cli ; Possibly unnecessary, maybe do something else? - clr rZERO ; Initialize rZERO (R2 register = zero) + clr rZERO ; Initialize rZERO (YH = R29 register = zero) rjmp main ; Jump to main ; We are "hiding" the USB descriptors in the Interrupt Vector Table @@ -392,7 +401,7 @@ main: ; Use Y+ for different purpose with YH=R29 to 0 for addressing extended io for any 64 bytes of YL specified section ; * WDT initialization routine: YL=lo8(EIO_BASE) --- (wdt_init) -- start and end of bootloader ; * USB communication routine: YL=lo8(USB_BASE) --- (usb_init) -- main part of bootloader - clr YH ; 0 = hi8(USB_BASE) = hi8(EIO_BASE) = 0 common initialization + ; YH=rZERO = 0 : good for hi8(USB_BASE) = hi8(EIO_BASE) = 0 ; We MUST disable the Watchdog Timer first, otherwise it will remain enabled and will keep resetting the system, so... ; Disable Watchdog Timer From b767f7a5f4cee651cb1fd3197a01d0f4227d5a6a Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 09:53:29 +0900 Subject: [PATCH 20/27] Save 2 bytes with no LED turn-off There is no need to turn off LED Write program message gives the completion confirmation. Signed-off-by: Osamu Aoki --- nanoBoot.S | 5 ----- 1 file changed, 5 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 7584e3a..ee74128 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -625,11 +625,6 @@ exit_bootloader: ; before enabling USB. std Y+oUDCON, rONE ; Store _BV(DETACH) (== 0x01) to the USB Device Configuration Register (UDCON) -#if defined(LED_ENABLED) - ; Turn LED off before exiting - TURN_LED_OFF -#endif - ; ================================================================= ; = Watchdog Timer initialization ; ================================================================= From 4b4def365b39a04db214adf353bdbc02d0548898 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 21:54:42 +0300 Subject: [PATCH 21/27] String descriptor implementation Borrowed from from 99c4dbf ("WIP: String descriptor implementation", 2021-09-04) I skipped size optimization in USB signal handling since it was problematic. If no LED are used, this is good enough. Signed-off-by: Osamu Aoki --- nanoBoot.S | 90 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 33 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index ee74128..364c957 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -228,13 +228,13 @@ .global reset_vector reset_vector: - cli ; Possibly unnecessary, maybe do something else? +; cli ; Dropped cli for size by Sergey + ; ... but is this safe for soft-reset, Osamu? clr rZERO ; Initialize rZERO (YH = R29 register = zero) rjmp main ; Jump to main ; We are "hiding" the USB descriptors in the Interrupt Vector Table -; NOTE: The 3 instructions above take 6 bytes total, eating into half (2 bytes) -; of the "External Interrupt Request" vector. +; NOTE: The 2 instructions above take 4 bytes total ; .long 0 /* External Interrupt Request 0 */ ; .long 0 /* External Interrupt Request 1 */ @@ -246,16 +246,20 @@ reset_vector: ; .long 0 /* Reserved */ ; .long 0 /* Pin Change Interrupt Request 0 */ -; We have a total 8.5 LWORDS (8.5*4=34 bytes; see NOTE above) to hide part of +; We have a total 9 LWORDS (9*4=36 bytes; see NOTE above) to hide part of ; the USB descriptors. We could just hide the Device Descriptor there, but we ; would waste 18 bytes since the config_descriptor would have to be moved after ; the USB-related ISRs (USB General Interrupt Request and the USB Endpoint ; Interrupt Request); instead, we hide the WHOLE Configuration Descriptor, -; including the config_descriptor, interface_descriptor, hid_descriptor and +; including the length prefix, config_descriptor, interface_descriptor, hid_descriptor and ; endpoint_descriptor which is: -; 9 + 9 + 9 + 7 = 34 bytes, and just leave the device_descriptor and +; 1 + 9 + 9 + 9 + 7 = 35 bytes, and just leave the device_descriptor and ; hid_report_descriptor (18 + 21 = 39 bytes) defined after the USB-related ISRs. +; 1 byte +config_descriptor_prefix: + .byte config_descriptor_end - config_descriptor + 0x80 + ; 9 bytes config_descriptor: // configuration descriptor, USB spec 9.6.3, page 264-266, Table 9-10 @@ -300,6 +304,8 @@ endpoint_descriptor: .word 64 // wMaxPacketSize -- Maximum packet size supported in bytes (64) .byte 5 // bInterval -- Polling interval (milliseconds) +config_descriptor_end: + ; USB-related ISRs are here!!! .org reset_vector + 0x28 @@ -354,10 +360,14 @@ device_descriptor: .word 0x2067 // idProduct -- Product ID (HID Class Bootloader PID, from LUFA) .word 0x0001 // bcdDevice -- Device Release Number .byte 0 // iManufacturer -- Index of Manufacturer String Descriptor - .byte 0 // iProduct -- Index of Product String Descriptor + .byte 1 // iProduct -- Index of Product String Descriptor .byte 0 // iSerialNumber -- Index of Serial Number String Descriptor .byte 1 // bNumConfigurations -- Number of Possible Configuration +; 1 byte +hid_report_descriptor_prefix: + .byte hid_report_descriptor_end - hid_report_descriptor + 0x80 + ; 21 bytes hid_report_descriptor: .byte 0x06, 0xDC, 0xFF // Usage Page (Vendor Defined 0xDCFF) @@ -370,7 +380,28 @@ hid_report_descriptor: .byte 0x96, 0x82, 0x00 // Report Count (130) -> SPM_PAGESIZE (128 bytes) + 2 .byte 0x91, 0x02 // Output (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position,Non-volatile) .byte 0xC0 // End Collection - +hid_report_descriptor_end: + +; 4 bytes +string_descriptor_0: + .byte 4 // bLength -- Size of the Descriptor in Bytes (4 bytes) + .byte 3 // bDescriptorType -- String Descriptor (0x03) + .word 0x0409 // wLANGID -- Language code supported by the device (en-US) + +string_descriptor_1: + .byte string_descriptor_1_end - string_descriptor_1 // bLength -- Size of the Descriptor in Bytes (4 bytes) + .byte 3 // bDescriptorType -- String Descriptor (0x03) + .word 'n,'a,'n,'o,'B,'o,'o,'t +string_descriptor_1_end: + +descriptor_table: + .byte 0x01, lo8(device_descriptor) + .byte 0x02, lo8(config_descriptor_prefix) + .byte 0x21, lo8(hid_descriptor) + .byte 0x22, lo8(hid_report_descriptor_prefix) + .byte 0xFE, lo8(string_descriptor_0) + .byte 0xFF, lo8(string_descriptor_1) +descriptor_table_end: ; ================================================================= ; == Entry point! @@ -961,31 +992,24 @@ GET_DESCRIPTOR: ; Just get the descriptor address into ; [RAMPZ:]Z, and the length into r16 - ldi ZH, hi8(config_descriptor) ; Load the high address part of config_descriptor into ZH - ldi ZL, lo8(config_descriptor) ; Load the low address part of config_descriptor into ZL - ldi r16, 34 ; Load r16 with length of config_descriptor (34 bytes) - cpi reg_wValueH, 0x02 ; Compare high byte of wValue with value 2; - breq process_descriptor ; If high byte of wValue is 0x02 (Configuration Descriptor), jump to handle that - adiw r30, hid_descriptor - config_descriptor ; Change Z to point to hid_descriptor - cpi reg_wValueH, 0x21 ; Compare high byte of wValue with value 0x21 (HID Class HID Descriptor) - ; The following code will also be reused for the device descriptor - both of these descriptors - ; contain the size in the first byte, and getting the size from there saves one instruction. This - ; trick cannot be applied to the Configuration Descriptor (which is actually a collection of - ; multiple descriptors) and the HID Report Descriptor (which has a completely different format). -process_single_descriptor: - lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes. - ; This instruction does not change any flags in SREG, therefore it can be placed - ; between the compare and the corresponding conditional jump. - breq process_descriptor ; If the last compare result was equal, jump to return the descriptor data. - adiw r30, device_descriptor - hid_descriptor ; Change Z to point to device_descriptor - cpi reg_wValueH, 0x01 ; Compare high byte of wValue with value 1; - breq process_single_descriptor ; If high byte of wValue is 0x01 (Device Descriptor), jump to handle that; - ; reuse the code for hid_descriptor above. - adiw r30, hid_report_descriptor - device_descriptor ; Change Z to point to hid_report_descriptor - ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) - cpi reg_wValueH, 0x22 ; Compare high byte of wValue with value 0x22; - brne UNHANDLED_SETUP_REQUEST ; If high byte of wValue is NOT 0x22 (HID Class HID Report Descriptor), reject the setup request; - ; otherwise fallthrough to process_descriptor. + subi reg_wValueL, 2 ; Shift the string descriptor index into the 0xFE...0xFF range + cpi reg_wValueH, 0x03 ; Check whether the request is for a string descriptor (0x03) + breq find_descriptor ; If a string descriptor is requested, the search key is now prepared in reg_wValueL + mov reg_wValueL, reg_wValueH ; If any other descriptor is requested, use its code as the search key +find_descriptor: + ldi ZH, hi8(descriptor_table) ; Load the high address part of descriptor_table into ZH (this part is assumed to be common for all descriptor data) + ldi ZL, lo8(descriptor_table) ; Load the low address part of descriptor_table into ZL +find_descriptor_loop: + lpm r0, Z+ ; Load the search key from the descriptor table + lpm r1, Z+ ; Load the low part of the descriptor address from the descriptor table + cp r0, reg_wValueL ; Compare the search key for the current table item with the requested search key + brlo find_descriptor_loop ; If the requested key is less than the key for the current item in the table, continue the search + brne UNHANDLED_SETUP_REQUEST ; Otherwise, if the keys are not equal, the table does not have a matching entry, so reject the request with STALL + mov ZL, r1 ; Load the low part of the descriptor address into ZL (ZH is assumed to be the same) + lpm r16, Z ; Load the descriptor length from the first byte of the descriptor + sbrc r16, 7 ; Skip the next instruction if bit 7 of the length is not set (the length is actually a part of the descriptor) + inc ZL ; Otherwise the length is actually a prefix, and the descriptor data starts from the next byte, so adjust the address + cbr r16, 0x80 ; Clear bit 7 of the length which may have been used as a prefix flag process_descriptor: From a5906c7a477aa7372050c8c403d38a4fc96ee02d Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 10:11:39 +0900 Subject: [PATCH 22/27] Use 2 bytes to re-activate cli Although we can further reduce size by skipping cli and some clr commands if we only care about power-on reset or real hardware reset. But if bootloader is called by long jump, this may not be safe thing to do. So reactivating this. Signed-off-by: Osamu Aoki --- nanoBoot.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 364c957..c7445f0 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -228,9 +228,7 @@ .global reset_vector reset_vector: -; cli ; Dropped cli for size by Sergey - ; ... but is this safe for soft-reset, Osamu? - clr rZERO ; Initialize rZERO (YH = R29 register = zero) + cli ; Possibly unnecessary, maybe do something else? rjmp main ; Jump to main ; We are "hiding" the USB descriptors in the Interrupt Vector Table @@ -421,6 +419,7 @@ main: ; ================================================================= ; = Initialize constants ; ================================================================= + clr rZERO ; Initialize rZERO (YH = R29 register = zero) ; Set R3=rONE clr rONE ; Initialize rONE as 0 (application may have set this before RESET) inc rONE ; Initialize rONE (R3 register = one) From bc7e388e0a44b6017b17b1526693760e51fa7f22 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 17:21:55 +0900 Subject: [PATCH 23/27] Save 4 bytes with shorter product name Signed-off-by: Osamu Aoki --- nanoBoot.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanoBoot.S b/nanoBoot.S index c7445f0..ae5f591 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -389,7 +389,7 @@ string_descriptor_0: string_descriptor_1: .byte string_descriptor_1_end - string_descriptor_1 // bLength -- Size of the Descriptor in Bytes (4 bytes) .byte 3 // bDescriptorType -- String Descriptor (0x03) - .word 'n,'a,'n,'o,'B,'o,'o,'t + .word 'n,'a,'n,'o,'B,'t // Each character eats 2 bytes (save 4 bytes) string_descriptor_1_end: descriptor_table: From 43e09c71dbf28a2180663b0cdbe4ee12549b6584 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 15:54:14 +0900 Subject: [PATCH 24/27] Enable to select opt-in LED features from Makefile This makes it possible to compile with opt-in features without touching the source code. Signed-off-by: Osamu Aoki --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 594ecf4..1f081bd 100644 --- a/Makefile +++ b/Makefile @@ -197,6 +197,8 @@ CPPFLAGS += $(patsubst %,-I%,$(EXTRAINCDIRS)) #CPPFLAGS += $(CSTANDARD) +LEDDEFS = + #---------------- Assembler Options ---------------- # -Wa,...: tell GCC to pass this to the assembler. # -adhlns: create listing @@ -206,8 +208,7 @@ CPPFLAGS += $(patsubst %,-I%,$(EXTRAINCDIRS)) # files -- see avr-libc docs [FIXME: not yet described there] # -listing-cont-lines: Sets the maximum number of continuation lines of hex # dump that will be displayed for a given single line of source input. -ASFLAGS = $(ADEFS) -Wa,-adhlns=$(<:%.S=$(OBJDIR)/%.lst),-gstabs,--listing-cont-lines=100 - +ASFLAGS = $(ADEFS) -Wa,-adhlns=$(<:%.S=$(OBJDIR)/%.lst),-gstabs,--listing-cont-lines=100 $(LEDDEFS) #---------------- Library Options ---------------- # Minimalistic printf version From d143417dcb87c89d8f954c0aaba942e28a5c9cb8 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 15:57:59 +0900 Subject: [PATCH 25/27] Drop Windows-only cosmetic and annoying code /NUL certainly breaks on non-Windows system and should never be here NUL is ugly since it leaves NUL on other platform I like /dev/null better, but just in case, I am taking safer option. This should be the least invasive change to reduce noise on non-Windows platform without breaking Windows environment. Signed-off-by: Osamu Aoki --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1f081bd..38a6c79 100644 --- a/Makefile +++ b/Makefile @@ -637,11 +637,11 @@ clean_list : # Create object files directory -$(shell mkdir $(OBJDIR) 2>/NUL) +$(shell mkdir -p $(OBJDIR)) # Include the dependency files. --include $(shell mkdir .dep 2>NUL) $(wildcard .dep/*) +-include $(shell mkdir -p .dep) $(wildcard .dep/*) # Listing of phony targets. From 58603833b024561d16bcc69f37859e265fa819ea Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 15:54:58 +0900 Subject: [PATCH 26/27] Helper scripts for build Signed-off-by: Osamu Aoki --- mk-all | 9 +++++++++ mk-generic | 12 ++++++++++++ mk-leonardo | 25 +++++++++++++++++++++++++ mk-promicro | 24 ++++++++++++++++++++++++ mk-teensy | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 120 insertions(+) create mode 100755 mk-all create mode 100755 mk-generic create mode 100755 mk-leonardo create mode 100755 mk-promicro create mode 100755 mk-teensy diff --git a/mk-all b/mk-all new file mode 100755 index 0000000..e16cdf0 --- /dev/null +++ b/mk-all @@ -0,0 +1,9 @@ +#!/bin/sh -ex +# vim:se sw=2 ts=2 sts=2 et ai: +# Script to build nanoBoot +export PATH=${PATH}:. +mk-generic +mk-teensy +mk-leonardo +mk-promicro + diff --git a/mk-generic b/mk-generic new file mode 100755 index 0000000..3f3cab0 --- /dev/null +++ b/mk-generic @@ -0,0 +1,12 @@ +#!/bin/sh -ex +# vim:se sw=2 ts=2 sts=2 et ai: +# Script to build nanoBoot + +#Generic board w/o LED support +DEFS="LEDDEFS=" + +make clean +make "$DEFS" +mv -f nanoBoot.hex nanoBoot-generic.hex + + diff --git a/mk-leonardo b/mk-leonardo new file mode 100755 index 0000000..d980c09 --- /dev/null +++ b/mk-leonardo @@ -0,0 +1,25 @@ +#!/bin/sh -ex +# vim:se sw=2 ts=2 sts=2 et ai: +# Script to build nanoBoot + +# Leonardo/Nano compatible board +# -- LED is ON with ATmega32u4 PIN C7 HIGH +# #define LED_BIT 7 +# #define LED_CONF DDRC +# #define LED_PORT PORTC +# #define LED_ACTIVE_LEVEL 1 +DEFS="LEDDEFS=-DLED_ENABLED -DLED_BIT=7 -DLED_CONF=DDRC -DLED_PORT=PORTC -DLED_ACTIVE_LEVEL=1" + +# Pro Micro compatible board +# -- LED is ON with ATmega32u4 PIN B3 LOW +# #define LED_BIT 3 +# #define LED_CONF DDRB +# #define LED_PORT PORTB +# #define LED_ACTIVE_LEVEL 0 +#DEFS="LEDDEFS=\"-DLED_ENABLED -DLED_BIT=3 -DLED_CONF=DDRB -DLED_PORT=PORTB -DLED_ACTIVE_LEVEL=0\"" + +make clean +make "$DEFS" +mv -f nanoBoot.hex nanoBoot-leonardo.hex + + diff --git a/mk-promicro b/mk-promicro new file mode 100755 index 0000000..20e0eb0 --- /dev/null +++ b/mk-promicro @@ -0,0 +1,24 @@ +#!/bin/sh -ex +# vim:se sw=2 ts=2 sts=2 et ai: +# Script to build nanoBoot + +# Pro Micro compatible board +# -- LED is ON with ATmega32u4 PIN D5 LOW +# #define LED_BIT 5 +# #define LED_CONF DDRD +# #define LED_PORT PORTD +# #define LED_ACTIVE_LEVEL 0 +DEFS="LEDDEFS=-DLED_ENABLED -DLED_BIT=5 -DLED_CONF=DDRD -DLED_PORT=PORTD -DLED_ACTIVE_LEVEL=0" + +# Pro Micro compatible board +# -- LED is ON with ATmega32u4 PIN B3 LOW +# #define LED_BIT 3 +# #define LED_CONF DDRB +# #define LED_PORT PORTB +# #define LED_ACTIVE_LEVEL 0 +#DEFS="LEDDEFS=-DLED_ENABLED -DLED_BIT=3 -DLED_CONF=DDRB -DLED_PORT=PORTB -DLED_ACTIVE_LEVEL=0" +make clean +make "$DEFS" +mv -f nanoBoot.hex nanoBoot-promicro.hex + + diff --git a/mk-teensy b/mk-teensy new file mode 100755 index 0000000..e32775e --- /dev/null +++ b/mk-teensy @@ -0,0 +1,50 @@ +#!/bin/sh -ex +# vim:se sw=2 ts=2 sts=2 et ai: +# Script to build nanoBoot + +# Adafruit's Atmega32u4 Breakout Board (Product ID: 296) - Now discontinued +# https://www.adafruit.com/product/296 +# -- LED is ON with ATmega32u4 PIN E6 HIGH +# #define LED_BIT 6 +# #define LED_CONF DDRE +# #define LED_PORT PORTE +# #define LED_ACTIVE_LEVEL 1 +#DEFS="LEDDEFS=-DLED_ENABLED -DLED_BIT=6 -DLED_CONF=DDRE -DLED_PORT=PORTE -DLED_ACTIVE_LEVEL=1" + +# Teensy 2.0 compatible board +# -- LED is ON with ATmega32u4 PIN D6 HIGH +#define LED_BIT 6 +#define LED_CONF DDRD +#define LED_PORT PORTD +#define LED_ACTIVE_LEVEL 1 +DEFS="LEDDEFS=-DLED_ENABLED -DLED_BIT=6 -DLED_CONF=DDRD -DLED_PORT=PORTD -DLED_ACTIVE_LEVEL=1" + +# Leonardo/Nano compatible board +# -- LED is ON with ATmega32u4 PIN C7 HIGH +# #define LED_BIT 7 +# #define LED_CONF DDRC +# #define LED_PORT PORTC +# #define LED_ACTIVE_LEVEL 1 +#DEFS="LEDDEFS=-DLED_ENABLED -DLED_BIT=7 -DLED_CONF=DDRC -DLED_PORT=PORTC -DLED_ACTIVE_LEVEL=1" + +# Pro Micro compatible board +# -- LED is ON with ATmega32u4 PIN D5 LOW +# #define LED_BIT 5 +# #define LED_CONF DDRD +# #define LED_PORT PORTD +# #define LED_ACTIVE_LEVEL 0 +#DEFS="LEDDEFS=-DLED_ENABLED -DLED_BIT=5 -DLED_CONF=DDRD -DLED_PORT=PORTD -DLED_ACTIVE_LEVEL=0" + +# Pro Micro compatible board +# -- LED is ON with ATmega32u4 PIN B3 LOW +# #define LED_BIT 3 +# #define LED_CONF DDRB +# #define LED_PORT PORTB +# #define LED_ACTIVE_LEVEL 0 +#DEFS="LEDDEFS=-DLED_ENABLED -DLED_BIT=3 -DLED_CONF=DDRB -DLED_PORT=PORTB -DLED_ACTIVE_LEVEL=0" + +make clean +make "$DEFS" +mv -f nanoBoot.hex nanoBoot-teensy.hex + + From a75c9e33d03be1b47b737d877294845aafb2b045 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Sun, 6 Feb 2022 18:08:43 +0900 Subject: [PATCH 27/27] Update binary size and mention helper scripts Signed-off-by: Osamu Aoki --- README.md | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 584dc69..79abf28 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,23 @@ The name *nanoBoot* comes from the fact that the compiled source fits in the sma It's very likely that a few sections can be rewritten to make it even smaller, and the ultimate goal is to support EEPROM programming as well, although that would require changes to the host code. -The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115). +The current version (2022-02-06) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115) or even newer one. Binary size: -* 476 bytes (as is) -* 482 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) -* 484 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) - -Here, LED supports require user to uncomment few lines in `nanoBoot.S`. +* 506 bytes (nanoBoot-generic.hex: No LED support) +* 510 bytes (enable LED support with "LED_ACTIVE_LEVEL 1" (Leonardo, Nano, Teensy 2.0-type) +* 512 bytes (enable LED support with "LED_ACTIVE_LEVEL 0" (Promicro-type) + +Upon connecting to the host via USB, this sends following information: +``` +usb 4-2: new full-speed USB device number ** using xhci_hcd +usb 4-2: New USB device found, idVendor=03eb, idProduct=2067, bcdDevice= 0.01 +usb 4-2: New USB device strings: Mfr=0, Product=1, SerialNumber=0 +usb 4-2: Product: nanoBt +hid-generic 000*:03EB:2067.0026: hiddev*,hidraw*: USB HID v1.11 Device [nanoBt] on usb-0000:0*:00.*-2/input0 +``` + +Helper scripts to build firmware with LED supports are provided as `mk-*`. ## HW assumptions: