From 78b72ed3a6f9a55851e60d93de6ba761d35d9999 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 22 Apr 2020 22:49:11 +0900 Subject: [PATCH 01/35] refactor set_watchdog_timer to 504 bytes Move ldi r16 to inside of set_watchdog_timer Signed-off-by: Osamu Aoki --- nanoBoot.S | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index a2497e3..b895cfc 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -310,14 +310,9 @@ main: in rMCUSR, _SFR_IO_ADDR(MCUSR) ; Load MCU Status Register to rMCUSR out _SFR_IO_ADDR(MCUSR), rZERO ; Load MCU Status Register with rZERO (clear reset flags, particularly clear WDRF in MCUSR), necessary before disabling the Watchdog - ; We MUST disable the Watchdog Timer first, otherwise it will remain enabled and will keep resetting the system, so... - ; Disable Watchdog Timer - ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration - ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) - mov r17, rZERO ; Load r17 with zero to disable the Watchdog Timer completely - rcall set_watchdog_timer ; Call the subroutine that sets the wathdog timer with the values loaded in r16 and r17 + rcall set_watchdog_timer ; Call the subroutine that sets the wathdog timer with the values loaded r17 (r16 used inside set_watchdog_timer) ; check_reset_flags: sbrs rMCUSR, EXTRF ; Skip the next instruction if EXTRF is set (if External Reset Flag, skip next instruction, go to run_bootloader) @@ -524,9 +519,6 @@ exit_bootloader: ; NOTE!! This part of the code assumes MCUSR has already been cleared ; Enable WDT, ~250 ms timeout (force a timeout to reset the AVR) - ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration - ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) - ldi r17, _BV(WDE) | _BV(WDP2) ; Load r17 with the value needed to set the desired Watchdog Configuration (WDCE = 0, not set!) ; Write the WDE and Watchdog prescaler bits (WDP); System Reset Mode (WDE = 1) and ~250 ms timeout (WDP2 = 1) @@ -949,10 +941,15 @@ EP_ISR_END: set_watchdog_timer: ; IMPORTANT!! This function assumes the correct values for the WDTCSR register - ; configuration are already loaded onto r16 and 17. + ; configuration are already loaded onto r17. wdr ; Reset the Watchdog Timer + ; We MUST disable the Watchdog Timer first, otherwise it will remain enabled and will keep resetting the system, so... + ; Disable Watchdog Timer + ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration + ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) + ; Load the desired configuration to the Watchdog Timer Control Register (WDTCSR) sts WDTCSR, r16 ; Store r16 to the Watchdog Timer Control Register (WDTCSR) sts WDTCSR, r17 ; Store r17 to the Watchdog Timer Control Register (WDTCSR) From d7dc7926c9a9bdeaacbdb7cc5524f6fe7eef6122 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 22 Apr 2020 23:57:04 +0900 Subject: [PATCH 02/35] Add LED support, size 510 bytes Signed-off-by: Osamu Aoki --- nanoBoot.S | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index b895cfc..aa48592 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -48,6 +48,28 @@ ; hfuse memory = 0xDF ; efuse memory = 0xC4 +; LED -- Configure this for the LED +; +; LED is on D6 for Teensy 2.0 compatible board (Adjust for other board) +#define LED_BIT 6 +#define LED_CONF DDRD +#define LED_PORT PORTD + +; LED is on C7 for Leonardo/Nano compatible board (Adjust for other board) +; #define LED_BIT 7 +; #define LED_CONF DDRC +; #define LED_PORT PORTC + +; LED (TX) is on D5 for Pro Micro compatible board (Adjust for other board) +; #define LED_BIT 5 +; #define LED_CONF DDRD +; #define LED_PORT PORTD + +; LED (RX) is on B3 for Pro Micro compatible board (Adjust for other board) +; #define LED_BIT 3 +; #define LED_CONF DDRB +; #define LED_PORT PORTB + ; SW assumptions: ; All Endpoints are being configured sequentially in ascending order, ; but, since we only use EP0, this is not that important @@ -321,6 +343,7 @@ run_application: ; We get here if the cause of th jmp 0 ; Simply jump to 0x0000 (application) IMPORTANT NOTE!! This CANNOT be an 'rjmp'!! run_bootloader: + sbi _SFR_IO_ADDR(LED_CONF), LED_BIT ; Set IO register as output for LED set ; Initialize BootLoaderActive flag (T flag in SREG) @@ -516,6 +539,7 @@ exit_bootloader: ; = Watchdog Timer initialization ; ================================================================= + cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting ; NOTE!! This part of the code assumes MCUSR has already been cleared ; Enable WDT, ~250 ms timeout (force a timeout to reset the AVR) @@ -820,6 +844,7 @@ GET_DESCRIPTOR: ; rjmp UNHANDLED_SETUP_REQUEST ; If the requested descriptor is not supported jump to UNHANDLED_SETUP_REQUEST send_device_descriptor: + sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Turn on LED just before sending device ID ; We only load the lower portion (lo8) of the address of the descriptor, ; the higher portion is common for all descriptors ldi ZL, lo8(device_descriptor) ; Load ZL with the least significant 8 bits of device_descriptor From c6c09b66d37adfec973f2d257d0d15d8c46a7f3a Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Thu, 23 Apr 2020 00:09:55 +0900 Subject: [PATCH 03/35] Use Y+ for extended IO, size 506 bytes Signed-off-by: Osamu Aoki --- nanoBoot.S | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index aa48592..b211b47 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -145,6 +145,14 @@ #define oUEBCHX (UEBCHX - USB_BASE) #define oUEINT (UEINT - USB_BASE) ; This register has the bits to identify which endpoint triggered an interrupt +; +; To facilitate coding, we will also use the Y register to point to the first Extended IO register; +; We can then use LDD / STD (Y+oU....) to address non-USB extended IO registers (EIO_BASE + relative offset) +; (These are used only in start-up and exit routines when USB is not active) +; +#define EIO_BASE WDTCSR +#define oWDTCSR (WDTCSR - EIO_BASE) +#define oCLKPR (CLKPR - EIO_BASE) #include @@ -332,6 +340,12 @@ main: in rMCUSR, _SFR_IO_ADDR(MCUSR) ; Load MCU Status Register to rMCUSR out _SFR_IO_ADDR(MCUSR), rZERO ; Load MCU Status Register with rZERO (clear reset flags, particularly clear WDRF in MCUSR), necessary before disabling the Watchdog + ; Set YH=R29 to 0 for addressing extended io for any 64 bytes of YL specified section. + ; YL=lo8(EIO_BASE) initial routine + ; YL=lo8(USB_BASE) main routine + ; YL=lo8(EIO_BASE) exit routine + clr YH ; 0 = hi8(USB_BASE) + ldi YL, lo8(EIO_BASE) ; Load YL with EIO_BASE (initial) mov r17, rZERO ; Load r17 with zero to disable the Watchdog Timer completely rcall set_watchdog_timer ; Call the subroutine that sets the wathdog timer with the values loaded r17 (r16 used inside set_watchdog_timer) @@ -395,8 +409,9 @@ run_bootloader: ; code to work as expected. ldi r17, _BV(CLKPCE) ; Load r17 with the value needed to "unlock" the prescaler of the Clock; Clock Prescaler Change Enable bit (CLKPCE) set to one, all other bits set to zero. - sts CLKPR, r17 ; Store r17 to the Clock Prescaler Register (CLKPR) - sts CLKPR, rZERO ; Store rZERO to the Clock Prescaler Register (CLKPR), setting CLKPS3, CLKPS2, CLKPS1 and CLKPS0 to zero (Clock Division Factor = 1; System Clock is 16 MHz) + ; still YH=0, YL=lo8(EIO_BASE) initial routine + std Y+oCLKPR, r17 ; Store r17 to the Clock Prescaler Register (CLKPR) + std Y+oCLKPR, rZERO ; Store rZERO to the Clock Prescaler Register (CLKPR), setting CLKPS3, CLKPS2, CLKPS1 and CLKPS0 to zero (Clock Division Factor = 1; System Clock is 16 MHz) ; ================================================================= ; = Basic device setup is NOW COMPLETE!! @@ -408,7 +423,8 @@ run_bootloader: ; ================================================================= ldi YL, lo8(USB_BASE) ; Load YL with the least significant 8 bits of USB_BASE - ldi YH, hi8(USB_BASE) ; Load YH with the most significant 8 bits of USB_BASE + ; still YH=0 + ; ldi YH, hi8(USB_BASE) ; Load YH with the most significant 8 bits of USB_BASE ; ================================================================= ; = From LUFA simplified - USB_Init:_start @@ -542,6 +558,7 @@ exit_bootloader: cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting ; NOTE!! This part of the code assumes MCUSR has already been cleared + ldi YL, lo8(EIO_BASE) ; Load YL with EIO_BASE (exit) ; Enable WDT, ~250 ms timeout (force a timeout to reset the AVR) ldi r17, _BV(WDE) | _BV(WDP2) ; Load r17 with the value needed to set the desired Watchdog Configuration (WDCE = 0, not set!) ; Write the WDE and Watchdog prescaler bits (WDP); System Reset Mode (WDE = 1) and ~250 ms timeout (WDP2 = 1) @@ -968,16 +985,16 @@ set_watchdog_timer: ; IMPORTANT!! This function assumes the correct values for the WDTCSR register ; configuration are already loaded onto r17. + ; always set YH to hi(EIO_BASE) before calling + wdr ; Reset the Watchdog Timer - ; We MUST disable the Watchdog Timer first, otherwise it will remain enabled and will keep resetting the system, so... - ; Disable Watchdog Timer ldi r16, _BV(WDCE) | _BV(WDE) ; Load r16 with the value needed to "unlock" the Watchdog Timer Configuration ; Write a logic one to the Watchdog Change Enable bit (WDCE) and Watchdog System Reset Enable (WDE) + std Y+oWDTCSR, r16 ; Store r16 to the Watchdog Timer Control Register (WDTCSR) ; Load the desired configuration to the Watchdog Timer Control Register (WDTCSR) - sts WDTCSR, r16 ; Store r16 to the Watchdog Timer Control Register (WDTCSR) - sts WDTCSR, r17 ; Store r17 to the Watchdog Timer Control Register (WDTCSR) + std Y+oWDTCSR, r17 ; Store r17 to the Watchdog Timer Control Register (WDTCSR) ret From 039e30f7aaf9013f8c6a2e70f96336125fbdef32 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:01:41 +0300 Subject: [PATCH 04/35] Save 2 bytes in the setup packet read loop Instead of using a separate register for the loop counter, compare the value in the XL register with the expected end address. This saves one instruction (2 bytes). Signed-off-by: Sergey Vlasov --- nanoBoot.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index b211b47..766963b 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -653,11 +653,10 @@ USB_Endpoint_ISR: ; Shorter version clr XH ; Clear XH Register ldi XL, 18 ; Load XL Register with number 18 (this will be used to refer to r18) - ldi r16, 8 ; Load r16 with number 8 (the number of fields we need to read) load: ldd r0, Y+oUEDATX ; Load r0 with the value in the USB Endpoint Data Register (UEDATX) st X+, r0 ; Store the value of r0 to the location pointed by X (r18), post increment X (X now points to r19) - dec r16 ; Decrement r16 - brne load ; Jump back to 'load' if r16 is not zero + cpi XL, 18+8 ; Compare XL with the location past the last byte that we need to read + brne load ; Jump back to 'load' if there are still bytes to read ; Our response is based on data direction... sbrc reg_bmRequestType, 7 ; Skip the next instruction if bit 7 of bmRequestType is not set; for host to device (OUT) transaction, bit 7 is cleared From d1d66c06663a20b07d2417ad518a86910ac2b6d2 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:09:22 +0300 Subject: [PATCH 05/35] Swap send_hid_descriptor and send_hid_report_descriptor Reorder the descriptor handling code so that the part that falls through to `process_descriptor` is handling a single USB descriptor prefixed with its length. Does not save any bytes by itself, just prepares the code for subsequent changes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 766963b..a3b8ff7 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -874,21 +874,21 @@ send_config_descriptor: ldi r16, 34 ; Load r16 with length of config_descriptor (34 bytes) rjmp process_descriptor ; jump to process_descriptor -send_hid_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor - ldi r16, 9 ; Load r16 with length of hid_descriptor (9 bytes) - rjmp process_descriptor ; jump to process_descriptor - send_hid_report_descriptor: ; We only load the lower portion (lo8) of the address of the descriptor, ; the higher portion is common for all descriptors ldi ZL, lo8(hid_report_descriptor); Load ZL with the least significant 8 bits of hid_report_descriptor ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) + rjmp process_descriptor ; jump to process_descriptor ; If needed, include other descriptors here +send_hid_descriptor: + ; We only load the lower portion (lo8) of the address of the descriptor, + ; the higher portion is common for all descriptors + ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor + ldi r16, 9 ; Load r16 with length of hid_descriptor (9 bytes) + process_descriptor: ; Acknowledge the SETUP packet From bdd133790d6f0b010dae5afd9bef8b04fa411392 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:29:20 +0300 Subject: [PATCH 06/35] Save 2 bytes in the descriptor sending code In two cases (`send_hid_descriptor` and `send_device_descriptor`) the returned data contains just a single USB descriptor, which contains its length in bytes in the first data byte. Replace two instructions to load the descriptor lengths with one instruction to read the length from the first data byte, saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index a3b8ff7..cdeca18 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -864,8 +864,7 @@ send_device_descriptor: ; We only load the lower portion (lo8) of the address of the descriptor, ; the higher portion is common for all descriptors ldi ZL, lo8(device_descriptor) ; Load ZL with the least significant 8 bits of device_descriptor - ldi r16, 18 ; Load r16 with length of device_descriptor (18 bytes) - rjmp process_descriptor ; jump to process_descriptor + rjmp process_single_descriptor ; jump to process_single_descriptor send_config_descriptor: ; We only load the lower portion (lo8) of the address of the descriptor, @@ -887,7 +886,10 @@ send_hid_descriptor: ; We only load the lower portion (lo8) of the address of the descriptor, ; the higher portion is common for all descriptors ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor - ldi r16, 9 ; Load r16 with length of hid_descriptor (9 bytes) + +process_single_descriptor: + + lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes process_descriptor: From 670c09253ce8a4ccd02fcbbc02859c593c76ce95 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:41:13 +0300 Subject: [PATCH 07/35] Move SET_CONFIGURATION in preparation for fallthrough No size or behavior changes, just a preparation to save some bytes with fallthrough. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index cdeca18..244e357 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -685,6 +685,13 @@ HANDLE_USB_STANDARD_DEVICE: rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST +SET_CONFIGURATION: + + rcall process_Host2Device ; This function affects r17 + + rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + + HANDLE_USB_CLAS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) breq SET_HID_REPORT ; jump to SET_HID_REPORT @@ -710,13 +717,6 @@ SET_ADDRESS: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST -SET_CONFIGURATION: - - rcall process_Host2Device ; This function affects r17 - - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST - - SET_HID_REPORT: ; Acknowledge the SETUP packet From 70ac55df637714acb8796329616605580ba0300d Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 20:56:18 +0300 Subject: [PATCH 08/35] Save 2 bytes by using fallthrough for SET_CONFIGURATION Restructure conditional jumps to use fallthrough for the `bRequest == 9` case; this removes one jump instruction, saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 244e357..b3f1b25 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -681,14 +681,13 @@ HANDLE_USB_STANDARD_DEVICE: cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) breq SET_ADDRESS ; jump to SET_ADDRESS cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) - breq SET_CONFIGURATION ; jump to SET_CONFIGURATION - - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - + brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to SET_CONFIGURATION if equal SET_CONFIGURATION: rcall process_Host2Device ; This function affects r17 +UNHANDLED_SETUP_REQUEST_1: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST From 5167ef7639b5823b8c3479880869c2a881f84758 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:00:55 +0300 Subject: [PATCH 09/35] Move HANDLE_USB_CLAS_INTERFACE in preparation for fallthrough No size or behavior changes, just a preparation to save some bytes with fallthrough. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index b3f1b25..4576739 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -675,6 +675,12 @@ HOST_TO_DEVICE: rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST +HANDLE_USB_CLAS_INTERFACE: + cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) + breq SET_HID_REPORT ; jump to SET_HID_REPORT + rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + + HANDLE_USB_STANDARD_DEVICE: ; Once we know we support the OUT transaction, we need to filter it based on the value in bRequest @@ -691,12 +697,6 @@ UNHANDLED_SETUP_REQUEST_1: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST -HANDLE_USB_CLAS_INTERFACE: - cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) - breq SET_HID_REPORT ; jump to SET_HID_REPORT - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - - SET_ADDRESS: ; Set device address; for this we only need to copy the value in wValueL which contains the address From 11e4101a86cbd53d7eca432eaf823952383832a0 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:05:44 +0300 Subject: [PATCH 10/35] Save 2 bytes by using fallthrough for HANDLE_USB_CLAS_INTERFACE Restructure conditional jumps to use fallthrough for the `HANDLE_USB_CLAS_INTERFACE` case; this removes one jump instruction, saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 4576739..a975e93 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -671,10 +671,8 @@ HOST_TO_DEVICE: andi reg_bmRequestType, (0x60 | 0x1F) ; Mask reg_bmRequestType with the bits that define request type and recipient (CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT) cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare the masked value in r16 with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) - breq HANDLE_USB_CLAS_INTERFACE ; jump to HANDLE_USB_CLAS_INTERFACE - - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - + brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to HANDLE_USB_CLAS_INTERFACE if equal HANDLE_USB_CLAS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) breq SET_HID_REPORT ; jump to SET_HID_REPORT From 7bae4e1cbcebbc4a2c4022f1e5fa493e235a7e0e Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:13:07 +0300 Subject: [PATCH 11/35] Save 2 bytes by using fallthrough for GET_DESCRIPTOR Restructure conditional jumps to use fallthrough for the `GET_DESCRIPTOR` case; this removes one jump instruction, saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index a975e93..ae53799 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -805,6 +805,7 @@ finish_hid_request: ; Clear Transmitter Ready Flag rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX +UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST @@ -822,11 +823,8 @@ DEVICE_TO_HOST: brne UNHANDLED_DEVICE_TO_HOST ; If bmRequestType is not 0x80, we know it's not a GET_DESCRIPTOR request, so jump to UNHANDLED_DEVICE_TO_HOST cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) - breq GET_DESCRIPTOR ; jump to GET_DESCRIPTOR - -UNHANDLED_DEVICE_TO_HOST: - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x80/0x81 or bRequest is not 0x06, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - + brne UNHANDLED_DEVICE_TO_HOST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to GET_DESCRIPTOR if equal GET_DESCRIPTOR: ; Just get the descriptor address into From 1d9f425435983007bea82c0b1988b390169de1f8 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 21:38:43 +0300 Subject: [PATCH 12/35] Save 2 bytes by reusing the SET_ADDRESS code for SET_CONFIGURATION `SET_CONFIGURATION` is basically a noop - the only thing that needs to be done is `process_Host2Device`. Instead of handling it in two instructions, reuse the `SET_ADDRESS` code for it, passing the current UDADDR value to it (so it effectively does nothing too), which can be done in a single instruction, thus saving 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index ae53799..0a6dbba 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -689,11 +689,10 @@ HANDLE_USB_STANDARD_DEVICE: ; fallthrough to SET_CONFIGURATION if equal SET_CONFIGURATION: - rcall process_Host2Device ; This function affects r17 - -UNHANDLED_SETUP_REQUEST_1: - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, + ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. + ldd reg_wValueL, Y+oUDADDR ; load the existing UDADDR value where the SET_ADDRESS code would expect the new address SET_ADDRESS: @@ -712,6 +711,7 @@ SET_ADDRESS: ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) +UNHANDLED_SETUP_REQUEST_1: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST SET_HID_REPORT: From 4fb02925e38ed725d0ee563cc870d1a8a4b2aeff Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Mon, 23 Aug 2021 12:02:01 +0300 Subject: [PATCH 13/35] Fix USB errors due to unconfigured interrupt in endpoint Although the subset of USB HID protocol that is actually used by the bootloader uses only the default control endpoint (0), the HID spec requires the device to have an interrupt in endpoint, and the host can poll that endpoint even when the HID report descriptor does not actually declare any input reports. Polling an unconfigured endpoint causes USB errors, which may prevent the bootloader from functioning properly. Apparently this was happening in Mac OS, making the bootloader unusable there (however, Windows and Linux did not expose the problem; on Linux it was possible to provoke these errors by opening the `/dev/hidrawN` device corresponding to the bootloader, but existing flashing tools do not use that method to access the bootloader device). Add the code to configure endpoint 1 as Interrupt IN, matching the USB descriptors; this is enough to make the USB controller generate NAK replies for that endpoint correctly, and the rest of bootloader code may continue using just endpoint 0 as before. The binary size increases by 12 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 48 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 0a6dbba..07fbadf 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -586,29 +586,55 @@ USB_General_ISR: ; service_EORSTI: ; unused label ; ================================================================= -; = Configure Endpoint0 +; = Configure Endpoints ; ================================================================= - ; ASSUMPTION! - ; We only use Endpoint0, and the reset value of the USB Device Select Endpoint Number Register (UENUM) is Zero, - ; so we don't need to select it or do anything else + ; Even though the bootloader uses only endpoint 0, the HID spec requires any HID device to have an + ; Interrupt IN endpoint, and the host can decide to poll that endpoint even when the HID report + ; descriptor does not actually declare any input reports. Polling an unconfigured endpoint causes + ; USB errors, therefore endpoint 1 must be configured here too. - ; Enable Endpoint + ; Enable and configure endpoint 1 as Interrupt IN: + ; UENUM = 1; + ; UECONX |= (1 << EPEN); + ; UECFG0X = (1 << EPTYPE1) | (1 << EPTYPE0) | (1 << EPDIR); + ; UECFG1X = (1 << EPSIZE1) | (1 << EPSIZE0) | (1 << ALLOC); + + std Y+oUENUM, rONE ; Select Endpoint 1 + + ; Set Endpoint Enable Bit (EPEN), all other bits set to zero has no effect on UECONX + std Y+oUECONX, rONE ; Store the USB Endpoint Configuration Register (UECONX) with the value needed to enable Endpoint 1 + + ldi r16, (_BV(EPTYPE1) | _BV(EPTYPE0) | _BV(EPDIR)) ; Load r16 with the value to configure Endpoint 1 + ; Endpoint Type Bits (EPTYPE1:0); 11 to set as Interrupt Endpoint + ; Endpoint Direction Bit (EPDIR); set to configure IN direction + + std Y+oUECFG0X, r16 ; Store r16 to the USB Endpoint Configuration0 Register (UECFG0X); + + ldi r16, (_BV(EPSIZE1) | _BV(EPSIZE0) | _BV(ALLOC)) ; Load r16 with the value to configure Endpoint 1 (and also 0 below) + ; Endpoint Size Bits (EPSIZE2:0); 011 to set to 64 bytes + ; Endpoint Bank Bits (EPBK1:0); 00 to set One bank + ; Endpoint Allocation Bit (ALLOC); set to allocate the endpoint memory + + std Y+oUECFG1X, r16 ; Store r16 to the USB Endpoint Configuration1 Register (UECFG1X); + + ; Enable and configure endpoint 0 as Control (this is done last, so that endpoint 0 will remain selected): + ; UENUM = 0; ; UECONX |= (1 << EPEN); ; UECFG0X = 0; - ; UECFG1X = 0x32; + ; UECFG1X = (1 << EPSIZE1) | (1 << EPSIZE0) | (1 << ALLOC); + + std Y+oUENUM, rZERO ; Select Endpoint0 + ; Set Endpoint Enable Bit (EPEN), all other bits set to zero has no effect on UECONX - std Y+oUECONX, rONE ; Store the USB Endpoint Configuration Register (UECONX) with the value needed to enable Enpoint 0 + std Y+oUECONX, rONE ; Store the USB Endpoint Configuration Register (UECONX) with the value needed to enable Endpoint 0 ; SIZE OPTIMIZATION: Not needed due to known reset value (Zero) ; std Y+oUECFG0X, rZERO ; Store rZERO to the USB Endpoint Configuration0 Register (UECFG0X); ; Endpoint Type Bits (EPTYPE1:0): 00 to set as Control Endpoint ; Endpoint Direction Bit (EPDIR): clear to configure OUT direction; needed for Control Endpoint - ldi r16, (_BV(EPSIZE1) | _BV(EPSIZE0) | _BV(ALLOC)) ; Load r16 with the value to configure Enpoint 0 - ; Endpoint Size Bits (EPSIZE2:0); 011 to set to 64 bytes - ; Endpoint Bank Bits (EPBK1:0); 00 to set One bank - ; Endpoint Allocation Bit (ALLOC); set to allocate the endpoint memory + ; SIZE OPTIMIZATION: r16 is already loaded with the required value while configuring endpoint 1 above std Y+oUECFG1X, r16 ; Store r16 to the USB Endpoint Configuration1 Register (UECFG1X); From 0ed8c5ad19c87881e0231fbac9c0ceb1658c25cd Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 19:46:34 +0300 Subject: [PATCH 14/35] Makefile: Use /dev/null instead of NUL, size 506 bytes Replace Windows-specific `NUL` with `/dev/null` to fix building the code in Linux (the Windows build environment requires MSYS anyway, therefore `/dev/null` should work there too). Signed-off-by: Sergey Vlasov I have cherry-picked Sergey's commits up to 79f1be5 ("Makefile: Use /dev/null instead of NUL", 2021-08-20) with linear history excluding 64ae729 ("Remove nonstandard avr-size options", 2021-08-20) onto my led branch. With the excluded commit, I observe the normal latest GCC 'size' command can be used for nanoBoot case to produce desired result as I experimented. Since we are generating $(TARGET).elf using forked avr-gcc, I chose to keep using 'avr-size'. Thus options are kept by excluding the commit. Signed-off-by: Osamu Aoki --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 594ecf4..d27aec6 100644 --- a/Makefile +++ b/Makefile @@ -636,11 +636,11 @@ clean_list : # Create object files directory -$(shell mkdir $(OBJDIR) 2>/NUL) +$(shell mkdir $(OBJDIR) 2>/dev/null) # Include the dependency files. --include $(shell mkdir .dep 2>NUL) $(wildcard .dep/*) +-include $(shell mkdir .dep 2>/dev/null) $(wildcard .dep/*) # Listing of phony targets. From 7341819396c5db33bedc17eb0cd711b55a628a4b Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 22:32:44 +0300 Subject: [PATCH 15/35] Save 4 bytes by simplifying wait_finish_transfer Instead of looping until one of two bits in UEINTX is set, and then retesting the value after the loop, just do `reti` inside the loop if `RXSTPI` is set. This saves 4 bytes by removing both a duplicate bit test instruction and an extra jump instruction. --- nanoBoot.S | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 07fbadf..329ddff 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -953,20 +953,10 @@ send_packet_done: ; Wait for the host to send an OUT packet (RXOUTI to assert), but abort if a SETUP packet is received wait_finish_transfer: ldd r17, Y+oUEINTX ; Load r17 with the most current value in the USB Endpoint Interrupt Register (UEINTX); - sbrs r17, RXOUTI ; Skip the next instruction if the Received OUT Data Interrupt Flag (RXOUTI) is set (there's already an OUT packet from the host), go to acknowledge_rxouti - sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is not set; no need to abort, we haven't received another SETUP packet, we can keep looping - rjmp acknowledge_rxouti ; Jump if either RXOUTI or RXSTPI are set - rjmp wait_finish_transfer ; Loop back to finish_transfer until either Received OUT Data Interrupt Flag (RXOUTI) or Received SETUP Interrupt Flag (RXSTPI) is set - -acknowledge_rxouti: - - ; We could have gotten here if we got out of the previous loop (wait_finish_transfer) if either RXOUTI or RXSTPI asserted, since RXSTPI has the HIGHEST priority, - ; we check for it here first, to decide whether or not we need to abort - - ; Abort if RXSTPI is set - ; NOTE: R17 already has the most current value of UEINTX, no need to load it again sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is cleared reti ; Return if RXSTPI is set, we need to prioritize SETUP packets + sbrs r17, RXOUTI ; Skip the next instruction if the Received OUT Data Interrupt Flag (RXOUTI) is set (there's already an OUT packet from the host) + rjmp wait_finish_transfer ; Loop back to finish_transfer if none of RXSTPI or RXOUTI flags are set ; Acknowledge the OUT packet rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX From 0c235b37c4a5f14ebbbf2813de75eb35cbc04439 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Fri, 20 Aug 2021 23:05:23 +0300 Subject: [PATCH 16/35] Save 4 bytes by refactoring UEINTX handling subroutines The `wait_TXINI` and `wait_RXOUTI` subroutines were always called after a call to some other `clear_XXX` subroutine to clear a bit in `UEINTX`. Replace those two subroutines with `clear_bit_and_wait_TXINI` and `clear_bit_and_wait_RXOUTI`, which get the bit to be cleared as a parameter in `r17`, and then inline the remaining `clear_XXX` subroutines (a subroutine with just 2 instructions in its body actually takes more space than inline code if it is called less than 3 times). --- nanoBoot.S | 92 ++++++++++++++++-------------------------------------- 1 file changed, 27 insertions(+), 65 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 329ddff..41d11df 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -742,11 +742,9 @@ UNHANDLED_SETUP_REQUEST_1: SET_HID_REPORT: - ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX - - ; Wait for command from the host - rcall wait_RXOUTI ; This function loads r17 with value of UEINTX + ; Acknowledge the SETUP packet and wait for command from the host + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + rcall clear_bit_and_wait_RXOUTI ; This function loads r17 with value of UEINTX load_page_address: ; We store the page address in r15:r14 and not in r31:r30 because we need @@ -787,11 +785,9 @@ check_endpoint_for_more_data: or r26, r26 brne fill_page_buffer ; if r26 is not zero, it means there's data in the endpoint which we can use to fill the page buffer, jump there - ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX - - ; Wait for more data from the host - rcall wait_RXOUTI ; This function loads r17 with value of UEINTX + ; Acknowledge the OUT packet and wait for more data from the host + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + rcall clear_bit_and_wait_RXOUTI ; This function loads r17 with value of UEINTX fill_page_buffer: ; There's data at the endpoint buffer, start fill_page_buffer sequence @@ -822,14 +818,13 @@ reenable_rww_section: finish_hid_request: - ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX - - ; Wait for TXINI (OK to transmit) - rcall wait_TXINI ; This function loads r17 with value of UEINTX + ; Acknowledge the OUT packet and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + rcall clear_bit_and_wait_TXINI ; This function loads r17 with value of UEINTX ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST @@ -915,7 +910,8 @@ process_single_descriptor: process_descriptor: ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) verifyMaxDescriptorLength: cp reg_wLengthL, r16 ; Compare the value in r24 (wLengthL) against the value in r16 (length of descriptor to send) @@ -948,7 +944,8 @@ transfer_descriptor: send_packet_done: ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) ; Wait for the host to send an OUT packet (RXOUTI to assert), but abort if a SETUP packet is received wait_finish_transfer: @@ -959,7 +956,8 @@ wait_finish_transfer: rjmp wait_finish_transfer ; Loop back to finish_transfer if none of RXSTPI or RXOUTI flags are set ; Acknowledge the OUT packet - rcall clear_RXOUTI ; This function uses r17 to clear the RXOUTI bit in UEINTX + ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) UNHANDLED_SETUP_REQUEST: @@ -972,7 +970,8 @@ UNHANDLED_SETUP_REQUEST: ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) ; STALL transaction @@ -1018,20 +1017,14 @@ process_Host2Device: ; NOTE: All the functions here affect r17 - ; Acknowledge the SETUP packet - rcall clear_RXSTPI ; This function uses r17 to clear the RXSTPI bit in UEINTX - - ; Wait for TXINI (OK to transmit) - rcall wait_TXINI ; This function loads r17 with value of UEINTX - - ; Clear Transmitter Ready Flag - rcall clear_TXINI ; This function uses r17 to clear the TXINI bit in UEINTX - - ; SIZE OPTIMIZATION: Fall through to wait_TXINI instead of rcall'ing it - ; Wait for TXINI (OK to transmit) - ; rcall wait_TXINI ; This function loads r17 with value of UEINTX - ; ret ; Return from call + ; Acknowledge the SETUP packet and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + rcall clear_bit_and_wait_TXINI ; This function loads r17 with value of UEINTX + ; Clear Transmitter Ready Flag and wait for TXINI (OK to transmit) + ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 +clear_bit_and_wait_TXINI: + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) wait_TXINI: ; NOTE: This function uses r17, we can use this fact to code other stuff @@ -1045,39 +1038,8 @@ wait_TXINI: ret ; Return from call -clear_RXSTPI: - - ; NOTE: This function affects r17 - - ; Acknowledge the SETUP packet - ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - -clear_TXINI: - - ; NOTE: This function affects r17 - - ; Clear Transmitter Ready Flag - ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - -clear_RXOUTI: - - ; NOTE: This function affects r17 - - ; Acknowledge the OUT packet - ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 +clear_bit_and_wait_RXOUTI: std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ret ; Return from call - - wait_RXOUTI: ; NOTE: This function uses r17, we can use this fact to code other stuff From 990ad5cbff06020afe598c9c2f82ef853c6e1fb3 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 10:18:43 +0900 Subject: [PATCH 17/35] Revert "Add LED support ...", size 492 bytes This reverts commit d7dc7926c9a9bdeaacbdb7cc5524f6fe7eef6122. which interferes with cherry picking ed07e05 ("Save 2 bytes by refactoring the GET_DESCRIPTOR code", 2021-08-24) The reverted commit will be applied later. Signed-off-by: Osamu Aoki --- nanoBoot.S | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 41d11df..127dfda 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -48,28 +48,6 @@ ; hfuse memory = 0xDF ; efuse memory = 0xC4 -; LED -- Configure this for the LED -; -; LED is on D6 for Teensy 2.0 compatible board (Adjust for other board) -#define LED_BIT 6 -#define LED_CONF DDRD -#define LED_PORT PORTD - -; LED is on C7 for Leonardo/Nano compatible board (Adjust for other board) -; #define LED_BIT 7 -; #define LED_CONF DDRC -; #define LED_PORT PORTC - -; LED (TX) is on D5 for Pro Micro compatible board (Adjust for other board) -; #define LED_BIT 5 -; #define LED_CONF DDRD -; #define LED_PORT PORTD - -; LED (RX) is on B3 for Pro Micro compatible board (Adjust for other board) -; #define LED_BIT 3 -; #define LED_CONF DDRB -; #define LED_PORT PORTB - ; SW assumptions: ; All Endpoints are being configured sequentially in ascending order, ; but, since we only use EP0, this is not that important @@ -357,7 +335,6 @@ run_application: ; We get here if the cause of th jmp 0 ; Simply jump to 0x0000 (application) IMPORTANT NOTE!! This CANNOT be an 'rjmp'!! run_bootloader: - sbi _SFR_IO_ADDR(LED_CONF), LED_BIT ; Set IO register as output for LED set ; Initialize BootLoaderActive flag (T flag in SREG) @@ -555,7 +532,6 @@ exit_bootloader: ; = Watchdog Timer initialization ; ================================================================= - cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting ; NOTE!! This part of the code assumes MCUSR has already been cleared ldi YL, lo8(EIO_BASE) ; Load YL with EIO_BASE (exit) @@ -876,7 +852,6 @@ GET_DESCRIPTOR: ; rjmp UNHANDLED_SETUP_REQUEST ; If the requested descriptor is not supported jump to UNHANDLED_SETUP_REQUEST send_device_descriptor: - sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Turn on LED just before sending device ID ; We only load the lower portion (lo8) of the address of the descriptor, ; the higher portion is common for all descriptors ldi ZL, lo8(device_descriptor) ; Load ZL with the least significant 8 bits of device_descriptor From aef1b8b598ea6b50daedbd598fc857118a3470e1 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 24 Aug 2021 16:59:18 +0300 Subject: [PATCH 18/35] Save 2 bytes by refactoring the GET_DESCRIPTOR code Rewrite the part of the GET_DESCRIPTOR handling code that loads the address and length of the requested descriptor to use less jumps; the resulting code consumes 2 bytes less, even though it is actually more correct (no longer replies with some descriptor to requests for an unknown descriptor type). The new code also avoids hardcoding the high address byte, and no longer depends on the fact that all descriptors have the same high address byte (but it depends on the fact that all offsets between adjacent descriptors can fit into the `adiw` constant argument (0...63)). --- nanoBoot.S | 75 +++++++++++++++++------------------------------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 127dfda..30b2466 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -827,60 +827,31 @@ GET_DESCRIPTOR: ; Just get the descriptor address into ; [RAMPZ:]Z, and the length into r16 - ; We know ALL descriptors are at the beginning of the bootloader, in the reset_vector space, - ; and by inspection we can determine that they all share the same high byte of the address (0x7EXX) - ldi ZH, 0x7E ; Load ZH with the most significant 8 bits of the descriptors address (0x7E) - - ; High byte of wValue for GET_DESCRIPTOR transactions specifies Descriptor Type - ; NOTE! We are skipping the comparison for 0x01 (Device Descriptor), since that can't really - ; be excluded, we simply assume that's the default to save space here. See @SAVE_SPACE below. - cpi reg_wValueH, 0x02 ; Compare high byte of wValue with value 2; - breq send_config_descriptor ; If high byte of wValue is 0x02 (Configuration Descriptor), jump to handle that - cpi reg_wValueH, 0x21 ; Compare high byte of wValue with value 0x21; - breq send_hid_descriptor ; If high byte of wValue is 0x21 (HID Class HID Descriptor), jump to handle that - cpi reg_wValueH, 0x22 ; Compare high byte of wValue with value 0x22; - breq send_hid_report_descriptor ; If high byte of wValue is 0x22 (HID Class HID Report Descriptor), jump to handle that - - ; If needed, include other descriptors here - - ; @SAVE_SPACE: I was able to comment this out and things still work, but it's probably bad (saves 6 bytes) - ; The following 2 lines are also dropped since we are skipping "rjmp UNHANDLED_SETUP_REQUEST" (osamuaoki) - ; cpi reg_wValueH, 0x01 ; Compare high byte of wValue with value 1; - ; breq send_device_descriptor ; If high byte of wValue is 0x01 (Device Descriptor), jump to handle that - ; NOTE: Originally, only this rjmp was skipped and things were still working, that's what - ; osamuaoki was able to use to optimize the check for (Device Descriptor), and simply fall through. - ; rjmp UNHANDLED_SETUP_REQUEST ; If the requested descriptor is not supported jump to UNHANDLED_SETUP_REQUEST - -send_device_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(device_descriptor) ; Load ZL with the least significant 8 bits of device_descriptor - rjmp process_single_descriptor ; jump to process_single_descriptor - -send_config_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(config_descriptor) ; Load ZL with the least significant 8 bits of config_descriptor + ldi ZH, hi8(config_descriptor) ; Load the high address part of config_descriptor into ZH + ldi ZL, lo8(config_descriptor) ; Load the low address part of config_descriptor into ZL ldi r16, 34 ; Load r16 with length of config_descriptor (34 bytes) - rjmp process_descriptor ; jump to process_descriptor - -send_hid_report_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(hid_report_descriptor); Load ZL with the least significant 8 bits of hid_report_descriptor - ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) - rjmp process_descriptor ; jump to process_descriptor - - ; If needed, include other descriptors here - -send_hid_descriptor: - ; We only load the lower portion (lo8) of the address of the descriptor, - ; the higher portion is common for all descriptors - ldi ZL, lo8(hid_descriptor) ; Load ZL with the least significant 8 bits of hid_descriptor - + cpi reg_wValueH, 0x02 ; Compare high byte of wValue with value 2; + breq process_descriptor ; If high byte of wValue is 0x02 (Configuration Descriptor), jump to handle that + adiw r30, hid_descriptor - config_descriptor ; Change Z to point to hid_descriptor + cpi reg_wValueH, 0x21 ; Compare high byte of wValue with value 0x21 (HID Class HID Descriptor) + ; The following code will also be reused for the device descriptor - both of these descriptors + ; contain the size in the first byte, and getting the size from there saves one instruction. This + ; trick cannot be applied to the Configuration Descriptor (which is actually a collection of + ; multiple descriptors) and the HID Report Descriptor (which has a completely different format). process_single_descriptor: - - lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes + lpm r16, Z ; Load r16 with the first byte of descriptor, which contains its length in bytes. + ; This instruction does not change any flags in SREG, therefore it can be placed + ; between the compare and the corresponding conditional jump. + breq process_descriptor ; If the last compare result was equal, jump to return the descriptor data. + adiw r30, device_descriptor - hid_descriptor ; Change Z to point to device_descriptor + cpi reg_wValueH, 0x01 ; Compare high byte of wValue with value 1; + breq process_single_descriptor ; If high byte of wValue is 0x01 (Device Descriptor), jump to handle that; + ; reuse the code for hid_descriptor above. + adiw r30, hid_report_descriptor - device_descriptor ; Change Z to point to hid_report_descriptor + ldi r16, 21 ; Load r16 with length of hid_report_descriptor (21 bytes) + cpi reg_wValueH, 0x22 ; Compare high byte of wValue with value 0x22; + brne UNHANDLED_SETUP_REQUEST ; If high byte of wValue is NOT 0x22 (HID Class HID Report Descriptor), reject the setup request; + ; otherwise fallthrough to process_descriptor. process_descriptor: From dc905606e13d5676e181a65a982f2a56af165f4e Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 24 Aug 2021 17:17:06 +0300 Subject: [PATCH 19/35] Move SET_HID_REPORT in preparation for fallthrough No size or behavior changes, just a preparation to save some bytes with fallthrough. --- nanoBoot.S | 70 +++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 30b2466..b3ab5ae 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -681,41 +681,6 @@ HANDLE_USB_CLAS_INTERFACE: rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST -HANDLE_USB_STANDARD_DEVICE: - - ; Once we know we support the OUT transaction, we need to filter it based on the value in bRequest - cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) - breq SET_ADDRESS ; jump to SET_ADDRESS - cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal - ; fallthrough to SET_CONFIGURATION if equal -SET_CONFIGURATION: - - ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, - ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. - - ldd reg_wValueL, Y+oUDADDR ; load the existing UDADDR value where the SET_ADDRESS code would expect the new address - -SET_ADDRESS: - - ; Set device address; for this we only need to copy the value in wValueL which contains the address - ; for the device set by the host to the USB Device Address Register (UDADDR); since the SET_ADDRESS - ; request is only executed once during enumeration, and because allowed address values are 1 through - ; 127 (7 LSBs), we don't need to care about the ADDEN bit (bit 7). We can also simply set the ADDEN - ; bit and store the value again in UDADDR to enable the USB Device Address. - - std Y+oUDADDR, reg_wValueL ; Store wValueL to the USB Device Address Register (UDADDR) - - rcall process_Host2Device ; This function affects r17 - - ; EnableDeviceAddress - ; UDADDR |= (1 << ADDEN) - ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address - std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) - -UNHANDLED_SETUP_REQUEST_1: - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST - SET_HID_REPORT: ; Acknowledge the SETUP packet and wait for command from the host @@ -806,6 +771,41 @@ UNHANDLED_DEVICE_TO_HOST: rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST +HANDLE_USB_STANDARD_DEVICE: + + ; Once we know we support the OUT transaction, we need to filter it based on the value in bRequest + cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) + breq SET_ADDRESS ; jump to SET_ADDRESS + cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) + brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + ; fallthrough to SET_CONFIGURATION if equal +SET_CONFIGURATION: + + ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, + ; so we reuse the SET_ADDRESS code by making it reload the same value to UDADDR. + + ldd reg_wValueL, Y+oUDADDR ; load the existing UDADDR value where the SET_ADDRESS code would expect the new address + +SET_ADDRESS: + + ; Set device address; for this we only need to copy the value in wValueL which contains the address + ; for the device set by the host to the USB Device Address Register (UDADDR); since the SET_ADDRESS + ; request is only executed once during enumeration, and because allowed address values are 1 through + ; 127 (7 LSBs), we don't need to care about the ADDEN bit (bit 7). We can also simply set the ADDEN + ; bit and store the value again in UDADDR to enable the USB Device Address. + + std Y+oUDADDR, reg_wValueL ; Store wValueL to the USB Device Address Register (UDADDR) + + rcall process_Host2Device ; This function affects r17 + + ; EnableDeviceAddress + ; UDADDR |= (1 << ADDEN) + ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address + std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) + +UNHANDLED_SETUP_REQUEST_1: + rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + ; IN transactions DEVICE_TO_HOST: From 9a222303767ae2685376a7b9dfb7ff4b087af6eb Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 24 Aug 2021 17:19:49 +0300 Subject: [PATCH 20/35] Save 2 bytes by using fallthrough for SET_HID_REPORT Restructure conditional jumps to use fallthrough for the `SET_HID_REPORT` case; this removes one jump instruction, saving 2 bytes. --- nanoBoot.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index b3ab5ae..f27f515 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -677,9 +677,8 @@ HOST_TO_DEVICE: ; fallthrough to HANDLE_USB_CLAS_INTERFACE if equal HANDLE_USB_CLAS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) - breq SET_HID_REPORT ; jump to SET_HID_REPORT - rjmp UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST - + brne UNHANDLED_SETUP_REQUEST_1 ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + ; fallthrough to SET_HID_REPORT SET_HID_REPORT: From b6a4be03abdccbe205ef0467ef0811c1e7f06a3b Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 15:58:56 +0300 Subject: [PATCH 21/35] Save 4 bytes in the USB detach code The original LUFA code performed `UDCON |= (1 << DETACH);` to detach the USB device; however, the other bits of `UDCON` are known to be 0 at this time, therefore a simple write of a constant value could be performed here. In addition, the value of `(1 << DETACH)` is 1 on all AVR chips that could be potentially supported by this code, therefore even the instruction to load the constant value into a register could be omitted. Doing these changes removes 2 instructions, saving 4 bytes. --- nanoBoot.S | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index f27f515..1d76768 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -524,9 +524,13 @@ main_loop: exit_bootloader: ; Detach device from USB Bus ; UDCON |= (1 << DETACH); - ldd r16, Y+oUDCON ; Load r16 with the value in the USB Device Configuration Register (UDCON) - ori r16, _BV(DETACH) ; Set the DETACH bit to enable the detachment - std Y+oUDCON, r16 ; Store r16 to the USB Device Configuration Register (UDCON) + ; SIZE OPTIMIZATION: All other UDCON bits except DETACH can be set to 0 at this time, and the value + ; of _BV(DETACH) is 0x01, therefore we can just store rONE into UDCON. + ; In theory this step could even be removed completely, because the watchdog reset should set the + ; DETACH bit anyway, but doing this here ensures that the host detects the USB device detach before + ; the application is started, which could avoid issues if the application does not add some delay + ; before enabling USB. + std Y+oUDCON, rONE ; Store _BV(DETACH) (== 0x01) to the USB Device Configuration Register (UDCON) ; ================================================================= ; = Watchdog Timer initialization From 01764e0812bc32c0d4d4f12c0179137c1674e459 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:18:23 +0300 Subject: [PATCH 22/35] Save 2 bytes in the USBCON init code The initialization of USBCON does not need to read the current register value - just writing the reset value into that register is enough. This removes one instruction, saving 2 bytes. In theory this write could even be omitted completely, saving 4 more bytes, but this would make the code less robust if the application code attempting to enter the bootloader does not initialize some USB controller registers properly, therefore leaving that USBCON write there is safer. --- nanoBoot.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 1d76768..79c71ea 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -419,13 +419,12 @@ run_bootloader: ; USBCON &= ~(1 << VBUSTE); ; USBCON &= ~(1 << USBE); - ; IMPORTANT NOTE: To reduce code size, we are going to reseve r16 to handle all writes to the USB Controller Register (USBCON) - ; this way we don't have to keep loading the value to it (ldd) - ldd r16, Y+oUSBCON ; Load r16 with the value in the USB Configuration Register (USBCON) - - ; The right value of USBCON is already in r16, just clear VBUS Pad Enable Bit (OTGPADE), - ; VBUS Transition Interrupt Enable Bit (VBUSTE) and USB macro Enable Bit (USBE) - andi r16, ~(_BV(OTGPADE)|_BV(VBUSTE)|_BV(USBE)) + ; SIZE OPTIMIZATION: Instead of resetting just some specific bits, initialize the whole USBCON + ; register with its reset value (although even this could be omitted, this initialization is left + ; here in case the application tries to enter the bootloader in a slightly incorrect way). + ; As a further optimization, the USBCON register value is left in r16 for use in subsequent code + ; which modifies various bits of that register. + ldi r16, _BV(FRZCLK) ; Load r16 with the reset value for the USB Configuration Register (USBCON) std Y+oUSBCON, r16 ; Store r16 to the USB Configuration Register (USBCON) ; Enable USB Regulator (USB_REG_On) From 052adef81c8c268ae024b774464eee349ab2a945 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:37:46 +0300 Subject: [PATCH 23/35] Save 2 bytes in the HOST_TO_DEVICE parsing code The instruction which applied the mask of `(CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT)` to bmRequestType was not actually needed, because the value of this mask is 0x7F, and the only bit which is not covered by the mask (0x80) is already known to be 0, therefore the masking did not actually change anything. Removing this instruction saves 2 bytes. Signed-off-by: Sergey Vlasov Since following commits are skipped previously when cherry-picking. 97c8d96 ("TEMPORARY: EEPROM code which does not fit", 2021-08-25) e48801a ("WIP: Optimize some more code to make the EEPROM support fit", 2021-08-25) 78b804d ("WIP: Redo the EEPROM write implementation", 2021-08-29) e7e94f3 ("Save 2 bytes by reusing the TXINI clearing mask", 2021-08-29) Adjusted to keep branch to thunk code. Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 79c71ea..e5621f9 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -674,8 +674,7 @@ HOST_TO_DEVICE: cpi reg_bmRequestType, 0x00 ; Compare r18 (bmRequestType) with value 0x00 (OUT Type Resquest, USB Standard Request, Recipient is the device) breq HANDLE_USB_STANDARD_DEVICE ; If bmRequestType is 0x00, we know it's either a SET_ADDRESS or SET_CONFIGURATION request, so jump to HANDLE_USB_STANDARD_DEVICE - andi reg_bmRequestType, (0x60 | 0x1F) ; Mask reg_bmRequestType with the bits that define request type and recipient (CONTROL_REQTYPE_TYPE | CONTROL_REQTYPE_RECIPIENT) - cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare the masked value in r16 with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) + cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare bmRequestType with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal ; fallthrough to HANDLE_USB_CLAS_INTERFACE if equal HANDLE_USB_CLAS_INTERFACE: From 43969aecf11226925c5ca9bb1aca91ba7fd4f79f Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 17:48:27 +0300 Subject: [PATCH 24/35] Save 2 bytes in the DEVICE_TO_HOST parsing code The DEVICE_TO_HOST handling code needs to handle only the GET_DESCRIPTOR requests, which may come with two possible bmRequestType values: - 0x80 (IN direction, USB Standard Request, Recipient is the device) if a descriptor which applies to the device as a whole is requested (this code is used for the device and configuration descriptors); - 0x81 (IN direction, USB Standard Request, Recipient is the interface) if a descriptor specific to a particular interface is requested (this code is used for the HID class and HID report descriptors). Because these codes are numerically sequential, and the direction bit has already been tested (therefore it is known that bmRequestType >= 0x80), it is enough to make a single comparison with 0x82 to detemine whether bmRequestType has one of the above values. Removing the bit masking operation which is not needed after that change saves 2 bytes. --- nanoBoot.S | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index e5621f9..306bd47 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -813,12 +813,14 @@ DEVICE_TO_HOST: ; If we get here, we know bit 7 of bmRequestType is set, meaning it is a DEVICE_TO_HOST (IN) request, ; now we need to filter out any unhandled requests - cbr reg_bmRequestType, 0x01 ; We mask reg_bmRequestType with value 0x01, bit 0 of bmRequestType is set if the recipient of the request is the interface, - ; and we need to handle that case since the host will query the interface to retrieve the hid_descriptor, obviously we also - ; need to handle the recipient being the device (bit 0 = 0) since all other descriptors are targeted to it - - cpi reg_bmRequestType, 0x80 ; Compare r18 (bmRequestType) with value 0x80 (IN Type Resquest, USB Standard Request, Recipient is the device/interface) - brne UNHANDLED_DEVICE_TO_HOST ; If bmRequestType is not 0x80, we know it's not a GET_DESCRIPTOR request, so jump to UNHANDLED_DEVICE_TO_HOST + ; SIZE OPTIMIZATION: The only bmRequestType values that we care about are: + ; - 0x80 - IN Type Request, USB Standard Request, Recipient is the device + ; - 0x81 - IN Type Request, USB Standard Request, Recipient is the interface + ; At this step it is known that bmRequestType >= 0x80, therefore checking for bmRequestType < 0x82 + ; is enough to detect whether bmRequestType has one of the above values. + + cpi reg_bmRequestType, 0x82 ; Check whether bmRequestType is less than 0x82 (then it must be either 0x80 or 0x81) + brcc UNHANDLED_DEVICE_TO_HOST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) brne UNHANDLED_DEVICE_TO_HOST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal From ff754abe9c213d155a8babfdcf8f46cb5d49b4c8 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Sat, 4 Sep 2021 22:51:37 +0300 Subject: [PATCH 25/35] Save 6 bytes by refactoring UNHANDLED_SETUP_REQUEST usage, size 470 bytes Remove the code which tried to check whether the SETUP packet has been handled - now UNHANDLED_SETUP_REQUEST can be used only if the SETUP packed needs to be acknowledged and replied with STALL, and in other cases the code should just do `reti` directly. In addition, UNHANDLED_SETUP_REQUEST is placed in the middle of the code, so that it would be reachable by conditional branches directly. Signed-off-by: Sergey Vlasov This is based on 6bc21dd ("Save 6 bytes by refactoring UNHANDLED_SETUP_REQUEST usage", 2021-09-04) Adjusted for thunk entry choice differences, both of them go to UNHANDLED_SETUP_REQUEST: * UNHANDLED_SETUP_REQUEST_1 * UNHANDLED_DEVICE_TO_HOST Signed-off-by: Osamu Aoki --- nanoBoot.S | 66 +++++++++++++++++++++++------------------------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 306bd47..7abcf37 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -675,11 +675,11 @@ HOST_TO_DEVICE: breq HANDLE_USB_STANDARD_DEVICE ; If bmRequestType is 0x00, we know it's either a SET_ADDRESS or SET_CONFIGURATION request, so jump to HANDLE_USB_STANDARD_DEVICE cpi reg_bmRequestType, ((1 << 5) | (1 << 0)) ; Compare bmRequestType with the value that defines the request type and recipient we care about HID_SET_REPORT (REQTYPE_CLASS | REQREC_INTERFACE) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + brne UNHANDLED_SETUP_REQUEST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal ; fallthrough to HANDLE_USB_CLAS_INTERFACE if equal HANDLE_USB_CLAS_INTERFACE: cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x05 (HID_REQ_SetReport) - brne UNHANDLED_SETUP_REQUEST_1 ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST + brne UNHANDLED_SETUP_REQUEST ; If reg_bmRequestType is not 0x00 or bRequest is not 0x05 or 0x09, we don't handle those cases, so jump to UNHANDLED_SETUP_REQUEST ; fallthrough to SET_HID_REPORT SET_HID_REPORT: @@ -766,10 +766,7 @@ finish_hid_request: ; Clear Transmitter Ready Flag ldi r17, ~(_BV(TXINI)) ; Clear the Transmitter Ready Interrupt Flag (TXINI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - -UNHANDLED_DEVICE_TO_HOST: - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + rjmp clear_UEINTX_bit_and_reti ; Store r17 to the USB Endpoint Interrupt Register (UEINTX), then return from interrupt HANDLE_USB_STANDARD_DEVICE: @@ -778,8 +775,27 @@ HANDLE_USB_STANDARD_DEVICE: cpi reg_bRequest, 0x05 ; Compare bRequest with value 0x05 (REQ_SetAddress) breq SET_ADDRESS ; jump to SET_ADDRESS cpi reg_bRequest, 0x09 ; Compare bRequest with value 0x09 (REQ_SetConfiguration) - brne UNHANDLED_SETUP_REQUEST_1 ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal - ; fallthrough to SET_CONFIGURATION if equal + breq SET_CONFIGURATION ; jump to SET_CONFIGURATION + +UNHANDLED_SETUP_REQUEST: + + ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall + + ; Acknowledge the SETUP packet + ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 + std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) + + ; STALL transaction + + ; // Endpoint_StallTransaction(); + ; UECONX |= (1 << STALLRQ); + ; Size optimization: We know that the only other bit that should be set in UECONX is EPEN, therefore + ; reading the current register value is not needed. + ldi r16, _BV(STALLRQ) | _BV(EPEN) ; Set the STALL Request Handshake Bit (STALLRQ) and EPEN in r16 + std Y+oUECONX, r16 ; Store r16 to the USB Endpoint Configuration Register (UECONX) + + reti ; Return from interrupt + SET_CONFIGURATION: ; Dirty trick: We don't need to do anything for SET_CONFIGURATION except process_Host2Device, @@ -804,8 +820,7 @@ SET_ADDRESS: ori reg_wValueL, _BV(ADDEN) ; In order to save space, we simply OR the address value already in reg_wValueL (r20) with the ADDEN bit to enable the USB Address std Y+oUDADDR, reg_wValueL ; Store reg_wValueL to the USB Device Address Register (UDADDR) -UNHANDLED_SETUP_REQUEST_1: - rjmp UNHANDLED_SETUP_REQUEST ; Go to UNHANDLED_SETUP_REQUEST + reti ; Return from interrupt ; IN transactions DEVICE_TO_HOST: @@ -820,10 +835,10 @@ DEVICE_TO_HOST: ; is enough to detect whether bmRequestType has one of the above values. cpi reg_bmRequestType, 0x82 ; Check whether bmRequestType is less than 0x82 (then it must be either 0x80 or 0x81) - brcc UNHANDLED_DEVICE_TO_HOST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) + brcc UNHANDLED_SETUP_REQUEST ; If bmRequestType >= 0x82, this request type is not handled here (it's not a GET_DESCRIPTOR request) cpi reg_bRequest, 0x06 ; Compare bRequest with value 0x06 (REQ_GetDescriptor) - brne UNHANDLED_DEVICE_TO_HOST ; jump to UNHANDLED_SETUP_REQUEST through a thunk if not equal + brne UNHANDLED_SETUP_REQUEST ; jump to UNHANDLED_SETUP_REQUEST if not equal ; fallthrough to GET_DESCRIPTOR if equal GET_DESCRIPTOR: @@ -906,33 +921,8 @@ wait_finish_transfer: ; Acknowledge the OUT packet ldi r17, ~(_BV(RXOUTI)) ; Clear the Received OUT Data Interrupt Flag (RXOUTI) in r17 +clear_UEINTX_bit_and_reti: std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - -UNHANDLED_SETUP_REQUEST: - - ; if (Endpoint_IsSETUPReceived()) - ; (UEINTX & (1 << RXSTPI)) - ldd r16, Y+oUEINTX ; Load r16 with the value in the USB Endpoint Interrupt Register (UEINTX); - sbrs r16, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is set; received SETUP packet? - reti ; Return if RXSTPI is not set, SETUP packet already handled - - ; If we reach this part, the SETUP packet has not been handled, so we need to acknowledge it and request a stall - - ; Acknowledge the SETUP packet - ldi r17, ~(_BV(RXSTPI)) ; Clear the Received SETUP Interrupt Flag (RXSTPI) in r17 - std Y+oUEINTX, r17 ; Store r17 to the USB Endpoint Interrupt Register (UEINTX) - - ; STALL transaction - - ; // Endpoint_StallTransaction(); - ; UECONX |= (1 << STALLRQ); - ldd r16, Y+oUECONX ; Load r16 with the value in the USB Endpoint Configuration Register (UECONX) - ori r16, _BV(STALLRQ) ; Set the STALL Request Handshake Bit (STALLRQ) in r16 - std Y+oUECONX, r16 ; Store r16 to the USB Endpoint Configuration Register (UECONX) - - -EP_ISR_END: - reti ; Return from interrupt From 02a9bf38018609f514a5d0a7bf6bc101f122bc71 Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 25 Aug 2021 17:03:37 +0300 Subject: [PATCH 26/35] Save 2 bytes in the flash write loop Instead of saving the initial address value in a separate register pair, and then reloading it after the flash write loop, keep the address in the Z register and subtract 128 from it after the data write loop - this should give the same result, because the loop counter is explicitly initialized by the bootloader code. However, because the `sbiw` instruction supports only the 0...63 range for its constant argument, another optimization trick is also used - instead of incrementing the whole 16-bit address, only the low byte is incremented; this should give the same result, because the block start address must be aligned to the flash page size (128 bytes), therefore any carry to the high byte should not happen within the page (it may happen just past the end of the page, but if both instruction will not perform that carry, the final result will be the same). Signed-off-by: Sergey Vlasov --- nanoBoot.S | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index 7abcf37..e24c24e 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -689,15 +689,13 @@ SET_HID_REPORT: rcall clear_bit_and_wait_RXOUTI ; This function loads r17 with value of UEINTX load_page_address: - ; We store the page address in r15:r14 and not in r31:r30 because we need - ; to keep track of the page when we call write_page_to_flash - ldd r14, Y+oUEDATX ; Load r14 with LSB of page address - ldd r15, Y+oUEDATX ; Load r15 with MSB of page address + ldd r30, Y+oUEDATX ; Load r30 with LSB of page address + ldd r31, Y+oUEDATX ; Load r31 with MSB of page address check_page_address: ldi r26, 0xFF ; Load value 0xFF to r26 - cp r26, r14 ; Compare low byte of page address against 0xFF - cpc r26, r15 ; Compare high byte of page address against 0xFF + cp r26, r30 ; Compare low byte of page address against 0xFF + cpc r26, r31 ; Compare high byte of page address against 0xFF brne erase_page ; if r15:r14 != 0xFFFF jump to erase_page quit_bootloader: @@ -707,9 +705,6 @@ quit_bootloader: erase_page: - ; Set page address in Z-Register - movw r30, r14 ; Copy r15:r14 to r31:r30 (Z-Register) - ldi r17, (_BV(PGERS)|_BV(SPMEN)) ; load r17 with the value needed to erase the currently specified page rcall do_SPM ; execute page erase (this function requires r17 to be loaded first with the right value for SPMCSR) @@ -742,13 +737,15 @@ write_page_buffer: rcall do_SPM ; execute page buffer write (this function requires r17 to be loaded first with the right value for SPMCSR) increment_byte_address: - adiw r30, 2 ; Increment Z-Register (the current byte address) by 2 + subi r30, -2 ; Increment the current address by 2. + ; Only the low byte needs to be incremented, because the block start address must be page aligned, + ; therefore any carry to the high byte may happen only past the end of the block. dec r16 ; decrement r16 (number of words per page) brne check_endpoint_for_more_data ; loop while r16 is not equal to SPM_PAGESIZE (128) - ; Set page address in Z-Register - movw r30, r14 ; Copy r15:r14 (the original page address) back to r31:r30 (Z-Register) + ; Restore the page address in Z-Register + subi r30, SPM_PAGESIZE ; Move the address back to the start of page (again only the low byte needs to be changed). write_page_to_flash: ldi r17, (_BV(PGWRT)|_BV(SPMEN)) ; load r17 with the value needed to commit the current page buffer to the flash From 00c1f8bbb5763c8a77ec4da199d2417e72edab7a Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Wed, 25 Aug 2021 17:42:45 +0300 Subject: [PATCH 27/35] Add bootloader overwrite protection To protect the user from accidents (or even deliberate attempts to brick the device), compare the specified page address with the bootloader start address, and skip the flash write if the address could overlap with the bootloader. Handling the error case by jumping to `finish_hid_request` did not work (it returned an error, but then the bootloader stopped responding to any further USB requests); apparently it is important to consume the proper number of bytes from the USB FIFO. Because of that, the bad address case is handled by running the same loop that is used for the normal flash write case, but with all `spm` instructions disabled by setting a flag bit. Bit 7 of `reg_bRequest` is chosen for that role (that bit is guaranteed to be 0 when starting to handle a normal `SET_HID_REPORT` request without needing to add any instructions to clear it). The `START_APPLICATION` command handling is also changed to use the same code path to save space. Because of additional optimization of the address comparison and the `START_APPLICATION` handling code, the binary size is increased by just 2 bytes. Signed-off-by: Sergey Vlasov --- nanoBoot.S | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/nanoBoot.S b/nanoBoot.S index e24c24e..550f0c3 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -693,15 +693,27 @@ load_page_address: ldd r31, Y+oUEDATX ; Load r31 with MSB of page address check_page_address: - ldi r26, 0xFF ; Load value 0xFF to r26 - cp r26, r30 ; Compare low byte of page address against 0xFF - cpc r26, r31 ; Compare high byte of page address against 0xFF - brne erase_page ; if r15:r14 != 0xFFFF jump to erase_page - -quit_bootloader: - ; we received the START_APPLICATION command, change value of BootLoaderActive flag - clt ; clear the BootLoaderActive flag (T flag in SREG) - rjmp finish_hid_request ; jump to finish_hid_request + ; Protect against overwriting the bootloader - allow flash write only if the specified address is + ; less than the bootloader start address. Only the high byte needs to be tested, because the + ; bootloader start is guaranteed to be on a 256 bytes boundary. + cpi r31, hi8(reset_vector) ; Compare high byte of page address against the high byte of the bootloader start addresss + brcs erase_page ; If the address is below the bootloader start, allow the flash write operation + + ; The address is definitely not correct for a flash write operation; however, simply jumping to + ; finish_hid_request would not just fail this SET_HID_REPORT request - apparently not reading the + ; OUT data properly results in the bootloader not responding to any subsequent USB requests too. + ; Instead of doing that, we run the normal flash write loop even if the address was bad, but set + ; the "disable flash write" bit, so that the actual flash write instructions will be skipped. + ; Bit 7 of reg_bRequest is used for that purpose - is is known to be 0 in the normal case. + sbr reg_bRequest, _BV(7) ; Set the "disable flash write" bit + + ; If the address is out of the allowed range for flash write, it may be the special value for the + ; START_APPLICATION command (0xffff); check for that value in the shortest way possible. + adiw r30, 1 ; Increment the address to turn 0xffff into 0x0000 + brne erase_page ; If the address was out of range and not 0xffff, jump to the regular flash write code + ; (which would just consume the OUT data to make USB work properly). + clt ; Otherwise (the address was 0xffff) clear the BootLoaderActive flag (T flag in SREG), + ; then fallthrough to the regular flash write code too. erase_page: @@ -993,7 +1005,12 @@ do_SPM: ; NOTE: This function assumes r17 already has the correct value for the SPMCSR register, depending on the ; desired SPM operation + ; NOTE: If bit 7 of reg_bRequest is set to 1, the actual SPM instruction will not be executed + ; (the wait loop will still run, but should just complete immediately). out _SFR_IO_ADDR(SPMCSR), r17 ; store value in r17 to the Store Program Memory Control and Status Register (SPMCSR) + sbrs reg_bRequest, 7 ; Skip the actual flash operation if the "disable flash write" bit is set. + ; This is apparently safe, because the SPM instruction must be executed within 4 cycles after setting SPMEN, + ; and the sbrs instruction takes just 1 cycle when not skipping. spm ; execute spm instruction based on the value loaded to SPMCSR wait_SPM: From 2d4bd1edce51f035fee201fb09ab043ba1641b37 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 15:38:37 +0900 Subject: [PATCH 28/35] LED support: MACRO for onboard LED Signed-off-by: Osamu Aoki --- nanoBoot.S | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index 550f0c3..d6e561e 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -48,6 +48,35 @@ ; hfuse memory = 0xDF ; efuse memory = 0xC4 +; LED -- Configure this for the LED +; +; Teensy 2.0 compatible board (Adjust for other board) +; -- LED is ON with ATmega32u4 PIN D6 HIGH +#define LED_BIT 6 +#define LED_CONF DDRD +#define LED_PORT PORTD + +; Leonardo/Nano compatible board (Adjust for other board) +; -- LED is ON with ATmega32u4 PIN C7 HIGH +; #define LED_BIT 7 +; #define LED_CONF DDRC +; #define LED_PORT PORTC + +; Pro Micro compatible board (Adjust for other board) +; -- LED is ON with ATmega32u4 PIN D5 LOW +; #define LED_BIT 5 +; #define LED_CONF DDRD +; #define LED_PORT PORTD + +; Pro Micro compatible board (Adjust for other board) +; -- LED is ON with ATmega32u4 PIN B3 LOW +; #define LED_BIT 3 +; #define LED_CONF DDRB +; #define LED_PORT PORTB + +; Except for Pro Micro compatible board, initially LED is off. +; This code assumes Teensy 2.0 or Leonardo/Nano compatible board + ; SW assumptions: ; All Endpoints are being configured sequentially in ascending order, ; but, since we only use EP0, this is not that important From fcdcc1aa9fbdbd66058c51206f383aeb5d8639ed Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 15:38:52 +0900 Subject: [PATCH 29/35] LED support: Initialize LED port and OFF Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index d6e561e..27ba530 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -364,6 +364,9 @@ run_application: ; We get here if the cause of th jmp 0 ; Simply jump to 0x0000 (application) IMPORTANT NOTE!! This CANNOT be an 'rjmp'!! run_bootloader: + sbi _SFR_IO_ADDR(LED_CONF), LED_BIT ; Set IO register as output for LED + ; No need to trun off LED initially (non-promicro) -- MCU port is initialized as 0 + ;sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED initially (promicro) set ; Initialize BootLoaderActive flag (T flag in SREG) From f7aab3eeb03548858316ef70ededab1735447517 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 16:21:44 +0900 Subject: [PATCH 30/35] LED support: Turn on LED before sending descriptor Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index 27ba530..d00d615 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -925,6 +925,9 @@ verifyMaxDescriptorLength: send_descriptor: + sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun on LED before exiting (non-promicro) + ;cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun on LED before exiting (promicro) + ; Abort if RXSTPI is set ldd r17, Y+oUEINTX ; Load r17 with the value in the USB Endpoint Interrupt Register (UEINTX); sbrc r17, RXSTPI ; Skip the next instruction if the Received SETUP Interrupt Flag (RXSTPI) is cleared From 3958209dba7af7eaf87851e35c85358e427ed1fb Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 15:39:03 +0900 Subject: [PATCH 31/35] LED support: Turn off LED before exiting bootloader, size 476 bytes Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index d00d615..5fc42f6 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -567,6 +567,9 @@ exit_bootloader: ; = Watchdog Timer initialization ; ================================================================= + cbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting (non-promicro) + ;sbi _SFR_IO_ADDR(LED_PORT), LED_BIT ; Trun off LED before exiting (promicro) + ; NOTE!! This part of the code assumes MCUSR has already been cleared ldi YL, lo8(EIO_BASE) ; Load YL with EIO_BASE (exit) From 71257574cd4d7fdc5595a7fd02a3e97a92efe104 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 14:30:46 +0900 Subject: [PATCH 32/35] Fix a typo in the comment for set_watchdog_timer Equivalent of: b6cb76e ("Fix a typo in the comment for set_watchdog_timer", 2021-09-05) Signed-off-by: Osamu Aoki --- nanoBoot.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanoBoot.S b/nanoBoot.S index 5fc42f6..7d71d25 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -355,7 +355,7 @@ main: ldi YL, lo8(EIO_BASE) ; Load YL with EIO_BASE (initial) mov r17, rZERO ; Load r17 with zero to disable the Watchdog Timer completely - rcall set_watchdog_timer ; Call the subroutine that sets the wathdog timer with the values loaded r17 (r16 used inside set_watchdog_timer) + rcall set_watchdog_timer ; Call the subroutine that sets the watchdog timer with the values loaded r17 (r16 used inside set_watchdog_timer) ; check_reset_flags: sbrs rMCUSR, EXTRF ; Skip the next instruction if EXTRF is set (if External Reset Flag, skip next instruction, go to run_bootloader) From b44de337be451711e4b44e5e9077b6fc4f2f6a01 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Wed, 8 Dec 2021 18:14:36 +0900 Subject: [PATCH 33/35] Add comment on rONE Signed-off-by: Osamu Aoki --- nanoBoot.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanoBoot.S b/nanoBoot.S index 7d71d25..77934e1 100644 --- a/nanoBoot.S +++ b/nanoBoot.S @@ -340,6 +340,9 @@ main: ; = Initialize constants ; ================================================================= ; Set R3=rONE + ; There are 2 RESET cases: hard reset and soft reset. + ; * For hard reset (power on or reset button pressed), there is no need to initialize rONE as 0 + ; * For soft reset from application, there is no guarantee clr rONE ; Initialize rONE as 0 (application may have set this before RESET) inc rONE ; Initialize rONE (R3 register = one) From 1f9f8edc92949325e12866e52c895b584e130c21 Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Tue, 31 Mar 2020 17:18:40 +0900 Subject: [PATCH 34/35] convert Makefile to UTF-8 Signed-off-by: Osamu Aoki --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d27aec6..f9c7f2b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # Hey Emacs, this is a -*- makefile -*- #---------------------------------------------------------------------------- -# WinAVR Makefile Template written by Eric B. Weddington, Jörg Wunsch, et al. +# WinAVR Makefile Template written by Eric B. Weddington, Jörg Wunsch, et al. # # Released to the Public Domain # From f069340b092c7f4368880c1790db95fad5a1271c Mon Sep 17 00:00:00 2001 From: Osamu Aoki Date: Fri, 10 Apr 2020 11:23:58 +0900 Subject: [PATCH 35/35] Update README.md for osamu's site Signed-off-by: Osamu Aoki --- README.md | 93 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e9bd6c7..35ddad0 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,35 @@ -# nanoBoot +# nanoBoot (w/LED) -[![Build Status](https://travis-ci.org/volium/nanoBoot.svg?branch=master)](https://travis-ci.org/volium/nanoBoot) +## HID bootloader with LED support & overwrite protection -This repository contains the source code for the USB HID-based bootloader for ATmegaXXU4 family of devices. + -The name *nanoBoot* comes from the fact that the compiled source fits in the smallest available boot size on the ATMegaXXu4 devices, 256 words or 512 bytes. The code is based on Dean Camera's [LUFA](https://github.com/abcminiuser/lufa) USB implementation, but it is **EXTREMELY** streamlined, size-optimized and targeted for the [ATmega16U4](http://www.atmel.com/devices/atmega16u4.aspx) and [ATmega32u4](http://www.atmel.com/devices/atmega32u4.aspx) devices; I had to make quite a few hardware assumptions, mostly to the fuse settings related to clock configuration for things to be as compact as possible, but the code still allows for some flexibility. +This repository [nanoBoot w/LED](https://github.com/osamuaoki/nanoBoot) contains the source code for the USB HID-based bootloader for ATmega32U4 family of devices with **LED support and overwrite protection**. -It's very likely that a few sections can be rewritten to make it even smaller, and the ultimate goal is to support EEPROM programming as well, although that would require changes to the host code. +The name **nanoBoot** comes from the fact that the compiled source fits in the smallest available boot size on the ATMega32u4 devices, 256 words or **512 bytes**. The code is based on Dean Camera's [LUFA](https://github.com/abcminiuser/lufa) USB implementation, but it is **EXTREMELY** streamlined, size-optimized and targeted for the [ATmega16U4](http://www.atmel.com/devices/atmega16u4.aspx) and [ATmega32u4](http://www.atmel.com/devices/atmega32u4.aspx) devices. -The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit/d0ea26bb01e764340dc8ad7b473ad98cefdb52eb)) is supported as-is in the 'hid_bootloader_loader.py' script that ships with [LUFA-151115](https://github.com/abcminiuser/lufa/releases/tag/LUFA-151115), and is exactly 506 bytes long. +Initial and major portion of manual assembly code optimization efforts were performed by [volium](https://github.com/volium) and published as the original [volium/nanoBoot](https://github.com/volium/nanoBoot). + +Some tweaks were performed by osamu to allow arbitrary setting for CKDIV8 fuse and it was merged to the upstream. + +There were a lot of manual size optimization and program size check feature addition by [sigprof](https://github.com/sigprof) and published as [sigprof/nanoBoot](https://github.com/sigprof/nanoBoot) + +Osamu gathered all useful code and made a linear history commits with his LED support added as **led** branch here at [osamuaoki/nanoBoot](https://github.com/osamuaoki/nanoBoot). + +There are some hardware and usage assumptions to the fuse settings which keep this bootloader as compact as possible. For the best result: + + * Application should clear r3 register before calling soft reset (0x7F00) to load a new program by this nanoBoot bootloader. + +The current version (2021-12-08) will tested manually with the compiled `hid_bootloader_cli.c` from [LUFA](https://github.com/abcminiuser/lufa) on Debian GNU/Linux 12 (bookworm/testing). + +Required packages on Debian GNU/Linux system: `gcc-avr`, `avr-libc`, `binutils-avr`, `libusb-dev`, `build-essential`, `git` ## HW assumptions: * CLK is 16 MHz Crystal and fuses are setup correctly to support it: * Select Clock Source (CKSEL3:CKSEL0) fuses are set to Extenal Crystal, CKSEL=1111 SUT=11 - * Divide clock by 8 fuse (CKDIV8) can be set to either 0 or 1 + * Divide clock by 8 fuse (CKDIV8) can be any value. + * 16 MHz operation needs 5V VCC for MCU * Bootloader starts on reset; Hardware Boot Enable fuse is configured, HWBE=0 * Boot Flash Size is set correctly to 256 words (512 bytes), StartAddress=0x3F00, BOOTSZ=11 * Device signature = 0x1E9587 @@ -24,10 +39,66 @@ The current version (commit #[d0ea26b](https://github.com/volium/nanoBoot/commit * hfuse memory = 0xD6 (EESAVE=0, BOOTRST=0) * efuse memory = 0xC7 (=0xF7, No BOD) -* Alternatively, BOD can be used to ease CKSEL-SUT setting requirements to - allow teensy-like FUSE settings: +* Alternatively BOD can be used to ease CKSEL-SUT setting requirements to + allow teensy like FUSE setting * lfuse memory = 0x5F (CKDIV8=0, 16CK + 0ms) * hfuse memory = 0xDF (EESAVE=1, BOOTRST=1) - * efuse memory = 0xF4 (BOD=2.4V) + * efuse memory = 0xC4 (=0xF4, BOD=2.4V) + +* LED on D6 port for Teensy 2.0 (Configurable in #define for any board) + +## Usage + +Please install this bootloader `nanoBoot.hex` using the ISP connected programmer (e.g. AVRISP mkII). + +``` +$ sudo avrdude -v -p atmega32u4 -c avrisp2 -Pusb -e -U flash:w:nanoBoot.hex \ + -U lfuse:w:0x5f:m -U hfuse:w:0xdf:m -U efuse:w:0xc4:m +``` + +You can start this bootloader by connecting the board to the PC with USB cable and pressing the RESET button. It is good idea to monitor the PC's USB connection. + +``` + $ watch lsusb +``` + +If this bootloader is started, you should see "Atmel". + +Please note, now this bootloader turns on LED just before sending device ID. Thus monitoring of USB is now optional. + +(If LED doesn't turn on even after 10 second wait for any reason, press the RESET button again.) + +Then program MCU with, e.g., a `LED.hex` firmware as: + +``` + $ sudo hid_bootloader_cli -mmcu=atmega32u4 -v LED.hex +``` +Please note, this bootloader turns off LED upon finish programming. + +(Pressing the RESET button during active bootloader execution seems to halt the bootloader. This seems to be the reason you need to press the RESET button again.) + +For your convenience, pre-compiled HEX file and associated scripts are provided under the `precompile` directory. + +## Configuration + +Only the first configuration choice is tested with a Teensy 2.0 compatible board. + +In `Makefile`: + +* `F_CPU = 16000000` or `F_CPU = 8000000` +* `BOOT_START_OFFSET = 0x7E00` or any valid ones for MCU + +In `nanoBoot.S`: + +* Adjust `#define LED_BIT`, `#define LED_CONF`, and `#define LED_PORT` for each board. Default is Teensy 2.0 setting. + +Code for LED ON/OFF needs to be adjusted for board such as Pro Micro on which IO pin is connected to LED-cathode side and LED-anode side is connected to Vcc(5V/3V) side. + +## Documentation + +"The documentation is part of the source code itself, and even though some people may find it extremely verbose, I think that's better than lack of documentation; after all, assembly can be hard to read sometimes... ohhh yes, in case that was not expected, this is all written in pure GAS (GNU Assembly), compiled using the [Atmel AVR 8-bit Toolchain](http://www.atmel.com/tools/atmelavrtoolchainforwindows.aspx)." (per volium) + +"The elegant programming techniques presented by volium with detailed comments were very enlightening for me to get started. It's delightful for me to read. Don't miss it!" (per osamu) -The documentation is part of the source code itself, and even though some people may find it extremely verbose, I think that's better than lack of documentation; after all, assembly can be hard to read sometimes... ohhh yes, in case that was not expected, this is all written in pure GAS (GNU Assembly), compiled using the [Atmel AVR 8-bit Toolchain](http://www.atmel.com/tools/atmelavrtoolchainforwindows.aspx). + * [AVR Instruction Set Manual](http://ww1.microchip.com/downloads/en/devicedoc/atmel-0856-avr-instruction-set-manual.pdf) + * [ATmega16U4, ATmega32U4 - Complete Datasheet](http://ww1.microchip.com/downloads/en/devicedoc/atmel-7766-8-bit-avr-atmega16u4-32u4_datasheet.pdf)