@@ -576,6 +576,10 @@ class StaticAttentionIOManager {
576
576
}
577
577
}
578
578
579
+ size_t input_pos () const {
580
+ return input_pos_;
581
+ }
582
+
579
583
/* *
580
584
* Prefill helper. Run multiple inferences as needed depending on the length
581
585
* of the prompt and method's input length. Returns the position in the output
@@ -586,6 +590,7 @@ class StaticAttentionIOManager {
586
590
executorch::runtime::Span<TokenT> tokens,
587
591
executorch::runtime::Span<TokenT> input_buffer,
588
592
executorch::runtime::Method& method) {
593
+ ET_LOG (Info, " Prefilling at position %zu" , input_pos_);
589
594
size_t input_len = input_buffer.size ();
590
595
auto & masks = get_mask (input_buffer.size ());
591
596
for (auto & pair : masks) {
@@ -621,6 +626,7 @@ class StaticAttentionIOManager {
621
626
executorch::runtime::Method& method,
622
627
std::function<TokenT(executorch::runtime::Method&)>& sample,
623
628
std::function<bool(TokenT)>& token_callback) {
629
+ ET_LOG (Info, " Decoding at position %zu" , input_pos_);
624
630
set_input (method, 0 , input_buffer.data ());
625
631
auto & masks = get_mask (input_buffer.size ());
626
632
for (auto & pair : masks) {
@@ -661,6 +667,10 @@ class StaticAttentionIOManager {
661
667
size_t window_size,
662
668
size_t n_verifications,
663
669
std::unordered_map<TokenT, SuffixCache<TokenT>> suffix_caches) {
670
+ ET_LOG (
671
+ Info,
672
+ " Decoding with lookahead and verification at position %zu" ,
673
+ input_pos_);
664
674
set_input (method, 0 , input_buffer.data ());
665
675
size_t input_len = input_buffer.size ();
666
676
0 commit comments