Skip to content

Commit ffb47e3

Browse files
authored
Input position accessor for static attention IO manager
Differential Revision: D80649327 Pull Request resolved: #13561
1 parent c78b0fd commit ffb47e3

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

examples/models/llama/runner/static_attention_io_manager.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,10 @@ class StaticAttentionIOManager {
576576
}
577577
}
578578

579+
size_t input_pos() const {
580+
return input_pos_;
581+
}
582+
579583
/**
580584
* Prefill helper. Run multiple inferences as needed depending on the length
581585
* of the prompt and method's input length. Returns the position in the output
@@ -586,6 +590,7 @@ class StaticAttentionIOManager {
586590
executorch::runtime::Span<TokenT> tokens,
587591
executorch::runtime::Span<TokenT> input_buffer,
588592
executorch::runtime::Method& method) {
593+
ET_LOG(Info, "Prefilling at position %zu", input_pos_);
589594
size_t input_len = input_buffer.size();
590595
auto& masks = get_mask(input_buffer.size());
591596
for (auto& pair : masks) {
@@ -621,6 +626,7 @@ class StaticAttentionIOManager {
621626
executorch::runtime::Method& method,
622627
std::function<TokenT(executorch::runtime::Method&)>& sample,
623628
std::function<bool(TokenT)>& token_callback) {
629+
ET_LOG(Info, "Decoding at position %zu", input_pos_);
624630
set_input(method, 0, input_buffer.data());
625631
auto& masks = get_mask(input_buffer.size());
626632
for (auto& pair : masks) {
@@ -661,6 +667,10 @@ class StaticAttentionIOManager {
661667
size_t window_size,
662668
size_t n_verifications,
663669
std::unordered_map<TokenT, SuffixCache<TokenT>> suffix_caches) {
670+
ET_LOG(
671+
Info,
672+
"Decoding with lookahead and verification at position %zu",
673+
input_pos_);
664674
set_input(method, 0, input_buffer.data());
665675
size_t input_len = input_buffer.size();
666676

0 commit comments

Comments
 (0)