diff --git a/dataset/extensibility_requests/default_setup.json b/dataset/extensibility_requests/default_setup.json
new file mode 100644
index 000000000..8bec58201
--- /dev/null
+++ b/dataset/extensibility_requests/default_setup.json
@@ -0,0 +1,8 @@
+{
+ "repo": "microsoftInternal/NAV",
+ "base_commit": "06e095c485e474431177e2170c0e22182bfdcc28",
+ "environment_setup_version": "27.0",
+ "project_paths": [
+ "App\\Layers"
+ ]
+}
diff --git a/dataset/extensibility_requests/extensibility_dataset.yaml b/dataset/extensibility_requests/extensibility_dataset.yaml
new file mode 100644
index 000000000..f603cbfe0
--- /dev/null
+++ b/dataset/extensibility_requests/extensibility_dataset.yaml
@@ -0,0 +1,1224 @@
+entries:
+ - instance_id: issue-29447
+ input:
+ title: "[Event Request] Codeunit 5880 \"Phys. Invt. Order-Finish\""
+ description: |
+ ### Why do you need this change?
+
+ Hi,
+ i need an event in the Codeunit 5880 "Phys. Invt. Order-Finish" in the procedure "CreateOrderTrackingBufferLines" to modify the TempInvtOrderTrackingBuffer record.
+
+ ### Describe the request
+
+ Add a new event in the procedure "CreateOrderTrackingBufferLines", before modifying the record TempInvtOrderTrackingBuffer.
+
+ procedure CreateOrderTrackingBufferLines(DocNo: Code[20]; LineNo: Integer)
+ var
+ ExpInvtOrderTracking: Record "Exp. Invt. Order Tracking";
+ ItemTrackingSetup: Record "Item Tracking Setup";
+ begin
+ PhysInvtRecordLine.Reset();
+ PhysInvtRecordLine.SetCurrentKey("Order No.", "Order Line No.");
+ PhysInvtRecordLine.SetRange("Order No.", DocNo);
+ PhysInvtRecordLine.SetRange("Order Line No.", LineNo);
+ PhysInvtRecordLine.SetFilter("Quantity (Base)", '<>%1', 0);
+ if PhysInvtRecordLine.Find('-') then
+ repeat
+ ItemTrackingSetup."Serial No." := PhysInvtRecordLine."Serial No.";
+ ItemTrackingSetup."Lot No." := PhysInvtRecordLine."Lot No.";
+ ItemTrackingSetup."Package No." := PhysInvtRecordLine."Package No.";
+ UpdateBufferRecordedQty(ItemTrackingSetup, PhysInvtRecordLine."Quantity (Base)", LineNo);
+ OnCreateOrderTrackingBufferLinesFromPhysInvtRecordLine(TempInvtOrderTrackingBuffer, PhysInvtRecordLine);
+ until PhysInvtRecordLine.Next() = 0;
+
+ ExpInvtOrderTracking.Reset();
+ ExpInvtOrderTracking.SetRange("Order No", DocNo);
+ ExpInvtOrderTracking.SetRange("Order Line No.", LineNo);
+ if ExpInvtOrderTracking.Find('-') then
+ repeat
+ ItemTrackingSetup."Serial No." := ExpInvtOrderTracking."Serial No.";
+ ItemTrackingSetup."Lot No." := ExpInvtOrderTracking."Lot No.";
+ ItemTrackingSetup."Package No." := ExpInvtOrderTracking."Package No.";
+ UpdateBufferExpectedQty(ItemTrackingSetup, ExpInvtOrderTracking."Quantity (Base)", LineNo);
+ OnCreateOrderTrackingBufferLinesFromExpInvtOrderTracking(TempInvtOrderTrackingBuffer, ExpInvtOrderTracking);
+ until ExpInvtOrderTracking.Next() = 0;
+
+ TempInvtOrderTrackingBuffer.Reset();
+ if TempInvtOrderTrackingBuffer.Find('-') then
+ repeat
+ TempInvtOrderTrackingBuffer."Qty. To Transfer" :=
+ TempInvtOrderTrackingBuffer."Qty. Recorded (Base)" - TempInvtOrderTrackingBuffer."Qty. Expected (Base)";
+ TempInvtOrderTrackingBuffer."Outstanding Quantity" := TempInvtOrderTrackingBuffer."Qty. To Transfer";
+ TempInvtOrderTrackingBuffer.Open := TempInvtOrderTrackingBuffer."Outstanding Quantity" <> 0;
+
+ // Start Event
+
+ OnCreateOrderTrackingBufferLinesOnBeforeModifyTempInvtOrderTracking(TempInvtOrderTrackingBuffer, DocNo, LineNo);
+
+ // End Event
+
+ TempInvtOrderTrackingBuffer.Modify();
+ until TempInvtOrderTrackingBuffer.Next() = 0;
+ end;
+
+ **New Event**
+ ```
+ [IntegrationEvent(false, false)]
+ local procedure OnCreateOrderTrackingBufferLinesOnBeforeModifyTempInvtOrderTracking(var TempInvtOrderTracking: Record "Invt. Order Tracking" temporary; DocNo: Code[20]; LineNo: Integer)
+ begin
+ end;
+ ```
+
+ comments: ""
+
+ expected:
+ labels: "SCM, event-request"
+ comments:
+ - author: Agent
+ body: |
+ ✅ Analysis complete - approved for implementation
+
+ **Implementation:**
+
+ Add event publisher in procedure `CreateOrderTrackingBufferLines` before `TempInvtOrderTrackingBuffer.Modify()`:
+
+ ```al
+ TempInvtOrderTrackingBuffer.Reset();
+ if TempInvtOrderTrackingBuffer.Find('-') then
+ repeat
+ TempInvtOrderTrackingBuffer."Qty. To Transfer" :=
+ TempInvtOrderTrackingBuffer."Qty. Recorded (Base)" - TempInvtOrderTrackingBuffer."Qty. Expected (Base)";
+ TempInvtOrderTrackingBuffer."Outstanding Quantity" := TempInvtOrderTrackingBuffer."Qty. To Transfer";
+ TempInvtOrderTrackingBuffer.Open := TempInvtOrderTrackingBuffer."Outstanding Quantity" <> 0;
+ OnCreateOrderTrackingBufferLinesOnBeforeModifyTempInvtOrderTracking(TempInvtOrderTrackingBuffer, DocNo, LineNo);
+ TempInvtOrderTrackingBuffer.Modify();
+ until TempInvtOrderTrackingBuffer.Next() = 0;
+ ```
+
+ Add event publisher procedure at the end of the codeunit (before the closing brace):
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnCreateOrderTrackingBufferLinesOnBeforeModifyTempInvtOrderTracking(var TempInvtOrderTracking: Record "Invt. Order Tracking" temporary; DocNo: Code[20]; LineNo: Integer)
+ begin
+ end;
+ ```
+
+ - instance_id: issue-29442
+ input:
+ title: "[W1][Page][176][Standard Purchase Code Subform] Make procedure UpdateTypeText() not local"
+ description: |
+ ### Why do you need this change?
+
+ Procedure UpdateTypeText() is in local scope and it updates field variable TypeAsText
+
+ ### Describe the request
+
+ Remove "local" scope from procedure UpdateTypeText() in page 176 "Standard Purchase Code Subform";
+
+ comments: ""
+
+ expected:
+ labels: "SCM, request-for-external"
+ comments:
+ - author: Agent
+ body: |
+ ✅ Analysis complete - approved for implementation
+
+ **Implementation:**
+
+ Remove `local` scope from procedure `UpdateTypeText()` in Page 176 "Standard Purchase Code Subform":
+
+ ```al
+ procedure UpdateTypeText()
+ var
+ RecRef: RecordRef;
+ begin
+ RecRef.GetTable(Rec);
+ TypeAsText := TempOptionLookupBuffer.FormatOption(RecRef.Field(Rec.FieldNo(Type)));
+ end;
+ ```
+
+ - instance_id: issue-29519
+ input:
+ title: "[Change Request] table 130 \"Incoming Document\" - field(18; Status; Option)"
+ description: |
+ ### Why do you need this change?
+
+ We have to extend the Enum for additional Document Status like Archived etc.
+
+ ### Describe the request
+
+ Hello, can you please change the field 18 "Status" in table 130 "Incoming Document" from option to an Enum?
+
+ ```
+ table 130 "Incoming Document";
+ {
+ .
+ .
+ .
+ field(18; Status; Enum "Incoming Document Status")
+ {
+ Caption = 'Status';
+ Editable = false;
+ }
+ ```
+
+ ```
+ enum 59002 "Incoming Document Status";
+ {
+ Extensible = true;
+
+ value(0; New)
+ {
+ Caption = 'New';
+ }
+ value(1; Released)
+ {
+ Caption = 'Released';
+ }
+ value(2; Rejected)
+ {
+ Caption = 'Rejected';
+ }
+ value(3; Posted)
+ {
+ Caption = 'Posted';
+ }
+ value(4; Created)
+ {
+ Caption = 'Created';
+ }
+ value(5; Failed)
+ {
+ Caption = 'Failed';
+ }
+ value(6; "Pending Approval")
+ {
+ Caption = 'Pending Approval';
+ }
+ }
+ ```
+
+ comments: ""
+
+ expected:
+ labels: "Integration, enum-request"
+ comments:
+ - author: Agent
+ body: |
+ ✅ **Analysis complete - approved for implementation**
+
+ ---
+
+ **Implementation:**
+
+ Convert field 18 "Status" in table 130 "Incoming Document" from Option field to Enum "Incoming Document Status" (enum 59002). This enables partners to extend the status values (e.g., add "Archived") without modifying base code.
+
+ ```al
+ enum 59002 "Incoming Document Status"
+ {
+ Extensible = true;
+
+ value(0; New)
+ {
+ Caption = 'New';
+ }
+ value(1; Released)
+ {
+ Caption = 'Released';
+ }
+ value(2; Rejected)
+ {
+ Caption = 'Rejected';
+ }
+ value(3; Posted)
+ {
+ Caption = 'Posted';
+ }
+ value(4; Created)
+ {
+ Caption = 'Created';
+ }
+ value(5; Failed)
+ {
+ Caption = 'Failed';
+ }
+ value(6; "Pending Approval")
+ {
+ Caption = 'Pending Approval';
+ }
+ }
+ ```
+
+ **Modified field in table 130:**
+
+ ```al
+ field(18; Status; Enum "Incoming Document Status")
+ {
+ Caption = 'Status';
+ Editable = false;
+ }
+ ```
+
+ ---
+
+ **File:** `IncomingDocument.Table.al` (modified) and `IncomingDocumentStatus.Enum.al` (new)
+
+ - instance_id: issue-29557
+ base_commit: "706e179dd811ac4c9ef3a474b53d64fa852ad420"
+ input:
+ title: "Report 6 \"Trial Balance\" - add area(Content) to request page"
+ description: |
+ ### Why do you need this change?
+
+ In 4PS Construct we want to extend request page of report 6 "Trial Balance" with new field and it is not possible to do it.
+
+ ### Describe the request
+
+ Dear ALAppExtensions team,
+
+ On behalf of 4PS I would like to request adding Content area to request page of report 6 "Trial Balance".
+ Currently it not possible to add a new field to request page using report extension due to error "The control 'Content' is not found in the target 'RequestOptionsPage'".
+
+ ````
+ requestpage
+ {
+ SaveValues = true;
+ AboutTitle = 'About Trial Balance';
+ AboutText = 'View a snapshot of your chart of accounts with a balance at date and net change in the specified period.';
+
+ layout
+ {
+ area(Content) { } //new
+ }
+
+ actions
+ {
+ }
+ }
+ ````
+
+ comments: ""
+
+ expected:
+ labels: ""
+ state: "closed"
+ comments:
+ - author: Agent
+ body: |
+ This request cannot be implemented.
+
+ **Reason:** The target code (Report 6 "Trial Balance") is marked as obsolete and cannot be modified. The report has `ObsoleteState = Pending`, `ObsoleteTag = '28.0'`, and is wrapped in `#if not CLEAN28` preprocessor directive. The `ObsoleteReason` states: "This report has been replaced by the report Trial Balance (Excel). This report will be removed in a future release."
+
+ - instance_id: issue-29576
+ input:
+ title: "[Event Requests] - Table 83 \"Item Journal Line\""
+ description: |
+ ### Why do you need this change?
+
+ 1. Please add an event publisher before the if "Qty. (Phys. Inventory)" >= "Qty. (Calculated") block in the OnValidate trigger of _Qty. (Phys. Inventory)_.
+ The event should include an IsHandled flag so extensions can skip the standard Validate("Entry Type") logic and assign the field value directly without triggering validation.
+ 2. Please add a new event publisher before the Validate("Setup Time") and Validate("Run Time") calls in the _Concurrent Capacity_ field – OnValidate trigger. The publisher should include an IsHandled parameter so extensions can override this logic and assign the values without executing the standard validation triggers.
+
+ ### Describe the request
+
+ 1. OnValidate trigger field 54 "Qty. (Phys. Inventory)"
+ ``````al
+ field(54; "Qty. (Phys. Inventory)"; Decimal)
+ {
+ ...
+ trigger OnValidate()
+ var
+ IsHandled : Boolean; //New local variable
+ begin
+ ...
+
+ PhysInvtEntered := true;
+ Quantity := 0;
+ //Code change - Start
+ OnBeforeCheckQuantity(Rec, xRec, IsHandled);
+ If not IsHandled then
+ if "Qty. (Phys. Inventory)" >= "Qty. (Calculated)" then begin
+ Validate("Entry Type", "Entry Type"::"Positive Adjmt.");
+ Validate(Quantity, "Qty. (Phys. Inventory)" - "Qty. (Calculated)");
+ end else begin
+ Validate("Entry Type", "Entry Type"::"Negative Adjmt.");
+ Validate(Quantity, "Qty. (Calculated)" - "Qty. (Phys. Inventory)");
+ end;
+ //Code change - End
+ PhysInvtEntered := false;
+ end;
+
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeCheckQuantity(var ItemJournalLine: Record "Item Journal Line"; xItemJournalLine: Record "Item Journal Line"; var IsHandled: Boolean)
+ begin
+ end;
+ ``````
+ 2. OnValidate trigger field 5849 "Concurrent Capacity"
+ ``````al
+ field(5849; "Concurrent Capacity"; Decimal)
+ {
+ ...
+ trigger OnValidate()
+ var
+ WorkCenter: Record "Work Center";
+ ShopCalendarMgt: Codeunit "Shop Calendar Management";
+ TotalTime: Integer;
+ IsHandled : Boolean; //New local variable
+ begin
+ ...
+ WorkCenter.Get("Work Center No.");
+ OnBeforeTimeValidation(Rec, xRec, IsHandled);
+ if not IsHandled then begin
+ Validate("Setup Time", 0);
+ Validate(
+ "Run Time",
+ Round(
+ TotalTime / ShopCalendarMgt.TimeFactor("Cap. Unit of Measure Code") *
+ "Concurrent Capacity", WorkCenter."Calendar Rounding Precision"));
+ end;
+ end;
+ }
+
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeTimeValidation(var ItemJournalLine: Record "Item Journal Line"; xItemJournalLine: Record "Item Journal Line"; var IsHandled: Boolean)
+ begin
+ end;
+ ``````
+
+ comments: ""
+
+ expected:
+ labels: "missing-info"
+ comments:
+ - author: Agent
+ body: |
+ Hi @exnihiloo,
+
+ Thanks for submitting this extensibility request. To help us process it efficiently, please update the issue to include the required information.
+
+ 📚 [Extensibility Guidelines](https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/devenv-contribute-extensibility)
+
+ **Missing Information:**
+
+ - **Alternatives Evaluated**: Which existing events or patterns were tried and why are they insufficient?
+ - **Justification for IsHandled**: Why is a standard event (without IsHandled) or alternative approach not feasible? What specific business logic must be skipped/replaced and why is bypass essential?
+ - **Performance Considerations**: How frequently will these events be triggered? What is the expected performance impact?
+ - **Data Sensitivity Review**: Are there any sensitive data exposure concerns? Please confirm data security implications.
+ - **Multi-Extension Interaction**: What are the risks if multiple extensions subscribe to these events with IsHandled? How would conflicts be handled?
+
+ Please update the issue with the missing details. We'll automatically re-analyze once updated.
+
+ - instance_id: issue-29465
+ base_commit: "706e179dd811ac4c9ef3a474b53d64fa852ad420"
+ input:
+ title: "[extensibility request] - Purch Post + Print - Event OnAfterConfirmPost"
+ description: |
+ ### Why do you need this change?
+
+ Could PurchaseHeader be set as reference?
+
+ Var PurchaseHeader:Record "Purchase Header"
+
+ ### Describe the request
+
+ I need this because i want to modify purchase header.
+
+ ```
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterConfirmPost(var PurchaseHeader: Record "Purchase Header"; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ We have already looked at existing events in the purchase posting flow but none of them expose the PurchaseHeader at the right point after the confirm dialog.
+ The IsHandled flag is needed so we can fully replace the default post-confirmation logic when our extension requires different behavior.
+ This event is triggered only when a user explicitly confirms posting, so the performance impact is negligible.
+ The PurchaseHeader record does not expose sensitive data beyond what is already visible in the purchase posting UI.
+ If multiple extensions subscribe with IsHandled, the last one to set it to true wins, which is acceptable given the narrow use case.
+
+ comments: ""
+
+ expected:
+ labels: ""
+ state: "closed"
+ comments:
+ - author: Agent
+ body: |
+ ✅ **Good news! This is already implemented in the codebase.**
+
+ The requested change already exists. The `PurchaseHeader` parameter in the `OnAfterConfirmPost` event **already has the `var` modifier**:
+
+ **Existing implementation:**
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterConfirmPost(var PurchaseHeader: Record "Purchase Header"; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ **File:** `PurchPostPrint.Codeunit.al`
+
+ You can use this existing event in your extension to modify the Purchase Header after the post confirmation. Closing this issue as the request is already satisfied.
+
+ - instance_id: issue-29459-step1
+ input:
+ title: "[Event Request] Codeunit 5880 \"Phys. Invt. Order-Finish\""
+ description: |
+ ### Why do you need this change?
+
+ Hi,
+ i need an event in the Codeunit 5880 "Phys. Invt. Order-Finish" in the procedure "Code"() to modify the PhysInvtOrderLine record before creating the reservation entries.
+
+ ### Describe the request
+
+ Add a new event in the procedure "Code"(), before the procedure "CreateReservationEntries".
+
+ ```
+ procedure "Code"()
+ var
+ ItemTrackingSetup: Record "Item Tracking Setup";
+ IsHandled: Boolean;
+ begin
+ ...
+ if PhysInvtOrderLine."Use Item Tracking" and
+ not IsBinMandatoryNoWhseTracking(Item, PhysInvtOrderLine."Location Code")
+ then begin
+ PhysInvtOrderLine."Pos. Qty. (Base)" := 0;
+ PhysInvtOrderLine."Neg. Qty. (Base)" := 0;
+ TempInvtOrderTrackingBuffer.Reset();
+ TempInvtOrderTrackingBuffer.DeleteAll();
+ CreateOrderTrackingBufferLines(PhysInvtOrderLine."Document No.", PhysInvtOrderLine."Line No.");
+
+ // Start Event
+ OnCodeOnBeforeCreateReservationEntriesAfterCreateOrderTrackingBufferLines(PhysInvtOrderLine);
+ // End Event
+
+ CreateReservationEntries(PhysInvtOrderLine."Document No.", PhysInvtOrderLine."Line No.", true, 0);
+ end else
+ ...
+ end;
+ ```
+
+ Add the event publisher at the end of the codeunit:
+
+ ```
+ [IntegrationEvent(false, false)]
+ local procedure OnCodeOnBeforeCreateReservationEntriesAfterCreateOrderTrackingBufferLines(var PhysInvtOrderLine: Record "Phys. Invt. Order Line")
+ begin
+ end;
+ ```
+
+ Internal work item: [AB#614919](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/614919)
+
+ comments: ""
+
+ expected:
+ labels: "missing-info"
+ comments:
+ - author: Agent
+ body: |
+ ⚠️ **Clarification needed**
+
+ ---
+
+ We noticed there is an existing event `OnBeforePhysInvtOrderLineModify` that fires before `PhysInvtOrderLine.Modify()` and provides access to the same `PhysInvtOrderLine` record.
+
+ **Question:** Could you use the existing `OnBeforePhysInvtOrderLineModify` event for your scenario?
+
+ If not, please explain:
+ 1. What specific modifications do you need to make to `PhysInvtOrderLine`?
+ 2. Why must these modifications happen **before** `CreateReservationEntries` rather than before `Modify()`?
+ 3. How does the data flow to `CreateReservationEntries` affect your extension's behavior?
+
+ This helps us avoid adding duplicate events when an existing one could serve the same purpose.
+
+ ---
+
+ **File:** `PhysInvtOrderFinish.Codeunit.al`
+
+ - instance_id: issue-29459-step2
+ input:
+ title: "[Event Request] Codeunit 5880 \"Phys. Invt. Order-Finish\""
+ description: |
+ ### Why do you need this change?
+
+ Hi,
+ i need an event in the Codeunit 5880 "Phys. Invt. Order-Finish" in the procedure "Code"() to modify the PhysInvtOrderLine record before creating the reservation entries.
+
+ ### Describe the request
+
+ Add a new event in the procedure "Code"(), before the procedure "CreateReservationEntries".
+
+ ```
+ procedure "Code"()
+ var
+ ItemTrackingSetup: Record "Item Tracking Setup";
+ IsHandled: Boolean;
+ begin
+ ...
+ if PhysInvtOrderLine."Use Item Tracking" and
+ not IsBinMandatoryNoWhseTracking(Item, PhysInvtOrderLine."Location Code")
+ then begin
+ PhysInvtOrderLine."Pos. Qty. (Base)" := 0;
+ PhysInvtOrderLine."Neg. Qty. (Base)" := 0;
+ TempInvtOrderTrackingBuffer.Reset();
+ TempInvtOrderTrackingBuffer.DeleteAll();
+ CreateOrderTrackingBufferLines(PhysInvtOrderLine."Document No.", PhysInvtOrderLine."Line No.");
+
+ // Start Event
+ OnCodeOnBeforeCreateReservationEntriesAfterCreateOrderTrackingBufferLines(PhysInvtOrderLine);
+ // End Event
+
+ CreateReservationEntries(PhysInvtOrderLine."Document No.", PhysInvtOrderLine."Line No.", true, 0);
+ end else
+ ...
+ end;
+ ```
+
+ Add the event publisher at the end of the codeunit:
+
+ ```
+ [IntegrationEvent(false, false)]
+ local procedure OnCodeOnBeforeCreateReservationEntriesAfterCreateOrderTrackingBufferLines(var PhysInvtOrderLine: Record "Phys. Invt. Order Line")
+ begin
+ end;
+ ```
+
+ Internal work item: [AB#614919](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/614919)
+ labels: "missing-info"
+ comments: |
+ **Comment 1 (AleksandricMarko):**
+ ⚠️ **Clarification needed**
+
+ ---
+
+ We noticed there is an existing event `OnBeforePhysInvtOrderLineModify` that fires before `PhysInvtOrderLine.Modify()` and provides access to the same `PhysInvtOrderLine` record.
+
+ **Question:** Could you use the existing `OnBeforePhysInvtOrderLineModify` event for your scenario?
+
+ If not, please explain:
+ 1. What specific modifications do you need to make to `PhysInvtOrderLine`?
+ 2. Why must these modifications happen **before** `CreateReservationEntries` rather than before `Modify()`?
+ 3. How does the data flow to `CreateReservationEntries` affect your extension's behavior?
+
+ This helps us avoid adding duplicate events when an existing one could serve the same purpose.
+
+ ---
+
+ **File:** `PhysInvtOrderFinish.Codeunit.al`
+
+ ---
+
+ **Comment 2 (Mariangela-DeMaio-EOS):**
+ Hi,
+ thanks for the suggestion, I think I can use the event "OnBeforePhysInvtOrderLineModify".
+
+ expected:
+ labels: ""
+ state: "closed"
+ comments:
+ - author: Agent
+ body: |
+ Thanks for confirming!
+
+ Since the existing `OnBeforePhysInvtOrderLineModify` event works for your scenario, no new event is needed. This event provides `var PhysInvtOrderLine` allowing you to modify the record before it's saved.
+
+ **Existing event you can use:**
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforePhysInvtOrderLineModify(var PhysInvtOrderLine: Record "Phys. Invt. Order Line")
+ begin
+ end;
+ ```
+
+ **File:** `PhysInvtOrderFinish.Codeunit.al`
+
+ Closing this issue as your need is already satisfied by the existing event.
+
+ - instance_id: issue-29455
+ input:
+ title: "[Extend publisher] Codeunit 99000771 BOM Matrix Management - OnBeforeBuildMatrix"
+ description: |
+ ### Why do you need this change?
+
+ The problem occurs with the prod. bom version comparison when a production bom has another sub bom in the production bom lines. If this sub bom has multiple versions then it is possible hat the comparison shows not the components of the active certified version but of a newer one which is currently under development. In the case part of the "BuildMatrix" procedure is the following GetVersion call
+
+ However, this does not filter for the current version or even consider the status, but only the start date, which can result in the active version not being returned.
+
+ ### Describe the request
+
+ It would be desirable if either the onbeforebuildmatrix event were adjusted and the VersionCode parameter were declared as a var parameter so that, if necessary, the active version could be set manually in this event.
+
+ or if the GetVersion function were to return the current version directly, as on the "Production BOM" page using the exixting GetBOMVersion procedure of codeunit VersionManagement VersionManagement.GetBOMVersion(Rec."No.", WorkDate(), true);
+
+ Internal work item: [AB#614921](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/614921)
+
+ comments: ""
+
+ expected:
+ labels: "SCM, event-request"
+ comments:
+ - author: Agent
+ body: |
+ ✅ **Analysis complete - approved for implementation**
+
+ ---
+
+ **Implementation:**
+
+ Extend the existing `OnBeforeBuildMatrix` event publisher in Codeunit 99000771 "BOM Matrix Management" by changing the `VersionCode` parameter to a `var` parameter, allowing subscribers to override the version code.
+
+ **Current code:**
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeBuildMatrix(ProductionBOMNo: Code[20]; VersionCode: Code[20]; Level: Integer; Quantity: Decimal; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ **Proposed change:**
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeBuildMatrix(ProductionBOMNo: Code[20]; var VersionCode: Code[20]; Level: Integer; Quantity: Decimal; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ ---
+
+ **File:** `BOMMatrixManagement.Codeunit.al`
+
+ - instance_id: issue-29454
+ input:
+ title: "[Request for External] table 7001 \"Price List Line\" - CheckAmountType"
+ description: |
+ ### Why do you need this change?
+
+ Can you remove local from provedure CheckAmountType in table 7001 "Price List Line"?
+
+ ### Describe the request
+
+ I need this to avoid making duplicate code, as I need to check the same scenario.
+ Internal work item: [AB#614920](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/614920)
+
+ comments: ""
+
+ expected:
+ labels: "SCM, request-for-external"
+ comments:
+ - author: Agent
+ body: |
+ ✅ **Analysis complete - approved for implementation**
+
+ ---
+
+ **Implementation:**
+
+ Change the `CheckAmountType` procedure from `local` to public (remove the `local` keyword) in table 7001 "Price List Line".
+
+ **Current code:**
+
+ ```al
+ local procedure CheckAmountType(FldCaption: Text; AmountType: Enum "Price Amount Type")
+ begin
+ if "Amount Type" = AmountType then
+ Error(FieldNotAllowedForAmountTypeErr, FldCaption, FieldCaption("Amount Type"), Format("Amount Type"));
+ end;
+ ```
+
+ **Proposed change:**
+
+ ```al
+ procedure CheckAmountType(FldCaption: Text; AmountType: Enum "Price Amount Type")
+ begin
+ if "Amount Type" = AmountType then
+ Error(FieldNotAllowedForAmountTypeErr, FldCaption, FieldCaption("Amount Type"), Format("Amount Type"));
+ end;
+ ```
+
+ ---
+
+ **File:** `PriceListLine.Table.al`
+
+ - instance_id: issue-29453-step1
+ input:
+ title: "[Event Request] page 7001 \"Price List Lines\" - OnAfterUpdateColumnVisibility"
+ description: |
+ ### Why do you need this change?
+
+ Can you change IncludeSender to true?
+
+ ```
+ //[IntegrationEvent(false, false)]
+ [IntegrationEvent(true, false)]
+ local procedure OnAfterUpdateColumnVisibility(PriceListHeader: Record "Price List Header"; var SourceTypeVisible: Boolean; var JobSourceTypeVisible: Boolean)
+ begin
+ end;
+ ```
+
+ ### Describe the request
+
+ ..
+ Internal work item: [AB#615080](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/615080)
+
+ comments: ""
+
+ expected:
+ labels: "missing-info"
+ comments:
+ - author: Agent
+ body: |
+ Thank you for your request. To process this event modification, please provide the following missing information:
+
+ **1. Complete Description:**
+ The "Describe the request" section is incomplete. Please provide:
+ - What specific problem you're trying to solve
+ - What information from the page you need to access
+ - Why the current event parameters are insufficient
+ - How you plan to use the page sender reference
+
+ **2. Detailed Justification for IncludeSender=true:**
+ Changing IncludeSender to true is a critical modification that requires careful justification. Please explain:
+ - **Why subscribers MUST have access to the page instance** - what specific page properties, methods, or state do you need to access?
+ - **Why the current event parameters (PriceListHeader, SourceTypeVisible, JobSourceTypeVisible) are insufficient** - what can't you accomplish with these?
+ - **What alternatives you evaluated** - why can't you achieve your goal by adding parameters to the event instead?
+ - **Specific use case** - describe the exact scenario where your extension needs page instance access
+
+ Please update the issue with this information so we can evaluate the request properly.
+
+ - instance_id: issue-29453-step2
+ input:
+ title: "[Event Request] page 7001 \"Price List Lines\" - OnAfterUpdateColumnVisibility"
+ description: |
+ ### Why do you need this change?
+
+ Can you change IncludeSender to true, so i can get information from the page and be able to set visibility for my own fields?
+
+ ```
+ //[IntegrationEvent(false, false)]
+ [IntegrationEvent(true, false)]
+ local procedure OnAfterUpdateColumnVisibility(PriceListHeader: Record "Price List Header"; var SourceTypeVisible: Boolean; var JobSourceTypeVisible: Boolean)
+ begin
+ end;
+ ```
+
+ ### Describe the request
+
+ ..
+ Internal work item: [AB#615080](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/615080)
+ labels: "missing-info"
+
+ comments: |
+ **Comment 1 (AleksandricMarko):**
+ Thank you for your request. To process this event modification, please provide the following missing information:
+
+ **1. Complete Description:**
+ The "Describe the request" section is incomplete. Please provide:
+ - What specific problem you're trying to solve
+ - What information from the page you need to access
+ - Why the current event parameters are insufficient
+ - How you plan to use the page sender reference
+
+ **2. Detailed Justification for IncludeSender=true:**
+ Changing IncludeSender to true is a critical modification that requires careful justification. Please explain:
+ - **Why subscribers MUST have access to the page instance** - what specific page properties, methods, or state do you need to access?
+ - **Why the current event parameters (PriceListHeader, SourceTypeVisible, JobSourceTypeVisible) are insufficient** - what can't you accomplish with these?
+ - **What alternatives you evaluated** - why can't you achieve your goal by adding parameters to the event instead?
+ - **Specific use case** - describe the exact scenario where your extension needs page instance access
+
+ Please update the issue with this information so we can evaluate the request properly.
+
+ ---
+
+ **Comment 2 (auzhhv):**
+ Hi Alexander,
+
+ **1. Complete Description:**
+ We have added custom fields that should behave simular to other fields in the same page regarding visibility. These fields have custom criterias for visibility, but the points where our visibility variables should be updated is the same as the existing fields. The event is called OnAfterUpdateColumnVisibility, but it's not possible to update visibility for other fields because of access. As we cannot add custom variables to events, we need access to find and change other events on pages. We will add procedures in the a page extension to access our visibility variables. Ex. SetField1Visibility(NewVisibility) where Field1 is the name of our field.
+
+ **2. Detailed Justification for IncludeSender=true:**
+ We must have access to the page to be able to set the our own visibility variables.
+ It makes no sense to put it other places as the name of the event applies that its the correct place to change visibility. And I have not found other places it makes sense to do the same. And this is probably the reason why this is the place base application has the same code.
+
+ expected:
+ labels: "SCM, event-request"
+ comments:
+ - author: Agent
+ body: |
+ ✅ **Analysis complete - approved for implementation**
+
+ ---
+
+ **Implementation:**
+
+ Change `IncludeSender` from `false` to `true` on the existing `OnAfterUpdateColumnVisibility` event in Page 7001 "Price List Lines" to allow subscribers to access the page instance.
+
+ **Current code (around line 490):**
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterUpdateColumnVisibility(PriceListHeader: Record "Price List Header"; var SourceTypeVisible: Boolean; var JobSourceTypeVisible: Boolean)
+ begin
+ end;
+ ```
+
+ **Proposed change:**
+
+ ```al
+ [IntegrationEvent(true, false)]
+ local procedure OnAfterUpdateColumnVisibility(PriceListHeader: Record "Price List Header"; var SourceTypeVisible: Boolean; var JobSourceTypeVisible: Boolean)
+ begin
+ end;
+ ```
+
+ ---
+
+ **File:** `PriceListLines.Page.al`
+
+ - instance_id: issue-29443
+ input:
+ title: "[W1][Page][171][Standard Sales Code Subform] Make procedure UpdateTypeText() not local"
+ description: |
+ ### Why do you need this change?
+
+ Procedure UpdateTypeText() is in local scope and it updates field variable TypeAsText
+
+ ### Describe the request
+
+ Remove "local" scope from procedure UpdateTypeText() in page 171 "Standard Sales Code Subform"
+ Internal work item: [AB#614045](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/614045)
+
+ comments: ""
+
+ expected:
+ labels: "SCM, request-for-external"
+ comments:
+ - author: Agent
+ body: |
+ ✅ Analysis complete - approved for implementation
+
+ **Implementation:**
+
+ Remove `local` scope from procedure `UpdateTypeText()` in Page 171 "Standard Sales Code Subform":
+
+ ```al
+ procedure UpdateTypeText()
+ var
+ RecRef: RecordRef;
+ begin
+ RecRef.GetTable(Rec);
+ TypeAsText := TempOptionLookupBuffer.FormatOption(RecRef.Field(Rec.FieldNo(Type)));
+ end;
+ ```
+
+ - instance_id: issue-29441
+ input:
+ title: "[W1][Page][289][Recurring Job Jnl.] Move variables to protected variables"
+ description: |
+ ### Why do you need this change?
+
+ Variables used as field values need to be accessable so they could be updated after changes from new fields
+
+ ### Describe the request
+
+ Please move variables JobDescription and AccName to protected variables
+
+ comments: ""
+
+ expected:
+ labels: "SCM, request-for-external"
+ comments:
+ - author: Agent
+ body: |
+ ✅ Analysis complete - approved for implementation
+
+ **Implementation:**
+
+ Move `JobDescription` and `AccName` variables from `var` to `protected var` section in Page 289 "Recurring Job Jnl.":
+
+ ```al
+ var
+ JobJnlManagement: Codeunit JobJnlManagement;
+ ReportPrint: Codeunit "Test Report-Print";
+ JobJnlReconcile: Page "Job Journal Reconcile";
+ CurrentJnlBatchName: Code[10];
+
+ protected var
+ JobDescription: Text[100];
+ AccName: Text[100];
+ ShortcutDimCode: array[8] of Code[20];
+ ```
+
+ - instance_id: issue-29755
+ input:
+ title: "[Event Request] codeunit 7322 \"Create Inventory Pick/Movement\" - procedure CreateInvtMvntWithoutSource"
+ description: |
+ ### Why do you need this change?
+
+ Currently there is no way to circumvent the "There is nothing to handle" error.
+
+ ### Alternatives Evaluated:
+ There are no alternatives for this issue. All events are either too early or too late.
+
+ ### Justification for IsHandled:
+ We need to be able to exit the procedure without the error.
+
+ ### Performance Considerations:
+ Called once per procedure execution, only if there is an error. Negligible impact.
+
+ ### Data Sensitivity Review:
+ No significantly sensitive data would be exposed through this event.
+
+ ### Multi-extension interaction:
+ There is no risk if multiple extensions are subscribed.
+
+ ### Describe the request
+
+ Can you add an event when raising the error "There is nothing to handle"
+
+ ```AL
+ procedure CreateInvtMvntWithoutSource(var InternalMovementHeader: Record "Internal Movement Header")
+ var
+ InternalMovementLine: Record "Internal Movement Line";
+ NewWarehouseActivityLine: Record "Warehouse Activity Line";
+ RemQtyToPickBase: Decimal;
+ //>>> New
+ IsHandled: Boolean;
+ //<<< New
+ begin
+ if not HideDialog then
+ if not Confirm(CreateInvtMvmtQst, false) then
+ exit;
+
+ ...
+
+ FindNextLineNo();
+
+ repeat
+ NewWarehouseActivityLine.Init();
+ ...
+ CreatePickOrMoveLine(NewWarehouseActivityLine, RemQtyToPickBase, RemQtyToPickBase, false);
+ until InternalMovementLine.Next() = 0;
+
+ if NextLineNo = 10000 then begin
+ //>>> New
+ OnCreateInvtMvntWithoutSourceOnBeforeNothingToHandleError(IsHandled);
+ if IsHandled then
+ exit;
+ //<<< New
+ Error(NothingToHandleMsg);
+ end;
+
+ MoveWhseComments(InternalMovementHeader."No.", CurrWarehouseActivityHeader."No.");
+
+ if DeleteHandledInternalMovementLines(InternalMovementHeader."No.") then begin
+ InternalMovementHeader.Delete(true);
+ if not HideDialog then
+ Message(ActivityCreatedMsg, CurrWarehouseActivityHeader.Type, CurrWarehouseActivityHeader."No.");
+ end else
+ if not HideDialog then
+ Message(TrackingNotFullyAppliedMsg, CurrWarehouseActivityHeader.Type, CurrWarehouseActivityHeader."No.");
+
+ OnAfterCreateInvtMvntWithoutSource(CurrWarehouseActivityHeader, InternalMovementHeader);
+ end;
+
+ //>>> New
+ [IntegrationEvent(false, false)]
+ local procedure OnCreateInvtMvntWithoutSourceOnBeforeNothingToHandleError(var IsHandled: Boolean)
+ begin
+ end;
+ //<<< New
+ ```
+ Internal work item: [AB#623063](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/623063)
+
+ comments: ""
+
+ expected:
+ labels: "SCM, event-request"
+ comments:
+ - author: Agent
+ body: |
+ ✅ **Analysis complete - approved for implementation**
+
+ ---
+
+ **Implementation:**
+
+ Add a new integration event in the `CreateInvtMvntWithoutSource` procedure to allow bypassing the "nothing to handle" error.
+
+ **Location in `CreateInvtMvntWithoutSource` procedure (around line 1598):**
+
+ ```al
+ if NextLineNo = 10000 then begin
+ IsHandled := false;
+ OnCreateInvtMvntWithoutSourceOnBeforeNothingToHandleError(IsHandled);
+ if not IsHandled then
+ Error(NothingToHandleMsg);
+ end;
+ ```
+
+ **Event Publisher (add at the end of the codeunit with other events):**
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnCreateInvtMvntWithoutSourceOnBeforeNothingToHandleError(var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ ---
+
+ **File:** `CreateInventoryPickMovement.Codeunit.al`
+
+ - instance_id: issue-29767
+ input:
+ title: "[Request for Extrenal] Report 8051 \"Contract Deferrals Release\""
+ description: |
+ ### Why do you need this change?
+
+ In order to document the Contract Deferrals release for customer and vendor with additional data in G/L Entries, we require certain events.
+
+ ### Describe the request
+
+ We need an integration event at the procedure
+
+ local procedure InsertTempGenJournalLine(DocumentNo: Code[20]; ContractNo: Code[20]; DeferralEntryNo: Integer; DimSetID: Integer; AccountNo: Code[20]; BalAccountNo: Code[20]; PostingAmount: Decimal; GenBusPostingGroup: Code[20]; GenProdPostingGroup: Code[20]; Partner: Enum "Service Partner")
+ begin
+ LineNo += 1;
+ TempGenJournalLine."Account No." := AccountNo;
+ TempGenJournalLine."Bal. Account No." := BalAccountNo;
+ TempGenJournalLine."Document No." := DocumentNo;
+ TempGenJournalLine."Subscription Contract No." := ContractNo;
+ TempGenJournalLine."Gen. Bus. Posting Group" := GenBusPostingGroup;
+ TempGenJournalLine."Gen. Prod. Posting Group" := GenProdPostingGroup;
+ TempGenJournalLine."Dimension Set ID" := DimSetID;
+ TempGenJournalLine.Amount := PostingAmount;
+ TempGenJournalLine."Line No." := LineNo;
+ TempGenJournalLine."Deferral Line No." := DeferralEntryNo;
+ OnBeforeInsertTemGenJournalLine(TempGenJournalLine, Partner)
+ TempGenJournalLine.Insert(false);
+ end;
+
+ [IntegrationEvent(false, false)]
+ Local procedure OnBeforeInsertTemGenJournalLine(var TempGenJournalLine: Record "Gen. Journal Line"; Partner: Enum "Service Partner")
+
+ And need event at next procedure for transfer the data in TempGenJournalLine to GenJnlLine before post the data
+
+ procedure PostGenJnlLine(var TempGenJournalLine: Record "Gen. Journal Line" temporary; PostingDate: Date; SourceCodeSetupContractDeferralsRelease: Code[10])
+ ...
+ OnBeforePostGenJnlLint(TempGenJournalLine, GenJnlLine);
+ GenJnlPostLine.RunWithCheck(GenJnlLine);
+ ...
+ OnBeforePostGenJnlLine(TempGenJournalLine, GenJnlLine);
+ GenJnlPostLine.RunWithCheck(GenJnlLine);
+ end;
+
+ [IntegrationEvent(false, false)]
+ Local procedure OnBeforePostGenJnlLint(TempGenJournalLine: Record "Gen. Journal Line"; var GenJournalLine: Record "Gen. Journal Line")
+
+ comments: ""
+
+ expected:
+ labels: "agent-not-processable"
+ comments: []
+
+ - instance_id: issue-29789
+ base_commit: "181dc724bef8901c9cab0c5a5a5a0e796958adc5"
+ input:
+ title: "[Event Request] Missing OnApplyStdCodesToPurchaseLinesOnAfterValidateType event"
+ description: |
+ ### Why do you need this change?
+
+ Table 172 "Standard Customer Sales Code" exposes `OnApplyStdCodesToSalesLinesOnAfterValidateType` inside `ApplyStdCodesToSalesLines`. This event fires immediately after `Type` is validated on the newly inserted sales line, allowing extensions to react to or complement the type validation — for example, to set custom default values, apply additional field logic, or integrate with third-party modules that depend on the line type being resolved before further fields are set.
+
+ Table 175 "Standard Vendor Purchase Code" has a symmetric procedure `ApplyStdCodesToPurchaseLines` but is missing the equivalent event. Extensions that extend both the sales and purchase recurring-code flow are unable to implement consistent behavior on the purchase side.
+
+ There is no existing event in `ApplyStdCodesToPurchaseLines` that fires at the same point in the flow (after type validation, before additional field assignments such as `No.` validation):
+
+ | Existing event | Position | Sufficient? |
+ |---|---|---|
+ | `OnBeforeApplyStdCodesToPurchaseLines` | Before loop | Too early |
+ | `OnApplyStdCodesToPurchaseLinesOnBeforeStdPurchLineFind` | Before per-line processing | Too early |
+ | `OnApplyStdCodesToPurchaseLinesOnAfterPurchLineInsert` | After insert, all fields set | Too late |
+ | `OnAfterApplyStdCodesToPurchaseLines` | After full loop | Too late |
+
+ ### Describe the request
+
+ ### EventRequest
+
+
+ [W1][Table][175][Standard Vendor Purchase Code]
+ [ApplyStdCodesToPurchaseLines]
+ ___
+
+ Add a missing integration event equivalent to
+ OnApplyStdCodesToSalesLinesOnAfterValidateType in Table 172.
+ The event should fire immediately after PurchLine.Validate(Type)
+ inside the ApplyStdCodesToPurchaseLines loop, passing the current
+ purchase line and the standard purchase line as parameters.
+ ___
+ ```al
+
+ [IntegrationEvent(false, false)]
+ local procedure OnApplyStdCodesToPurchaseLinesOnAfterValidateType(
+ var PurchaseLine: Record "Purchase Line";
+ StandardPurchaseLine: Record "Standard Purchase Line")
+ begin
+ end;
+ ```
+
+ ### Proposed placement (pseudocode context)
+
+ ```al
+ // inside ApplyStdCodesToPurchaseLines loop:
+ PurchLine.Validate(Type, StdPurchLine.Type);
+ OnApplyStdCodesToPurchaseLinesOnAfterValidateType(PurchLine, StdPurchLine); // <-- new
+ PurchLine.Validate("No.", StdPurchLine."No.");
+ ```
+
+ ### Symmetry reference — Table 172 existing event
+
+ ```al
+ // Table 172 "Standard Customer Sales Code";
+ [IntegrationEvent(false, false)]
+ local procedure OnApplyStdCodesToSalesLinesOnAfterValidateType(
+ var SalesLine: Record "Sales Line";
+ StandardSalesLine: Record "Standard Sales Line")
+ begin
+ end;
+ ```
+
+ The requested event is a direct purchase-side mirror of this existing sales-side event.
+
+ Internal work item: [AB#624115](https://dynamicssmb2.visualstudio.com/1fcb79e7-ab07-432a-a3c6-6cf5a88ba4a5/_workitems/edit/624115)
+
+ comments: ""
+
+ expected:
+ labels: ""
+ state: "closed"
+ comments:
+ - author: Agent
+ body: |
+ ✅ **Good news! This is already implemented in the codebase.**
+
+ The requested change already exists:
+
+ **Existing implementation:**
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnApplyStdCodesToPurchaseLinesOnLoopStdPurchLine(var StandardVendorPurchaseCode: Record "Standard Vendor Purchase Code"; var StandardPurchaseLine: Record "Standard Purchase Line"; var PurchaseLine: Record "Purchase Line"; var PurchaseHeader: Record "Purchase Header"; var StandardPurchaseCode: Record "Standard Purchase Code"; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ **File:** `StandardVendorPurchaseCode.Table.al`
+
+ You can use this existing event in your extension. Closing this issue as the request is already satisfied.
diff --git a/docs/_data/extensibility-request.json b/docs/_data/extensibility-request.json
new file mode 100644
index 000000000..f744d8bdb
--- /dev/null
+++ b/docs/_data/extensibility-request.json
@@ -0,0 +1,4 @@
+{
+ "runs": [],
+ "aggregate": []
+}
diff --git a/pyproject.toml b/pyproject.toml
index e00138488..8cb94f23d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
"textual>=7.0",
"numpy>=2.3.5",
"scipy>=1.16.3",
+ "autoevals>=0.0.106",
]
[project.scripts]
diff --git a/src/bcbench/agent/copilot/agent.py b/src/bcbench/agent/copilot/agent.py
index 2fdc98078..a4088d3ff 100644
--- a/src/bcbench/agent/copilot/agent.py
+++ b/src/bcbench/agent/copilot/agent.py
@@ -7,10 +7,10 @@
import yaml
-from bcbench.agent.copilot.metrics import parse_metrics
-from bcbench.agent.shared import build_mcp_config, build_prompt
+from bcbench.agent.copilot.metrics import parse_metrics, parse_metrics_ext
+from bcbench.agent.shared import build_mcp_config, build_prompt, build_prompt_ext
from bcbench.config import get_config
-from bcbench.dataset import DatasetEntry
+from bcbench.dataset import DatasetEntry, ExtensibilityDatasetEntry
from bcbench.exceptions import AgentError, AgentTimeoutError
from bcbench.logger import get_logger
from bcbench.operations import setup_agent_skills, setup_custom_agent, setup_instructions_from_config
@@ -75,6 +75,7 @@ def run_copilot_agent(
cmd_args,
cwd=str(repo_path),
stderr=subprocess.PIPE, # only capture stderr where metrics are printed
+ input=prompt.encode("utf-8"),
timeout=_config.timeout.agent_execution,
check=True,
)
@@ -104,3 +105,90 @@ def run_copilot_agent(
except Exception as e:
logger.exception(f"Unexpected error running Copilot CLI: {e}")
raise
+
+
+def run_copilot_agent_ext(
+ entry: ExtensibilityDatasetEntry, model: str, category: EvaluationCategory, repo_path: Path, output_dir: Path, al_mcp: bool = False
+) -> tuple[AgentMetrics | None, ExperimentConfiguration]:
+ """Run GitHub Copilot CLI agent on a single dataset entry.
+
+ Returns:
+ Tuple of (AgentMetrics, ExperimentConfiguration) with metrics and configuration used during the experiment
+ """
+ config_file = Path(__file__).parent.parent / "shared" / "config.yaml"
+ copilot_config = yaml.safe_load(config_file.read_text())
+
+ logger.info(f"Running GitHub Copilot CLI on: {entry.instance_id}")
+
+ prompt: str = build_prompt_ext(entry, repo_path, copilot_config, category, al_mcp=al_mcp)
+ mcp_config_json, mcp_server_names = build_mcp_config(copilot_config, entry, repo_path, al_mcp=al_mcp)
+ instructions_enabled: bool = setup_instructions_from_config(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
+ custom_agent: str | None = setup_custom_agent(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
+ config = ExperimentConfiguration(mcp_servers=mcp_server_names, custom_instructions=instructions_enabled, custom_agent=custom_agent)
+
+ logger.info(f"Executing Copilot CLI in directory: {repo_path}")
+ logger.debug(f"Using prompt:\n{prompt}")
+
+ copilot_cmd = shutil.which("copilot.cmd") or shutil.which("copilot")
+ if not copilot_cmd:
+ raise AgentError("Copilot CLI not found in PATH. Please ensure it is installed and available.")
+
+ try:
+ cmd_args = [
+ copilot_cmd,
+ "--allow-all-tools", # required for non-interactive mode
+ "--allow-all-paths", # might be required for non-interactive mode, seems to hang when trying to access files outside allowed dirs
+ "--disable-builtin-mcps",
+ f"--model={model}",
+ "--log-level=debug",
+ "--disable-parallel-tools-execution",
+ f"--log-dir={output_dir.resolve()}",
+ ]
+ if not instructions_enabled:
+ cmd_args.append("--no-custom-instructions")
+ if mcp_config_json:
+ cmd_args.append(f"--additional-mcp-config={mcp_config_json}")
+ if custom_agent:
+ cmd_args.append(f"--agent={custom_agent}")
+
+ # Copilot CLI often accepts a file via redirection but here we use the cat strategy or just pass regular args if we can,
+ # but since we had issues, let's try the pipe approach if supported, OR just standard input.
+ # But actually, looking at similar tools, they often just take the prompt as the last argument or via a file.
+ # Let's try passing it via stdin first as it assumes less about file flags.
+
+ logger.debug(f"Copilot command args: {cmd_args}")
+
+ result = subprocess.run(
+ cmd_args,
+ cwd=str(repo_path),
+ stderr=subprocess.PIPE, # only capture stderr where metrics are printed
+ input=prompt.encode("utf-8"),
+ timeout=_config.timeout.agent_execution,
+ check=True,
+ )
+
+ if result.stderr:
+ sys.stdout.buffer.write(result.stderr)
+ sys.stdout.buffer.flush()
+ logger.info(f"Copilot CLI run complete for: {entry.instance_id}")
+
+ stderr = result.stderr.decode("utf-8", errors="replace") if result.stderr else ""
+ stderr_lines = stderr.splitlines()
+
+ # Find the most recent session log for tool usage parsing
+ session_logs = list(output_dir.glob("process-*.log"))
+ session_log_path = max(session_logs, key=lambda p: p.stat().st_mtime) if session_logs else None
+
+ metrics = parse_metrics_ext(stderr_lines, session_log_path=session_log_path)
+
+ return metrics, config
+ except subprocess.TimeoutExpired:
+ logger.error(f"Copilot CLI timed out after {_config.timeout.agent_execution} seconds")
+ metrics = AgentMetrics(execution_time=_config.timeout.agent_execution)
+ raise AgentTimeoutError("Copilot CLI timed out", metrics=metrics, config=config) from None
+ except subprocess.CalledProcessError as e:
+ logger.error(f"Copilot CLI execution failed with error {e.stderr}")
+ raise AgentError(f"Copilot CLI execution failed: {e}") from None
+ except Exception as e:
+ logger.exception(f"Unexpected error running Copilot CLI: {e}")
+ raise
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/agents/Argus.agent.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/agents/Argus.agent.md
new file mode 100644
index 000000000..3e9e0cb28
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/agents/Argus.agent.md
@@ -0,0 +1,55 @@
+---
+name: Argus
+description: 'Extensibility Analysis Agent specialized in analyzing GitHub extensibility issues.'
+tools: ['read/readFile', 'search/fileSearch', 'agent', 'todo']
+---
+
+This agent acts as an Extensibility Analysis Agent. Its purpose is to analyze GitHub extensibility issues by collecting data, checking eligibility, determining request types, verifying requirements, analyzing the codebase, and finally assigning teams and applying labels/comments.
+
+Execute ALL the following steps (1-7) sequentially.
+
+1. **Step 1: Initialize**
+ Use the view tool to read `.github/instructions/Argus/step0-getting-started.md` and follow all startup checks described there. Format input data as `GH_REQUEST`.
+
+2. **Step 2: Eligibility Check**
+ Use the view tool to read `.github/instructions/Argus/step2-eligibility-check.md` and follow all instructions using `GH_REQUEST`.
+ - Produce output: `{"IsEligible": boolean, "IsStale": boolean, "FailureReason": string}`
+ - If `IsEligible` is `false`: proceed directly to step 7.
+
+3. **Step 3: Request Types**
+ Use the view tool to read `.github/instructions/Argus/step3-request-types.md` and follow all instructions using `GH_REQUEST`.
+ - Produce output: `{"Success": boolean, "TYPE": string, "SUBTYPE": string, "FailureLabel": string, "FailureReason": string}`
+ - Store `TYPE` and `SUBTYPE` for use in later steps.
+ - If `Success` is `false`: proceed directly to step 7.
+
+4. **Step 4: Requirements Check**
+ Use the view tool to read `.github/instructions/Argus/step4-requirements-check.md` and follow all instructions using `GH_REQUEST`, `TYPE`, and `SUBTYPE`.
+ - Use targeted `grep` and view tool calls for any file lookups required by the instructions.
+ - Produce output: `{"Success": boolean, "FailureLabel": string, "FailureReason": string}`
+ - If `Success` is `false`: proceed directly to step 7.
+
+5. **Step 5: Codebase Analysis**
+ Use the view tool to read `.github/instructions/Argus/step5-codebase-analysis.md` and follow all instructions using `GH_REQUEST`, `TYPE`, and `SUBTYPE`.
+ - **CRITICAL for codebase search**: All searches MUST be scoped to `App/Layers/` — never search outside this path. Find files by filename glob first (e.g. `glob("App/Layers/**/W1/**/*RecurringJobJnl*.al")`). AL files follow `CamelCaseName.ObjectType.al` naming. Only if glob fails, use a single targeted grep by object name (NOT numeric ID) scoped to `App/Layers/` (e.g. `grep("Recurring Job Jnl", "App/Layers/**/W1/**/*.al")`). Never use `type="al"`, bare `**/*.al`, patterns without the `App/Layers/` prefix, or search by numeric ID (e.g. "page 289") — these scan the entire codebase and cause severe performance issues. **If an object is not found within `App/Layers/` after glob + one grep, STOP IMMEDIATELY — do NOT search elsewhere — return `agent-not-processable` and proceed to step 7.**
+ - Produce output: `{"Success": boolean, "OBJECT_LIST": array, "SUGGESTED_IMPLEMENTATION": string, "FailureLabel": string, "FailureReason": string}`
+ - Store `OBJECT_LIST` and `SUGGESTED_IMPLEMENTATION` for use in later steps.
+ - If `Success` is `false`: proceed directly to step 7.
+
+6. **Step 6: Team Assignment**
+ Use the view tool to read `.github/instructions/Argus/step6-team-assignment.md` and follow all instructions using `OBJECT_LIST`.
+ - Use the view tool to read any mapping files referenced in the instructions.
+ - Produce output: `{"Success": boolean, "TEAM_LABEL": string, "FailureLabel": string, "FailureReason": string}`
+ - Store `TEAM_LABEL` for use in step 7.
+ - If `Success` is `false`: proceed directly to step 7.
+
+7. **Step 7: Finalize**
+ Use the view tool to read `.github/instructions/Argus/step7-labels-comments.md` and follow all instructions. Use all collected data from previous steps (including any failure reasons if applicable) to avoid refetching.
+ **CRITICAL**: Your final output for this step MUST be a single ```json code fence containing EXACTLY this structure — no other keys, no nesting, no renaming:
+ ```json
+ {
+ "labels_to_apply": ["label1", "label2"],
+ "comment_to_post": "full comment text",
+ "state_of_issue": "open"
+ }
+ ```
+ Do NOT use alternative key names like `RecommendedLabels`, `final_labels`, `GitHubComment`, `comment_template`, etc. The keys MUST be literally `labels_to_apply`, `comment_to_post`, `state_of_issue`.
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_alternative_suggestions.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_alternative_suggestions.yaml
new file mode 100644
index 000000000..616029b86
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_alternative_suggestions.yaml
@@ -0,0 +1,209 @@
+event_request_alternative_suggestions:
+ description: "Alternative suggestions for event-request type - recommending existing solutions or better approaches"
+
+ common_rules:
+ - id: similar_events_exist
+ applies_to: ["event-request"]
+ severity: "warning"
+ description: "Similar events already available nearby"
+ action: "suggest_alternative"
+ rejection_note: "Similar event exists at this location: {event_name}. Consider adding parameters to the existing event instead of creating a new one."
+ requires_confirmation: true
+ guidance: |
+ When analyzing an event request, check if a similar event already exists at or near the requested location.
+ If found, suggest adding parameters to the existing event instead of creating a new one.
+
+ **Detection Criteria:**
+ - Event fires at the same logical point (before/after same operation)
+ - Event provides access to the same or related records
+
+ **Why Consolidate Events:**
+ - Reduces event overhead (fewer event invocations)
+ - Simplifies subscriber implementation (one event to subscribe to)
+ - Easier to maintain and understand
+ - Avoids confusion about which event to use
+
+ **Example - Similar Event Exists:**
+
+ Author requests: "Add OnBeforePostSalesLine event with CustomerNo parameter"
+
+ Existing event found:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforePostSalesLine(var SalesLine: Record "Sales Line"; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ ✅ **Recommended Solution** - Add parameter to existing event:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforePostSalesLine(var SalesLine: Record "Sales Line"; var IsHandled: Boolean; CustomerNo: Code[20])
+ begin
+ end;
+ ```
+
+ ❌ **Not Recommended** - Creating a new similar event:
+ ```al
+ // Don't create this - too similar to existing event
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforePostSalesLineWithCustomer(var SalesLine: Record "Sales Line"; CustomerNo: Code[20])
+ begin
+ end;
+ ```
+
+ **Agent Response Template:**
+ ```
+ A similar event `{existing_event_name}` already exists at this location.
+
+ Instead of creating a new event, we recommend adding the requested parameter(s) to the existing event:
+ - Current: `{current_signature}`
+ - Proposed: `{proposed_signature}`
+
+ Do you agree with this approach?
+ ```
+
+ - id: existing_event_may_satisfy_request
+ applies_to: ["event-request"]
+ severity: "warning"
+ description: "An existing nearby event may satisfy the author's use case"
+ action: "request_clarification"
+ warning_message: "An existing event may satisfy this request. Clarification needed before adding a new event."
+ requires_confirmation: true
+ guidance: |
+ When analyzing an event request, check if an existing event at a different (but nearby) location
+ could satisfy the author's use case. This differs from `similar_events_exist` - here the event
+ is at a DIFFERENT location but might still achieve the same goal.
+
+ **When to Apply This Rule:**
+ - Author wants to modify a record before a specific operation
+ - An existing event fires at a nearby point with the same record available
+ - It's unclear if the timing difference between events matters for the use case
+
+ **Analysis Steps:**
+ 1. Identify the author's stated goal (what they want to modify and why)
+ 2. Check if an existing event provides access to the same record
+ 3. Analyze if the existing event fires at a point where modifications would achieve the same result
+ 4. Consider if data flow to subsequent procedures is affected by event placement
+
+ **Example Scenario:**
+
+ Author requests: "Add OnBeforeValidateQuantity event in Sales Line table to modify Quantity"
+
+ Agent finds existing event:
+ ```al
+ // Existing event in OnValidate trigger for Quantity field
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterValidateQuantity(var SalesLine: Record "Sales Line")
+ begin
+ end;
+ ```
+
+ **Questions to Ask:**
+ - Does the author need to modify Quantity BEFORE validation runs, or would modifying it AFTER work?
+ - Will the existing OnAfterValidateQuantity event allow the author to achieve their goal?
+ - What specific behavior requires the event to be BEFORE vs AFTER?
+
+ **Agent Response Template:**
+ ```
+ We noticed there is an existing event `OnAfterValidateQuantity` that fires after the Quantity field is validated.
+
+ This event provides access to the same `Sales Line` record that you want to modify.
+
+ **Question:** Could you use the existing `OnAfterValidateQuantity` event for your scenario?
+
+ If not, please explain:
+ 1. What specific modifications do you need to make?
+ 2. Why must these modifications happen BEFORE validation rather than after?
+ 3. What behavior would differ if you used the existing event instead?
+ ```
+
+ - id: integration_event_include_sender_true
+ applies_to: ["event-request"]
+ severity: "blocking"
+ description: "IntegrationEvent with IncludeSender=true (first parameter is true)"
+ action: "suggest_alternative_or_request_justification"
+ requires_confirmation: true
+ warning_message: "The event signature has IncludeSender set to true. For codeunits, consider using 'this' keyword instead. For all object types, a detailed justification is required."
+ guidance: |
+ This rule has two behaviors depending on the object type:
+
+ ---
+
+ ## Behavior A: Codeunit Objects — Suggest 'this' Keyword
+
+ When the event is in a **codeunit**, suggest replacing IncludeSender=true with an explicit 'this' parameter.
+
+ **What is 'this' keyword:**
+ The 'this' keyword in AL provides a reference to the current instance of an object. It can be passed as a parameter to events, allowing subscribers to access the publisher object's public members.
+
+ **Why 'this' is Better Than IncludeSender=true:**
+ - More explicit and intentional - developer consciously passes the reference
+ - Clearer code - the parameter is visible in the event call
+
+ **Example - Using IncludeSender=true (Less Preferred):**
+ ```al
+ codeunit 50100 "Sales Order Processor"
+ {
+ // ❌ Less preferred - IncludeSender=true affects all subscribers
+ [IntegrationEvent(true, false)]
+ local procedure OnBeforeProcessOrder(var SalesHeader: Record "Sales Header"; var IsHandled: Boolean)
+ begin
+ end;
+ }
+ ```
+ **Example - Using 'this' Keyword (Preferred):**
+ ```al
+ codeunit 50100 "Sales Order Processor"
+ {
+ // ✅ Preferred - explicit 'this' parameter
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeProcessOrder(var SalesHeader: Record "Sales Header"; var IsHandled: Boolean; Sender: Codeunit "Sales Order Processor")
+ begin
+ end;
+
+ local procedure ProcessOrder(var SalesHeader: Record "Sales Header")
+ var
+ IsHandled: Boolean;
+ begin
+ OnBeforeProcessOrder(SalesHeader, IsHandled, this); // Explicitly passing 'this'
+ end;
+ }
+ ```
+
+ **Agent Response Template (Codeunit):**
+ ```
+ Instead of IncludeSender=true, we recommend using the 'this' keyword explicitly:
+
+ **Suggested Implementation:**
+ - Pass 'this' when calling the event
+
+ This approach is more explicit and follows modern AL patterns.
+ Do you agree with this approach, or do you have a specific reason for requiring IncludeSender=true?
+ ```
+
+ ---
+
+ ## Behavior B: All Object Types — Require Detailed Justification
+
+ For **all object types** (including codeunits, pages, reports, tables, etc.), if the author requests IncludeSender=true **without providing a justification**, ask for one before proceeding.
+
+ **When to Apply:**
+ - The request includes IncludeSender=true (first parameter of [IntegrationEvent] is true)
+ - The author has not explained why IncludeSender is needed
+
+ **What Counts as Sufficient Justification:**
+ - A concrete use case that requires the subscriber to call back into the publisher object
+ - Explanation of why an explicit parameter cannot be used instead
+ - Reference to a specific scenario where the subscriber needs the sender instance
+
+ **Agent Response Template (Justification Required):**
+ ```
+ The requested event has IncludeSender set to true. Before proceeding, please provide a detailed justification:
+
+ 1. **Why is IncludeSender=true needed?** What does the subscriber need to do with the sender instance?
+ 2. **Why can't an explicit parameter be used instead?** (e.g., passing the relevant data directly as a parameter)
+ 3. **What is the concrete use case** that requires access to the publisher object from the subscriber?
+
+ This information is needed to evaluate whether IncludeSender=true is the appropriate design choice here.
+ ```
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_blockers.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_blockers.yaml
new file mode 100644
index 000000000..cf126dd8b
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_blockers.yaml
@@ -0,0 +1,72 @@
+event_request_blockers:
+ description: "Blocker rules specific to event-request type - auto-reject scenarios"
+
+ rules:
+ - id: consecutive_events_not_allowed
+ applies_to: ["event-request"]
+ severity: "blocking"
+ description: "Consecutive event calls (one directly after another) are not allowed"
+ action: "suggest_alternative"
+ rejection_note: "Adding a new event directly adjacent to an existing event is not allowed. The existing event should be updated with additional parameters instead."
+ guidance: |
+ **Blocking Rule:** Two event calls placed consecutively (one directly after another with no meaningful code between them) are not acceptable. If someone requests a new event just above or below an existing one, the solution is to update the existing event with new parameters.
+
+ **Rationale:**
+ - Consecutive events create unnecessary overhead (two event invocations instead of one)
+ - Subscribers would need to subscribe to multiple events for related data
+ - Increases complexity for extension developers
+ - Indicates the existing event is missing parameters rather than needing a new event
+ - Violates the principle of event consolidation
+
+ **Detection Criteria:**
+ - New event requested immediately before an existing event (no code between)
+ - New event requested immediately after an existing event (no code between)
+ - Only whitespace, comments, or variable declarations between events
+
+ **Example - NOT Allowed (consecutive events):**
+ ```al
+ local procedure ProcessSalesOrder(var SalesHeader: Record "Sales Header")
+ begin
+ // Some processing code...
+
+ // ❌ NOT ALLOWED - Two events directly next to each other
+ OnBeforeProcessSalesOrderNewEvent(SalesHeader, CustomerNo); // New event requested
+ OnBeforeProcessSalesOrder(SalesHeader, IsHandled); // Existing event
+
+ if not IsHandled then begin
+ // Processing logic
+ end;
+ end;
+ ```
+
+ **Example - Correct Solution (update existing event):**
+ ```al
+ local procedure ProcessSalesOrder(var SalesHeader: Record "Sales Header")
+ begin
+ // Some processing code...
+
+ // ✅ CORRECT - Existing event updated with new parameter
+ OnBeforeProcessSalesOrder(SalesHeader, IsHandled, CustomerNo); // Added CustomerNo parameter
+
+ if not IsHandled then begin
+ // Processing logic
+ end;
+ end;
+
+ // Updated event signature with new parameter at the END
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeProcessSalesOrder(var SalesHeader: Record "Sales Header"; var IsHandled: Boolean; var CustomerNo: Code[20])
+ begin
+ end;
+ ```
+
+ **What Counts as "Consecutive":**
+ - Direct adjacency (no code between events)
+ - Only whitespace between events
+ - Only comments between events
+ - Only variable declarations between events (no executable code)
+
+ **What Does NOT Count as "Consecutive":**
+ - Meaningful executable code between events (assignments, function calls, conditionals)
+ - Different logical sections of a procedure
+ - Events in different procedures/triggers
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_implementation.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_implementation.yaml
new file mode 100644
index 000000000..0e8f7dc4f
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_implementation.yaml
@@ -0,0 +1,331 @@
+event_request_implementation:
+ description: "Implementation rules for event-request type - best practices and patterns"
+
+ rules:
+ - id: never_suggest_ishandled_unless_requested
+ applies_to: ["event-request"]
+ severity: "notice"
+ description: "Agent must NEVER suggest IsHandled events unless explicitly requested by the author"
+ action: "internal_warning"
+ rule: "When implementing event requests, ONLY add IsHandled parameter if the author EXPLICITLY requested it using keywords like 'IsHandled', 'bypass', 'skip', 'handled', or 'prevent execution'. Regular event requests get regular events WITHOUT IsHandled."
+ guidance: |
+ **CRITICAL RULE:** The agent must NEVER suggest or implement an IsHandled event pattern unless the author explicitly requested it.
+
+ **Detection of IsHandled Request:**
+ The author must use one of these explicit indicators:
+ - "IsHandled" keyword in the request
+ - "bypass" or "skip" the standard logic
+ - "prevent" or "stop" execution
+ - "handled" parameter
+ - Explicitly mentions wanting to replace/override the standard behavior
+
+ **If NOT explicitly requested:**
+ - Implement a REGULAR IntegrationEvent without IsHandled parameter
+ - Event should provide access to data for extension purposes
+ - Event should NOT include bypass/skip functionality
+
+ **Example - Regular Event Request (NO IsHandled keywords):**
+ Author says: "Add an event publisher in TestPurchaseLines"
+
+ ❌ WRONG - Agent adds IsHandled:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeCheckIsCancellationSupported(...; var IsHandled: Boolean)
+ ```
+
+ ✅ CORRECT - Agent adds regular event:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeCheckIsCancellationSupported(...; var IsCancellationSupported: Boolean)
+ ```
+
+ **Example - IsHandled Request (explicit keywords):**
+ Author says: "Add IsHandled event to bypass the credit limit check"
+
+ ✅ CORRECT - Agent adds IsHandled:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeCreditLimitCheck(...; var IsHandled: Boolean)
+ ```
+
+ **Rationale:**
+ - IsHandled events allow bypassing critical business logic
+ - They carry higher risk and require explicit author intent
+ - Regular events are safer and provide extensibility without bypass capability
+ - Agent should not escalate the scope of a request beyond what was asked
+
+ - id: adding_var_to_event_parameter
+ applies_to: ["event-request"]
+ severity: "notice"
+ description: "Adding 'var' modifier to existing event parameter is allowed (non-breaking change)"
+ action: "internal_warning"
+ detailed_explanation: |
+ **Not applicable for NEW events.**
+ **Allowed Change:** Adding `var` to an existing event parameter is NOT a breaking change.
+
+ **Rationale:**
+ - Existing subscribers continue to work (they just won't modify the value)
+ - Expands functionality without breaking backward compatibility
+ - Allows new subscribers to modify the parameter while old ones remain unaffected
+
+ **Example - Allowed Change:**
+
+ Original event:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterConfirmPost(PurchaseHeader: Record "Purchase Header"; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ Changed to (ALLOWED):
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterConfirmPost(var PurchaseHeader: Record "Purchase Header"; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ - id: new_parameters_at_end
+ applies_to: ["event-request"]
+ severity: "notice"
+ description: "New parameters in existing events must be added at the end of the parameter list"
+ action: "internal_warning"
+ exception_note: "When adding new parameters to an existing event, ALWAYS place them at the end of the parameter list, regardless of where the author suggested placing them."
+ detailed_explanation: |
+ **Mandatory Rule:** New parameters in existing events must ALWAYS be added at the END of the parameter list.
+
+ **Rationale:**
+ - Adding parameters in the middle of the signature would be hard to track
+ - It is easier to maintain and understand changes when new parameters are added at the end
+
+ **Example - Correct Implementation:**
+
+ Original event:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnBeforeProcess(var Customer: Record Customer; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ Author requests to add `NewParam: Text` parameter.
+
+ ❌ Wrong (even if author suggested this):
+ ```al
+ local procedure OnBeforeProcess(var Customer: Record Customer; NewParam: Text; var IsHandled: Boolean)
+ ```
+
+ ✅ Correct (always add at the end):
+ ```al
+ local procedure OnBeforeProcess(var Customer: Record Customer; var IsHandled: Boolean; NewParam: Text)
+ ```
+
+ - id: temporary_record_parameter_naming
+ applies_to: ["event-request"]
+ severity: "notice"
+ description: "Temporary record parameters must have 'Temp' prefix"
+ check_method: "parameter_naming_analysis"
+ action: "internal_warning"
+ rule: "When adding a temporary record parameter to an event, the parameter name MUST start with 'Temp' prefix."
+ guidance: |
+ **Mandatory Rule:** All temporary record parameters in event signatures must have the 'Temp' prefix.
+
+ **Rationale:**
+ - Clearly indicates to subscribers that the record is temporary
+ - Prevents confusion about data persistence
+ - Follows BC naming conventions for temporary variables
+ - Makes code more readable and self-documenting
+
+ **Example - Correct Implementation:**
+
+ ❌ Wrong:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterProcess(var InvtOrderTracking: Record "Invt. Order Tracking" temporary)
+ begin
+ end;
+ ```
+
+ ✅ Correct:
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterProcess(var TempInvtOrderTracking: Record "Invt. Order Tracking" temporary)
+ begin
+ end;
+ ```
+
+ - id: multiple_calls_same_event_location
+ applies_to: ["event-request"]
+ severity: "warning"
+ description: "Multiple event calls requested for the same location require separate event publishers"
+ action: "internal_warning"
+ rule: "When a request asks for multiple event calls at the same location (e.g., same procedure/trigger), each call MUST have its own separate event publisher with a unique name."
+ guidance: |
+ **Mandatory Rule:** If a request includes multiple calls for the same event location, each call must be implemented as a separate event publisher with a distinct name.
+
+ **Rationale:**
+ - Each event should have a single, clear purpose
+ - Separate events allow subscribers to choose which specific event to handle
+ - Improves code readability and maintainability
+ - Allows for better performance optimization (subscribers only listen to events they need)
+ - Provides clearer semantics for what each event represents
+
+ **Naming Convention for Separate Events:**
+ - Use descriptive suffixes that indicate the specific purpose
+ - Follow pattern: `On[Before/After][Operation][SpecificPurpose]`
+ - Examples:
+ - `OnBeforePostValidateCustomer`
+ - `OnBeforePostCheckInventory`
+ - `OnAfterInsertUpdateDimensions`
+ - `OnAfterInsertNotifyWarehouse`
+
+ - id: event_naming_convention
+ applies_to: ["event-request"]
+ severity: "notice"
+ description: "Event names must follow the standard naming pattern regardless of author's suggested name"
+ action: "internal_warning"
+ rule: "Agent MUST apply the standard event naming convention even if the author proposes a different name."
+ guidance: |
+ **Mandatory Rule:** All events must follow the standard naming pattern based on their placement location.
+
+ ---
+
+ ## Option 1: Events at Beginning/End of Procedure or Trigger
+
+ **Pattern:** `OnBefore/OnAfter[ProcedureName/TriggerName]`
+
+ Use this pattern when the event fires at the very beginning or end of a procedure/trigger.
+
+ | Location | Timing | Correct Event Name |
+ |----------|--------|-------------------|
+ | PostSalesLine procedure | Beginning | `OnBeforePostSalesLine` |
+ | PostSalesLine procedure | End | `OnAfterPostSalesLine` |
+ | OnInsert trigger | Beginning | `OnBeforeOnInsert` |
+ | OnModify trigger | End | `OnAfterOnModify` |
+ | OnDelete trigger | Beginning | `OnBeforeOnDelete` |
+ | OnPreReport trigger | Beginning | `OnBeforeOnPreReport` |
+ | OnPostReport trigger | End | `OnAfterOnPostReport` |
+ | "Sell-to Customer No." OnValidate | Beginning | `OnBeforeValidateSellToCustomerNo` |
+ | Quantity OnValidate | End | `OnAfterValidateQuantity` |
+
+ ---
+
+ ## Option 2: Events in the Middle of Procedure or Trigger
+
+ **Pattern:** `On[ProcedureName/TriggerName]OnBefore/OnAfter[ActionContext]`
+
+ Use this pattern when the event fires at a specific point inside the procedure/trigger (not at the beginning/end).
+
+ | Location | Action Context | Correct Event Name |
+ |----------|----------------|-------------------|
+ | PostSalesLine procedure | Before validation | `OnPostSalesLineOnBeforeValidation` |
+ | PostSalesLine procedure | After calculation | `OnPostSalesLineOnAfterCalculation` |
+ | Code procedure | Before check | `OnCodeOnBeforeCheck` |
+ | OnInsert trigger | After init defaults | `OnOnInsertOnAfterInitDefaults` |
+ | OnModify trigger | Before validate location | `OnOnModifyOnBeforeValidateLocationCode` |
+ | "Sales Invoice Line" OnAfterGetRecord | After calc amounts | `OnSalesInvoiceLineOnAfterGetRecordOnAfterCalcAmounts` |
+
+ ---
+
+ **Summary:**
+ - **Beginning/End:** `OnBefore/OnAfter` + ProcedureName/TriggerName
+ - **Middle:** `On` + ProcedureName/TriggerName + `OnBefore/OnAfter` + ActionContext
+
+ - id: event_parameter_naming_convention
+ applies_to: ["event-request"]
+ severity: "notice"
+ description: "Event parameters must follow naming conventions - no abbreviations for records, descriptive names for simple types"
+ action: "internal_warning"
+ rule: "When defining event publisher parameters, record parameters must use the full table name without spaces, and simple parameters must use descriptive names without abbreviations."
+ guidance: |
+ **Mandatory Rule:** Event parameters must follow strict naming conventions regardless of what the author suggests.
+
+ ### Record Parameters - Remove Spaces, No Abbreviations
+
+ For record parameters, use the table name with spaces removed. Do NOT use abbreviations.
+
+ | Table Name (in AL) | Correct Parameter Name | ❌ Wrong |
+ |-------------------|------------------------|----------|
+ | "Sales Header" | `SalesHeader` | `SalesHdr`, `SH` |
+ | "Sales Line" | `SalesLine` | `SalesLn`, `SL` |
+ | "Item Ledger Entry" | `ItemLedgerEntry` | `ItemLedgEntry`, `ILE` |
+ | "G/L Entry" | `GLEntry` | `GLE`, `GenLedgEntry` |
+ | "Purchase Header" | `PurchaseHeader` | `PurchHdr`, `PH` |
+ | "Customer" | `Customer` | `Cust`, `C` |
+ | "Vendor" | `Vendor` | `Vend`, `V` |
+
+ ### Simple Parameters - Use Descriptive Names
+
+ | ❌ Wrong | ✅ Correct |
+ |----------|-----------|
+ | `DocNo` | `DocumentNo` |
+ | `Amt` | `Amount` or `TotalAmount` |
+ | `Qty` | `Quantity` |
+ | `Desc` | `Description` |
+ | `Date` | `PostingDate`, `DocumentDate`, etc. |
+
+ - id: manual_binding_for_heavily_used_procedures
+ applies_to: ["event-request"]
+ severity: "notice"
+ description: "Events in heavily-used procedures should include manual binding guidance note"
+ action: "internal_warning"
+ rule: "When suggesting an event in a procedure that is heavily used across the codebase for different scenarios, include a note about EventSubscriberInstance property and manual binding considerations."
+ guidance: |
+ **Guidance Rule:** When an event is being added to a procedure that is called from multiple
+ places across the codebase for different business scenarios, the agent should include a note
+ in the comment about manual binding considerations.
+
+ **Rationale:**
+ - Procedures can be called from multiple places for different purposes/scenarios
+ - Event subscribers bound with automatic binding will fire on EVERY call
+ - This may cause unintended side effects if subscriber was meant for a specific scenario
+ - Manual binding gives control over when the subscriber is active
+
+ **Documentation Note Template:**
+ ```
+ 📝 **Note:** This procedure is called from multiple places in the codebase for different scenarios.
+ If you need your event subscriber to fire only in specific scenarios, consider using
+ [Manual Event Subscription Binding](https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/properties/devenv-eventsubscriberinstance-property#manual-binding).
+ ```
+
+ - id: ishandled_with_onafter_event_pattern
+ applies_to: ["event-request"]
+ severity: "notice"
+ description: "IsHandled events must preserve OnAfter event execution; OnAfter requests must check for existing IsHandled patterns"
+ action: "internal_warning"
+ rule: "When adding an IsHandled event that skips the whole procedure, or when adding OnAfter event to a procedure with existing IsHandled, ensure OnAfter event executes in ALL conditions (even when IsHandled=true)."
+ guidance: |
+ **Mandatory Rule:** OnBefore and OnAfter procedure events must BOTH execute in all conditions. When IsHandled=true skips the main procedure code, the OnAfter event must still fire.
+
+ **Scenarios Covered:**
+
+ **Scenario 1: Adding IsHandled event when OnAfter exists**
+ When someone requests an IsHandled event to skip the whole procedure AND there is already an OnAfter event (or one should be added), implement using `if not IsHandled then begin ... end;` pattern instead of `if IsHandled then exit;`
+
+ **Scenario 2: Adding OnAfter event (regular event without IsHandled) when IsHandled with exit exists**
+ When someone requests an OnAfter[ProcedureName] event (regular event, no IsHandled parameter) AND there is already an OnBefore with `if IsHandled then exit;`, the agent MUST:
+ 1. Identify the existing `if IsHandled then exit;` pattern
+ 2. Refactor it to `if not IsHandled then begin ... end;` pattern
+ 3. Add the OnAfter event AFTER the `end;` of the if block
+
+ **Example - CORRECT Pattern (OnAfter always fires):**
+ ```al
+ procedure GetReservationQty(var QtyReserved: Decimal; var QtyReservedBase: Decimal)
+ var
+ IsHandled: Boolean;
+ begin
+ IsHandled := false;
+ OnBeforeGetReservationQty(Rec, QtyReserved, QtyReservedBase, IsHandled);
+
+ // ✅ CORRECT - Using if not IsHandled then begin...end pattern
+ if not IsHandled then begin
+ CalcFields("Reserved Quantity", "Reserved Qty. (Base)");
+ QtyReserved := "Reserved Quantity";
+ QtyReservedBase := "Reserved Qty. (Base)";
+ end;
+
+ // ✅ OnAfter ALWAYS executes, regardless of IsHandled value
+ OnAfterGetReservationQty(Rec, QtyReserved, QtyReservedBase);
+ end;
+ ```
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_alternative_suggestions.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_alternative_suggestions.yaml
new file mode 100644
index 000000000..3355ce98e
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_alternative_suggestions.yaml
@@ -0,0 +1,6 @@
+event_request_ishandled_alternative_suggestions:
+ description: "Alternative suggestion rules specific to event-request/ishandled subtype - check if existing functionality can satisfy the request"
+
+ rules: []
+ # No specific alternative suggestion rules for ishandled subtype yet.
+ # Type-level alternative suggestion rules still apply.
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_blockers.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_blockers.yaml
new file mode 100644
index 000000000..6cf8aee09
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_blockers.yaml
@@ -0,0 +1,184 @@
+event_request_ishandled_blockers:
+ description: "Blocker rules specific to event-request/ishandled subtype - auto-reject scenarios for IsHandled events"
+
+ rules:
+ - id: ishandled_in_ondelete_trigger
+ applies_to: ["event-request/ishandled"]
+ severity: "blocking"
+ description: "IsHandled event at the beginning of OnDelete trigger"
+ action: "auto_reject"
+ warning_message: "IsHandled events at the beginning of OnDelete triggers are not allowed. Bypassing delete operations entirely can cause data inconsistency and orphaned records."
+ guidance: |
+ Instead of using an IsHandled event at the beginning of the OnDelete trigger, consider these alternatives:
+
+ **Recommended approaches:**
+ 1. Use standard OnBeforeDelete event (without IsHandled) to add custom logic before deletion
+ 2. Use OnAfterDelete event to handle post-deletion operations
+ 3. If you need to prevent deletion conditionally, use TestField or Error statements before the delete operation
+
+ This approach:
+ - ✅ Allows extensions to add logic before/after deletion
+ - ✅ Maintains data consistency
+ - ✅ Prevents orphaned records
+ - ✅ Follows standard extensibility patterns
+
+ - id: ishandled_unsafe_code_block
+ applies_to: ["event-request/ishandled"]
+ severity: "blocking"
+ description: "IsHandled event with unsafe/critical code block"
+ action: "suggest_alternative"
+ warning_message: "The code block that would be bypassed contains critical operations that cannot be safely skipped."
+ requires_confirmation: true
+ guidance: |
+ **Blocking Rule:** When an IsHandled event is requested, the agent MUST analyze the code block that would be bypassed to identify unsafe or critical operations that cannot be safely skipped.
+
+ **What Makes a Code Block Unsafe to Bypass:**
+
+ 1. **Database Operations Without Compensation**
+ - INSERT/MODIFY/DELETE operations that other code depends on
+ - Operations that maintain referential integrity
+ - Ledger entry creation or posting operations
+
+ 2. **State Management Operations**
+ - Status field updates that control workflow
+ - Lock/unlock operations
+ - Commit statements that other operations depend on
+
+ 3. **Critical Validation Logic**
+ - Security checks or permission validations
+ - Data integrity validations
+ - Business rule enforcements that prevent data corruption
+
+ ---
+
+ **Example 1 - UNSAFE: Bypassing Ledger Entry Creation**
+ Author requests: "Add IsHandled event to skip the G/L posting in PostSalesInvoice"
+ ```al
+ local procedure PostSalesInvoice(var SalesHeader: Record "Sales Header")
+ var
+ GenJnlLine: Record "Gen. Journal Line";
+ begin
+ // ❌ UNSAFE TO BYPASS - These operations maintain financial integrity
+ GenJnlLine.Init();
+ GenJnlLine."Document Type" := GenJnlLine."Document Type"::Invoice;
+ GenJnlLine."Account Type" := GenJnlLine."Account Type"::Customer;
+ GenJnlLine.Amount := SalesHeader."Amount Including VAT";
+ GenJnlPostLine.RunWithCheck(GenJnlLine); // Creates G/L entries
+
+ SalesHeader.Status := SalesHeader.Status::Posted;
+ SalesHeader.Modify();
+ end;
+ ```
+ **Why Unsafe:** Skipping this block would:
+ - Leave financial records incomplete (invoice without G/L entries)
+ - Cause imbalanced ledgers
+ - Break audit trails
+ - Potentially mark document as posted without actual posting
+
+ **Agent Response:**
+ ```
+ ⛔ **Cannot add IsHandled event here.**
+
+ The code block contains critical G/L posting operations that cannot be safely bypassed:
+ - `GenJnlPostLine.RunWithCheck()` creates ledger entries
+ - Status update depends on successful posting
+
+ **Alternatives:**
+ 1. Add a regular event BEFORE posting to modify values
+ 2. Add a regular event AFTER posting for post-processing
+ 3. If you need to change HOW posting works, consider extending the posting codeunit instead
+ ```
+ ---
+ **Example 2 - UNSAFE: Bypassing Referential Integrity Operations**
+ Author requests: "Add IsHandled event to skip dimension validation in PostPurchaseLine"
+
+ ```al
+ local procedure PostPurchaseLine(var PurchLine: Record "Purchase Line")
+ begin
+ // ❌ UNSAFE TO BYPASS - Maintains referential integrity
+ DimensionMgt.CheckDimValuePosting(
+ TableArray, NoArray,
+ PurchLine."Dimension Set ID");
+
+ // Creates dimension set entries linked to the posted document
+ PostedDimSetID := DimensionMgt.CreateDimSetFromJobTaskDim(
+ PurchLine."Job No.",
+ PurchLine."Job Task No.",
+ PurchLine."Dimension Set ID");
+
+ PurchInvLine."Dimension Set ID" := PostedDimSetID;
+ PurchInvLine.Insert();
+ end;
+ ```
+ **Why Unsafe:** Skipping this block would:
+ - Allow invalid dimension combinations to be posted
+ - Create posted documents without proper dimension linkage
+ - Break dimension reporting and analysis
+ ---
+
+ **Agent Analysis Checklist:**
+
+ | Check | Question | If YES |
+ |-------|----------|--------|
+ | Database writes | Does the block INSERT/MODIFY/DELETE records that other code depends on? | ⛔ Unsafe |
+ | Ledger operations | Does it create ledger entries or posting operations? | ⛔ Unsafe |
+ | Status changes | Does it update status fields that control workflow? | ⚠️ Review carefully |
+ | External calls | Does it call external services or APIs? | ⚠️ Review carefully |
+ | Validation | Does it perform security or integrity validations? | ⛔ Unsafe |
+ | Number series | Does it consume document numbers? | ⛔ Unsafe |
+ | Only assignments | Does it only assign/calculate values without side effects? | ✅ Safe |
+
+ ---
+ **Alternative Suggestions When Blocking:**
+ 1. **Use regular event instead** - Allows extensions to ADD logic without BYPASSING
+ 2. **Add event at different location** - Before or after the critical block
+ 3. **Request parameter access** - Add parameters to existing events to modify behavior
+ 4. **Suggest code refactoring** - Extract safe portions into separate procedures
+
+ - id: ishandled_modification_to_existing_event
+ applies_to: ["event-request/ishandled"]
+ severity: "blocking"
+ description: "Request to add IsHandled pattern to an existing event"
+ action: "auto_reject"
+ warning_message: "Adding IsHandled pattern to existing events is not allowed. This fundamentally changes the purpose of the event and can break existing subscribers."
+ guidance: |
+ **⛔ BLOCKER: Cannot Add IsHandled to Existing Events**
+
+ When an author requests to add IsHandled functionality to an event that already exists in the codebase, this must be **automatically rejected**.
+
+ **Why This Is Not Allowed:** Changes the fundamental purpose and behavior of the existing event
+
+ **Detection Criteria:**
+ The agent must check if:
+ 1. An event with the requested name already exists in the codebase
+ 2. The request is to add IsHandled pattern to this existing event
+ 3. The event doesn't currently support IsHandled functionality
+
+ **Agent Response Template:**
+ ```
+ ⛔ **Request Rejected: Cannot Add IsHandled to Existing Event**
+
+ The event '[EventName]' already exists without IsHandled functionality. Adding IsHandled to an existing event is not allowed as it changes the event's purpose.
+
+ **Recommended Alternatives:**
+ 1. **Add a NEW event** with IsHandled before or after the existing event
+ - Example: OnBeforeDoSomething() with IsHandled, followed by the existing OnDoSomething()
+
+ 2. **Use alternatives to IsHandled pattern:**
+ - Add parameters to influence behavior without bypassing
+ - Use conditional event triggering
+ - Refactor code to be more extensible
+
+ 3. **Create a separate request** for one of the above alternatives
+
+ **Example:**
+ If you want to allow bypassing logic around the existing event, create a NEW event:
+ - Existing: `OnAfterCalculateTotal()`
+ - New: `OnBeforeCalculateTotal(var IsHandled: Boolean)` (raised before existing event)
+ ```
+
+ **Important Notes:**
+ - Even if the change seems "minor", it's still not allowed
+ - This rule applies to ALL existing events, regardless of age or usage
+ - The author should create a NEW request with an alternative approach
+ - Point the author to alternative suggestion rules for guidance on better patterns
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_implementation.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_implementation.yaml
new file mode 100644
index 000000000..b7c094788
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_implementation.yaml
@@ -0,0 +1,45 @@
+event_request_ishandled_implementation:
+ description: "Implementation rules specific to event-request/ishandled subtype"
+
+ rules:
+ - id: ishandled_initialization_required
+ applies_to: ["event-request/ishandled"]
+ severity: "blocking"
+ description: "IsHandled parameter must be initialized to false before calling the event"
+ action: "internal_warning"
+ rule: "When adding a new event with IsHandled parameter or adding IsHandled parameter to an existing event, ALWAYS initialize IsHandled to false before the event call."
+ guidance: |
+ **Mandatory Rule:** The IsHandled parameter must ALWAYS be initialized to `false` immediately before calling an event that has an IsHandled parameter.
+
+ **Rationale:**
+ - Ensures predictable behavior - IsHandled starts as false (not handled)
+ - Prevents undefined/garbage values from affecting event flow
+ - Subscribers expect IsHandled to be false when they receive the event
+ - Makes code intent clear - the default is to NOT skip the code block
+ - Follows BC standard patterns for IsHandled events
+
+ **Correct Pattern:**
+ ```al
+ var
+ IsHandled: Boolean;
+ begin
+ IsHandled := false;
+ OnBeforeSalesPost(SalesHeader, IsHandled);
+ if IsHandled then
+ exit;
+
+ // Standard logic here
+ end;
+ ```
+
+ **❌ Wrong - Missing initialization:**
+ ```al
+ var
+ IsHandled: Boolean;
+ begin
+ // ❌ WRONG - IsHandled not initialized!
+ OnBeforeSalesPost(SalesHeader, IsHandled);
+ if IsHandled then
+ exit;
+ end;
+ ```
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_warnings.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_warnings.yaml
new file mode 100644
index 000000000..5bf613e47
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_ishandled_warnings.yaml
@@ -0,0 +1,61 @@
+event_request_ishandled_warnings:
+ description: "Warning rules specific to event-request/ishandled subtype"
+
+ rules:
+ - id: ishandled_in_loops
+ applies_to: ["event-request/ishandled"]
+ severity: "warning"
+ description: "IsHandled event placement inside loops"
+ action: "suggest_alternative"
+ warning_message: "IsHandled events inside loops can cause severe performance degradation as the bypass logic is evaluated on every iteration."
+ guidance: |
+ Instead of placing an IsHandled event inside the loop, consider adding a regular event **before the loop** after all filtering is complete.
+
+ **Recommended approach:**
+ 1. Apply all necessary filters to the record (e.g., `DetailedVendorLedgEntry.SetRange(...)`)
+ 2. Add a regular IntegrationEvent AFTER filtering, before the loop starts
+ 3. Subscribers can then modify the filtered set without bypassing critical code inside the loop
+
+ This approach:
+ - ✅ Avoids repeated IsHandled checks in the loop
+ - ✅ Allows extensions to modify filtering logic
+ - ✅ Maintains performance (event fires once, not per iteration)
+ - ✅ Achieves the same extensibility goal
+
+ - id: ishandled_large_code_block_extraction
+ applies_to: ["event-request/ishandled"]
+ severity: "warning"
+ description: "Large code blocks with IsHandled should be extracted to separate procedures"
+ action: "suggest_alternative"
+ rule: "When adding an IsHandled event that surrounds a larger block of code, consider extracting that code into a separate procedure and adding an OnBefore[ProcedureName] event with IsHandled."
+ guidance: |
+ **Recommended Practice:** When an IsHandled event would wrap a significant block of code (typically 5+ lines with related logic), the agent should evaluate whether extracting that code into a separate procedure would be a better approach.
+
+ **Rationale:**
+ - Improves code organization and readability
+ - Creates a clear, named operation that can be bypassed
+ - Makes the code's intent more explicit
+ - Easier for subscribers to understand what they're bypassing
+ - Better maintainability and testability
+ - Follows single responsibility principle
+
+ **Agent Analysis Required:**
+ Before suggesting extraction, the agent MUST verify:
+ 1. **Code Cohesion:** Are the values/variables in the code block logically connected?
+ 2. **Single Purpose:** Does the code block perform a single, identifiable operation?
+ 3. **Meaningful Name:** Can a descriptive procedure name be assigned that clearly describes the operation?
+ 4. **Parameter Set:** Can the required inputs/outputs be clearly defined as procedure parameters?
+
+ **When NOT to Extract:**
+ - Code block is very small (1-3 lines)
+ - Variables in the block are not logically related
+ - No meaningful procedure name can be derived
+ - Extraction would require passing too many unrelated parameters
+
+ **Decision Criteria for Agent:**
+ | Factor | Extract | Keep Inline |
+ |--------|---------|-------------|
+ | Lines of code | 5+ lines | 1-4 lines |
+ | Variables related | Yes, single purpose | No, mixed concerns |
+ | Meaningful name possible | Yes | No clear name |
+ | Parameter count | Reasonable (1-5) | Too many (6+) |
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_warnings.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_warnings.yaml
new file mode 100644
index 000000000..c94196822
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/event_request_warnings.yaml
@@ -0,0 +1,106 @@
+event_request_warnings:
+ description: "Warning rules for event-request type - require justification or clarification"
+
+ rules:
+ - id: recordref_parameter_in_event
+ applies_to: ["event-request"]
+ severity: "warning"
+ description: "RecordRef parameter used in event signature"
+ action: "request_clarification"
+ rejection_reason: "RecordRef parameters should not be added to events without good reason."
+ guidance: |
+ RecordRef is a dynamic reference type that should not be used as event parameters without good justification. Potentially risks include:
+ - Reduces type safety (subscribers can pass any record type)
+ - Makes subscribers more complex (requires dynamic type casting)
+ - In some cases allows attackers to access critical/private data and perform unauthorized operations
+
+ **Required Justification:**
+ 1. Explain exactly how RecordRef will be used by subscribers
+ 2. Justify why record parameter is insufficient
+ 3. Explain what else did you consider before choosing RecordRef
+
+ - id: xrec_parameter_in_event
+ applies_to: ["event-request"]
+ severity: "warning"
+ description: "xRec parameter used in event signature"
+ action: "request_justification"
+ rejection_note: "xRec parameter requires detailed justification as its behavior depends on the calling context."
+ guidance: |
+ xRec is a special variable that contains the previous values of a record before modification.
+ Using xRec in event parameters requires careful consideration because:
+
+ **Context Dependency:**
+ - xRec behavior varies significantly depending on where it's used (pages vs other objects)
+ - In table or codeunit: Contains the record values before the current operation
+ - In page: May contain unexpected or stale data depending on calling context
+ - Can be unreliable if called from different contexts than expected
+
+ **Required Justification:**
+ 1. Explain exactly how xRec will be used by subscribers
+ 2. Justify why current record (Rec) is insufficient
+ 3. Explain why previous values are specifically needed
+ 4. Confirm understanding of xRec behavior in the specific context
+
+ **Risk:** xRec may contain unexpected or stale data depending on the calling context,
+ leading to unreliable subscriber behavior.
+
+ - id: events_in_loops
+ applies_to: ["event-request"]
+ severity: "warning"
+ description: "Event placement inside loops (performance impact)"
+ action: "request_justification"
+ warning_message: "The proposed event location is inside a loop, which may cause performance issues if subscribers perform heavy operations."
+ requires_confirmation: true
+ guidance: |
+ Events placed inside loops can cause significant performance issues because:
+ - The event fires on EVERY iteration of the loop
+ - Subscribers may perform database operations, calculations, or external calls
+ - Performance degrades linearly (or worse) with the number of iterations
+ - Can cause timeouts on large datasets
+
+ **Detection:**
+ - Event location is inside `repeat...until`, `while...do`, `for...to`, or `foreach` loops
+ - Event location is inside a `FindSet()` or `FindFirst()` loop pattern
+
+ **Performance Impact Example:**
+ ```al
+ // ❌ WARNING - Event fires for EVERY line (could be 1000+ times)
+ if SalesLine.FindSet() then
+ repeat
+ OnProcessSalesLine(SalesLine); // Fires on every iteration!
+ ProcessLine(SalesLine);
+ until SalesLine.Next() = 0;
+ ```
+
+ **Better Alternatives:**
+
+ **Option 1 - Event BEFORE the loop (modify filters/setup):**
+ ```al
+ // ✅ BETTER - Event fires ONCE before loop starts
+ OnBeforeProcessSalesLines(SalesHeader, SalesLine);
+ if SalesLine.FindSet() then
+ repeat
+ ProcessLine(SalesLine);
+ until SalesLine.Next() = 0;
+ ```
+
+ **Option 2 - Event AFTER the loop (post-processing):**
+ ```al
+ // ✅ BETTER - Event fires ONCE after all lines processed
+ if SalesLine.FindSet() then
+ repeat
+ ProcessLine(SalesLine);
+ until SalesLine.Next() = 0;
+ OnAfterProcessSalesLines(SalesHeader, TotalAmount);
+ ```
+
+ **When Events in Loops May Be Acceptable:**
+ - Small, guaranteed dataset (e.g., max 5-10 iterations)
+ - Subscribers are expected to perform only lightweight operations
+ - No alternative location achieves the same extensibility goal
+ - Author explicitly justifies the need
+
+ **Required Justification from Author:**
+ 1. Why must the event fire on each iteration?
+ 2. What operations will subscribers perform?
+ 3. Have you considered alternatives (before/after loop)?
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/general_blockers.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/general_blockers.yaml
new file mode 100644
index 000000000..49f75a53f
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/general_blockers.yaml
@@ -0,0 +1,220 @@
+general_blockers:
+ description: "Blocker rules that apply to ALL request types - auto-reject scenarios"
+
+ rules:
+ - id: obsolete_code
+ applies_to: ["all"]
+ severity: "blocking"
+ description: "Target code marked as obsolete or inside NOT CLEAN region"
+ check_patterns: ["[Obsolete", "ObsoleteState", "ObsoleteReason", "NOT CLEAN"]
+ action: "auto_reject"
+ rejection_reason: "The target code is marked as obsolete and cannot be modified"
+ guidance: |
+ Code marked with `[Obsolete]` attribute or inside `// NOT CLEAN` regions is deprecated
+ and should not be modified. Such code is scheduled for removal or replacement.
+
+ **Example 1 - Obsolete procedure:**
+ ```al
+ [Obsolete('Use GetCustomerBalance instead', '25.0')]
+ procedure GetBalance(CustomerNo: Code[20]): Decimal
+ begin
+ // This procedure is obsolete - do not add events here
+ exit(Customer.GetBalanceLCY());
+ end;
+ ```
+
+ **Example 2 - NOT CLEAN region:**
+ ```al
+ #if not CLEAN28
+ local procedure LegacyCalculation(var Amount: Decimal)
+ begin
+ // This procedure is obsolete - do not add events here
+ Amount := Amount * 1.25;
+ end;
+ #endif
+ ```
+
+ - id: public_signature_change
+ applies_to: ["all"]
+ severity: "blocking"
+ description: "Requires public procedure signature change"
+ action: "auto_reject"
+ rejection_reason: "Implementation would require changing a public procedure signature, which may break existing code"
+ exceptions: "Adding name to existing exit parameter to public procedures is allowed"
+ guidance: |
+ Changing the signature of public procedures is not allowed as it may break existing code.
+ Extensions and other dependent code rely on the exact signature of public procedures.
+
+ **What is NOT Allowed:**
+ - Adding new parameters
+ - Removing existing parameters
+ - Changing parameter types
+ - Changing parameter order
+ - Changing return type
+ - Adding/removing `var` modifier on parameters
+
+ **What IS Allowed:**
+ - Adding a name to an unnamed return value (non-breaking change)
+
+ **Example 1 - NOT Allowed (adding parameter):**
+ ```al
+ // Before:
+ procedure PostSalesDocument(var SalesHeader: Record "Sales Header")
+ begin
+ // ...
+ end;
+
+ // ❌ NOT Allowed - adding new parameter breaks existing callers:
+ procedure PostSalesDocument(var SalesHeader: Record "Sales Header"; Preview: Boolean)
+ begin
+ // ...
+ end;
+ ```
+
+ **Example 2 - NOT Allowed (changing parameter type):**
+ ```al
+ // Before:
+ procedure GetCustomerName(CustomerNo: Code[20]): Text[100]
+ begin
+ // ...
+ end;
+
+ // ❌ NOT Allowed - changing Code[20] to Code[50] breaks existing callers:
+ procedure GetCustomerName(CustomerNo: Code[50]): Text[100]
+ begin
+ // ...
+ end;
+ ```
+
+ **Example 3 - Allowed (naming return value):**
+ ```al
+ // Before:
+ procedure IsInvoiceDocType(): Boolean
+ begin
+ exit(true);
+ end;
+
+ // ✅ Allowed - adding name to return value is non-breaking:
+ procedure IsInvoiceDocType() Result: Boolean
+ begin
+ Result := true;
+ end;
+ ```
+
+ - id: sensitive_data_exposure
+ applies_to: ["all"]
+ severity: "blocking"
+ description: "Implementation would expose sensitive or private data"
+ action: "auto_reject"
+ rejection_reason: "The implementation would expose sensitive/private data that should not be made accessible"
+ guidance: |
+ This rule applies to any request that would result in exposing sensitive data:
+ - Making a procedure public that accesses/handles sensitive data
+ - Adding event parameters that expose private/sensitive information
+ - Changing access levels on variables containing sensitive data
+
+ **Examples of Sensitive Data:**
+ - DotNet components
+ - SecretText variables
+ - Personal identifiable information (PII)
+ - Financial credentials or tokens
+ - Password hashes or authentication data
+ - Internal system configuration
+ - Audit trail data that should remain protected
+
+ - id: multi_change_policy
+ applies_to: ["all"]
+ severity: "blocking"
+ description: "All-or-nothing rule for requests with multiple changes"
+ rejection_reason: "If ANY proposed change is blocked or infeasible, REJECT ALL changes in the request"
+ guidance: |
+ When a request contains multiple changes (e.g., multiple events, multiple procedure modifications),
+ all changes must be feasible for the request to be approved. If any single change is blocked,
+ the entire request must be rejected.
+
+ **Rationale:**
+ - Multiple changes in a single request are often interdependent
+ - Partial implementation may leave the system in an inconsistent state
+ - The author expects all requested changes to be implemented together
+ - Prevents confusion about which parts were implemented
+
+ **How to Detect Multi-Change Requests:**
+ - Request mentions multiple procedures/triggers
+ - Request asks for events in multiple locations
+ - Request includes numbered items (1, 2, 3...) or bullet points
+ - Request uses "and" to connect multiple actions
+
+ **Example - Multi-Change Request:**
+ ```
+ Please add the following events:
+ 1. OnBeforePost in Codeunit 80 "Sales-Post"
+ 2. OnAfterValidate in Table 36 "Sales Header"
+ 3. OnBeforeInsert in Table 37 "Sales Line"
+ ```
+
+ **If change #2 is blocked (e.g., obsolete code):**
+ - ❌ Do NOT implement changes #1 and #3 only
+ - ✅ Reject the entire request
+ - ✅ Explain which change is blocked and why
+ - ✅ Ask author to submit separate requests or modify the blocked change
+
+ - id: potential_security_risk
+ applies_to: ["all"]
+ severity: "blocking"
+ description: "Potential exposure of sensitive data or security mechanisms based on keywords"
+ action: "include_human"
+ rejection_reason: "The requested change involves code handling sensitive security data. This requires manual security review."
+ guidance: |
+ **Security Risk Assessment:**
+ Procedures or events containing keywords related to security, authentication, or sensitive data require careful review.
+
+ **Risky Keywords:**
+ - password, secret, token, key
+ - permission, user, login, auth
+ - credential, encryption
+
+ **Validation Steps:**
+ 1. Check if the code exposes raw secrets or credentials.
+ 2. Check if the change allows bypassing security checks (e.g., permission validation).
+ 3. If the code is purely for logic (e.g., "ValidateUserStatus") and doesn't expose secrets, it MAY be safe.
+ 4. If in doubt, flag for manual review (apply 'agent-not-processable' label).
+
+ - id: entity_not_found
+ applies_to: ["all"]
+ severity: "blocking"
+ description: "Target procedure/trigger/variable not found in expected location"
+ action: "request_clarification"
+ rejection_reason: "The specified procedure/trigger/variable was not found in the codebase"
+ exceptions: "In case of adding NEW code (e.g., an event) inside a trigger, it is acceptable that the trigger does not yet exist"
+ guidance: |
+ If the agent cannot locate the specified procedure, trigger, or variable in the expected
+ object or location, it must request clarification from the author.
+
+ **Possible Reasons for Not Found:**
+ - Typographical errors in names
+ - Incorrect object type or ID specified
+ - The codebase has changed since the request was made
+ - The requested entity does not exist
+
+ **Exception - Triggers That Don't Exist Yet:**
+ When the request is to add a new event inside a trigger (e.g., OnInsert, OnAfterGetRecord, OnValidate, OnOpenPage, etc.),
+ it is acceptable if the trigger does not currently exist in the object. The agent should:
+ - Create the trigger with the requested event inside it
+ - This is a valid implementation pattern in AL
+
+ **Example - Acceptable (trigger doesn't exist):**
+ ```al
+ // Request: "Add OnBeforeInsert event in Table 18 Customer"
+ // Table 18 currently has no OnInsert trigger
+
+ // ✅ Agent creates the trigger with the event:
+ trigger OnInsert()
+ begin
+ OnBeforeInsertCustomer(Rec);
+ end;
+ ```
+
+ **Agent Actions (when NOT an exception):**
+ 1. Apply `missing-info` label
+ 2. Do NOT apply team label or `agent-analyzed`
+ 3. Post a comment asking the author to verify the target location and names
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/request_for_external_implementation.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/request_for_external_implementation.yaml
new file mode 100644
index 000000000..ff1f568c9
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/codebase-rules/request_for_external_implementation.yaml
@@ -0,0 +1,94 @@
+request_for_external_implementation:
+ description: "Implementation rules specific to request-for-external type - ensure alignment with coding standards"
+
+ rules:
+ - id: procedure_access_level
+ applies_to: ["request-for-external"]
+ severity: "notice"
+ description: "Making procedure publically accessible"
+ action: "internal_warning"
+ guidance: |
+ **Implementation:** Making a procedure externally accessible implies:
+ - Removing `local` scope modifier (if present) or
+ - Removing `[Scope('OnPrem')]` attribute (if present)
+
+ **Example 1 - Removing local scope:**
+ ```al
+ // Before:
+ local procedure CalculateDiscount(var SalesLine: Record "Sales Line")
+ begin
+ // Internal logic
+ end;
+
+ // After:
+ procedure CalculateDiscount(var SalesLine: Record "Sales Line")
+ begin
+ // Now accessible externally
+ end;
+ ```
+
+ **Example 2 - Removing OnPrem scope:**
+ ```al
+ // Before:
+ [Scope('OnPrem')]
+ procedure ValidateCustomer(CustomerNo: Code[20])
+ begin
+ // OnPrem only logic
+ end;
+
+ // After:
+ procedure ValidateCustomer(CustomerNo: Code[20])
+ begin
+ // Now accessible externally
+ end;
+ ```
+
+ - id: variable_access_level
+ applies_to: ["request-for-external"]
+ severity: "notice"
+ description: "Variable access level change from var to protected var"
+ action: "internal_warning"
+ guidance: |
+ When changing variable access level from 'var' to 'protected var', ensure proper code structure:
+
+ **Structure Requirements:**
+ 1. If object currently has NO 'protected var' section, create it AFTER all 'var' declarations
+ 2. Place all protected var declarations in the dedicated 'protected var' section
+ 3. Proper order: var declarations → protected var section → procedure declarations
+
+ **Example - Correct Structure:**
+ ```al
+ codeunit 50100 "Example Codeunit"
+ {
+ var
+ ExistingGlobalVar: Text[100];
+ AnotherGlobalVar: Integer;
+
+ protected var
+ NewlyProtectedVar1: Text[50];
+ NewlyProtectedVar2: Boolean;
+
+ procedure MyProcedure()
+ begin
+ // ...
+ end;
+ }
+ ```
+
+ **Common Mistake - Don't do this:**
+ ```al
+ // ❌ Wrong - mixing protected var with var
+ var
+ ExistingGlobalVar: Text[100];
+ protected var
+ NewlyProtectedVar1: Text[50];
+ var
+ AnotherGlobalVar: Integer; // Wrong location
+ ```
+
+ **Migration Steps:**
+ 1. Identify all variables that need to be protected
+ 2. Check if object already has 'protected var' section
+ 3. If no protected var section exists: Add 'protected var' section after all 'var' declarations
+ 4. Move/add variables to protected var section
+ 5. Ensure no var declarations appear after protected var section
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/comment-templates/comment_templates.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/comment-templates/comment_templates.yaml
new file mode 100644
index 000000000..7ed8e067a
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/comment-templates/comment_templates.yaml
@@ -0,0 +1,490 @@
+comment_templates:
+
+ # =============================================================================
+ # APPROVAL COMMENTS (Success Path)
+ # =============================================================================
+ #
+ # ⚠️ CRITICAL RULES FOR APPROVAL COMMENTS:
+ # 1. Follow the template EXACTLY - do not add extra sections
+ # 2. NEVER add "Analysis Summary", "Team Assignment", or similar metadata
+ # 3. NEVER add "Correction Required" or explain differences from author's proposal
+ # 4. NEVER add footer like "This issue has been analyzed by Argus Agent"
+ # 5. NEVER add "Thank you" messages or greetings
+ # 6. Show ONLY what will be implemented - no explanations of WHY
+ # 7. If request passed validation, just show the final implementation code
+ # =============================================================================
+
+ approved_event_request:
+ description: "Posted when event-request issue is approved"
+ applies_to: ["event-request"]
+ template: |
+ ✅ **Analysis complete - approved for implementation**
+
+ ---
+
+ **Implementation:**
+
+ {implementation_description}
+
+ {code_location_description}
+
+ ```al
+ {code_with_event_call}
+ ```
+
+ **Event Publisher (add at the end of the codeunit with other events):**
+
+ ```al
+ {event_publisher_code}
+ ```
+
+ ---
+
+ **File:** `{filename_only}`
+
+ placeholders:
+ implementation_description:
+ description: "Brief description of what needs to be added/changed"
+ example: "Add a new integration event in the `Code()` procedure of Codeunit 5880 \"Phys. Invt. Order-Finish\" before `TempInvtOrderTrackingBuffer.Modify()` in the loop."
+
+ code_location_description:
+ description: "Description of where in the code the change goes"
+ example: "**Location in `Code()` procedure (around line 149):**"
+
+ code_with_event_call:
+ description: "Code snippet showing event call in context"
+ example: |
+ TempInvtOrderTrackingBuffer.Reset();
+ if TempInvtOrderTrackingBuffer.Find('-') then
+ repeat
+ TempInvtOrderTrackingBuffer."Qty. To Transfer" :=
+ TempInvtOrderTrackingBuffer."Qty. Recorded (Base)" - TempInvtOrderTrackingBuffer."Qty. Expected (Base)";
+ OnCodeOnBeforeModifyInvtOrderTracking(TempInvtOrderTrackingBuffer); // <-- New event
+ TempInvtOrderTrackingBuffer.Modify();
+ until TempInvtOrderTrackingBuffer.Next() = 0;
+
+ event_publisher_code:
+ description: "The event publisher procedure to add"
+ example: |
+ [IntegrationEvent(false, false)]
+ local procedure OnCodeOnBeforeModifyInvtOrderTracking(var TempInvtOrderTracking: Record "Invt. Order Tracking" temporary)
+ begin
+ end;
+
+ filename_only:
+ description: "Just the filename without full path (e.g., PhysInvtOrderFinish.Codeunit.al)"
+ format: "{ObjectName}.{ObjectType}.al"
+ example: "PhysInvtOrderFinish.Codeunit.al"
+
+ approved_request_for_external:
+ description: "Posted when request-for-external issue is approved"
+ applies_to: ["request-for-external"]
+ template: |
+ ✅ **Analysis complete - approved for implementation**
+
+ ---
+
+ **Implementation:**
+
+ {implementation_description}
+
+ **Current code:**
+
+ ```al
+ {current_code}
+ ```
+
+ **Proposed change:**
+
+ ```al
+ {proposed_code}
+ ```
+
+ ---
+
+ **File:** `{filename_only}`
+
+ approved_enum_request:
+ description: "Posted when enum-request issue is approved"
+ applies_to: ["enum-request"]
+ template: |
+ ✅ **Analysis complete - approved for implementation**
+
+ ---
+
+ **Implementation:**
+
+ {implementation_description}
+
+ ```al
+ {enum_code}
+ ```
+
+ ---
+
+ **File:** `{filename_only}`
+
+ # =============================================================================
+ # ALREADY IMPLEMENTED COMMENTS
+ # =============================================================================
+
+ already_implemented:
+ description: "Posted when ALL requested changes already exist in the codebase (100% match)"
+ applies_to: ["all"]
+ template: |
+ ✅ **Good news! This is already implemented in the codebase.**
+
+ The requested change already exists:
+
+ **Existing implementation:**
+
+ ```al
+ {existing_code}
+ ```
+
+ **File:** `{filename_only}`
+
+ You can use this existing {implementation_type} in your extension. Closing this issue as the request is already satisfied.
+
+ placeholders:
+ existing_code:
+ description: "The existing code that matches the request"
+ example: |
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterConfirmPost(var PurchaseHeader: Record "Purchase Header"; var IsHandled: Boolean)
+ begin
+ end;
+
+ filename_only:
+ description: "Just the filename without full path"
+ example: "PurchPostPrint.Codeunit.al"
+
+ implementation_type:
+ description: "Type of implementation (event, procedure, enum value, etc.)"
+ examples:
+ - "event"
+ - "procedure"
+ - "enum value"
+
+ partial_already_implemented:
+ description: "Posted when some requests already exist and others are approved for implementation"
+ applies_to: ["all"]
+ template: |
+ ✅ **Analysis complete - partial implementation already exists**
+
+ ---
+
+ **Already Implemented (no changes needed):**
+
+ {already_implemented_section}
+
+ ---
+
+ **Approved for Implementation:**
+
+ {pending_implementation_section}
+
+ ---
+
+ **Note:** This issue will remain open for the pending implementation(s). The existing code above is already available for use in your extension.
+
+ placeholders:
+ already_implemented_section:
+ description: "Section showing all changes that already exist"
+ format: |
+ For each already_implemented_change:
+
+ **{n}. {description}**
+
+ ```al
+ {existing_code}
+ ```
+
+ **File:** `{filename_only}`
+ example: |
+ **1. Add var to PurchaseHeader parameter**
+
+ ```al
+ [IntegrationEvent(false, false)]
+ local procedure OnAfterConfirmPost(var PurchaseHeader: Record "Purchase Header"; var IsHandled: Boolean)
+ begin
+ end;
+ ```
+
+ **File:** `PurchPostPrint.Codeunit.al`
+
+ pending_implementation_section:
+ description: "Section showing all changes approved for implementation (follows standard approval format)"
+ format: |
+ For each pending_change:
+
+ **{n}. {implementation_description}**
+
+ {code_location_description}
+
+ ```al
+ {code_with_event_call}
+ ```
+
+ **Event Publisher:**
+
+ ```al
+ {event_publisher_code}
+ ```
+
+ **File:** `{filename_only}`
+
+ # =============================================================================
+ # MISSING INFORMATION COMMENTS
+ # =============================================================================
+
+ missing_info_requirements:
+ description: "Posted when issue fails requirements validation"
+ applies_to: ["all"]
+ template: |
+ Hi @{author_name},
+
+ Thanks for submitting this extensibility request. To help us process it efficiently, please update the issue to include the required information.
+
+ 📚 [Extensibility Guidelines](https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/devenv-contribute-extensibility)
+
+ **Missing Information:**
+
+ {missing_items_list}
+
+ Please update the issue with the missing details. We'll automatically re-analyze once updated.
+
+ placeholders:
+ author_name:
+ description: "GitHub username of issue author"
+ missing_items_list:
+ description: "Bullet list of missing requirements"
+ format: "- **{requirement_name}**: {explanation}"
+
+
+ missing_info_procedure_not_found:
+ description: "Posted when target procedure/trigger not found in codebase"
+ applies_to: ["event-request"]
+ template: |
+ Hi @{author_name},
+
+ We couldn't locate the specified procedure/trigger in the codebase.
+
+ **Target:** `{target_procedure}` in `{target_object}`
+
+ {suggestion_section}
+
+ Please verify the procedure/trigger name and update the issue. We'll automatically re-analyze once updated.
+
+ placeholders:
+ target_procedure:
+ description: "The procedure name that wasn't found"
+ target_object:
+ description: "The object where procedure was expected"
+ suggestion_section:
+ description: "Fuzzy match suggestions if available"
+ format: |
+ **Did you mean one of these?**
+ {fuzzy_matches}
+
+ missing_info_multiple_types:
+ description: "Posted when issue contains multiple distinct request types"
+ applies_to: ["all"]
+ template: |
+ Hi @{author_name},
+
+ This issue contains multiple distinct request types:
+
+ {detected_types_list}
+
+ Please create separate issues for each type to ensure proper processing and tracking.
+
+ We'll automatically process each issue once they are created separately.
+
+ # =============================================================================
+ # ALTERNATIVE SUGGESTION COMMENTS
+ # =============================================================================
+
+ suggest_alternative_event:
+ description: "Posted when similar event already exists"
+ applies_to: ["event-request"]
+ template: |
+ Hi @{author_name},
+
+ A similar event already exists in this location that may serve your purpose:
+
+ **Existing event:** `{existing_event_name}`
+
+ ```al
+ {existing_event_signature}
+ ```
+
+ **Location:** `{filename_only}`
+
+ Would this existing event work for your scenario? If not, please explain why a new event is needed.
+
+ Please respond with confirmation or additional details. We'll re-analyze once you respond.
+
+ suggest_alternative_approach:
+ description: "Posted when alternative approach is recommended"
+ applies_to: ["event-request"]
+ template: |
+ Hi @{author_name},
+
+ {alternative_explanation}
+
+ **Recommended approach:**
+
+ {recommended_approach}
+
+ Please confirm if this alternative approach works for your scenario, or explain why the original approach is required.
+
+ suggest_alternative_location:
+ description: "Posted when agent finds the solution should be in a different location than requested"
+ applies_to: ["event-request", "enum-request", "request-for-external"]
+ template: |
+ Hi @{author_name},
+
+ Thank you for your extensibility request. After analyzing the codebase, I found that the functionality you're looking to extend is actually located in a different place than specified.
+
+ **Your request:** {original_request_summary}
+
+ **What I found:** {analysis_finding}
+
+ **Suggested alternative:**
+
+ {suggested_implementation}
+
+ ```al
+ {suggested_code}
+ ```
+
+ **File:** `{filename_only}`
+
+ {manual_binding_note}
+
+ Please confirm if this alternative approach works for your scenario,
+ or let us know if you specifically need change like it is requested and what are the reasons.
+
+ placeholders:
+ original_request_summary:
+ description: "Brief summary of what the author originally requested"
+ example: "Add OnAfterIsCancellationSupported event in TestPurchaseLines procedure of Codeunit 1313"
+ analysis_finding:
+ description: "Explanation of what the codebase analysis revealed"
+ example: "The `IsCancellationSupported()` logic is actually implemented in Table 123 \"Purch. Inv. Line\", which is called from `TestPurchaseLines`."
+ suggested_implementation:
+ description: "Description of the recommended implementation"
+ example: "Add the event in Table 123 \"Purch. Inv. Line\" in the `IsCancellationSupported()` procedure"
+ suggested_code:
+ description: "Code showing the suggested implementation"
+ filename_only:
+ description: "Filename where the suggested implementation would go"
+ manual_binding_note:
+ description: "Optional note about manual binding for public procedures"
+ format: |
+ 📝 **Note:** This event is in a public procedure that may be called from multiple places. If you need to control when your event subscriber fires, consider using [Manual Event Subscription Binding](https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/properties/devenv-eventsubscriberinstance-property#manual-binding).
+ when_to_include: "Include when target procedure is public (not local or internal)"
+
+ # =============================================================================
+ # REJECTION COMMENT
+ # =============================================================================
+
+ rejected_request:
+ description: "Posted when request is rejected due to blocking rules"
+ applies_to: ["all"]
+ template: |
+ This request cannot be implemented.
+
+ **Reason:** {rejection_reason}
+
+ placeholders:
+ rejection_reason:
+ description: "Brief explanation of why the request was rejected"
+ examples:
+ - "The target code is marked as obsolete and cannot be modified"
+ - "Implementation would require changing a public procedure signature, which may break existing code"
+ - "The implementation would expose sensitive/private data that should not be made accessible"
+
+ # =============================================================================
+ # STALE ISSUE COMMENT
+ # =============================================================================
+
+ stale_issue_closure:
+ description: "Posted when closing stale issue (30+ days with missing-info)"
+ applies_to: ["all"]
+ template: |
+ No activities, will be closed
+
+# =============================================================================
+# FORMATTING RULES
+# =============================================================================
+
+formatting_rules:
+
+ filename_format:
+ description: "How to format file references in comments"
+ rule: "Use only the filename, not the full path"
+ format: "{ObjectName}.{ObjectType}.al"
+ examples:
+ correct: "PhysInvtOrderFinish.Codeunit.al"
+ incorrect: "App\\Layers\\W1\\BaseApp\\Inventory\\Counting\\Document\\PhysInvtOrderFinish.Codeunit.al"
+
+ code_blocks:
+ description: "How to format code in comments"
+ language: "al"
+ format: |
+ ```al
+ {code}
+ ```
+
+ headers:
+ description: "Use bold markdown for section headers"
+ format: "**{header_text}:**"
+
+ status_icons:
+ approved: "✅"
+ missing_info: "⚠️"
+ blocked: "❌"
+ stale: "⏱️"
+ reprocessing: "🔄"
+
+ manual_binding_note:
+ description: "When to include manual binding guidance in comments"
+ rule: "Include this note when adding events to procedures that are heavily used across the codebase for different scenarios"
+ format: |
+ 📝 **Note:** This procedure is called from multiple places in the codebase for different scenarios. If you need your event subscriber to fire only in specific scenarios, consider using [Manual Event Subscription Binding](https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/properties/devenv-eventsubscriberinstance-property#manual-binding).
+ when_to_include:
+ - "Procedure is called from multiple places in the codebase"
+ - "Procedure serves different business scenarios (e.g., used by both Sales and Purchase flows)"
+ - "Procedure is a utility/helper function used broadly"
+ - "Public procedures that can be called from extensions"
+ - "The event doesn't have inherent filtering (e.g., specific document type parameter)"
+ when_to_exclude:
+ - "Procedure has very limited/specific usage (only called from one logical flow)"
+ - "Event parameters naturally filter the scenario (e.g., specific document type parameter)"
+ - "Event is in a page/report trigger where context is clear"
+ - "Procedure is only relevant to one business process"
+
+# =============================================================================
+# EXCLUDED ELEMENTS
+# =============================================================================
+
+excluded_from_comments:
+ description: "Elements that should NOT be included in approval comments"
+ items:
+ - "Thank you for your extensibility request. The requested event is feasible and has been analyzed."
+ - "Team: {team_name}"
+ - "Full file paths (use filename only)"
+ - "Internal implementation rules or guidelines (e.g., 'new parameters must be added at the end')"
+ - "Notes about BC extensibility guidelines or best practices"
+ - "Any rule explanations - rules should be applied silently, not explained in comments"
+ - "Suggestions for additional changes outside the scope of the request (e.g., 'the same change could be applied to other similar objects')"
+ - "Recommendations to extend the request to other files, objects, or pages not explicitly mentioned by the author"
+
+ rationale:
+ thank_you_message: "Redundant - the approval header is sufficient"
+ team_assignment: "Team is indicated by label, not needed in comment"
+ full_paths: "Clutters the comment - filename is sufficient for identification"
+ internal_rules: "Implementation rules are for agent behavior, not user-facing documentation. Apply rules silently without explaining them in comments."
+ out_of_scope_suggestions: "Agent must only address exactly what is requested. Never suggest additional changes to other objects, files, or pages that were not explicitly asked for by the author. The goal is to satisfy the request as stated, not to expand its scope."
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/enum_request_requirements.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/enum_request_requirements.yaml
new file mode 100644
index 000000000..7a4fc21c5
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/enum_request_requirements.yaml
@@ -0,0 +1,51 @@
+# Enum Request Specific Requirements Configuration
+# Defines ADDITIONAL requirements specific to enum-request type issues
+# NOTE: General requirements (title, description, meaningful justification) are defined in ea_config_general_requirements.yaml
+# Version: 1.0
+
+enum_request:
+ enabled: true
+ description: "Requests to add new enum values or create new enums - ADDITIONAL requirements beyond general ones"
+ detection_keywords: ["enum", "option", "option value", "enumextension"]
+ max_iterations: 3
+
+ subtypes:
+ new_enum:
+ enabled: true
+ description: "Request to create entirely new enum"
+
+ additional_requirements:
+ - id: enum_definition
+ name: "Enum Definition"
+ description: "Complete enum definition with all proposed values and captions"
+ mandatory: true
+ validation_hints: ["enum", "definition", "values", "caption", "ordinal"]
+
+ - id: usage_context
+ name: "Usage Context"
+ description: "Where and how the enum will be used (tables, pages, reports)"
+ mandatory: true
+ validation_hints: ["usage", "context", "table", "field", "page"]
+
+ extend_enum:
+ enabled: true
+ description: "Request to add values to existing enum"
+
+ additional_requirements:
+ - id: target_enum
+ name: "Target Enum"
+ description: "Exact enum name/ID and proposed new values with captions"
+ mandatory: true
+ validation_hints: ["enum", "name", "id", "value", "caption"]
+
+ - id: compatibility_check
+ name: "Compatibility Check"
+ description: "Confirmation that new values won't break existing functionality"
+ mandatory: true
+ validation_hints: ["compatibility", "existing", "functionality", "break", "impact"]
+
+# Validation Messages
+validation_messages:
+ insufficient_detail: "The provided information is insufficient for processing. Please add:"
+ #requirement_reference: "For more details, see our Extensible Enums documentation: {link}"
+ resubmit_after_update: "Please update the issue with the missing details. We'll automatically re-analyze once updated."
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/event_request_requirements.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/event_request_requirements.yaml
new file mode 100644
index 000000000..191b4ee98
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/event_request_requirements.yaml
@@ -0,0 +1,135 @@
+# Event Request Specific Requirements Configuration
+# Defines ADDITIONAL requirements specific to event-request type issues
+# NOTE: General requirements (title, description, meaningful justification) are defined in ea_config_general_requirements.yaml
+# Version: 1.2
+
+event_request:
+ enabled: true
+ description: "Event publisher/subscriber requests - ADDITIONAL requirements beyond general ones"
+ detection_keywords: ["publisher", "subscriber", "IntegrationEvent", "event", "OnBefore", "OnAfter"]
+
+ # ⚠️ CRITICAL RULE: Agent must NEVER suggest IsHandled unless EXPLICITLY requested
+ ishandled_detection_rule:
+ description: "IsHandled sub-type is ONLY detected when author EXPLICITLY requests it"
+ explicit_keywords_required: ["IsHandled", "bypass", "skip", "prevent execution", "handled parameter"]
+ behavior: |
+ - If NONE of the explicit keywords are present → classify as "regular" sub-type
+ - Agent must NEVER infer or suggest IsHandled on its own
+ - Regular events do NOT include IsHandled parameter
+ - This prevents escalating request scope beyond what author asked
+
+ # Common requirements for ALL event request subtypes
+ common_requirements:
+ - id: proposed_code_change
+ name: "Proposed Code Change"
+ description: "Exact code addition/modification with surrounding context (5-10 lines)"
+ mandatory: true
+ exceptions:
+ - condition: "Adding parameter to existing event"
+ description: "When request is to add a parameter to an existing event"
+ mandatory: false
+ requirements:
+ - "Clearly specify parameter name"
+ - "Identify target event name"
+ - "Specify exact location (codeunit/procedure name and number)"
+ - "Optional: Specify parameter type if not obvious from context"
+ example: "Add parameter 'ServShipmentNo' to existing event 'OnAfterFinalizePostingOnBeforeCommit' in procedure PostWithLines of Codeunit 5980 'Service-Post'"
+ rationale: "For parameter additions, the implementation is straightforward - add the parameter to both the event declaration and invocation. Explicit code is helpful but not mandatory if the request is clear."
+ validation_hints: ["code", "procedure", "trigger", "event", "publisher", "context"]
+ #documentation_link: "{link}"
+
+ subtypes:
+ ishandled:
+ enabled: true
+ description: "IsHandled event requests with bypass functionality - ONLY when EXPLICITLY requested"
+ detection_patterns: ["IsHandled", "is handled", "Handled", "bypass", "skip", "prevent execution"]
+ detection_rule: "MUST be EXPLICITLY requested by author - agent cannot suggest or infer"
+ max_iterations: 5
+
+ additional_requirements:
+ - id: invocation_example
+ name: "Invocation Example (recommended)"
+ description: "How event will be raised and sample subscriber implementation"
+ mandatory: false
+ validation_hints: ["invoke", "subscriber", "example", "sample", "usage"]
+
+ - id: alternatives_evaluated
+ name: "Alternatives Evaluated"
+ description: "Which existing events/patterns were tried and why insufficient"
+ mandatory: true
+ validation_hints: ["alternative", "tried", "existing", "evaluated", "why not"]
+
+ - id: justification_for_ishandled
+ name: "Justification for IsHandled"
+ description: "Why standard event/redesign/contribution isn't feasible"
+ mandatory: true
+ validation_hints: ["justify", "why", "ishandled", "necessary", "bypass"]
+ quality_standards:
+ reject_generic_statements: true
+ unacceptable_patterns:
+ - "we need to bypass"
+ - "customers requested"
+ - "important for business"
+ - "flexibility required"
+ requires_specifics:
+ - "Specific technical reason why standard event insufficient"
+ - "Concrete business logic that must be skipped/replaced"
+ - "Exact scenario where bypass is essential"
+ - "Why alternative approaches won't work"
+ good_example: "We must bypass the standard credit limit check because our vertical industry (healthcare) requires real-time insurance pre-authorization instead. The standard check would block orders that are actually covered by insurance, but there's no way to inject our authorization logic before the credit check fails."
+ bad_example: "We need IsHandled to implement our custom logic."
+
+ - id: performance_considerations
+ name: "Performance Considerations"
+ description: "Expected execution frequency and potential impact assessment"
+ mandatory: true
+ validation_hints: ["performance", "frequency", "impact", "execution", "load"]
+
+ - id: data_sensitivity_review
+ name: "Data Sensitivity Review"
+ description: "Confirmation of no sensitive data exposure or rationale if necessary"
+ mandatory: true
+ validation_hints: ["sensitive", "data", "security", "exposure", "private"]
+
+ - id: multi_extension_interaction
+ name: "Multi-Extension Interaction"
+ description: "Risks if multiple extensions subscribe and conflict mitigation"
+ mandatory: true
+ validation_hints: ["multi", "extension", "conflict", "subscribe", "risk"]
+
+ regular:
+ enabled: true
+ description: "Regular event requests without IsHandled"
+ max_iterations: 3
+
+ additional_requirements:
+ - id: use_case_example
+ name: "Use Case Example"
+ description: "How event will be used and business scenario"
+ mandatory: false
+ validation_hints: ["use case", "example", "scenario", "business", "how"]
+
+ - id: meaningful_justification_for_parameter_addition
+ name: "Meaningful Justification (Relaxed for Parameter Additions)"
+ description: "For adding parameters to existing events, justification requirements are relaxed"
+ applies_to: "parameter_addition_to_existing_event"
+ mandatory: false
+ rationale: |
+ When adding a parameter to an existing event, the justification is inherently clear:
+ - The user needs access to data that exists in scope
+ - The data is not currently exposed through the event
+ - This is a straightforward extensibility enhancement
+
+ The requirement "I need parameter X" is sufficient UNLESS there are specific concerns:
+ - Security/sensitive data exposure
+ - Performance implications
+ - Breaking changes
+
+ Agent should only request additional justification if one of these concerns applies.
+ validation_approach: "Evaluate for potential issues, not just completeness of justification"
+
+# Requirement Validation Messages
+validation_messages:
+ insufficient_detail: "The provided information is insufficient for processing. Please add:"
+ requirement_reference: "For more details, see our Extensibility Guidelines: {link}"
+ resubmit_after_update: "Please update the issue with the missing details. We'll automatically re-analyze once updated."
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/extensibility_enhancement_requirements.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/extensibility_enhancement_requirements.yaml
new file mode 100644
index 000000000..fa757b553
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/extensibility_enhancement_requirements.yaml
@@ -0,0 +1,37 @@
+# Extensibility Enhancement Specific Requirements Configuration
+# Defines ADDITIONAL requirements specific to extensibility-enhancement type issues
+# NOTE: General requirements (title, description, meaningful justification) are defined in ea_config_general_requirements.yaml
+# Version: 1.0
+
+extensibility_enhancement:
+ enabled: true
+ description: "General extensibility improvements and enhancements - ADDITIONAL requirements beyond general ones"
+ detection_keywords: ["extensibility", "enhancement", "improvement", "access", "protected", "variable"]
+ max_iterations: 3
+
+ additional_requirements:
+ - id: proposed_enhancement
+ name: "Proposed Enhancement"
+ description: "Specific enhancement with exact code changes or design modifications"
+ mandatory: true
+ validation_hints: ["enhancement", "code", "change", "design", "modification"]
+ documentation_link: "https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/devenv-extensibility-patterns"
+
+ - id: technical_justification
+ name: "Technical Justification"
+ description: "Technical reasoning why the enhancement is necessary and beneficial"
+ mandatory: true
+ validation_hints: ["technical", "reasoning", "necessary", "beneficial", "why"]
+ quality_standards:
+ reject_generic_statements: true
+ requires_specifics:
+ - "Technical limitation of current approach"
+ - "Performance or architectural benefits"
+ - "Specific scenarios where current design fails"
+ - "Impact on extension development patterns"
+
+# Validation Messages
+validation_messages:
+ insufficient_detail: "The provided information is insufficient for processing. Please add:"
+ #requirement_reference: "For more details, see our Extensibility Guidelines: {link}"
+ resubmit_after_update: "Please update the issue with the missing details. We'll automatically re-analyze once updated."
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/general_requirements.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/general_requirements.yaml
new file mode 100644
index 000000000..159dec152
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/general_requirements.yaml
@@ -0,0 +1,144 @@
+# General Requirements Configuration
+# Defines common requirements that apply to ALL request types
+# Version: 1.0
+
+general_requirements:
+ description: "Common requirements that must be met by all extensibility requests"
+
+ mandatory_requirements:
+ - id: clear_title
+ name: "Clear Title"
+ description: "Issue title must clearly describe the request type and target"
+ mandatory: true
+ validation_hints: ["title", "clear", "descriptive", "specific"]
+ quality_standards:
+ reject_generic_statements: true
+ unacceptable_patterns:
+ - "Need help"
+ - "Extension issue"
+ - "Problem with"
+ - "Question about"
+ - "Request"
+ - "Enhancement needed"
+ requires_specifics:
+ - "Include request type (Event, Public Access, Enum, etc.)"
+ - "Mention specific object/procedure name"
+ - "Be descriptive and actionable"
+ good_examples:
+ - "Add OnBeforePostSalesInvoice event to Sales Invoice posting"
+ - "Change ValidateCustomerData procedure from local to public in Codeunit 123"
+ - "Add Electronic Signature option to Document Approval Type enum"
+ bad_examples:
+ - "Need event"
+ - "Extension request"
+ - "Problem with sales"
+
+ - id: clear_description
+ name: "Clear Description"
+ description: "Issue description must provide comprehensive context and details"
+ mandatory: true
+ validation_hints: ["description", "context", "details", "comprehensive"]
+ quality_standards:
+ minimum_elements:
+ - "What you're trying to accomplish"
+ - "Current limitation or problem"
+ - "Proposed solution"
+ - "Business/technical justification"
+ good_structure: |
+ **Problem:** [What can't be done today]
+ **Proposed Solution:** [Exact change requested]
+ **Justification:** [Why this is needed - see meaningful justification standards]
+ **Implementation:** [Specific code/design changes]
+
+ - id: meaningful_justification
+ name: "Meaningful Justification"
+ description: "All requests must include meaningful business/technical justification with specific scenarios"
+ mandatory: true
+ validation_hints: ["justification", "business", "scenario", "specific", "meaningful"]
+ quality_standards:
+ reject_generic_statements: true
+ unacceptable_patterns:
+ - "We need this for our extension"
+ - "This is important for us"
+ - "It has been requested by our customers"
+ - "We need it for customization"
+ - "This would be useful"
+ - "Customers are asking for this"
+ - "We need better extensibility"
+ - "Required for our business"
+ - "Essential for our solution"
+ - "Standard requirement"
+ requires_specifics:
+ - "Technical intent or specific business scenario"
+ - "What functionality is needed (e.g., filter data, validate records, access information)"
+ - "Enough context to understand the extensibility gap being addressed"
+ validation_criteria:
+ - "Should include concrete business scenario OR technical intent"
+ - "Should explain current technical limitation OR desired functionality"
+ - "Accept statements showing clear technical need (e.g., 'set additional filters', 'validate before X', 'access Y data')"
+ - "Reject only completely generic statements with no technical context"
+ good_examples_by_type:
+ event_request: "We need to validate custom tax rules before invoice posting. Currently, there's no way to intercept the posting process before tax calculation occurs, which causes incorrect tax amounts when country-specific rules differ from standard BC logic."
+ request_for_external: "Our extension needs to call ValidateCustomerData() from external codeunit. This procedure is currently local but contains shared validation logic that multiple extensions require. Making it public would eliminate code duplication across 3 different customer-specific extensions."
+ request_for_external_simple: "I need this to avoid making duplicate code, as I need to check the same scenario."
+ enum_request: "We need to add 'Electronic Signature' option to Document Approval Type enum. Our compliance extension must track electronic signatures as a distinct approval method, but current enum values don't cover this regulatory requirement in our industry."
+ extensibility_enhancement: "The Purchase Header table's OnValidate trigger for Vendor No. field doesn't expose the vendor record to extensions. We need access to vendor data during validation to implement vendor-specific business rules without re-querying the database."
+ bug: "When posting a sales invoice with multiple lines containing items that have different VAT rates, the VAT calculation is incorrect. Expected: Each line should calculate VAT based on its item's VAT rate. Actual: All lines use the VAT rate from the first line."
+
+ # Special case: Request-for-external with code duplication avoidance
+ request_for_external_simplified_acceptance:
+ description: "For request-for-external, simpler justification is acceptable when procedure is safe"
+ acceptable_simple_justifications:
+ - "I need this to avoid making duplicate code"
+ - "I need to reuse this logic in my extension"
+ - "I need to call the same check/validation"
+ - "To avoid copying this code to my extension"
+ condition: "Agent must verify procedure is NOT risky (no sensitive data, no security implications, no XPIA risk)"
+ rationale: "Code duplication avoidance is a valid technical reason - the intent is clear and the benefit is obvious"
+
+ bad_examples:
+ - "We need this event to implement customization for customer"
+ - "This is important for our business requirements"
+ - "Customers have requested this functionality"
+ - "We need this to be public so we can call it"
+ - "This enum needs more options for our extension"
+
+ - id: no_images_use_code_snippets
+ name: "No Images - Use Code Snippets"
+ description: "Images are not allowed - code must be provided as text using code snippets"
+ mandatory: true
+ validation_hints: ["image", "screenshot", "picture", "code snippet", "text"]
+ quality_standards:
+ reject_images: true
+ rationale: |
+ - Code in images cannot be analyzed by the agent
+ - Images cannot be searched or indexed
+ - Code snippets allow for proper syntax highlighting and copying
+ - Text-based code is accessible and easier to work with
+ required_format: |
+ All code must be provided as text using proper markdown code blocks with AL language syntax:
+ ```al
+ // Your AL code here
+ procedure Example()
+ begin
+ // Implementation
+ end;
+ ```
+ polite_message_template: |
+ Thank you for your request!
+
+ I noticed that you've included an image in your submission. To help us analyze your request more effectively, could you please replace the image with a text-based code snippet?
+
+ Please use the following format for AL code:
+
+ ```al
+ // Your AL code here
+ procedure YourProcedureName()
+ begin
+ // Your implementation
+ end;
+ ```
+
+ This will allow us to better understand your request and provide more accurate guidance.
+
+ Thank you for your cooperation!
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/request_for_external_requirements.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/request_for_external_requirements.yaml
new file mode 100644
index 000000000..dab414c08
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/input-requirements/request_for_external_requirements.yaml
@@ -0,0 +1,80 @@
+# Request for External Specific Requirements Configuration
+# Defines ADDITIONAL requirements specific to request-for-external type issues
+# NOTE: General requirements (title, description, meaningful justification) are defined in ea_config_general_requirements.yaml
+# Version: 1.0
+
+request_for_external:
+ enabled: true
+ description: "Requests to change procedure/property access from local/internal to public/global, or to remove OnPrem Scope restrictions - ADDITIONAL requirements beyond general ones"
+ detection_keywords: ["local", "global", "public", "accessibility", "access level", "external", "OnPrem", "Scope", "remove OnPrem", "remove Scope"]
+ max_iterations: 3
+
+ # Simplified validation for safe procedures
+ simplified_validation:
+ enabled: true
+ description: "When author wants to avoid code duplication and procedure is safe, accept with minimal justification"
+ trigger_conditions:
+ - "Author mentions avoiding duplicate code"
+ - "Author mentions reusing existing logic"
+ - "Author mentions calling the same validation/check"
+ trigger_keywords: ["duplicate", "duplication", "reuse", "same logic", "same scenario", "same check", "same validation", "avoid copying"]
+ procedure_safety_check:
+ description: "Agent must verify the procedure is safe before applying simplified validation"
+ risky_indicators:
+ - "Accesses or modifies private/sensitive data (passwords, tokens, secrets)"
+ - "Performs security-related operations (authentication, authorization)"
+ - "Could be exploited for XPIA (Cross-Partner Integration Attack)"
+ - "Modifies critical system state that should remain internal"
+ - "Handles encryption/decryption operations"
+ - "Accesses user credentials or session data"
+ - "Could bypass permission checks if made public"
+ safe_indicators:
+ - "Simple validation/check procedure"
+ - "Read-only data access"
+ - "Formatting or calculation helpers"
+ - "UI-related utilities"
+ - "Standard business logic that extensions need to reuse"
+ when_safe_and_avoiding_duplication:
+ accept_minimal_justification: true
+ skip_detailed_usage_scenario: true
+ rationale: "Code duplication avoidance is a valid technical reason when the procedure poses no security or stability risk"
+
+ additional_requirements:
+ - id: proposed_code_change
+ name: "Proposed Code Change"
+ description: "Exact access level change needed with current and proposed declarations"
+ mandatory: true
+ validation_hints: ["code", "procedure", "access", "level", "local", "public"]
+ flexible_validation: true
+ validation_notes: |
+ This requirement can be satisfied in two ways:
+ 1. Author explicitly provides current and proposed code structure, OR
+ 2. Agent can determine the target object/procedure/variable from the description and locate it in the codebase
+
+ Only mark as missing-info if:
+ - Author's description is too vague to identify the target, AND
+ - Codebase search cannot locate the referenced object/procedure/variable
+ #documentation_link: "https://learn.microsoft.com/en-us/dynamics365/business-central/dev-itpro/developer/devenv-al-procedures"
+
+ - id: usage_scenario
+ name: "Usage Scenario"
+ description: "How the public procedure will be used and from which context"
+ mandatory: true
+ validation_hints: ["usage", "scenario", "context", "how", "where"]
+ flexible_validation: true
+ validation_notes: |
+ This requirement can be relaxed when:
+ 1. Author explicitly mentions avoiding code duplication, AND
+ 2. Agent verifies the procedure is safe (no risky indicators)
+
+ In such cases, the justification "avoid duplicate code" or "reuse existing logic"
+ is sufficient - detailed usage scenario is NOT required.
+
+ The intent is clear: author wants to call the same logic from their extension
+ instead of copying it.
+
+# Validation Messages
+validation_messages:
+ insufficient_detail: "The provided information is insufficient for processing. Please add:"
+ #requirement_reference: "For more details, see our Extensibility Guidelines: {link}"
+ resubmit_after_update: "Please update the issue with the missing details. We'll automatically re-analyze once updated."
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step0-getting-started.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step0-getting-started.md
new file mode 100644
index 000000000..e513c4cc4
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step0-getting-started.md
@@ -0,0 +1,33 @@
+# Agent Setup & Rules (Minimal)
+
+## 1. Startup Checks
+Both checks are **required** and must pass. Before processing, verify:
+1. **Codebase**: `glob` for `**/SalesPost.Codeunit.al` returns at least one result.
+2. **Configs**: Ensure existence of YAML files (team-mapping, templates, and at least some requirements/rules). Do not read or open the files.
+
+**Failure**: Report error and halt. **Success**: Print "✅ Argus initialized".
+
+## 2. Scope & Constraints
+- **Agent Mode**: Read-only code. Append-only comments/labels. NO editing code/PRs.
+- **Output**: GitHub comments/labels + console logs only.
+
+## 3. Processing Criteria
+- **Workflow**:
+ - Process issues **sequentially** (one at a time).
+ - **Independence**: Reset context fully between issues. Failures in one issue do not halt the processing of others.
+ - **Logging**: Log "Now starting processing issue #[ID]" at the start and "Issue #[ID] is processed." upon completion.
+ - Skip ineligible issues silently (log internally).
+
+## 4. Format input data as GH_REQUEST (json object):
+{
+ "number": int (number from instance_id),
+ "title": string,
+ "description": string,
+ "type": string (default is Task),
+ "state": string (default is open),
+ "labels": string[],
+ "author": string (default is N/A),
+ "created_at": timestamp (default is current datime),
+ "updated_at": timestamp (default is current datime),
+ "comments": Comment[]
+}
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step1-collect-data.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step1-collect-data.md
new file mode 100644
index 000000000..612ad077a
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step1-collect-data.md
@@ -0,0 +1,29 @@
+# Issue Data Collection
+
+**Purpose:** Collecting data for the issue from repository `microsoft/ALAppExtensions`.
+
+## Core Logic
+1. **Fetch data** Use GitHub MCP server to fetch issue data, comments, and labels.
+ * **Fetch Details**: Use `github_issue_read` with `method: "get"`.
+ * **Fetch Comments**: Use `github_issue_read` with `method: "get_comments"`. Sort chronologically by `created_at`.
+ * **Fetch Labels**: Use `github_issue_read` with `method: "get_labels"`.
+2. **Store collected data** as `GH_REQUEST` (json object):
+```json
+{
+ "number": int,
+ "title": string,
+ "description": string,
+ "type": string,
+ "state": string,
+ "labels": string[],
+ "author": string,
+ "created_at": timestamp,
+ "updated_at": timestamp,
+ "comments": Comment[]
+}
+```
+3. **Output:**
+ Return a JSON object:
+ * `Success`: (boolean) True if retrieval successful
+ * `GH_REQUEST`: (object) Populated data object with GitHub issue data
+ * `FailureReason`: (string) Error message ("Failed to retrieve issue #N") if failed to retrieve issue details
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step2-eligibility-check.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step2-eligibility-check.md
new file mode 100644
index 000000000..197766dd8
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step2-eligibility-check.md
@@ -0,0 +1,32 @@
+# Issue Eligibility Check
+
+**Purpose:** Validate issue meets criteria for automated processing.
+
+## Core Logic
+An issue is **ELIGIBLE** if it passes **ALL** checks:
+1. **Issue State**: Must be `open`.
+2. **Issue Type**: Must be `Task`.
+3. **Recency**: If `missing-info` label is present, last activity must be < 30 days ago (failure marks as `IsStale`).
+4. **Labels**:
+ * **No Labels**: No Microsoft team member (excluding bot) involved.
+ * **"missing-info" Label Only**: Author must be the last commenter.
+ * **Other Labels**: Not eligible.
+
+## Output Format
+Return a JSON object:
+ * `IsEligible`: (boolean) True if **ALL** checks pass
+ * `IsStale`: (boolean) True if "missing-info" label exists AND no activity > 30 days
+ * `FailureReason`: (string) Reason for ineligibility (only if NOT Eligible)
+
+
+## Decision Matrix
+
+| Condition | IsEligible | IsStale | FailureReason |
+| :--- | :--- | :--- | :--- |
+| **All Checks Pass** | `true` | `false` | `` |
+| **Issue Closed** | `false` | `false` | "Issue is closed" |
+| **"missing-info" > 30 days** | `false` | `true` | "No activity for 30+ days" |
+| **Wrong Type** | `false` | `false` | "Type is not Task" |
+| **Team Involved** | `false` | `false` | "Microsoft team member involved" |
+| **Agent Last Comment** | `false` | `false` | "Waiting for author response" |
+| **Other Labels** | `false` | `false` | "Already processed" |
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step3-request-types.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step3-request-types.md
new file mode 100644
index 000000000..ea3c06d3c
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step3-request-types.md
@@ -0,0 +1,28 @@
+# Request Type Classification
+
+**Purpose:** Classify the issue into a specific extensibility request type based on keywords and intent.
+
+## Request Types & Logic
+
+| Type | Keywords | Sub-Types / Logic |
+|------|----------|-------------------|
+| **`event-request`** | "event", "publisher", "subscriber", "OnBefore", "OnAfter" | • **`ishandled`**: When the request asks for an `IsHandled` (or similar) parameter to allow skipping code. Note: Simply adding other parameters to an *existing* event is NOT `ishandled`.
• **`regular`**: Default. |
+| **`request-for-external`** | "local to global", "public", "accessibility", "remove OnPrem" | Change scope/visibility. |
+| **`enum-request`** | "enum", "option" | • **`new_enum`**: Create brand new enum.
• **`extend_existing_enum`**: Add to existing. |
+| **`extensibility-enhancement`** | "improve", "enhance", "add" | Catch-all for other enhancements. |
+
+## Critical Rules
+
+1. **IsHandled Logic**: Identify `IsHandled` sub-type if the intent is to bypass/skip existing logic using the event, regardless of whether the specific keyword is used.
+2. **Bug Handling**:
+ * If an issue describes a "bug" but explicitly requests an extensibility change (event, enum, accessibility) to resolve it, classify as that specific request type.
+ * If it is a pure bug report (unexpected behavior/error) without an extensibility request, return `Success: false` with label `agent-not-processable` and explanation.
+3. **Single Intent**: Multiple requests of the **same type** (e.g., 3 events) are allowed. If an issue contains multiple **distinct** request types (e.g., Event + Enum creation), return `Success: false` with label `missing-info` (request split) and explanation.
+
+## Output Format
+Return a JSON object:
+ * `Success`: (boolean) True if type determined, false otherwise
+ * `TYPE`: (string) Determined type.
+ * `SUBTYPE`: (string) Determined sub-types if exist.
+ * `FailureLabel`: (string) `missing-info` or `agent-not-processable` (only if `Success` is `false`)"
+ * `FailureReason`: (string) If Success is false, provide explanation
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step4-requirements-check.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step4-requirements-check.md
new file mode 100644
index 000000000..f1fdf2331
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step4-requirements-check.md
@@ -0,0 +1,22 @@
+# Requirements Check (Minimal)
+
+**Purpose:** Validate issue against all requirement layers (General + Type + Sub-Type).
+
+## Core Logic
+1. **Load Configs:** Load `general_requirements.yaml` and type-specific YAMLs (e.g., `event_request_requirements.yaml`).
+2. **Execute Checks:** Run checks sequentially (General -> Type -> Sub-Type).
+ * **Fail Fast:** If any check triggers `agent-not-processable`, stop immediately and return failure.
+ * **Collect Failures:** For other failures (e.g., missing info), continue running all checks to report all missing items at once.
+3. **Mandatory Logging:**
+ * Format: `{PASS|FAIL} | {requirement_name} - {one sentence summary}`
+4. **Output:**
+ Return a JSON object:
+ * `Success`: (boolean) True if request met all requirements.
+ * `FailureLabel`: (string) `missing-info` or `agent-not-processable` (only if `Success` is `false`).
+ * `FailureReason`: (string) Consolidated explanation of all failures.
+
+## Configuration Sources
+* `input-requirements/general_requirements.yaml`
+* `input-requirements/{type}_requirements.yaml` (event_request, request_for_external, enum, etc.)
+
+**Note:** All validation rules, criteria, and max iterations are defined in these YAML files.
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step5-codebase-analysis.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step5-codebase-analysis.md
new file mode 100644
index 000000000..ef330e3c8
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step5-codebase-analysis.md
@@ -0,0 +1,114 @@
+# Codebase Analysis
+
+**Purpose:** Verify feasibility through progressive rule evaluation and generate implementation guidance.
+
+## Core Logic
+
+1. **Understand Intent:** Determine what the author wants to accomplish based on the request, including analysis of all existing comments.
+
+2. **Identify Targets:** Determine which objects should be updated based on the request.
+ - **Action:** Generate `ObjectList` (only objects where changes are required).
+
+3. **Locate Code:** For each object in `ObjectList`:
+ - Find the object in the codebase using a **glob by filename first** (fastest, does not read file content):
+ - AL files follow the naming convention `CamelCaseName.ObjectType.al`. Derive the filename from the object name.
+ - **CRITICAL: All searches MUST be scoped to `App/Layers/` — do NOT search outside this path.**
+ - Example: Page "Recurring Job Jnl." → `glob("App/Layers/**/W1/**/*RecurringJobJnl*.al")`
+ - Example: Codeunit "Phys. Invt. Order-Finish" → `glob("App/Layers/**/W1/**/*PhysInvtOrderFinish*.al")`
+ - **Only if glob yields no result**, fall back to a single targeted grep by object name (NOT by numeric ID): `grep("Recurring Job Jnl", "App/Layers/**/W1/**/*.al")`.
+ - **CRITICAL: Do NOT search by numeric ID** (e.g. do NOT grep for "page 289"). Do NOT use `type="al"` or bare `**/*.al` glob patterns — they scan the entire codebase and are extremely slow.
+ - **CRITICAL: Do NOT search outside `App/Layers/`** — never use `**/W1/**` or any pattern without the `App/Layers/` prefix.
+ - **STOP IMMEDIATELY** if the object is not found after glob + one targeted grep within `App/Layers/`. Do NOT search elsewhere. Do NOT expand the search scope. Return `{"Success": false, "FailureLabel": "agent-not-processable", "FailureReason": "Object not found in App/Layers/"}` and proceed directly to step 7.
+ - **Verify Target:** Confirm procedure/trigger logic.
+ - **Trigger missing?** Create new (Allowed).
+ - **Procedure missing?** Return `missing-info`.
+
+4. **Progressive Rule Evaluation:** For each inquiry, apply rules in the following order:
+
+ ### a) **BLOCKER RULES** (Stop on First Match)
+ - Load: `codebase-rules/{type}_blockers.yaml` + `codebase-rules/{type}_{subtype}_blockers.yaml` (if subtype specified) + `general_blockers.yaml`
+ - **Purpose:** Identify requests that cannot be processed (auto-reject scenarios).
+ - **Behavior:** If ANY blocker rule matches, STOP immediately and return failure.
+ - **Action:** Return `FailureLabel: "auto-reject"` with `FailureReason` from the rule.
+ - **Track Progress:** Count total blocker rules vs. rules checked.
+
+ ### b) **ALTERNATIVE SUGGESTION RULES** (Evaluate All)
+ - Load: `codebase-rules/{type}_alternative_suggestions.yaml` + `codebase-rules/{type}_{subtype}_alternative_suggestions.yaml` (if subtype specified) + `general_alternative_suggestions.yaml`
+ - **Purpose:** Check if existing functionality can satisfy the request (with or without small modifications).
+ - **Behavior:** Evaluate ALL suggestion rules before proceeding.
+ - **Action:** If ANY suggestion applies, STOP further execution. Return `FailureLabel: "missing-info"` with all suggestions listed in `FailureReason`. Author must confirm if suggestion is acceptable or if new implementation is still needed.
+ - **Track Progress:** Count total suggestion rules vs. rules checked.
+
+ ### c) **WARNING RULES** (Evaluate All)
+ - Load: `codebase-rules/{type}_warnings.yaml` + `codebase-rules/{type}_{subtype}_warnings.yaml` (if subtype specified) + `general_warnings.yaml`
+ - **Purpose:** Identify requests that require justification, clarification, or author confirmation.
+ - **Behavior:** Check ALL warning rules before returning. Accumulate all warnings.
+ - **Action:** If ANY warnings apply, STOP further execution. Return `FailureLabel: "missing-info"` with all warnings listed in `FailureReason`. Author must provide clarification/justification.
+ - **Track Progress:** Count total warning rules vs. rules checked.
+
+ ### d) **IMPLEMENTATION RULES** (Apply All)
+ - Load: `codebase-rules/{type}_implementation.yaml` + `codebase-rules/{type}_{subtype}_implementation.yaml` (if subtype specified) + `general_implementation.yaml`
+ - **Purpose:** Ensure suggested solution aligns with coding standards and best practices.
+ - **Prerequisite:** Only execute if NO suggestions and NO warnings were triggered in previous steps.
+ - **Behavior:** Apply ALL implementation rules to the proposed solution.
+ - **Action:** Generate `SuggestedImplementation` following all applicable rules.
+ - **Track Progress:** Count total implementation rules vs. rules checked.
+
+5. **Graceful Rule Handling:**
+ - If a rule file doesn't exist, log it and continue with available rules.
+ - Always apply general rules (if available) even when type-specific rules are missing.
+ - **CRITICAL:** Agent MUST include ALL available rules from loaded files in the analysis.
+
+6. **Check Existing Implementation:**
+ - **Exact Match:** Mark as `ALREADY_IMPLEMENTED` (check before applying rules).
+
+7. **Multi-Change Logic:** Apply all-or-nothing logic for mixed statuses.
+
+8. **Mandatory Logging:**
+ - Format: `{PASS|FAIL|SKIP} | {category}/{rule_id} - {one sentence summary}`
+ - Example: `PASS | blocker/obsolete_code - Target code is not obsolete`
+ - Example: `FAIL | warning/recordref_parameter - RecordRef parameter requires justification`
+ - Example: `SKIP | implementation/event_naming - Rule file not found`
+
+9. **Output:**
+ Return a JSON object:
+ - `Success`: (boolean) True if request met all requirements (no blockers, no alternative suggestions, no warnings).
+ - `OBJECT_LIST`: (Array) List of objects involved. Each item includes:
+ - `Type`: (string) e.g., Codeunit, Table.
+ - `Id`: (integer) Object ID.
+ - `Name`: (string) Object name.
+ - `Namespace`: (string).
+ - `SUGGESTED_IMPLEMENTATION`: (string) Explanation with code snippets of what is suggested to implement (only if `Success` is `true`).
+ - `FailureLabel`: (string) `missing-info`, `agent-not-processable`, or `auto-reject` (only if `Success` is `false`).
+ - `FailureReason`: (string) Consolidated explanation of all failures, suggestions, or warnings requiring author response.
+
+## Configuration Sources
+
+### Rule File Structure
+Rules are organized by category, type, and subtype:
+
+**General Rules (apply to all types):**
+- `codebase-rules/general_blockers.yaml`
+- `codebase-rules/general_alternative_suggestions.yaml`
+- `codebase-rules/general_warnings.yaml`
+- `codebase-rules/general_implementation.yaml`
+
+**Type-Specific Rules:**
+- `codebase-rules/{type}_blockers.yaml` (e.g., `event_request_blockers.yaml`)
+- `codebase-rules/{type}_alternative_suggestions.yaml`
+- `codebase-rules/{type}_warnings.yaml`
+- `codebase-rules/{type}_implementation.yaml`
+
+**Subtype-Specific Rules (if applicable):**
+- `codebase-rules/{type}_{subtype}_blockers.yaml` (e.g., `event_request_ishandled_blockers.yaml`)
+- `codebase-rules/{type}_{subtype}_alternative_suggestions.yaml`
+- `codebase-rules/{type}_{subtype}_warnings.yaml`
+- `codebase-rules/{type}_{subtype}_implementation.yaml`
+
+**Loading Strategy:**
+- General rules are always loaded
+- Type-specific rules are loaded based on the request type
+- Subtype-specific rules are loaded only when a subtype is specified in the request
+- Agent loads rules ad hoc per category (not all at once)
+
+**Note:** Not all category files may exist initially. Agent must handle missing files gracefully and log them.
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step6-team-assignment.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step6-team-assignment.md
new file mode 100644
index 000000000..5793768a3
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step6-team-assignment.md
@@ -0,0 +1,26 @@
+# Team Assignment
+
+**Purpose:** Assign ownership based on object namespaces.
+
+## Core Logic
+1. **Extract Namespaces:** Get namespaces from all target objects.
+ - **Failure Condition:** If none found, return `agent-not-processable` and stop.
+2. **Match Teams:** Compare extracted namespaces against configuration settings.
+ - **Preprocessing:** Remove "Microsoft" prefix if present (e.g., `Microsoft.Sales.History` -> `Sales.History`).
+ - **Strategy:** Iterative fallback matching:
+ 1. Try full remaining namespace.
+ 2. If no match, remove last segment and retry (e.g., `Sales.History` -> `Sales`).
+ 3. Repeat until match found or only root segment remains.
+3. **Determine Winner:**
+ - Count matches per team.
+ - **Tie-Breaker:** If counts are equal, select the alphabetically first team.
+ - **Failure Condition:** If no matches, return `agent-not-processable` and stop.
+4. **Output:**
+ Return a JSON object:
+ * `Success`: (boolean) True if owning team found.
+ * `TEAM_LABEL`: (string) Found team name `Finance` or `Integration` or `SCM` (only if `Success` is `true`).
+ * `FailureLabel`: (string) `agent-not-processable` (only if `Success` is `false`).
+ * `FailureReason`: (string) Explanation of failure.
+
+## Configuration Sources
+* `team-configuration/team_namespace_mapping.yaml`
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step7-labels-comments.md b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step7-labels-comments.md
new file mode 100644
index 000000000..66817cf72
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/step7-labels-comments.md
@@ -0,0 +1,55 @@
+# Labels & Comments
+
+**Purpose:** Finalize processing by providing a proper labels, comments, and state.
+
+**Rule:** All comments must be generated using templates from `comment-templates/comment_templates.yaml` corresponding to the situation.
+
+**Output Format:** **MUST** Return a JSON object (Final_Output) with the following structure:
+```json
+{
+ "labels_to_apply": ["label1", "label2"],
+ "comment_to_post": "Generated comment text using template, with a proper explanation",
+ "state_of_issue": "open" or "closed"
+}
+```
+
+## 1. Prerequisites (Mandatory)
+- Verify **ALL** previous steps completed if outcome is `FEASIBLE`.
+- If outcome is `MISSING_INFO` or `AGENT_NOT_PROCESSABLE`, partial completion is valid.
+
+## 2. Decision Logic
+
+### A. Success (Feasible)
+- **Labels:** Team (e.g., "Finance") + Type (e.g., "event-request"). Added as a pair.
+- **Comment:** "✅ Analysis complete - approved for implementation". Include existing/pending code.
+- **Status:** Open.
+
+### B. Missing Info
+- **Labels:** `missing-info` **ONLY**. (No Type/Team labels).
+- **Comment:** Explain what is missing/needed.
+- **Status:** Open.
+
+### C. Agent Not Processable
+- **Labels:** `agent-not-processable` **ONLY**.
+- **Comment:** None.
+- **Status:** Open.
+
+### D. Auto Reject
+- **Labels:** None.
+- **Comment:** "This request cannot be implemented." + Reason.
+- **Status:** **Close** (Reason: not planned).
+
+### E. Already Implemented
+- **Labels:** None.
+- **Comment:** "✅ Already implemented." Show code snippets.
+- **Status:** **Close** (Reason: completed).
+
+### F. Stale Issue (30+ days inactive)
+- **Labels:** Maintain `missing-info`.
+- **Comment:** "Closing due to inactivity."
+- **Status:** **Close** (Reason: not planned).
+
+## 3. Output Requirements (Mandatory to generete and log a JSON format output Final_Output)
+1. **labels_to_apply:** List only the labels to be added/set. If transitioning from one state to another (e.g., from `missing-info` to feasible), include only the final labels (stale labels will be removed automatically).
+2. **comment_to_post:** Generate using templates from `comment-templates/comment_templates.yaml`. Select the template matching the request type and outcome (e.g., `approved_event_request` for feasible requests). Always generet comment even there is no proper template to use.
+3. **state_of_issue:** Set to `"open"` or `"closed"` based on the decision logic above (closed for outcomes D, E, F).
diff --git a/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/team-configuration/team_namespace_mapping.yaml b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/team-configuration/team_namespace_mapping.yaml
new file mode 100644
index 000000000..52e003a94
--- /dev/null
+++ b/src/bcbench/agent/copilot/instructions/microsoftInternal-NAV/instructions/Argus/team-configuration/team_namespace_mapping.yaml
@@ -0,0 +1,111 @@
+# Team Namespace Mapping Configuration
+# Maps namespaces to team ownership for automated team assignment
+# Version: 1.0
+
+team_namespace_mapping:
+
+ Finance:
+ namespaces:
+ - "AccountantPortal"
+ - "Bank"
+ - "Booking"
+ - "CashFlow"
+ - "Finance.CashFlow"
+ - "CostAccounting"
+ - "Finance"
+ - "FixedAssets"
+ - "Foundation"
+ - "Globalization"
+ - "GeneralLedger"
+ - "MoneyTransfer"
+ - "PaymentMethods"
+ - "Payroll"
+ - "PostingSetup"
+ - "Sales.Peppol"
+ - "SubscriptionBilling"
+ - "TaxEngine"
+ - "ValueAddedTax"
+ match_strategy: "iterative_fallback"
+ case_sensitive: false
+ description: "Finance team - handles financial/accounting related code"
+
+ SCM:
+ namespaces:
+ - "Assembly"
+ - "CRM"
+ - "HumanResources"
+ - "Inventory"
+ - "Inventory.Posting"
+ - "Manufacturing"
+ - "Pricing"
+ - "Projects"
+ - "Purchases"
+ - "PurchaseDocuments"
+ - "Sales"
+ - "Sales.Document"
+ - "SalesInvoicing"
+ - "Service"
+ - "ServiceManagement"
+ - "Warehouse"
+ - "WarehouseManagement"
+ - "ProductionOrders"
+ - "ServiceOrders"
+ match_strategy: "iterative_fallback"
+ case_sensitive: false
+ description: "SCM team - handles supply chain, manufacturing, service, and related code"
+
+ Integration:
+ namespaces:
+ - "AI"
+ - "API"
+ - "Automation"
+ - "Azure"
+ - "AzureML"
+ - "BusinessCentral"
+ - "DataAdministration"
+ - "DateTime"
+ - "Device"
+ - "Diagnostics"
+ - "EMail"
+ - "Email"
+ - "EServices"
+ - "ExternalServices"
+ - "Foundation"
+ - "Integration"
+ - "Intercompany"
+ - "JSON"
+ - "REST"
+ - "SOAP"
+ - "Security"
+ - "System"
+ - "SystemApplication"
+ - "Telemetry"
+ - "Utilities"
+ - "WebServices"
+ - "XML"
+ match_strategy: "iterative_fallback"
+ case_sensitive: false
+ description: "Integration team - handles system integration, API, automation, and utilities"
+
+# Assignment Algorithm
+assignment_algorithm:
+ step1: "Extract namespaces from target objects in issue"
+ step2: "Search codebase to locate actual object and determine namespace"
+ step3: "Match each namespace against team namespace mappings (iterative fallback: remove Microsoft prefix, then try full, drop segment, repeat)"
+ step4: "Count namespace matches per team"
+ step5: "Assign team with highest match count"
+ step6: "If tie, sort alphabetically and select first team (Finance > Integration > SCM)"
+ step7: "If no matches found, mark as agent-not-processable"
+
+# Tie-Breaker Rules
+tie_breaker:
+ enabled: true
+ strategy: "alphabetical"
+ order: ["Finance", "Integration", "SCM"] # Alphabetical order
+ description: "When multiple teams have equal match counts, select team appearing first alphabetically"
+
+# Fallback Handling
+fallback:
+ no_objects_identified: "agent-not-processable"
+ no_namespace_match: "agent-not-processable"
+ single_object_multiple_namespaces: "count_all_matches"
diff --git a/src/bcbench/agent/copilot/metrics.py b/src/bcbench/agent/copilot/metrics.py
index 652265932..c1e5db35b 100644
--- a/src/bcbench/agent/copilot/metrics.py
+++ b/src/bcbench/agent/copilot/metrics.py
@@ -1,10 +1,11 @@
+import json
import re
from collections import Counter
from pathlib import Path
from typing import Sequence
from bcbench.logger import get_logger
-from bcbench.types import AgentMetrics
+from bcbench.types import AgentMetrics, ExtAgentMetrics
logger = get_logger(__name__)
@@ -131,3 +132,115 @@ def parse_token_count(s: str) -> int:
except Exception as e:
logger.error(f"Failed to parse metrics from output: {e}")
return None
+
+
+# Pattern to find JSON code fences with real newlines
+_JSON_FENCE_REAL_NEWLINES = re.compile(r"```json\s*(\{[\s\S]*?\})\s*```")
+
+# Pattern to find JSON code fences with literal \n (escaped newlines in JSON string values
+# found in session log files where assistant content is stored as JSON strings)
+_JSON_FENCE_ESCAPED_NEWLINES = re.compile(r"```json\\n(\{.*?\})\\n```")
+
+
+def _unescape_json_string(s: str) -> str:
+ """Unescape a JSON-encoded string value (handles \\n, \\", \\\\ etc.)."""
+ try:
+ return json.loads(f'"{s}"')
+ except json.JSONDecodeError:
+ return s
+
+
+# Required keys for the Final_Output JSON schema from step7-labels-comments
+_FINAL_OUTPUT_KEYS = {"labels_to_apply", "comment_to_post", "state_of_issue"}
+
+
+def _normalize_final_output(raw: dict) -> dict:
+ return {
+ "labels_to_apply": raw.get("labels_to_apply", []),
+ "comment_to_post": raw.get("comment_to_post", ""),
+ "state_of_issue": raw.get("state_of_issue", ""),
+ }
+
+
+def _extract_last_json_from_fences(text: str) -> dict:
+ """Extract the Final_Output JSON from code fences in text.
+
+ Returns a dict with exactly {labels_to_apply, comment_to_post, state_of_issue}.
+ Missing keys default to empty values.
+ """
+ candidates: list[str] = []
+
+ # Match fences with real newlines
+ candidates.extend(m.group(1) for m in _JSON_FENCE_REAL_NEWLINES.finditer(text))
+
+ # Match fences with literal \n (session log format where content is inside JSON strings)
+ for m in _JSON_FENCE_ESCAPED_NEWLINES.finditer(text):
+ # Content is JSON-escaped — unescape via json.loads to handle \n, \", \\\\ etc.
+ candidates.append(_unescape_json_string(m.group(1)))
+
+ # Parse all candidates
+ parsed: list[dict] = []
+ for json_str in candidates:
+ try:
+ obj = json.loads(json_str)
+ if isinstance(obj, dict):
+ parsed.append(obj)
+ except json.JSONDecodeError:
+ continue
+
+ # Only match blocks that have at least one exact Final_Output key
+ matches = [p for p in parsed if _FINAL_OUTPUT_KEYS & p.keys()]
+ if matches:
+ return _normalize_final_output(matches[-1])
+
+ return _normalize_final_output({})
+
+
+def parse_metrics_ext(output_lines: Sequence[str], session_log_path: Path | None = None) -> ExtAgentMetrics | None:
+ """Parse extended metrics from Copilot CLI output and session logs.
+
+ This extends parse_metrics() by additionally extracting JSON output from code fences.
+
+ Args:
+ output_lines: Lines from Copilot CLI stderr output
+ session_log_path: Optional path to session log file for tool usage parsing
+
+ Returns:
+ ExtAgentMetrics with all base metrics plus json_output field, or None if parsing fails
+ """
+ # Parse base metrics using the standard parser
+ base_metrics = parse_metrics(output_lines, session_log_path)
+ if base_metrics is None:
+ return None
+
+ # Extract Final_Output JSON (labels_to_apply, comment_to_post, state_of_issue)
+ output_text = "".join(output_lines)
+ json_output = _normalize_final_output({})
+
+ try:
+ # First, try to find JSON in stderr output (real newlines)
+ json_output = _extract_last_json_from_fences(output_text)
+
+ # If all values are empty and we have a session log, search there
+ if not any(json_output.values()) and session_log_path and session_log_path.exists():
+ try:
+ session_content = session_log_path.read_text(encoding="utf-8")
+ json_output = _extract_last_json_from_fences(session_content)
+ if any(json_output.values()):
+ logger.debug(f"Found JSON output in session log: {session_log_path}")
+ except Exception as e:
+ logger.warning(f"Failed to read session log for JSON extraction: {e}")
+ except Exception as e:
+ logger.warning(f"Failed to parse JSON output: {e}")
+
+ logger.info(f"Extracted JSON output: {json_output}")
+
+ return ExtAgentMetrics(
+ execution_time=base_metrics.execution_time,
+ llm_duration=base_metrics.llm_duration,
+ turn_count=base_metrics.turn_count,
+ prompt_tokens=base_metrics.prompt_tokens,
+ completion_tokens=base_metrics.completion_tokens,
+ tool_usage=base_metrics.tool_usage,
+ json_output=json.dumps(json_output),
+ )
diff --git a/src/bcbench/agent/shared/__init__.py b/src/bcbench/agent/shared/__init__.py
index 6b199fe8f..fee2ece2f 100644
--- a/src/bcbench/agent/shared/__init__.py
+++ b/src/bcbench/agent/shared/__init__.py
@@ -1,6 +1,6 @@
"""Shared code for CLI-based agents (Claude, Copilot)."""
from bcbench.agent.shared.mcp import build_mcp_config
-from bcbench.agent.shared.prompt import build_prompt
+from bcbench.agent.shared.prompt import build_prompt, build_prompt_ext
-__all__ = ["build_mcp_config", "build_prompt"]
+__all__ = ["build_mcp_config", "build_prompt", "build_prompt_ext"]
diff --git a/src/bcbench/agent/shared/config.yaml b/src/bcbench/agent/shared/config.yaml
index 2f271f326..9428c26c6 100644
--- a/src/bcbench/agent/shared/config.yaml
+++ b/src/bcbench/agent/shared/config.yaml
@@ -54,6 +54,22 @@ prompt:
{{task}}
{% endif %}
+ extensibility-request-template: |
+ You are working with a Business Central (AL) code repository at {{repo_path}}.
+
+ Task: Analyze and process the extensibility request using the custom agent - "Argus extensibility agent"
+
+ Request details:
+ {{task}}
+
+ Important constraints:
+ - Do NOT modify any existing code or test files
+ - Do NOT commit any changes to the repository
+ - Do NOT try to build or run tests
+
+ CRITICAL OUTPUT REQUIREMENT:
+ After completing the analysis, you MUST produce a comprehensive JSON output.
+
# controls:
# 1. whether to copy custom instructions from `src/bcbench/agent/shared/instructions//`
# - Copilot: copies to repo/.github/ and renames AGENTS.md to copilot-instructions.md
@@ -63,7 +79,7 @@ prompt:
# NOTE: the canonical source file is AGENTS.md; it is automatically renamed
# to the agent-specific filename (AgentType.instruction_filename) during setup
instructions:
- enabled: false
+ enabled: true
# controls:
# 1. whether to copy skills from `src/bcbench/agent/shared/instructions//skills/`
@@ -78,8 +94,8 @@ skills:
# - Claude: copies to repo/.claude/agents/
# 2. whether to pass --agent= to copilot or claude
agents:
- enabled: false
- name: ALTest
+ enabled: true
+ name: Argus
mcp:
servers:
diff --git a/src/bcbench/agent/shared/prompt.py b/src/bcbench/agent/shared/prompt.py
index b819ada9f..f4bb96f43 100644
--- a/src/bcbench/agent/shared/prompt.py
+++ b/src/bcbench/agent/shared/prompt.py
@@ -2,7 +2,7 @@
from jinja2 import Template
-from bcbench.dataset import DatasetEntry
+from bcbench.dataset import DatasetEntry, ExtensibilityDatasetEntry
from bcbench.types import EvaluationCategory
@@ -25,3 +25,18 @@ def build_prompt(entry: DatasetEntry, repo_path: Path, config: dict, category: E
is_problem_statement=is_problem_statement, # only relevant for test-generation
al_mcp=al_mcp, # whether AL MCP server is enabled
)
+
+
+def build_prompt_ext(entry: ExtensibilityDatasetEntry, repo_path: Path, config: dict, category: EvaluationCategory, al_mcp: bool = False) -> str:
+ prompt_config = config.get("prompt", {})
+ template_str = prompt_config.get(f"{category.value}-template")
+ include_project_paths = prompt_config.get("include_project_paths")
+
+ template = Template(template_str)
+ return template.render(
+ repo_path=repo_path,
+ task=entry.get_task(),
+ project_paths=", ".join(entry.project_paths),
+ include_project_paths=include_project_paths,
+ al_mcp=al_mcp, # whether AL MCP server is enabled
+ )
diff --git a/src/bcbench/commands/evaluate.py b/src/bcbench/commands/evaluate.py
index 6e41b2f92..44f3ceb4f 100644
--- a/src/bcbench/commands/evaluate.py
+++ b/src/bcbench/commands/evaluate.py
@@ -7,6 +7,7 @@
from typing_extensions import Annotated
from bcbench.agent import run_claude_code, run_copilot_agent, run_mini_agent
+from bcbench.agent.copilot.agent import run_copilot_agent_ext
from bcbench.cli_options import (
ClaudeCodeModel,
ContainerName,
@@ -22,10 +23,12 @@
)
from bcbench.config import get_config
from bcbench.dataset import DatasetEntry, load_dataset_entries
+from bcbench.dataset.dataset_entry import ExtensibilityDatasetEntry
+from bcbench.dataset.dataset_loader import load_ext_dataset_entries
from bcbench.evaluate import EvaluationPipeline, create_pipeline
from bcbench.logger import get_logger
from bcbench.results import BaseEvaluationResult
-from bcbench.types import AgentMetrics, EvaluationContext, ExperimentConfiguration
+from bcbench.types import AgentMetrics, EvaluationCategory, EvaluationContext, ExperimentConfiguration
logger = get_logger(__name__)
_config = get_config()
@@ -98,7 +101,7 @@ def evaluate_copilot(
password: ContainerPassword,
category: EvaluationCategoryOption,
model: CopilotModel = "claude-haiku-4.5",
- dataset_path: DatasetPath = _config.paths.dataset_path,
+ dataset_path: DatasetPath | None = None,
repo_path: RepoPath = _config.paths.testbed_path,
output_dir: OutputDir = _config.paths.evaluation_results_path,
run_id: RunId = "copilot_test_run",
@@ -109,8 +112,16 @@ def evaluate_copilot(
To only run the agent to generate a patch without building/testing, use 'bcbench run copilot' instead.
"""
- entries: list[DatasetEntry] = load_dataset_entries(dataset_path, entry_id=entry_id)
- entry: DatasetEntry = entries[0]
+ if category == EvaluationCategory.EXTENSIBILITY_REQUEST:
+ if dataset_path is None:
+ dataset_path = _config.paths.ext_dataset_path
+ entries: list[ExtensibilityDatasetEntry] = load_ext_dataset_entries(dataset_path, entry_id=entry_id)
+ entry: ExtensibilityDatasetEntry = entries[0]
+ else:
+ if dataset_path is None:
+ dataset_path = _config.paths.dataset_path
+ entries: list[DatasetEntry] = load_dataset_entries(dataset_path, entry_id=entry_id)
+ entry: DatasetEntry = entries[0]
logger.info(f"Loaded {entry_id} entry from dataset")
run_dir: Path = output_dir / run_id
@@ -133,18 +144,31 @@ def evaluate_copilot(
)
pipeline = create_pipeline(category)
- pipeline.execute(
- context,
- lambda ctx: run_copilot_agent(
- entry=ctx.entry,
- repo_path=ctx.repo_path,
- category=category,
- model=ctx.model,
- output_dir=ctx.result_dir,
- al_mcp=al_mcp,
- container_name=ctx.container_name,
- ),
- )
+ if category == EvaluationCategory.EXTENSIBILITY_REQUEST:
+ pipeline.execute(
+ context,
+ lambda ctx: run_copilot_agent_ext(
+ entry=ctx.entry,
+ repo_path=ctx.repo_path,
+ model=ctx.model,
+ category=category,
+ output_dir=ctx.result_dir,
+ al_mcp=al_mcp,
+ ),
+ )
+ else:
+ pipeline.execute(
+ context,
+ lambda ctx: run_copilot_agent(
+ entry=ctx.entry,
+ repo_path=ctx.repo_path,
+ category=category,
+ model=ctx.model,
+ output_dir=ctx.result_dir,
+ al_mcp=al_mcp,
+ container_name=ctx.container_name,
+ ),
+ )
logger.info("Evaluation complete!")
logger.info(f"Results saved to: {run_dir}")
diff --git a/src/bcbench/commands/run.py b/src/bcbench/commands/run.py
index a026d1435..624fff3e5 100644
--- a/src/bcbench/commands/run.py
+++ b/src/bcbench/commands/run.py
@@ -7,6 +7,7 @@
from bcbench.agent.claude import run_claude_code
from bcbench.agent.copilot import run_copilot_agent
+from bcbench.agent.copilot.agent import run_copilot_agent_ext
from bcbench.agent.copilot.metrics import parse_session_log
from bcbench.agent.mini import run_mini_agent
from bcbench.cli_options import (
@@ -20,9 +21,10 @@
RepoPath,
)
from bcbench.config import get_config
-from bcbench.dataset import DatasetEntry, load_dataset_entries
+from bcbench.dataset import DatasetEntry, ExtensibilityDatasetEntry, load_dataset_entries, load_ext_dataset_entries
from bcbench.logger import get_logger
from bcbench.operations import setup_repo_postbuild, setup_repo_prebuild
+from bcbench.types import EvaluationCategory
logger = get_logger(__name__)
_config = get_config()
@@ -67,7 +69,7 @@ def run_copilot(
category: EvaluationCategoryOption,
container_name: ContainerName,
model: CopilotModel = "claude-haiku-4.5",
- dataset_path: DatasetPath = _config.paths.dataset_path,
+ dataset_path: DatasetPath | None = None,
repo_path: RepoPath = _config.paths.testbed_path,
output_dir: OutputDir = _config.paths.evaluation_results_path,
al_mcp: Annotated[bool, typer.Option("--al-mcp", help="Enable AL MCP server")] = False,
@@ -80,12 +82,34 @@ def run_copilot(
Example:
uv run bcbench run copilot microsoft__BCApps-5633 --category bug-fix --repo-path /path/to/BCApps
"""
- entry: DatasetEntry = load_dataset_entries(dataset_path, entry_id=entry_id)[0]
+
+ if category == EvaluationCategory.EXTENSIBILITY_REQUEST:
+ if dataset_path is None:
+ dataset_path = _config.paths.ext_dataset_path
+ entry: ExtensibilityDatasetEntry = load_ext_dataset_entries(dataset_path, entry_id=entry_id)[0]
+ else:
+ if dataset_path is None:
+ dataset_path = _config.paths.dataset_path
+ entry: DatasetEntry = load_dataset_entries(dataset_path, entry_id=entry_id)[0]
setup_repo_prebuild(entry, repo_path)
- setup_repo_postbuild(entry, repo_path, category)
+ if category != EvaluationCategory.EXTENSIBILITY_REQUEST:
+ setup_repo_postbuild(entry, repo_path, category)
+
+ if category == EvaluationCategory.EXTENSIBILITY_REQUEST:
+ metrics, _ = run_copilot_agent_ext(entry=entry, repo_path=repo_path, model=model, category=category, output_dir=output_dir, al_mcp=al_mcp)
+ logger.info(f"Agent metrics: {metrics}")
- run_copilot_agent(entry=entry, repo_path=repo_path, model=model, category=category, output_dir=output_dir, al_mcp=al_mcp, container_name=container_name)
+ # Compare against expected
+ from bcbench.evaluate.extensibility import compare_extensibility_output
+
+ resolved, errors = compare_extensibility_output(entry, metrics)
+ if resolved:
+ logger.info(f"✓ Entry {entry_id} matches expected output")
+ else:
+ logger.warning(f"✗ Entry {entry_id} does not match expected: {errors}")
+ else:
+ run_copilot_agent(entry=entry, repo_path=repo_path, model=model, category=category, output_dir=output_dir, al_mcp=al_mcp, container_name=container_name)
@run_app.command("claude")
diff --git a/src/bcbench/config.py b/src/bcbench/config.py
index 06fdffd11..b2d69c37b 100644
--- a/src/bcbench/config.py
+++ b/src/bcbench/config.py
@@ -36,6 +36,9 @@ class PathConfig:
bc_bench_root: Path
dataset_path: Path
+ ext_dataset_path: Path
+ ext_dataset_dir: Path
+ ext_dataset_default_setup_path: Path
dataset_dir: Path
problem_statement_dir: Path
testbed_path: Path
@@ -52,6 +55,9 @@ def from_root(cls, root: Path) -> PathConfig:
bc_bench_root=root,
dataset_dir=root / "dataset",
dataset_path=root / "dataset" / "bcbench.jsonl",
+ ext_dataset_dir=root / "dataset" / "extensibility_requests",
+ ext_dataset_path=root / "dataset" / "extensibility_requests" / "extensibility_dataset.yaml",
+ ext_dataset_default_setup_path=root / "dataset" / "extensibility_requests" / "default_setup.json",
problem_statement_dir=root / "dataset" / "problemstatement",
testbed_path=root.parent / "NAV",
ps_script_path=root / "scripts",
diff --git a/src/bcbench/dataset/__init__.py b/src/bcbench/dataset/__init__.py
index 6af72372f..e76d108d5 100644
--- a/src/bcbench/dataset/__init__.py
+++ b/src/bcbench/dataset/__init__.py
@@ -1,12 +1,14 @@
"""Dataset module for querying, validating and analyze dataset entries."""
-from bcbench.dataset.dataset_entry import DatasetEntry, TestEntry
-from bcbench.dataset.dataset_loader import load_dataset_entries
+from bcbench.dataset.dataset_entry import DatasetEntry, ExtensibilityDatasetEntry, TestEntry
+from bcbench.dataset.dataset_loader import load_dataset_entries, load_ext_dataset_entries
from bcbench.dataset.reviewer import run_dataset_reviewer
__all__ = [
"DatasetEntry",
+ "ExtensibilityDatasetEntry",
"TestEntry",
"load_dataset_entries",
+ "load_ext_dataset_entries",
"run_dataset_reviewer",
]
diff --git a/src/bcbench/dataset/dataset_entry.py b/src/bcbench/dataset/dataset_entry.py
index 76a1361a0..94697c759 100644
--- a/src/bcbench/dataset/dataset_entry.py
+++ b/src/bcbench/dataset/dataset_entry.py
@@ -11,7 +11,7 @@
_config = get_config()
-__all__ = ["DatasetEntry", "TestEntry"]
+__all__ = ["DatasetEntry", "ExtensibilityDataset", "ExtensibilityDatasetEntry", "TestEntry"]
class TestEntry(BaseModel):
@@ -126,3 +126,47 @@ def extract_project_name(self) -> str:
# Fallback to the last meaningful part
return parts[-1] if parts else ""
+
+
+class ExtensibilityDataset(BaseModel):
+ entries: list[ExtensibilityDatasetEntry]
+
+
+class ExtensibilityDatasetEntry(BaseModel):
+ """Representation of a Business Central benchmark dataset entry."""
+
+ repo: str | None = None
+ base_commit: str | None = None
+ environment_setup_version: str | None = None
+ instance_id: str
+ input: dict
+ expected: dict
+ project_paths: list[str] = []
+
+ @model_validator(mode="after")
+ def populate_defaults(self) -> ExtensibilityDatasetEntry:
+ """Populate default values from default_setup.json if not set."""
+ defaults_path = _config.paths.ext_dataset_default_setup_path
+
+ if defaults_path.exists():
+ defaults = json.loads(defaults_path.read_text(encoding="utf-8"))
+
+ if self.repo is None:
+ self.repo = defaults.get("repo")
+
+ if self.base_commit is None:
+ self.base_commit = defaults.get("base_commit")
+
+ if self.environment_setup_version is None:
+ self.environment_setup_version = defaults.get("environment_setup_version")
+
+ if not self.project_paths:
+ self.project_paths = defaults.get("project_paths", [])
+ return self
+
+ def get_task(self) -> str:
+ """Get the full task description including hints."""
+ return json.dumps(self.input)
+
+ def extract_project_name(self) -> str:
+ return ""
diff --git a/src/bcbench/dataset/dataset_loader.py b/src/bcbench/dataset/dataset_loader.py
index 8eb1b3f12..ccd42987f 100644
--- a/src/bcbench/dataset/dataset_loader.py
+++ b/src/bcbench/dataset/dataset_loader.py
@@ -2,10 +2,12 @@
from pathlib import Path
-from bcbench.dataset.dataset_entry import DatasetEntry
+import yaml
+
+from bcbench.dataset.dataset_entry import DatasetEntry, ExtensibilityDataset, ExtensibilityDatasetEntry
from bcbench.exceptions import EntryNotFoundError
-__all__ = ["load_dataset_entries"]
+__all__ = ["load_dataset_entries", "load_ext_dataset_entries"]
def load_dataset_entries(dataset_path: Path, entry_id: str | None = None, random: int | None = None) -> list[DatasetEntry]:
@@ -49,3 +51,30 @@ def load_dataset_entries(dataset_path: Path, entry_id: str | None = None, random
return random_module.sample(entries, min(random, len(entries)))
return entries
+
+
+def load_ext_dataset_entries(dataset_path: Path, entry_id: str | None = None) -> list[ExtensibilityDatasetEntry]:
+ """
+ Load extension dataset entries from a YAML file.
+
+ Examples:
+ # Load a single entry by ID
+ entries = load_ext_dataset_entries(path, entry_id="issue-29447")
+ """
+ if not dataset_path.exists():
+ raise FileNotFoundError(f"Dataset file not found: {dataset_path}")
+
+ with open(dataset_path, encoding="utf-8") as file:
+ entries: list[ExtensibilityDatasetEntry] = ExtensibilityDataset(**yaml.safe_load(file)).entries
+
+ for entry in entries:
+ if entry_id:
+ if entry.instance_id == entry_id:
+ return [entry]
+ continue
+ entries.append(entry)
+
+ if entry_id:
+ raise EntryNotFoundError(entry_id)
+
+ return entries
diff --git a/src/bcbench/evaluate/__init__.py b/src/bcbench/evaluate/__init__.py
index 79c109648..7f78bc81d 100644
--- a/src/bcbench/evaluate/__init__.py
+++ b/src/bcbench/evaluate/__init__.py
@@ -1,5 +1,6 @@
"""Evaluation module for running pipelines and creating results."""
from bcbench.evaluate.base import EvaluationPipeline, create_pipeline
+from bcbench.evaluate.extensibility import compare_extensibility_output
-__all__ = ["EvaluationPipeline", "create_pipeline"]
+__all__ = ["EvaluationPipeline", "compare_extensibility_output", "create_pipeline"]
diff --git a/src/bcbench/evaluate/base.py b/src/bcbench/evaluate/base.py
index 87ec251b2..990c56c4d 100644
--- a/src/bcbench/evaluate/base.py
+++ b/src/bcbench/evaluate/base.py
@@ -104,6 +104,7 @@ def save_result(self, context: EvaluationContext, result: BaseEvaluationResult)
def create_pipeline(category: EvaluationCategory) -> EvaluationPipeline:
"""Factory function to create evaluation pipeline based on category."""
from bcbench.evaluate.bugfix import BugFixPipeline
+ from bcbench.evaluate.extensibility import ExtensibilityPipeline
from bcbench.evaluate.testgeneration import TestGenerationPipeline
match category:
@@ -113,6 +114,9 @@ def create_pipeline(category: EvaluationCategory) -> EvaluationPipeline:
case EvaluationCategory.TEST_GENERATION:
logger.info(f"Using TestGenerationPipeline for category: {category}")
return TestGenerationPipeline()
+ case EvaluationCategory.EXTENSIBILITY_REQUEST:
+ logger.info(f"Using ExtensibilityPipeline for category: {category}")
+ return ExtensibilityPipeline()
case _:
raise ValueError(f"Unknown evaluation category: {category}")
raise RuntimeError("Unreachable: no pipeline returned")
diff --git a/src/bcbench/evaluate/extensibility.py b/src/bcbench/evaluate/extensibility.py
new file mode 100644
index 000000000..199f1a26d
--- /dev/null
+++ b/src/bcbench/evaluate/extensibility.py
@@ -0,0 +1,214 @@
+import json
+import os
+from collections.abc import Callable
+
+from autoevals import LLMClassifier
+
+from bcbench.config import get_config
+from bcbench.dataset import ExtensibilityDatasetEntry
+from bcbench.evaluate.base import EvaluationPipeline
+from bcbench.logger import get_logger, github_log_group
+from bcbench.operations.setup_operations import setup_repo_prebuild
+from bcbench.results.extensibility import ExtensibilityResult
+from bcbench.types import EvaluationContext, ExtAgentMetrics
+
+logger = get_logger(__name__)
+_config = get_config()
+
+__all__ = ["ExtensibilityPipeline"]
+
+
+class IssueStateMatch:
+ def __call__(self, *, expected: str, output: dict, **kwargs) -> bool:
+ output_state = output["state_of_issue"]
+ return expected == output_state
+
+
+def _labels_match(expected: dict, output: dict) -> bool:
+ expected_labels_str = expected.get("labels", "")
+ expected_labels = {label.strip().lower() for label in expected_labels_str.split(",")} if expected_labels_str else set()
+ output_labels = {label.lower() for label in output.get("labels", [])}
+ return expected_labels == output_labels
+
+
+def _create_github_models_client():
+ import subprocess
+
+ from openai import OpenAI
+
+ token = os.environ.get("GITHUB_TOKEN") or subprocess.check_output(["gh", "auth", "token"], text=True).strip()
+
+ return OpenAI(
+ base_url="https://models.github.ai/inference",
+ api_key=token,
+ )
+
+
+class IssueCommentMatch:
+ def __init__(self, **kwargs):
+ client = _create_github_models_client()
+ self._classifier = LLMClassifier(
+ name=self.__class__.__name__,
+ model="openai/gpt-4.1",
+ choice_scores={"Y": 1.0, "N": 0.0},
+ client=client,
+ prompt_template="""You are evaluating whether a *generated* GitHub BC extensibility issue comment is an acceptable
+substitute for the *expected* comment, given the original issue.
+
+Consider:
+- Does the generated comment correctly address the same concern?
+- Is it at least as helpful and specific as the expected one?
+- Is it technically accurate w.r.t. the issue description?
+
+Here is the data:
+[Issue title]
+{{input.title}}
+
+[Issue body]
+{{input.description}}
+
+[Issue comments]
+{{input.comments}}
+
+[Expected comments]
+{{expected.comments}}
+
+[Model (generated) comment]
+{{output.comment}}
+
+Respond with a single letter:
+
+Y - The model comment is an adequate replacement for the expected comment.
+N - The model comment is not an adequate replacement.
+""",
+ )
+
+ def __call__(self, *, input: str | dict, output: dict, expected: dict, **kwargs):
+ return self._classifier(
+ input=json.loads(input) if isinstance(input, str) else input,
+ output=output,
+ expected=expected,
+ **kwargs,
+ )
+
+
+def compare_extensibility_output(
+ entry: ExtensibilityDatasetEntry,
+ metrics: ExtAgentMetrics | None,
+ *,
+ run_comment_eval: bool = True,
+) -> tuple[bool, list[str]]:
+ resolved = False
+ error_messages: list[str] = []
+
+ expected = entry.expected
+ input_data = entry.get_task()
+
+ if not metrics or not metrics.json_output:
+ error_messages.append("Agent did not produce JSON output")
+ logger.warning(error_messages[-1])
+ return False, error_messages
+
+ try:
+ agent_output = metrics.json_output
+ if isinstance(agent_output, str):
+ agent_output = json.loads(agent_output)
+
+ output = {
+ "state_of_issue": agent_output.get("state_of_issue"),
+ "labels": agent_output.get("labels_to_apply", []),
+ "comment": agent_output.get("comment_to_post", ""),
+ }
+
+ logger.info(f"Expected: {expected}")
+ logger.info(f"Agent output: {output}")
+
+ # Issue state
+ expected_state = expected.get("state", "open")
+ state_ok = IssueStateMatch()(expected=expected_state, output=output)
+ logger.info(f" IssueStateMatch: {'PASS' if state_ok else 'FAIL'} (expected '{expected_state}', got '{output.get('state_of_issue')}')")
+
+ # Labels
+ labels_ok = _labels_match(expected=expected, output=output)
+ logger.info(f" LabelsMatch: {'PASS' if labels_ok else 'FAIL'} (expected '{expected.get('labels', '')}', got '{', '.join(output.get('labels', []))}')")
+
+ # Comment (LLM judge — may fail without API key)
+ comment_ok = False
+ comment_score = 0.0
+ expected_comment = expected.get("comments", "")
+ generated_comment = output.get("comment", "")
+ if not expected_comment:
+ comment_ok = not generated_comment
+ comment_score = 1.0 if comment_ok else 0.0
+ logger.info(f" CommentMatch: expected empty, generated {'empty' if comment_ok else 'non-empty'}")
+ elif run_comment_eval:
+ try:
+ comment_result = IssueCommentMatch()(input=input_data, expected=expected, output=output)
+ comment_score = comment_result.score if comment_result else 0.0
+ comment_ok = comment_score == 1.0
+ except Exception as llm_err:
+ logger.warning(f"IssueCommentMatch evaluator failed: {llm_err}")
+ error_messages.append(f"Comment eval error: {llm_err}")
+ else:
+ logger.info(" CommentMatch: skipped (run_comment_eval=False)")
+ comment_ok = True # don't penalize when skipped
+
+ logger.info(f" CommentMatch: {comment_score}")
+
+ # Collect errors
+ if not state_ok:
+ error_messages.append(f"IssueState: expected '{expected_state}', got '{output.get('state_of_issue')}'")
+ if not labels_ok:
+ error_messages.append(f"Labels: expected {expected.get('labels')}, got {output.get('labels')}")
+ if not comment_ok and "Comment eval error" not in str(error_messages):
+ error_messages.append(f"Comment: score {comment_score}")
+
+ resolved = state_ok and labels_ok and comment_ok
+
+ if resolved:
+ logger.info("✓ All evaluators passed")
+ else:
+ logger.warning(f"✗ Some evaluators failed: {error_messages}")
+
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
+ error_messages.append(f"Failed to parse/validate JSON output: {e}")
+ logger.error(error_messages[-1])
+
+ return resolved, error_messages
+
+
+class ExtensibilityPipeline(EvaluationPipeline):
+ def setup(self, context: EvaluationContext) -> None:
+ setup_repo_prebuild(context.entry, context.repo_path)
+
+ def run_agent(self, context: EvaluationContext, agent_runner: Callable) -> None:
+ with github_log_group(f"{context.agent_name} -- Entry: {context.entry.instance_id}"):
+ context.metrics, context.experiment = agent_runner(context)
+
+ def evaluate(self, context: EvaluationContext) -> None:
+ if isinstance(context.entry, ExtensibilityDatasetEntry):
+ ext_metrics = context.metrics if isinstance(context.metrics, ExtAgentMetrics) else None
+ resolved, error_messages = compare_extensibility_output(context.entry, ext_metrics)
+ else:
+ resolved, error_messages = False, ["Entry is not an ExtensibilityDatasetEntry"]
+
+ # Extract json_output string for the result
+ json_output_str: str | None = None
+ if context.metrics and isinstance(context.metrics, ExtAgentMetrics):
+ json_output_str = context.metrics.json_output
+
+ # Create result based on validation
+ error_summary = "; ".join(error_messages) if error_messages else "Validation failed"
+
+ if resolved:
+ result = ExtensibilityResult.create_success(context, "", json_output=json_output_str)
+ logger.info(f"✓ Successfully validated {context.entry.instance_id}")
+ else:
+ result = ExtensibilityResult.create_test_failure(context, "", error_msg=error_summary, json_output=json_output_str)
+ logger.warning(f"✗ Validation failed for {context.entry.instance_id}: {error_summary}")
+
+ if result is not None:
+ result.save(context.result_dir, f"{context.entry.instance_id}{_config.file_patterns.result_pattern}")
+ else:
+ logger.error(f"No result generated for {context.entry.instance_id}")
+ raise RuntimeError(f"No result generated for {context.entry.instance_id}")
diff --git a/src/bcbench/results/base.py b/src/bcbench/results/base.py
index a1344565b..dff374803 100644
--- a/src/bcbench/results/base.py
+++ b/src/bcbench/results/base.py
@@ -112,10 +112,11 @@ def create_result_from_json(payload: dict[str, Any]) -> BaseEvaluationResult:
payload: Dictionary containing result data
Returns:
- BugFixResult or TestGenerationResult instance based on category
+ BugFixResult or TestGenerationResult or ExtensibilityResult instance based on category
"""
# Import here to avoid circular dependencies
from bcbench.results.bugfix import BugFixResult
+ from bcbench.results.extensibility import ExtensibilityResult
from bcbench.results.testgeneration import TestGenerationResult
category = EvaluationCategory(payload["category"])
@@ -125,5 +126,7 @@ def create_result_from_json(payload: dict[str, Any]) -> BaseEvaluationResult:
return BugFixResult.model_validate(payload)
case EvaluationCategory.TEST_GENERATION:
return TestGenerationResult.model_validate(payload)
+ case EvaluationCategory.EXTENSIBILITY_REQUEST:
+ return ExtensibilityResult.model_validate(payload)
case _:
raise ValueError(f"Unknown evaluation category: {category}")
diff --git a/src/bcbench/results/bceval_export.py b/src/bcbench/results/bceval_export.py
index 043e0f84d..a070981c3 100644
--- a/src/bcbench/results/bceval_export.py
+++ b/src/bcbench/results/bceval_export.py
@@ -6,9 +6,10 @@
from pathlib import Path
from typing import Any
-from bcbench.dataset import DatasetEntry, load_dataset_entries
+from bcbench.dataset import DatasetEntry, ExtensibilityDatasetEntry, load_dataset_entries, load_ext_dataset_entries
from bcbench.logger import get_logger
from bcbench.results.base import BaseEvaluationResult
+from bcbench.results.extensibility import ExtensibilityResult
from bcbench.results.testgeneration import TestGenerationResult
from bcbench.types import EvaluationCategory
@@ -17,7 +18,13 @@
def write_bceval_results(results: list[BaseEvaluationResult], out_dir: Path, run_id: str, dataset_path: Path, output_filename: str) -> None:
"""Write results into a JSONL file for bceval consumption."""
- dataset_entries: list[DatasetEntry] = load_dataset_entries(dataset_path)
+
+ is_extensibility = dataset_path.suffix.lower() == ".yaml"
+
+ if is_extensibility:
+ dataset_entries: list[ExtensibilityDatasetEntry] = load_ext_dataset_entries(dataset_path)
+ else:
+ dataset_entries: list[DatasetEntry] = load_dataset_entries(dataset_path)
output_file = out_dir / output_filename
with open(output_file, "w") as f:
@@ -28,7 +35,10 @@ def write_bceval_results(results: list[BaseEvaluationResult], out_dir: Path, run
logger.error(f"No matching dataset entry found for instance_id: {result.instance_id}")
continue
- input, expected = get_info_from_dataset_entry(matching_entries[0], result.category)
+ if is_extensibility:
+ input, expected = get_info_from_dataset_entry_ext(matching_entries[0])
+ else:
+ input, expected = get_info_from_dataset_entry(matching_entries[0], result.category)
metadata: dict[str, Any] = {
"model": result.model,
@@ -49,11 +59,14 @@ def write_bceval_results(results: list[BaseEvaluationResult], out_dir: Path, run
metadata["pre_patch_failed"] = result.pre_patch_failed
metadata["post_patch_passed"] = result.post_patch_passed
+ if isinstance(result, ExtensibilityResult):
+ metadata["json_output"] = result.json_output
+
bceval_result = {
"id": result.instance_id,
"input": input,
"expected": expected,
- "output": result.generated_patch,
+ "output": result.generated_patch if not is_extensibility else result.json_output,
"context": "",
"metadata": metadata,
"tags": [],
@@ -80,3 +93,15 @@ def get_info_from_dataset_entry(entry: DatasetEntry, category: EvaluationCategor
return entry.get_task(), entry.test_patch
case _:
raise ValueError(f"Unsupported evaluation category: {category}")
+
+
+def get_info_from_dataset_entry_ext(entry: ExtensibilityDatasetEntry) -> tuple[str, str]:
+ """
+ Extract relevant info from ExtensibilityDatasetEntry for bceval results.
+
+ Args:
+ entry: The ExtensibilityDatasetEntry instance
+ Returns:
+ A tuple of (input, expected output)
+ """
+ return entry.get_task(), json.dumps(entry.expected)
diff --git a/src/bcbench/results/extensibility.py b/src/bcbench/results/extensibility.py
new file mode 100644
index 000000000..00e7c2bba
--- /dev/null
+++ b/src/bcbench/results/extensibility.py
@@ -0,0 +1,7 @@
+from bcbench.results.base import BaseEvaluationResult
+
+
+class ExtensibilityResult(BaseEvaluationResult):
+ """Result class for extensibility evaluation category."""
+
+ json_output: str | None = None
diff --git a/src/bcbench/types.py b/src/bcbench/types.py
index 6489414ae..e8afc4053 100644
--- a/src/bcbench/types.py
+++ b/src/bcbench/types.py
@@ -12,7 +12,7 @@
from bcbench.logger import get_logger
if TYPE_CHECKING:
- from bcbench.dataset import DatasetEntry
+ from bcbench.dataset import DatasetEntry, ExtensibilityDatasetEntry
__all__ = ["AgentMetrics", "AgentType", "EvaluationCategory", "EvaluationContext", "ExperimentConfiguration"]
@@ -41,6 +41,31 @@ class AgentMetrics(BaseModel):
tool_usage: dict[str, int] | None = None
+class ExtAgentMetrics(BaseModel):
+ """Metrics collected during extensibility agent execution.
+
+ Separates runtime execution data from experiment configuration.
+ """
+
+ model_config = ConfigDict(frozen=True)
+
+ # Total execution time in seconds
+ execution_time: float | None = None
+ llm_duration: float | None = None
+
+ turn_count: int | None = None
+
+ # Token usage from LLM calls
+ prompt_tokens: int | None = None
+ completion_tokens: int | None = None
+
+ # Tool usage statistics from agent logs
+ tool_usage: dict[str, int] | None = None
+
+ # JSON output produced by the extensibility agent
+ json_output: str | None = None
+
+
class ExperimentConfiguration(BaseModel):
"""Configuration for agent experiment execution.
@@ -98,6 +123,7 @@ def get_target_dir(self, repo_path: Path) -> Path:
class EvaluationCategory(str, Enum):
BUG_FIX = "bug-fix"
TEST_GENERATION = "test-generation"
+ EXTENSIBILITY_REQUEST = "extensibility-request"
# CODE_REVIEW = "code-review"
# EVENT_REQUEST = "event-request"
@@ -111,7 +137,7 @@ class EvaluationContext:
"""
# Core configuration
- entry: DatasetEntry
+ entry: DatasetEntry | ExtensibilityDatasetEntry
repo_path: Path
result_dir: Path
diff --git a/tests/test_custom_instructions.py b/tests/test_custom_instructions.py
index 1a3eb54a2..eb42d44a2 100644
--- a/tests/test_custom_instructions.py
+++ b/tests/test_custom_instructions.py
@@ -50,7 +50,7 @@ def test_setup_custom_instructions():
# Verify file content matches
if item.is_file():
- assert target_item.read_text() == item.read_text(), f"Content mismatch for {item.name}"
+ assert target_item.read_text(encoding="utf-8") == item.read_text(encoding="utf-8"), f"Content mismatch for {item.name}"
elif item.is_dir():
# For directories, verify all files match recursively
for source_file in item.rglob("*"):
diff --git a/tests/test_type_exhaustiveness.py b/tests/test_type_exhaustiveness.py
index a229628b8..1cc2d8e64 100644
--- a/tests/test_type_exhaustiveness.py
+++ b/tests/test_type_exhaustiveness.py
@@ -29,6 +29,8 @@ def test_all_categories_have_pipelines():
def test_all_categories_handled_in_get_info_from_dataset_entry(sample_dataset_entry_with_problem_statement: DatasetEntry):
for category in EvaluationCategory:
+ if category == EvaluationCategory.EXTENSIBILITY_REQUEST:
+ continue # Uses get_info_from_dataset_entry_ext with ExtensibilityDatasetEntry
input_text, expected_output = get_info_from_dataset_entry(sample_dataset_entry_with_problem_statement, category)
assert isinstance(input_text, str)
assert isinstance(expected_output, str)
diff --git a/uv.lock b/uv.lock
index d64faada9..e34f90816 100644
--- a/uv.lock
+++ b/uv.lock
@@ -140,11 +140,27 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
]
+[[package]]
+name = "autoevals"
+version = "0.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "chevron" },
+ { name = "jsonschema" },
+ { name = "polyleven" },
+ { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/78/77/666c447a095eedc175f3ba986dcb4e925a0218c5cbe08ff07b7c95672770/autoevals-0.1.0.tar.gz", hash = "sha256:ae884fe6107dbd6e05d840f51c2dba7eccfa01449e5ee5e83b6b4589508b2aca", size = 56223, upload-time = "2026-02-13T23:16:05.368Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5a/84/9d64763498cf820f021ec19708c0a066f1ef5f28d36601bedb2e1cbbd1b3/autoevals-0.1.0-py3-none-any.whl", hash = "sha256:573ab490966fd5f2265dc4842d0bfd7b729ee121c86bd72db4440badb7264587", size = 61308, upload-time = "2026-02-13T23:16:03.734Z" },
+]
+
[[package]]
name = "bcbench"
version = "0.4.0"
source = { editable = "." }
dependencies = [
+ { name = "autoevals" },
{ name = "jsonschema" },
{ name = "mini-swe-agent" },
{ name = "numpy" },
@@ -176,6 +192,7 @@ dev = [
[package.metadata]
requires-dist = [
+ { name = "autoevals", specifier = ">=0.0.106" },
{ name = "jsonschema", specifier = ">=4.0" },
{ name = "mini-swe-agent", specifier = ">=1.0.0" },
{ name = "numpy", specifier = ">=2.3.5" },
@@ -309,6 +326,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
]
+[[package]]
+name = "chevron"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/1f/ca74b65b19798895d63a6e92874162f44233467c9e7c1ed8afd19016ebe9/chevron-0.14.0.tar.gz", hash = "sha256:87613aafdf6d77b6a90ff073165a61ae5086e21ad49057aa0e53681601800ebf", size = 11440, upload-time = "2021-01-02T22:47:59.233Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/52/93/342cc62a70ab727e093ed98e02a725d85b746345f05d2b5e5034649f4ec8/chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443", size = 11595, upload-time = "2021-01-02T22:47:57.847Z" },
+]
+
[[package]]
name = "click"
version = "8.3.0"
@@ -1370,6 +1396,41 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
+[[package]]
+name = "polyleven"
+version = "0.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/c7/e0b3bbe72e0003e5d02726e0d406ea47d523a2aec9c41d831817a8e0bce1/polyleven-0.11.0.tar.gz", hash = "sha256:d74d348387cf340051711c0dd6af993b4c264daa78470098de16f4a2b725785c", size = 6407, upload-time = "2026-02-09T09:41:49.87Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ce/16/5aec69609adc373f10087eb69b0b9d177ae721632715a86348b429030514/polyleven-0.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0cb8ed97b536f9aada3ad45169ee7768c426498bf3fa608a4eabd055dfef795e", size = 7425, upload-time = "2026-02-09T09:41:06.542Z" },
+ { url = "https://files.pythonhosted.org/packages/bd/5b/0542c723aa83833a5090114bc4e5a8e60293873fe60ee8221a5888d87370/polyleven-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2f975ab8cb81fd8eb5a647a3cefb0bb80bc307920a9307f66ab4019d88370ed2", size = 7505, upload-time = "2026-02-09T09:41:07.445Z" },
+ { url = "https://files.pythonhosted.org/packages/4a/8d/c317217734a5bd2011f1128c1a9056477a5148d8d95527fcab2fe3955876/polyleven-0.11.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:16986fd58911d6075b5f63ea001197141145b7a6df48bc4ce4530e79227e74a2", size = 21035, upload-time = "2026-02-09T09:41:08.32Z" },
+ { url = "https://files.pythonhosted.org/packages/5f/8f/8a3e6e4a68dbd9de564fd3d16eee90e3f807a4380fd7192f40af4be47175/polyleven-0.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6a814629cc0468f9800b1333414a3be08fda9c5ce6b63e97154a9d21732e590", size = 21509, upload-time = "2026-02-09T09:41:09.285Z" },
+ { url = "https://files.pythonhosted.org/packages/6b/da/4097998bea845f0b3a67112200aa08c19d4da0a17d761b35484d695c21e2/polyleven-0.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:88a35ec93ec3d81a7347fd49db314a914798a144dca3d22946d18bba9b597dec", size = 20536, upload-time = "2026-02-09T09:41:10.211Z" },
+ { url = "https://files.pythonhosted.org/packages/a1/71/67b7679ede99589ec749290d938693b87cdb6bb327b062c46d2129a5e6ec/polyleven-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:50bb7d68b790194d552ee1256a02e205486b27eb22ab333eeb0003e0271c4846", size = 20775, upload-time = "2026-02-09T09:41:11.692Z" },
+ { url = "https://files.pythonhosted.org/packages/e9/3e/6f7fad4fee748ba365cb3e1ba2e061a74e18d987eb554ead4757127df2ab/polyleven-0.11.0-cp313-cp313-win32.whl", hash = "sha256:ce264f6a9daa3265299d8ffcb180d8256517a8d9235613a3b267172da0bc1e06", size = 11629, upload-time = "2026-02-09T09:41:12.652Z" },
+ { url = "https://files.pythonhosted.org/packages/2f/cc/4877913dec8fb4f968a070c894254db5811b62128d3a69b05bcd1305b5c3/polyleven-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:4648732c8ad3955c8d7b1aa015d92936a150475aaa97ce704fe0c8e7fa7e0c4f", size = 10841, upload-time = "2026-02-09T09:41:13.682Z" },
+ { url = "https://files.pythonhosted.org/packages/59/e2/039cc477ce73d6184e12cf6341ac200bc9f4c5428254c399015ec30392e1/polyleven-0.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:166f6c9b161c6af92ff201c734d6437bc7ef74a32dab306c5d47a0bdb7a82d9f", size = 9424, upload-time = "2026-02-09T09:41:14.545Z" },
+ { url = "https://files.pythonhosted.org/packages/a9/cf/a02d74f965127adb6a8fbd5030e2c98335ef2f8e7452b12a882883b2053a/polyleven-0.11.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3c18b8e44e5d04f1ffa7d41eb68da553833ab8663b7cfb1a505d85676db5c797", size = 7482, upload-time = "2026-02-09T09:41:15.431Z" },
+ { url = "https://files.pythonhosted.org/packages/fe/74/dfa9e9891cd85e679f230c5e740cba11b0bb11bd9fb298657ccf048ff70e/polyleven-0.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7ab547adc0ac72a2852d37337a4a839d4e2f713940b0e8a944d45c528e5e6538", size = 7508, upload-time = "2026-02-09T09:41:16.365Z" },
+ { url = "https://files.pythonhosted.org/packages/dc/ef/399ae8d21f7b348514b7ad3bd7b9d530bf195fb0a8ec63cf7af7d17a4071/polyleven-0.11.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5808f62874187dfd4e30de5dd5f42a660562ec95a87cc64d5455ba0f4be8f175", size = 21056, upload-time = "2026-02-09T09:41:17.226Z" },
+ { url = "https://files.pythonhosted.org/packages/21/60/7eb97286a6171dd794a0e5b261175e8bfeb99a2b566bd9b8848ebc97f6df/polyleven-0.11.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9deb75346b4177d5e69496791e6156f705d9059961ce8f9520a0dc96532f10f2", size = 21535, upload-time = "2026-02-09T09:41:18.137Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/bc/6fa59257c2138e33a858f10236a2a6b381b87f61251c1df468be7c666338/polyleven-0.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ef28c4c6cdc71a32f0478772d2f07b2cd412fe7950182033b1c36c8a481b0834", size = 20560, upload-time = "2026-02-09T09:41:19.04Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/2d/85be9c91d05cb0127586640108f3110f6a3a98c9478f84713d4771c49761/polyleven-0.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94832ff5d04022ba6038c2ca0c9ea6906330cde3a3b1761739d772647d01da33", size = 20814, upload-time = "2026-02-09T09:41:20.001Z" },
+ { url = "https://files.pythonhosted.org/packages/da/91/5a99ae6cf16ff55a94c5686871ed20b816ad1690f823494c76dc3ce0f54b/polyleven-0.11.0-cp314-cp314-win32.whl", hash = "sha256:e6182ea6142904ea50cf82e2955d922156b5fcf9a8279925f312961f16710a58", size = 11966, upload-time = "2026-02-09T09:41:20.946Z" },
+ { url = "https://files.pythonhosted.org/packages/48/ec/9c6fcdeb1dd436523f8e2275407f588d6a66a524d7a793f554957373769c/polyleven-0.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:bf82bb8601582da8f2248293c1e6f4cce2025c79fd64fccddf67dd8538655b55", size = 11100, upload-time = "2026-02-09T09:41:21.863Z" },
+ { url = "https://files.pythonhosted.org/packages/42/7f/1e59881a56a4963b4546c7b558ab7979daddff586001f18b80f1f66cece9/polyleven-0.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:45487a1e4a8415e4ed45e6720b2a3ad9d240336f7afa136a625b8f802a1880c2", size = 9624, upload-time = "2026-02-09T09:41:22.749Z" },
+ { url = "https://files.pythonhosted.org/packages/47/5a/5eaa75427f17d4cdf8e2139988a3ec6b841b6e077ebc1fccb754c1f8b55e/polyleven-0.11.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c518ced3e7c05de4efbd12fd7b61d6d574eb170f431e0415689d9f143fe552ee", size = 7490, upload-time = "2026-02-09T09:41:23.677Z" },
+ { url = "https://files.pythonhosted.org/packages/50/47/5dd5fa13d315e0d5dc3e41bbaa16306ea56e74929ad29df54d5c24a84dcc/polyleven-0.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:fa49732cdecd985241db9f78d5fdba7170ba6375d2bf9ad040b05127dc96b877", size = 7514, upload-time = "2026-02-09T09:41:24.55Z" },
+ { url = "https://files.pythonhosted.org/packages/75/aa/838f1bc632144f4f5820b9dbd31e0c64de41a7b0970b5cbe6fc02746090f/polyleven-0.11.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b2aada9dd04e84389d90790f359447447a499d6d86807697d80732ed45547a43", size = 21123, upload-time = "2026-02-09T09:41:25.401Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/a9/d6f32263b863dfffeed9a67e80b53476cd0089f202b0510a80eb07f7425b/polyleven-0.11.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94311ee39e2db957415eacb36b96ae26dcc427c260465324de45fb8c870d4661", size = 21627, upload-time = "2026-02-09T09:41:27.219Z" },
+ { url = "https://files.pythonhosted.org/packages/ae/68/4dee05a4217a3eb1f85cbc915f5fa269d79b86d2a8384be68bcd21de37cc/polyleven-0.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:45cfb234fece0c9df73276788fa529a25f91abf97dd0d9aed4f1b713b6d530e3", size = 20635, upload-time = "2026-02-09T09:41:28.137Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/c2/8486bdaebf47e6b764e8be227a7d2898463f2b4d91443ecdeee9ebeca6bc/polyleven-0.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9aaed455f498172769fd88f83c27bb8f43e0583d7b27d6b343154d471ec2145e", size = 20870, upload-time = "2026-02-09T09:41:29.07Z" },
+ { url = "https://files.pythonhosted.org/packages/b3/13/b827188b55108bd816110a6f60b78aee0db045a98bf7b1f2e7bfb60f4039/polyleven-0.11.0-cp314-cp314t-win32.whl", hash = "sha256:2a59849c327279902e8b396666f6998234aa82aacc47abc103d93babaad46203", size = 11917, upload-time = "2026-02-09T09:41:29.997Z" },
+ { url = "https://files.pythonhosted.org/packages/ab/18/c909bde1d1db7ead33329b941b0050c93cab9b811e44b49d04adb8c5f0f8/polyleven-0.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6ba2dcf3aff2909bbf3bdd9c1749f8de207f023fbb2c0b1d681c6bf3e78ceef1", size = 11073, upload-time = "2026-02-09T09:41:31.371Z" },
+ { url = "https://files.pythonhosted.org/packages/78/cf/51f7a0fab2d65c2b6908872f26bb03bb7e2357d195f2a59aec1a27489106/polyleven-0.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:05207bb66da15a2dc5c530e2f5cb5f0588d0a7e79b3bd542965f9e06e3fb14fe", size = 9601, upload-time = "2026-02-09T09:41:32.235Z" },
+]
+
[[package]]
name = "pre-commit"
version = "4.3.0"