Added eval metric explanation

shenrunzhang · shenrunzhang · commit 111258a315d0 · 2025-11-18T16:52:49.000-06:00
diff --git a/src/pages/LeaderboardPage.tsx b/src/pages/LeaderboardPage.tsx
@@ -166,7 +166,7 @@ const LeaderboardPage: React.FC = () => {
             className={`metric-filter-btn ${activeMetric === 'optimalAcc' ? 'active' : ''}`}
             onClick={() => setActiveMetric('optimalAcc')}
           >
-            Opt. Acc
+            Opt. Acc.
           </button>
           <button
             className={`metric-filter-btn ${activeMetric === 'latency' ? 'active' : ''}`}
@@ -398,6 +398,46 @@ const LeaderboardPage: React.FC = () => {
               </div>
             </div>
 
+            <div className="metric-card">
+              <div className="metric-summary">
+                <h3>Accuracy Score</h3>
+                <p> The average correctness across all of our dataset's queries.</p>
+              </div>
+
+              <div className="metric-details">
+                <h4>Definition</h4>
+                <p>
+                We calculate accuracy as the average correctness of the answers generated by the router's selected models across all of our dataset's queries
+                </p>
+
+
+                <p>
+                  <strong>Range:</strong> [0, 100]
+                </p>
+              </div>
+            </div>
+
+            <div className="metric-card">
+              <div className="metric-summary">
+                <h3>Cost/1k Queries</h3>
+                <p>Measures the cost incurred by a router’s routing decisions per 1000 queries.</p>
+              </div>
+
+              <div className="metric-details">
+                <h4>Definition</h4>
+                <p>
+                This is the average token cost incurred by the router's selected models for 1000 queries from our dataset.
+                <br />
+                We obtain the per-token cost for the specific models a router
+chooses using the official API pricing published by their providers. For unpopular models that are not served by commercial providers, we deploy them ourselves for experiments.
+In such cases, we approximate their costs using the pricing tiers published by commercial hosting
+platforms.
+                </p>
+
+              </div>
+            </div>
+
+
             {/* 2️⃣ Optimal Selection Score */}
             <div className="metric-card">
               <div className="metric-summary">