diff --git a/dashboard/app/documentation/evaluation-metrics/gauges-with-context/page.tsx b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges-with-context/page.tsx
similarity index 56%
rename from dashboard/app/documentation/evaluation-metrics/gauges-with-context/page.tsx
rename to dashboard/app/[locale]/documentation/evaluation-metrics/gauges-with-context/page.tsx
index fc6480ab4..1b2fe6405 100644
--- a/dashboard/app/documentation/evaluation-metrics/gauges-with-context/page.tsx
+++ b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges-with-context/page.tsx
@@ -1,4 +1,5 @@
-import { Metadata } from "next"
+'use client';
+
import Link from "next/link"
import { Card, CardContent } from "@/components/ui/card"
import { ac1GaugeSegments } from "@/components/ui/scorecard-evaluation"
@@ -6,11 +7,7 @@ import { Gauge, Segment } from "@/components/gauge"
import { GaugeThresholdComputer } from "@/utils/gauge-thresholds"
import EvaluationCard from '@/components/EvaluationCard'
import { Button as DocButton } from "@/components/ui/button"
-
-export const metadata: Metadata = {
- title: "Context-Aware Gauges - Plexus Documentation",
- description: "Detailed explanation of how Plexus uses context-aware Accuracy and Agreement gauges for robust evaluation."
-}
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
const createExampleScore = (
id: string,
@@ -55,6 +52,389 @@ const fixedAccuracyGaugeSegments: Segment[] = [
];
export default function GaugesWithContextPage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+ );
+ }
+
+ // English content (default)
const articleTopicLabelerExampleData = {
id: 'article-topic-labeler',
score_name: 'Article Topic Labeler Performance',
diff --git a/dashboard/app/documentation/evaluation-metrics/gauges/accuracy/page.tsx b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/accuracy/page.tsx
similarity index 78%
rename from dashboard/app/documentation/evaluation-metrics/gauges/accuracy/page.tsx
rename to dashboard/app/[locale]/documentation/evaluation-metrics/gauges/accuracy/page.tsx
index d8d53156a..22008da21 100644
--- a/dashboard/app/documentation/evaluation-metrics/gauges/accuracy/page.tsx
+++ b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/accuracy/page.tsx
@@ -1,6 +1,8 @@
-import { Metadata } from "next"
+'use client';
+
import Link from "next/link"
import { Button as DocButton } from "@/components/ui/button"
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
// It might be useful to add a simple visual example if desired,
// but for now, we'll focus on textual explanation and links.
// import { Gauge, Segment } from "@/components/gauge"
@@ -14,12 +16,7 @@ import {
alwaysSafeEmailClassDistribution,
alwaysSafeEmailConfusionMatrix,
alwaysSafeEmailPredictedDistribution
-} from "@/app/documentation/evaluation-metrics/examples-data"
-
-export const metadata: Metadata = {
- title: "Accuracy Gauge - Plexus Documentation",
- description: "Understanding the Plexus Accuracy gauge and how its dynamic contextualization aids in interpreting classification performance."
-}
+} from "@/app/[locale]/documentation/evaluation-metrics/examples-data"
// Example segments if we decide to add a simple visual:
// const fixedAccuracyGaugeSegments: Segment[] = [
@@ -56,6 +53,69 @@ const AccuracyGauge = ({ value, title, segments }: {
// ];
export default function AccuracyGaugePage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+ );
+ }
// Added segment calculations for class number visualization
const thresholds2Class = GaugeThresholdComputer.computeThresholds({ C1: 1, C2: 1}); // Simplified for example
const dynamicSegments2Class = GaugeThresholdComputer.createSegments(thresholds2Class);
diff --git a/dashboard/app/documentation/evaluation-metrics/gauges/agreement/page.tsx b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/agreement/page.tsx
similarity index 50%
rename from dashboard/app/documentation/evaluation-metrics/gauges/agreement/page.tsx
rename to dashboard/app/[locale]/documentation/evaluation-metrics/gauges/agreement/page.tsx
index 209992db0..09ac65642 100644
--- a/dashboard/app/documentation/evaluation-metrics/gauges/agreement/page.tsx
+++ b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/agreement/page.tsx
@@ -1,4 +1,5 @@
-import { Metadata } from "next"
+'use client';
+
import Link from "next/link"
import { Button as DocButton } from "@/components/ui/button"
import { Gauge, Segment } from "@/components/gauge"
@@ -12,12 +13,8 @@ import {
alwaysSafeEmailPredictedDistribution,
// We'll use the data for the 'Always Safe' example to show AC1 in action
// We don't need the accuracy-specific segments here, as ac1GaugeSegments are standard.
-} from "@/app/documentation/evaluation-metrics/examples-data"
-
-export const metadata: Metadata = {
- title: "Agreement Gauge - Plexus Documentation",
- description: "Understanding the Plexus Agreement Gauge (e.g., Gwet's AC1) and how it provides a chance-corrected measure of performance."
-}
+} from "@/app/[locale]/documentation/evaluation-metrics/examples-data"
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
// Component to display a standalone Agreement Gauge for illustration
const AgreementGaugeDisplay = ({ value, title }: {
@@ -37,6 +34,151 @@ const AgreementGaugeDisplay = ({ value, title }: {
);
export default function AgreementGaugePage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
The Plexus Agreement Gauge
diff --git a/dashboard/app/documentation/evaluation-metrics/gauges/class-imbalance/page.tsx b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/class-imbalance/page.tsx
similarity index 79%
rename from dashboard/app/documentation/evaluation-metrics/gauges/class-imbalance/page.tsx
rename to dashboard/app/[locale]/documentation/evaluation-metrics/gauges/class-imbalance/page.tsx
index a7f5a6e59..01839ac3f 100644
--- a/dashboard/app/documentation/evaluation-metrics/gauges/class-imbalance/page.tsx
+++ b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/class-imbalance/page.tsx
@@ -1,4 +1,5 @@
-import { Metadata } from "next"
+'use client';
+
import Link from "next/link"
import { Button as DocButton } from "@/components/ui/button"
import EvaluationCard from '@/components/EvaluationCard'
@@ -24,12 +25,8 @@ import {
articleTopicLabelerConfusionMatrix,
articleTopicLabelerPredictedDistribution,
articleTopicLabelerFullContextSegments
-} from "@/app/documentation/evaluation-metrics/examples-data"
-
-export const metadata: Metadata = {
- title: "Understanding Class Imbalance in Evaluations - Plexus Documentation",
- description: "How class imbalance can distort accuracy metrics and how Plexus provides clearer insights for imbalanced datasets."
-}
+} from "@/app/[locale]/documentation/evaluation-metrics/examples-data"
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
const AccuracyGauge = ({ value, title, segments }: {
value: number,
@@ -47,6 +44,79 @@ const AccuracyGauge = ({ value, title, segments }: {
);
export default function ClassImbalanceProblemPage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
El Desafío: Interpretando la Precisión con Datos Desbalanceados
+
+ Podrías estar aquí porque una evaluación en Plexus resaltó un desbalance de clases en tu conjunto de datos. Esta es una situación común donde algunas categorías (o clases) de datos son mucho más frecuentes que otras. Por ejemplo, en un conjunto de datos de emails, los emails "normales" podrían superar vastamente en número a los emails "spam". O, en manufactura, los items no defectuosos podrían ser mucho más comunes que los defectuosos.
+
+
+ Aunque tener datos desbalanceados no es un error en sí mismo, puede hacer que los puntajes de precisión tradicionales sean altamente engañosos. Exploremos por qué el desbalance de clases es un factor crítico en entender el verdadero rendimiento de tu clasificador y cómo interpretar correctamente las métricas de evaluación en estos escenarios.
+
+
+
+
+ Cuando la Precisión Engaña: La Trampa de la Clase Mayoritaria
+
+ El problema principal con el desbalance de clases es que un clasificador puede lograr un puntaje alto de precisión simplemente prediciendo siempre la clase mayoritaria, incluso si no ha aprendido nada sobre distinguir entre clases, especialmente las raras. Esto crea una falsa sensación de buen rendimiento.
+
+
+
+
+
Precisión Cruda: {alwaysSafeEmailAccuracy}% !
+
+
+
¡Parece Genial, Pero Tiene Fallas Críticas!
+
+ Este filtro detecta CERO emails prohibidos. Parece preciso solo porque etiqueta correctamente la clase mayoritaria del 97%.
+
+
+ >
+ }
+ />
+
+
+ En el ejemplo anterior, un filtro que marca cada email como "seguro" logra 97% de precisión. ¡Esto suena impresionante! Sin embargo, falla completamente en su tarea principal: identificar contenido prohibido. La alta precisión viene puramente del desbalance de datos.
+
+
+
+
Percepción Clave: El Desbalance Infla la Precisión Ingenua
+
+ Los puntajes de precisión cruda son profundamente engañosos con datos desbalanceados. Una alta precisión podría simplemente reflejar la proporción de la clase mayoritaria, no capacidad predictiva genuina a través de todas las clases. Lo que parece un rendimiento excelente podría indicar un modelo que ha aprendido muy poco, o peor, es completamente inefectivo para clases minoritarias.
+
+
+
+
+
+
+ Vista General de Métricas de Evaluación
+
+
+ Detallado: Indicadores con Contexto
+
+
+ Ver Más Ejemplos de Métricas
+
+
+
+
+ );
+ }
+
const imbal_scenario1_dist = { C1: 50, C2: 50 }; // Balanced
const imbal_scenario1_thresholds = GaugeThresholdComputer.computeThresholds(imbal_scenario1_dist);
const imbal_scenario1_segments = GaugeThresholdComputer.createSegments(imbal_scenario1_thresholds);
diff --git a/dashboard/app/documentation/evaluation-metrics/gauges/class-number/page.tsx b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/class-number/page.tsx
similarity index 60%
rename from dashboard/app/documentation/evaluation-metrics/gauges/class-number/page.tsx
rename to dashboard/app/[locale]/documentation/evaluation-metrics/gauges/class-number/page.tsx
index 20e014afb..851ebd77e 100644
--- a/dashboard/app/documentation/evaluation-metrics/gauges/class-number/page.tsx
+++ b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/class-number/page.tsx
@@ -1,4 +1,5 @@
-import { Metadata } from "next"
+'use client';
+
import Link from "next/link"
import { Button as DocButton } from "@/components/ui/button"
import EvaluationCard from '@/components/EvaluationCard'
@@ -22,12 +23,8 @@ import {
articleTopicLabelerConfusionMatrix,
articleTopicLabelerPredictedDistribution,
articleTopicLabelerFullContextSegments,
-} from "@/app/documentation/evaluation-metrics/examples-data"
-
-export const metadata: Metadata = {
- title: "Interpreting Accuracy with Varying Number of Classes - Plexus Documentation",
- description: "Understanding how the number of classes impacts accuracy interpretation and how Plexus addresses this challenge."
-}
+} from "@/app/[locale]/documentation/evaluation-metrics/examples-data"
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
// Simplified AccuracyGauge component for this page if needed, or use the main one if it fits.
// For now, let's assume we might want a local one for focused examples.
@@ -47,6 +44,142 @@ const AccuracyGaugeDisplay = ({ value, title, segments }: {
)
export default function NumberOfClassesProblemPage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
El Desafío: Número de Clases y Precisión
+
+ Los puntajes de precisión cruda pueden ser engañosos. Uno de los factores más significativos que afecta cómo interpretamos la precisión es el número de clases que un clasificador está tratando de predecir. Esta página se enfoca específicamente en este desafío y cómo Plexus ayuda a proporcionar claridad.
+
+
+
+
+ Por Qué Importa el Número de Clases: Un Cuento de Dos Juegos
+
+ Imagina dos juegos de adivinanza. En el primero, predices el lanzamiento de una moneda (2 opciones: Cara o Cruz). En el segundo, predices el palo de una carta sacada (4 opciones: Corazones, Diamantes, Tréboles, Espadas).
+ Si adivinas aleatoriamente en ambos juegos, tu precisión esperada es vastamente diferente:
+
+
+ Lanzamiento de Moneda (2 Clases): Tienes 1 posibilidad entre 2 (50%) de estar correcto aleatoriamente.
+ Palo de Carta (4 Clases): Tienes 1 posibilidad entre 4 (25%) de estar correcto aleatoriamente.
+
+
+ Esta ilustración simple resalta un problema central: un puntaje de precisión cruda (ej., 60%) significa cosas muy diferentes dependiendo del número de clases.
+ 60% de precisión es solo ligeramente mejor que la casualidad para un lanzamiento de moneda, pero significativamente mejor que la casualidad para predecir un palo de carta.
+
+
+
+
+
+
{fairCoinData.accuracy}% de precisión.
+
+ El indicador contextual muestra que esto está cerca del nivel de casualidad del 50% para 2 clases.
+
+
+ >
+ }
+ />
+
+
+
{cardSuitData.accuracy}% de precisión.
+
+ El indicador contextual muestra que esto está cerca del nivel de casualidad del 25% para 4 clases.
+
+
+ >
+ }
+ />
+
+
+
+
Percepción Clave: La Línea Base Cambia con el Conteo de Clases
+
+ La línea base de casualidad aleatoria baja conforme el número de clases aumenta (asumiendo clases balanceadas). Una precisión del 50% es pobre para un problema de 2 clases pero excelente para un problema de 10 clases (donde la casualidad es 10%). Sin entender esta línea base cambiante, la precisión cruda es ininterpretable.
+
+
+
+
+
+ Visualizando el Impacto: 65% de Precisión a Través de Diferentes Conteos de Clases
+
+ Cada escenario abajo muestra una precisión del 65%. El indicador izquierdo usa una escala fija, no contextualizada. El indicador derecho ajusta dinámicamente sus segmentos coloreados basado en el número de clases (asumiendo una distribución balanceada para esta ilustración), proporcionando contexto inmediato.
+
+
+
+
Dos Clases
+
+
+
Contextual: 65% es 'convergiendo', justo arriba del 50% de casualidad.
+
+
+
Tres Clases
+
+
+
Contextual: 65% es 'viable', bien arriba del ~33% de casualidad.
+
+
+
Cuatro Clases
+
+
+
Contextual: 65% es 'genial', significativamente arriba del 25% de casualidad.
+
+
+
Doce Clases
+
+
+
Contextual: 65% es sobresaliente, superando por mucho el ~8.3% de casualidad.
+
+
+
+
La Conclusión
+
+ El mismo puntaje de precisión del 65% transiciona de mediocre a excelente conforme el número de clases aumenta. Los indicadores fijos son engañosos. Los indicadores contextuales, que se adaptan al número de clases, son esenciales para la interpretación correcta.
+
+
+
+
+
+
+ Vista General de Métricas de Evaluación
+
+
+ Detallado: Indicadores con Contexto
+
+
+ Ver Más Ejemplos de Métricas
+
+
+
+
+ );
+ }
+
return (
The Challenge: Number of Classes and Accuracy
diff --git a/dashboard/app/documentation/evaluation-metrics/gauges/precision/page.tsx b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/precision/page.tsx
similarity index 54%
rename from dashboard/app/documentation/evaluation-metrics/gauges/precision/page.tsx
rename to dashboard/app/[locale]/documentation/evaluation-metrics/gauges/precision/page.tsx
index 7b8b29d2e..e0b851b62 100644
--- a/dashboard/app/documentation/evaluation-metrics/gauges/precision/page.tsx
+++ b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/precision/page.tsx
@@ -1,4 +1,5 @@
-import { Metadata } from "next"
+'use client';
+
import Link from "next/link"
import { Button as DocButton } from "@/components/ui/button"
import { Gauge, Segment } from "@/components/gauge"
@@ -13,12 +14,8 @@ import {
alwaysSafeEmailConfusionMatrix, // Will be for 'Always Safe', need to derive for 'Always Prohibited'
alwaysSafeEmailPredictedDistribution,
}
- from "@/app/documentation/evaluation-metrics/examples-data"
-
-export const metadata: Metadata = {
- title: "Precision Gauge - Plexus Documentation",
- description: "Understanding the Plexus Precision Gauge and its role in evaluating classifier performance, especially concerning False Positives."
-}
+ from "@/app/[locale]/documentation/evaluation-metrics/examples-data"
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
// Component to display a standalone Precision Gauge for illustration
const PrecisionGaugeDisplay = ({ value, title }: {
@@ -66,6 +63,121 @@ const alwaysProhibitedEmailData = {
};
export default function PrecisionGaugePage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
El Indicador de Precisión de Plexus
+
+ La precisión es una métrica clave que responde la pregunta: "De todos los items que el clasificador etiquetó como positivos, ¿qué proporción eran realmente positivos?" Mide la exactitud o corrección de las predicciones positivas. Un puntaje de precisión alto indica que el clasificador tiene una tasa baja de Falsos Positivos (FP).
+
+
+
+
+ ¿Por Qué es Importante la Precisión?
+
+ Enfocarse en la precisión es crucial en escenarios donde el costo de un Falso Positivo es alto. Un Falso Positivo ocurre cuando el modelo predice incorrectamente una instancia negativa como positiva. Ejemplos incluyen:
+
+
+ Detección de Spam: Marcar un email legítimo (ham) como spam. Esto podría llevar a que los usuarios pierdan comunicaciones importantes.
+ Detección de Fraude: Marcar incorrectamente una transacción legítima como fraudulenta, causando inconvenientes y pérdida potencial de confianza para el usuario.
+ Moderación de Contenido: Remover o marcar erróneamente contenido apropiado como inapropiado, llevando a preocupaciones de censura o frustración del usuario.
+
+
+ En estos casos, se desea alta precisión para minimizar estos errores costosos, incluso si significa que algunas instancias positivas podrían perderse (menor recuperación).
+
+
+
+
+ Cómo Funciona el Indicador de Precisión de Plexus
+
+ El Indicador de Precisión en Plexus muestra el puntaje de precisión calculado, yendo de 0% a 100%. La fórmula es:
+
+
+ Precisión = Verdaderos Positivos / (Verdaderos Positivos + Falsos Positivos)
+
+
+ Los segmentos visuales en el Indicador de Precisión (ej., colores indicando niveles de rendimiento) típicamente representan puntos de referencia generales de rendimiento. Un puntaje de precisión del 90% se entiende generalmente como que 9 de cada 10 items marcados como positivos por el modelo eran realmente positivos. Mientras que el desbalance extremo de clases puede hacer que lograr alta precisión sea desafiante, la interpretación del puntaje de precisión en sí mismo es bastante directa. Los segmentos ayudan a categorizar visualmente este rendimiento (ej., pobre, regular, bueno, excelente).
+
+
+
Ejemplo: Indicador de Precisión
+
+
+ Una precisión del 85% indica que el 85% de los items predichos como positivos eran realmente positivos.
+
+
+
+
+
+ Precisión en Acción: Escenarios de Ejemplo
+
+ Veamos cómo se desarrolla la precisión en diferentes escenarios usando nuestro contexto de filtro de email, donde "Prohibido" es la clase positiva que queremos detectar.
+
+
+
+
+
+
+
+ El filtro "Siempre Prohibido" tiene una precisión de solo 3%. Esto significa que por cada 100 emails que marca como prohibidos, 97 de ellos son realmente seguros. Esto sería inutilizable en la práctica debido al número abrumador de falsas alarmas, a pesar de su recuperación perfecta para la clase prohibida.
+
+
+
+
Contraste: El Objetivo de Alta Precisión
+
+ En un buen filtro de spam (donde "Spam" es la clase positiva), el objetivo sería muy alta precisión. Quieres estar muy seguro de que si un email está marcado como Spam, realmente es Spam. Esto minimiza la oportunidad de que emails importantes, no-spam se pierdan.
+
+
+
+
+
+ Precisión y Recuperación: El Intercambio
+
+ La precisión y la Recuperación a menudo tienen una relación inversa. Mejorar una puede a veces llevar a una disminución en la otra. Por ejemplo, si haces que un clasificador sea más agresivo en identificar instancias positivas (para aumentar la recuperación), podría empezar a cometer más errores en instancias negativas, así reduciendo la precisión.
+
+
+ Entender este intercambio es clave. La elección de si optimizar para precisión o recuperación (o un balance como el puntaje F1) depende del problema específico y los costos relativos de Falsos Positivos versus Falsos Negativos.
+
+
+
+
+ Puntos Clave para la Precisión
+
+ La precisión mide la exactitud de las predicciones positivas: VP / (VP + FP).
+ Alta precisión significa una tasa baja de Falsos Positivos.
+ Crucial cuando el costo de los Falsos Positivos es alto.
+ El Indicador de Precisión de Plexus muestra este puntaje de 0-100%.
+ A menudo se considera en conjunto con la Recuperación debido a su intercambio.
+
+
+
+
+
+ Volver a la Vista General de Métricas de Evaluación
+
+
+ Aprende sobre el Indicador de Recuperación
+
+
+ Más sobre Indicadores con Contexto
+
+
+
+
+ );
+ }
+
return (
The Plexus Precision Gauge
diff --git a/dashboard/app/documentation/evaluation-metrics/gauges/recall/page.tsx b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/recall/page.tsx
similarity index 51%
rename from dashboard/app/documentation/evaluation-metrics/gauges/recall/page.tsx
rename to dashboard/app/[locale]/documentation/evaluation-metrics/gauges/recall/page.tsx
index 08123590b..7ecc47c0b 100644
--- a/dashboard/app/documentation/evaluation-metrics/gauges/recall/page.tsx
+++ b/dashboard/app/[locale]/documentation/evaluation-metrics/gauges/recall/page.tsx
@@ -1,4 +1,5 @@
-import { Metadata } from "next"
+'use client';
+
import Link from "next/link"
import { Button as DocButton } from "@/components/ui/button"
import { Gauge, Segment } from "@/components/gauge"
@@ -11,12 +12,8 @@ import {
alwaysSafeEmailClassDistribution,
alwaysSafeEmailConfusionMatrix,
alwaysSafeEmailPredictedDistribution,
-} from "@/app/documentation/evaluation-metrics/examples-data"
-
-export const metadata: Metadata = {
- title: "Recall Gauge - Plexus Documentation",
- description: "Understanding the Plexus Recall Gauge (Sensitivity) and its importance in evaluating classifier completeness, especially concerning False Negatives."
-}
+} from "@/app/[locale]/documentation/evaluation-metrics/examples-data"
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
// Component to display a standalone Recall Gauge for illustration
const RecallGaugeDisplay = ({ value, title }: {
@@ -46,6 +43,121 @@ const recallForProhibitedInAlwaysSafe = (0 / (0 + 30)) * 100; // 0%
const precisionForProhibitedInAlwaysSafe = 0; // 0 / (0 + 0) which is undefined, typically shown as 0 in this context
export default function RecallGaugePage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
El Indicador de Recuperación de Plexus
+
+ La recuperación, también conocida como Sensibilidad o Tasa de Verdaderos Positivos (TVP), responde la pregunta: "De todos los items que fueron realmente positivos, ¿qué proporción identificó correctamente el clasificador?" Mide la integridad o exhaustividad del clasificador para encontrar todas las instancias positivas. Un puntaje alto de recuperación indica que el clasificador tiene una tasa baja de Falsos Negativos (FN).
+
+
+
+
+ ¿Por Qué es Importante la Recuperación?
+
+ Enfocarse en la recuperación es crítico en escenarios donde el costo de un Falso Negativo es alto. Un Falso Negativo ocurre cuando el modelo predice incorrectamente una instancia positiva como negativa. Ejemplos incluyen:
+
+
+ Diagnóstico Médico: No detectar una enfermedad seria en un paciente que realmente la tiene. Esto podría retrasar el tratamiento y tener consecuencias graves para la salud.
+ Detección de Fraude: Perder una transacción fraudulenta, llevando a pérdidas financieras.
+ Sistemas de Seguridad: Un vehículo autónomo no detectando un obstáculo, o un sistema de seguridad no detectando a un intruso.
+
+
+ En estos casos, la alta recuperación es primordial para asegurar que se pierdan la menor cantidad posible de instancias positivas, incluso si eso significa aceptar un mayor número de Falsos Positivos (menor precisión).
+
+
+
+
+ Cómo Funciona el Indicador de Recuperación de Plexus
+
+ El Indicador de Recuperación en Plexus muestra el puntaje de recuperación calculado, yendo de 0% a 100%. La fórmula es:
+
+
+ Recuperación = Verdaderos Positivos / (Verdaderos Positivos + Falsos Negativos)
+
+
+ Los segmentos visuales en el Indicador de Recuperación generalmente representan puntos de referencia estándar de rendimiento. Un puntaje de recuperación del 90% significa que el clasificador identificó exitosamente el 90% de todas las instancias positivas reales. Como la precisión, la interpretación directa es sencilla, y los segmentos categorizan visualmente este rendimiento.
+
+
+
Ejemplo: Indicador de Recuperación
+
+
+ Una recuperación del 75% indica que el clasificador encontró el 75% de todas las instancias positivas reales.
+
+
+
+
+
+ Recuperación en Acción: Escenarios de Ejemplo
+
+ Examinemos la recuperación usando nuestro contexto de filtro de email, donde "Prohibido" es la clase positiva que queremos detectar.
+
+
+
+
+
+
+
+ El filtro "Siempre Seguro" tiene 0% de recuperación para la clase "Prohibido". Esto significa que falla en identificar cualquiera de los emails prohibidos. Aunque logra 97% de precisión al etiquetar correctamente los emails seguros, es inútil para su tarea principal de atrapar contenido prohibido debido a su falla catastrófica en recuperación para esa clase.
+
+
+
+
Contraste: El Objetivo de Alta Recuperación
+
+ En el tamizaje médico crítico (donde "Enfermedad Presente" es positivo), el objetivo es muy alta recuperación. Quieres identificar tantos casos verdaderos como sea posible, incluso si eso significa que algunos individuos sanos sean marcados para más pruebas (Falsos Positivos, llevando a menor precisión para la clase "Enfermedad Presente").
+
+
+
+
+
+ Recuperación y Precisión: El Intercambio
+
+ La recuperación y la Precisión a menudo exhiben una relación inversa. Aumentar la recuperación (ej., haciendo que un clasificador sea más sensible a casos positivos) puede a veces llevar a más Falsos Positivos, así reduciendo la precisión.
+
+
+ El puntaje F1 es una métrica común que combina precisión y recuperación en un solo número (la media armónica), proporcionando una medida balanceada. Elegir si priorizar recuperación, precisión, o un balance depende enormemente de la aplicación específica y las consecuencias de diferentes tipos de errores.
+
+
+
+
+ Puntos Clave para la Recuperación
+
+ La recuperación (Sensibilidad) mide la habilidad de encontrar todas las instancias positivas reales: VP / (VP + FN).
+ Alta recuperación significa una tasa baja de Falsos Negativos.
+ Crucial cuando el costo de los Falsos Negativos es alto.
+ El Indicador de Recuperación de Plexus muestra este puntaje de 0-100%.
+ A menudo se considera en conjunto con la Precisión; el puntaje F1 balancea ambos.
+
+
+
+
+
+ Volver a la Vista General de Métricas de Evaluación
+
+
+ Aprende sobre el Indicador de Precisión
+
+
+ Más sobre Indicadores con Contexto
+
+
+
+
+ );
+ }
+
return (
The Plexus Recall Gauge
diff --git a/dashboard/app/[locale]/documentation/evaluation-metrics/page.tsx b/dashboard/app/[locale]/documentation/evaluation-metrics/page.tsx
new file mode 100644
index 000000000..b6ce65e63
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/evaluation-metrics/page.tsx
@@ -0,0 +1,712 @@
+'use client';
+
+import { Button as DocButton } from "@/components/ui/button"
+import Link from "next/link"
+import { GaugeThresholdComputer } from "@/utils/gauge-thresholds"
+import EvaluationCard from '@/components/EvaluationCard'
+import { Segment } from "@/components/gauge"
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
+
+// Helper function to create sample score data for examples
+const createExampleScore = (
+ id: string,
+ name: string,
+ ac1: number,
+ accuracy: number,
+ itemCount: number,
+ mismatches: number,
+ labelDistribution?: Record
+) => ({
+ id,
+ score_name: name,
+ cc_question_id: `example-${id}`,
+ ac1,
+ item_count: itemCount,
+ mismatches,
+ accuracy,
+ label_distribution: labelDistribution
+})
+
+// Define fixed segments for the illustrative accuracy gauges in the initial scenarios (kept for initial coin flip examples if those are retained in narrative)
+const fixedAccuracyGaugeSegments: Segment[] = [
+ { start: 0, end: 50, color: 'var(--gauge-inviable)' },
+ { start: 50, end: 70, color: 'var(--gauge-converging)' },
+ { start: 70, end: 80, color: 'var(--gauge-almost)' },
+ { start: 80, end: 90, color: 'var(--gauge-viable)' },
+ { start: 90, end: 100, color: 'var(--gauge-great)' },
+];
+
+export default function EvaluationMetricsPage() {
+ const { locale } = useTranslationContext();
+
+ // Article Topic Labeler - Our consistent example through the document
+ const articleTopicLabelerExampleData = {
+ id: 'article-topic-labeler',
+ score_name: 'Article Topic Labeler Performance',
+ cc_question_id: 'example-topic-labeler',
+ accuracy: 62.0,
+ item_count: 100,
+ mismatches: 38, // 100 - 62
+ gwetAC1: 0.512, // Lower AC1 reflecting 62% accuracy
+ label_distribution: {
+ 'News': 40,
+ 'Sports': 15,
+ 'Business': 15,
+ 'Technology': 15,
+ 'Lifestyle': 15
+ }
+ };
+
+ const articleTopicLabelerClassDistribution = [
+ { label: "News", count: 40 },
+ { label: "Sports", count: 15 },
+ { label: "Business", count: 15 },
+ { label: "Technology", count: 15 },
+ { label: "Lifestyle", count: 15 }
+ ];
+
+ const articleTopicLabelerConfusionMatrix = {
+ labels: ["News", "Sports", "Business", "Technology", "Lifestyle"],
+ matrix: [
+ { actualClassLabel: "News", predictedClassCounts: { "News": 28, "Sports": 3, "Business": 3, "Technology": 3, "Lifestyle": 3 } },
+ { actualClassLabel: "Sports", predictedClassCounts: { "News": 3, "Sports": 9, "Business": 1, "Technology": 1, "Lifestyle": 1 } },
+ { actualClassLabel: "Business", predictedClassCounts: { "News": 3, "Sports": 1, "Business": 8, "Technology": 2, "Lifestyle": 1 } },
+ { actualClassLabel: "Technology", predictedClassCounts: { "News": 3, "Sports": 1, "Business": 2, "Technology": 8, "Lifestyle": 1 } },
+ { actualClassLabel: "Lifestyle", predictedClassCounts: { "News": 3, "Sports": 1, "Business": 1, "Technology": 1, "Lifestyle": 9 } },
+ ],
+ };
+
+ const articleTopicLabelerPredictedDistribution = [
+ { label: "News", count: 40 },
+ { label: "Sports", count: 15 },
+ { label: "Business", count: 15 },
+ { label: "Technology", count: 15 },
+ { label: "Lifestyle", count: 15 }
+ ];
+
+ // Segments for the final Article Topic Labeler example (fully contextualized)
+ const articleTopicLabelerFullContextSegments = GaugeThresholdComputer.createSegments(
+ GaugeThresholdComputer.computeThresholds(articleTopicLabelerExampleData.label_distribution)
+ );
+
+ // Coin flip examples for the narrative
+ const fairCoinData = createExampleScore(
+ 'fair-coin',
+ 'Randomly Guessing Coin Flips (50/50)',
+ -0.04,
+ 48.0,
+ 100,
+ 52,
+ { 'Heads': 50, 'Tails': 50 }
+ )
+
+ const alwaysHeadsData = createExampleScore(
+ 'always-heads',
+ 'Always Guessing "Heads" (50/50)',
+ 0.02,
+ 51.0,
+ 100,
+ 49,
+ { 'Heads': 51, 'Tails': 49 }
+ )
+
+ const fairCoinDistribution = [
+ { label: "Heads", count: 51 },
+ { label: "Tails", count: 49 }
+ ];
+
+ const predictedFairCoinData = [
+ { label: "Heads", count: 50 },
+ { label: "Tails", count: 50 }
+ ];
+
+ const predictedAlwaysHeadsData = [
+ { label: "Heads", count: 100 },
+ { label: "Tails", count: 0 }
+ ];
+
+ const fairCoinConfusionMatrix = {
+ labels: ["Heads", "Tails"],
+ matrix: [
+ { actualClassLabel: "Heads", predictedClassCounts: { "Heads": 24, "Tails": 26 } },
+ { actualClassLabel: "Tails", predictedClassCounts: { "Heads": 26, "Tails": 24 } },
+ ],
+ };
+
+ const alwaysHeadsConfusionMatrix = {
+ labels: ["Heads", "Tails"],
+ matrix: [
+ { actualClassLabel: "Heads", predictedClassCounts: { "Heads": 51, "Tails": 0 } },
+ { actualClassLabel: "Tails", predictedClassCounts: { "Heads": 49, "Tails": 0 } },
+ ],
+ };
+
+ // Card Suit Guessing Example Data for narrative
+ const cardSuitData = createExampleScore(
+ 'card-suit-guessing',
+ 'Predicting a Card Suit (4 Classes, Random Guessing)',
+ -0.03,
+ 23.0,
+ 208,
+ 160,
+ { '♥️': 52, '♦️': 52, '♣️': 52, '♠️': 52 }
+ );
+
+ const cardSuitActualDistribution = [
+ { label: "♥️", count: 52 },
+ { label: "♦️", count: 52 },
+ { label: "♣️", count: 52 },
+ { label: "♠️", count: 52 }
+ ];
+
+ const cardSuitConfusionMatrix = {
+ labels: ["♥️", "♦️", "♣️", "♠️"],
+ matrix: [
+ { actualClassLabel: "♥️", predictedClassCounts: { "♥️": 12, "♦️": 13, "♣️": 13, "♠️": 14 } },
+ { actualClassLabel: "♦️", predictedClassCounts: { "♥️": 13, "♦️": 12, "♣️": 14, "♠️": 13 } },
+ { actualClassLabel: "♣️", predictedClassCounts: { "♥️": 13, "♦️": 14, "♣️": 12, "♠️": 13 } },
+ { actualClassLabel: "♠️", predictedClassCounts: { "♥️": 14, "♦️": 13, "♣️": 13, "♠️": 12 } },
+ ],
+ };
+
+ const cardSuitPredictedDistribution = [
+ { label: "♥️", count: 12+13+13+14 },
+ { label: "♦️", count: 13+12+14+13 },
+ { label: "♣️", count: 13+14+12+13 },
+ { label: "♠️", count: 14+13+13+12 }
+ ];
+
+ if (locale === 'es') {
+ return (
+
+
Interpretando Métricas de Evaluación: El Desafío
+
+ Entender métricas como la precisión es clave para evaluar el rendimiento de IA. Sin embargo, los números crudos pueden ser engañosos sin el contexto apropiado. Esta página explora errores comunes e introduce el enfoque de Plexus para una evaluación más clara y confiable.
+
+
+
+
+ La Gran Pregunta: ¿Es Bueno Este Clasificador?
+
+ Al desarrollar un sistema de IA, necesitamos indicadores para saber si nuestro modelo está funcionando bien. Consideremos un "Etiquetador de Temas de Artículos" que clasifica artículos en cinco categorías: Noticias, Deportes, Negocios, Tecnología y Estilo de Vida. Evaluado en 100 artículos, logra 62% de precisión.
+
+
+
+
+
¿Es buena una precisión del 62%?
+
+ Este número parece mediocre. El indicador no contextualizado sugiere que solo está 'convergiendo'. ¿Pero es esto rendimiento pobre, o hay más en la historia?
+
+
+ >
+ }
+ />
+
+
+ Intuitivamente, el 62% parece algo débil—casi 4 de cada 10 artículos están mal. Pero para juzgar esto, necesitamos una línea base: ¿qué precisión lograría adivinar aleatoriamente?
+
+
+
+
+ Trampa 1: Ignorar la Línea Base (Acuerdo por Casualidad)
+
+ La precisión cruda no tiene sentido sin conocer la tasa de acuerdo por casualidad. Considera predecir 100 lanzamientos de moneda:
+
+
+
+ Con un lanzamiento de moneda justo, la adivinación aleatoria debería lograr cerca del 50% de precisión. Cualquier cosa significativamente mejor que esto indica habilidad real.
+
+
+
+
+ Trampa 2: No Considerar el Desbalance de Clases
+
+ El desbalance de clases puede hacer que la precisión alta sea engañosa. Un clasificador puede lograr alta precisión simplemente prediciendo siempre la clase mayoritaria.
+
+
+
+
+ La Solución de Plexus: Indicadores Contextualizados
+
+ Plexus aborda estos desafíos con un enfoque de dos partes:
+
+
+
+
+ Indicadores de Precisión Contextualizados: Ajustan dinámicamente sus escalas visuales basándose en el número de clases y distribución de clases específicas del problema.
+
+
+ Indicadores de Acuerdo Conscientes del Contexto: Métricas como el AC1 de Gwet que internamente contabilizan el acuerdo por casualidad y proporcionan puntajes estandarizados comparables.
+
+
+
+
+
+
+
+ Próximos Pasos
+
+ Explora los indicadores individuales y conceptos relacionados:
+
+
+
+
+
Indicadores Individuales
+
+
+ Indicador de Precisión
+
+
+ Indicador de Acuerdo
+
+
+ Indicador de Precisión (Métrica)
+
+
+ Indicador de Recuperación
+
+
+
+
+
+
Conceptos Clave
+
+
+ Indicadores con Contexto
+
+
+ Número de Clases
+
+
+ Desbalance de Clases
+
+
+ Ejemplos Detallados
+
+
+
+
+
+
+
+ );
+ }
+
+ return (
+
+
Interpreting Evaluation Metrics: The Challenge
+
+ Understanding metrics like accuracy is key to evaluating AI performance. However, raw numbers can be deceptive without proper context. This page explores common pitfalls and introduces Plexus's approach to clearer, more reliable evaluation.
+
+
+
+
+ The Big Question: Is This Classifier Good?
+
+ When developing an AI system, we need gauges to tell if our model is performing well. Let's consider an "Article Topic Labeler" that classifies articles into five categories: News, Sports, Business, Technology, and Lifestyle. Evaluated on 100 articles, it achieves 62% accuracy.
+
+
+
+
+
Is 62% accuracy good?
+
+ This number seems mediocre. The uncontextualized gauge suggests it's just 'converging'. But is this poor performance, or is there more to the story?
+
+
+ >
+ }
+ />
+
+
+ Intuitively, 62% seems somewhat weak—nearly 4 out of 10 articles are wrong. But to judge this, we need a baseline: what accuracy would random guessing achieve?
+
+
+
+
+ Pitfall 1: Ignoring the Baseline (Chance Agreement)
+
+ Raw accuracy is meaningless without knowing the chance agreement rate. Consider predicting 100 coin flips:
+
+
+
+
+ ~50% accuracy achieved.
+
+ But is this good guessing without knowing the chance baseline?
+
+
+ }
+ />
+
+
+ ~51% accuracy achieved.
+
+ Slightly better, but still hovering around the 50% chance rate.
+
+
+ }
+ />
+
+
+
+
Key Insight: The Baseline Problem
+
+ Both strategies hover around 50% accuracy. This is the base random-chance agreement rate for a binary task. Without understanding this baseline, raw accuracy numbers are uninterpretable. Any reported accuracy must be compared against what random chance would yield for that specific problem.
+
+
+
+
+
+ Pitfall 2: The Moving Target of Multiple Classes
+
+ The chance agreement rate isn't fixed; it changes with the number of classes. For example, consider guessing the suit of a randomly drawn card from a standard 4-suit deck:
+
+
+
+
+
~23% accuracy in this run.
+
+ The fixed gauge makes this look terrible. Is it?
+
+
+
+
Misleading Raw View
+
+ For a 4-class problem, 25% is the actual random chance baseline. The raw gauge is deceptive here.
+
+
+ >
+ }
+ />
+
+
+
Key Insight: Number of Classes Shifts the Baseline
+
+ The baseline random-chance agreement rate dropped from 50% (for 2 classes like coin flips) to 25% (for 4 classes like card suits). This is a critical concept: as the number of equally likely options increases, the accuracy you'd expect from random guessing decreases . Therefore, a 30% accuracy is much better for a 10-class problem (10% chance) than for a 2-class problem (50% chance).
+
+
+
+
+
+
+ {locale === 'es' ? 'Trampa 3: La Ilusión del Desbalance de Clases' : 'Pitfall 3: The Illusion of Class Imbalance'}
+
+
+ {locale === 'es'
+ ? 'La distribución de clases en tus datos (balance de clases) añade otra capa de complejidad. Si un conjunto de datos está desbalanceado, un clasificador puede lograr alta precisión simplemente prediciendo siempre la clase mayoritaria, incluso si no tiene habilidad real.'
+ : 'The distribution of classes in your data (class balance) adds another layer of complexity. If a dataset is imbalanced, a classifier can achieve high accuracy by simply always predicting the majority class, even if it has no real skill.'
+ }
+
+
+
+
+ {locale === 'es' ? '~52% de precisión.' : '~52% accuracy.'}
+
+
+ {locale === 'es'
+ ? 'La estrategia no explota el desbalance conocido 75/25 de la baraja.'
+ : 'Strategy doesn\'t exploit the deck\'s known 75/25 imbalance.'
+ }
+
+
+ }
+ />
+
+
+
+ {locale === 'es' ? '¡75% de precisión!' : '75% accuracy!'}
+
+
+
+
+ {locale === 'es' ? '¡Engañosamente Alto!' : 'Deceptively High!'}
+
+
+ {locale === 'es'
+ ? 'Este 75% se logra explotando el desbalance (siempre adivinando la mayoría), no por habilidad.'
+ : 'This 75% is achieved by exploiting the imbalance (always guessing majority), not by skill.'
+ }
+
+
+ >
+ }
+ />
+
+
+ {locale === 'es'
+ ? 'Un ejemplo más extremo: un filtro de correo electrónico afirma tener 97% de precisión al detectar contenido prohibido. Sin embargo, si solo el 3% de los correos realmente contiene tal contenido, un filtro que etiqueta *cada correo* como "seguro" (atrapando cero violaciones) logrará 97% de precisión.'
+ : 'A more extreme example: an email filter claims 97% accuracy at detecting prohibited content. However, if only 3% of emails actually contain such content, a filter that labels *every single email* as "safe" (catching zero violations) will achieve 97% accuracy.'
+ }
+
+
+
+
+ {locale === 'es' ? '¡97% de precisión! ¿Suena genial?' : '97% accuracy! Sounds great?'}
+
+
+
+
+ {locale === 'es' ? '¡FALLA CRÍTICA!' : 'CRITICAL FLAW!'}
+
+
+ {locale === 'es'
+ ? 'Este modelo detecta CERO contenido prohibido. Es peor que inútil, proporcionando una falsa sensación de seguridad.'
+ : 'This model detects ZERO prohibited content. It\'s worse than useless, providing a false sense of security.'
+ }
+
+
+ >
+ }
+ />
+
+
+ {locale === 'es' ? 'Percepción Clave: El Desbalance Infla la Precisión Ingenua' : 'Key Insight: Imbalance Inflates Naive Accuracy'}
+
+
+ {locale === 'es'
+ ? 'Los puntajes de precisión cruda son profundamente engañosos sin considerar el desbalance de clases. Una alta precisión podría simplemente reflejar la proporción de la clase mayoritaria, no el poder predictivo real. Una precisión del 97% podría ser excelente para un problema balanceado, mediocre para uno moderadamente desbalanceado, o indicativo de falla completa en la detección de eventos raros.'
+ : 'Raw accuracy scores are deeply misleading without considering class imbalance. A high accuracy might simply reflect the majority class proportion, not actual predictive power. A 97% accuracy could be excellent for a balanced problem, mediocre for a moderately imbalanced one, or indicative of complete failure in rare event detection.'
+ }
+
+
+
+
+
+
+ {locale === 'es' ? 'La Solución de Plexus: Un Enfoque Unificado para la Claridad' : 'Plexus\'s Solution: A Unified Approach to Clarity'}
+
+
+ {locale === 'es'
+ ? 'Para superar estas trampas comunes y proporcionar una verdadera comprensión del rendimiento del clasificador, Plexus emplea una estrategia de dos frentes que combina métricas crudas contextualizadas con puntajes de acuerdo inherentemente conscientes del contexto:'
+ : 'To overcome these common pitfalls and provide a true understanding of classifier performance, Plexus employs a two-pronged strategy that combines contextualized raw metrics with inherently context-aware agreement scores:'
+ }
+
+
+
+
+ {locale === 'es' ? 'Indicadores de Precisión Contextualizados:' : 'Contextualized Accuracy Gauges:'}
+ {locale === 'es'
+ ? 'No solo mostramos la precisión cruda; la mostramos en una escala visual dinámica. Los segmentos coloreados de nuestros indicadores de Precisión se adaptan basados en el número de clases *y* su distribución en tus datos específicos. Esto inmediatamente te ayuda a interpretar si un puntaje de precisión es bueno, malo, o indiferente *para ese contexto de problema particular*.'
+ : 'We don\'t just show raw accuracy; we show it on a dynamic visual scale. The colored segments of our Accuracy gauges adapt based on the number of classes *and* their distribution in your specific data. This immediately helps you interpret if an accuracy score is good, bad, or indifferent *for that particular problem context*.'
+ }
+
+
+
+ {locale === 'es' ? 'Indicadores de Acuerdo Inherentemente Conscientes del Contexto:' : 'Inherently Context-Aware Agreement Gauges:'}
+ {locale === 'es'
+ ? 'Junto con la precisión, presentamos prominentemente un indicador de Acuerdo (típicamente usando AC1 de Gwet). Esta métrica está específicamente diseñada para calcular una medida de acuerdo corregida por casualidad. *Internamente* considera el número de clases y su distribución, proporcionando un puntaje estandarizado (0 = casualidad, 1 = perfecto) que refleja habilidad más allá de la adivinanza aleatoria. Este puntaje es directamente comparable entre diferentes problemas y conjuntos de datos.'
+ : 'Alongside accuracy, we prominently feature an Agreement gauge (typically using Gwet\'s AC1). This metric is specifically designed to calculate a chance-corrected measure of agreement. It *internally* accounts for the number of classes and their distribution, providing a standardized score (0 = chance, 1 = perfect) that reflects skill beyond random guessing. This score is directly comparable across different problems and datasets.'
+ }
+
+
+
+ {locale === 'es'
+ ? 'Veamos cómo este enfoque unificado clarifica el rendimiento de nuestro Etiquetador de Temas de Artículos (que tenía 62% de precisión cruda, 5 clases, y una distribución desbalanceada con 40% "Noticias"):'
+ : 'Let\'s see how this unified approach clarifies the performance of our Article Topic Labeler (which had 62% raw accuracy, 5 classes, and an imbalanced distribution with 40% "News"):'
+ }
+
+
+
+
+
+
+ {locale === 'es' ? 'El Poder de Dos Indicadores' : 'The Power of Two Gauges'}
+
+
+ {locale === 'es'
+ ? 'Este enfoque combinado ofrece una comprensión robusta e intuitiva:'
+ : 'This combined approach offers robust and intuitive understanding:'
+ }
+
+
+
+ {locale === 'es'
+ ? 'El Indicador de Precisión Contextualizado clarifica lo que significa la precisión cruda del 62% para *las complejidades de esta tarea específica* (5 clases, desbalanceado).'
+ : 'The Contextualized Accuracy Gauge clarifies what the raw 62% accuracy means for *this specific task\'s complexities* (5 classes, imbalanced).'
+ }
+
+
+ {locale === 'es'
+ ? 'El Indicador de Acuerdo proporciona un puntaje único y estandarizado (AC1 de 0.512) midiendo rendimiento *por encima de la casualidad*, directamente comparable entre diferentes problemas.'
+ : 'The Agreement Gauge provides a single, standardized score (AC1 of 0.512) measuring performance *above chance*, directly comparable across different problems.'
+ }
+
+
+
+ {locale === 'es'
+ ? 'Juntos, previenen malinterpretaciones de la precisión cruda y ofrecen una verdadera percepción del rendimiento de un clasificador.'
+ : 'Together, they prevent misinterpretations of raw accuracy and offer true insight into a classifier\'s performance.'
+ }
+
+
+
+
+
+ {locale === 'es' ? 'Profundiza en las Soluciones' : 'Dive Deeper into the Solutions'}
+
+
+ {locale === 'es'
+ ? 'Para entender las mecánicas detalladas de cómo Plexus contextualiza los indicadores de Precisión y cómo funciona el indicador de Acuerdo a través de varios escenarios, explora nuestra guía dedicada:'
+ : 'To understand the detailed mechanics of how Plexus contextualizes Accuracy gauges and how the Agreement gauge works across various scenarios, explore our dedicated guide:'
+ }
+
+
+
+ {locale === 'es' ? 'Entendiendo Indicadores con Contexto' : 'Understanding Gauges with Context'}
+
+
+
+
+
+
+
+ {locale === 'es' ? 'Próximos Pasos' : 'Next Steps'}
+
+
+ {locale === 'es'
+ ? 'Explora más documentación para mejorar tu comprensión:'
+ : 'Explore further documentation to enhance your understanding:'
+ }
+
+
+
+
+ {locale === 'es' ? 'Detallado: Indicadores con Contexto' : 'Detailed: Gauges with Context'}
+
+
+
+
+ {locale === 'es' ? 'Ver Más Ejemplos' : 'View More Examples'}
+
+
+
+
+ {locale === 'es' ? 'Aprender sobre Evaluaciones' : 'Learn about Evaluations'}
+
+
+
+
+ {locale === 'es' ? 'Explorar Reportes' : 'Explore Reports'}
+
+
+
+
+
+
+
+ );
+}
\ No newline at end of file
diff --git a/dashboard/app/documentation/layout.tsx b/dashboard/app/[locale]/documentation/layout.tsx
similarity index 100%
rename from dashboard/app/documentation/layout.tsx
rename to dashboard/app/[locale]/documentation/layout.tsx
diff --git a/dashboard/app/[locale]/documentation/methods/add-edit-score/page.tsx b/dashboard/app/[locale]/documentation/methods/add-edit-score/page.tsx
new file mode 100644
index 000000000..2a0728026
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/methods/add-edit-score/page.tsx
@@ -0,0 +1,536 @@
+'use client';
+
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
+
+export default function AddEditScorePage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
Agregar/Editar una Puntuación
+
+ Aprende cómo crear y gestionar puntuaciones individuales dentro de cuadros de puntuación usando la interfaz del dashboard de Plexus.
+
+
+
+
+ Agregar Puntuaciones en el Dashboard
+
+ Las puntuaciones son criterios de evaluación individuales dentro de un cuadro de puntuación. El dashboard proporciona
+ una interfaz intuitiva para crear y configurar puntuaciones.
+
+
+
+
+
Guía Paso a Paso
+
+
+ Acceder a la Creación de Puntuaciones:
+ Abre tu cuadro de puntuación y haz clic en "Agregar Puntuación" o edita un cuadro de puntuación existente.
+
+
+ Elegir Tipo de Puntuación:
+ Selecciona entre los tipos de puntuación disponibles:
+
+ Análisis de Sentimientos
+ Calidad de Contenido
+ Verificación Gramatical
+ Métricas Personalizadas
+
+
+
+ Configurar Parámetros:
+ Configura la puntuación:
+
+ Nombre y descripción de la puntuación
+ Peso (importancia en el cuadro de puntuación general)
+ Umbral (puntuación mínima aceptable)
+ Parámetros personalizados específicos al tipo de puntuación
+
+
+
+ Vista Previa y Prueba:
+ Usa la función de vista previa para probar la puntuación contra contenido de muestra.
+
+
+ Guardar Puntuación:
+ Haz clic en "Agregar Puntuación" para incluirla en tu cuadro de puntuación.
+
+
+
+
+
+
Editar Puntuaciones Existentes
+
+
+ Localizar la Puntuación:
+ Encuentra la puntuación que deseas modificar dentro de tu cuadro de puntuación.
+
+
+ Acceder al Modo de Edición:
+ Haz clic en el ícono de edición junto a la puntuación.
+
+
+ Modificar Configuraciones:
+ Actualiza la configuración de la puntuación según sea necesario.
+
+
+ Guardar Cambios:
+ Haz clic en "Guardar" para aplicar tus modificaciones.
+
+
+
+
+
+
+
+ Gestión de Versiones de Puntuaciones
+
+ Las puntuaciones en Plexus soportan versionado, permitiéndote rastrear cambios y gestionar diferentes implementaciones:
+
+
+
+
+
Crear Nuevas Versiones
+
+ Cuando editas una puntuación y guardas cambios, se crea automáticamente una nueva versión.
+ Puedes agregar notas para documentar los cambios realizados en cada versión.
+
+
+
+
Versiones Campeón
+
+ Cada puntuación tiene una versión "campeón" designada que se usa para evaluaciones.
+ Puedes promover cualquier versión a estado campeón cuando estés satisfecho con su rendimiento.
+
+
+
+
Versiones Destacadas
+
+ Marca versiones importantes como "destacadas" para resaltarlas en el historial de versiones.
+ Esto ayuda a rastrear hitos significativos en el desarrollo de tu puntuación.
+
+
+
+
+
+
+ Consejos de Configuración de Puntuaciones
+
+
+
Balance de Pesos
+
+ Considera cuidadosamente la importancia relativa de cada puntuación al establecer pesos.
+ El total de todos los pesos en un cuadro de puntuación debe ser igual a 1.0.
+
+
+
+
Establecimiento de Umbrales
+
+ Establece umbrales apropiados basados en tus requisitos de calidad y prueba
+ con muestras de contenido representativas.
+
+
+
+
Tipos de Puntuaciones
+
+ Elige tipos de puntuación que se alineen con tus objetivos de evaluación. Combina diferentes
+ tipos para crear evaluaciones integrales.
+
+
+
+
+
+
+ Usar la CLI
+
+ Para la gestión automatizada de puntuaciones, puedes usar la CLI de Plexus:
+
+
+
+ {`# Ver información detallada sobre una puntuación
+plexus scorecards score "Nombre de Puntuación" --account "nombre-cuenta"
+plexus scorecards score "clave-puntuacion" --account "nombre-cuenta"
+
+# Mostrar historial de versiones y configuración
+plexus scorecards score "Nombre de Puntuación" --account "nombre-cuenta" --show-versions --show-config
+
+# Listar todas las puntuaciones para un cuadro de puntuación específico
+plexus scorecards list-scores --scorecard-id "id-cuadro-puntuacion"
+
+# Próximamente:
+# Ver historial de versiones para una puntuación
+plexus scorecards history --account-key "clave-cuenta" --score-key "clave-puntuacion"
+
+# Promover una versión a campeón
+plexus scorecards promote --account-key "clave-cuenta" --score-id "id-puntuacion" --version-id "id-version"
+
+# Agregar una nueva puntuación a un cuadro de puntuación
+plexus scores add --scorecard-id "id-cuadro" --name "Puntuación de Calidad" --type quality --weight 0.5
+
+# Listar todas las puntuaciones en un cuadro de puntuación
+plexus scores list --scorecard "Aseguramiento de Calidad"
+
+# Ver configuración de puntuación
+plexus scores info --score "Verificación Gramatical"`}
+
+
+
+
+
Búsqueda Eficiente de Puntuaciones
+
+ El comando score soporta múltiples métodos de búsqueda:
+
+
+ Por ID: plexus scorecards score "id-puntuacion"
+ Por clave: plexus scorecards score "clave-puntuacion"
+ Por nombre: plexus scorecards score "Nombre de Puntuación"
+ Por ID externo: plexus scorecards score "id-externo"
+
+
+ Puedes limitar la búsqueda a una cuenta específica o cuadro de puntuación para resultados más rápidos.
+
+
+
+
+
+
+ Referencia del SDK de Python
+
+ Para la gestión programática de puntuaciones, puedes usar el SDK de Python:
+
+
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="tu-clave-api")
+
+# Obtener un cuadro de puntuación usando cualquier identificador (nombre, clave, ID, o ID externo)
+scorecard = plexus.scorecards.get("Aseguramiento de Calidad")
+
+# Obtener una puntuación usando cualquier identificador
+score = plexus.scores.get("Verificación Gramatical")
+
+# Obtener todas las puntuaciones en un cuadro de puntuación
+scores = scorecard.get_scores()
+
+# Obtener configuración de puntuación
+config = score.get_configuration()
+
+# Obtener resultados de evaluación de puntuación
+results = score.get_results(limit=10)`}
+
+
+
+ Al igual que la CLI, el SDK de Python también soporta el sistema de identificadores flexible, permitiéndote referenciar recursos usando diferentes tipos de identificadores.
+
+
+
+
+ Configuración YAML
+
+ Las puntuaciones pueden configurarse usando YAML para personalización avanzada:
+
+
+
+ {`name: Puntuación de Calidad
+key: puntuacion-calidad
+externalId: score_123
+type: LangGraphScore
+parameters:
+ check_grammar: true
+ check_style: true
+ min_word_count: 100
+threshold: 0.8
+weight: 0.5`}
+
+
+
+ Próximamente: La capacidad de extraer y subir configuraciones YAML usando la CLI para edición offline y control de versiones.
+
+
+
+
+ Próximamente
+
+ Se están desarrollando características adicionales para puntuaciones. Regresa pronto para:
+
+
+ Nuevos tipos de puntuaciones y métricas
+ Algoritmos de puntuación avanzados
+ Parámetros de evaluación personalizados
+ Analíticas de rendimiento de puntuaciones
+ Operaciones masivas de puntuaciones
+ Sincronización YAML para edición offline
+
+
+
+
+ );
+ }
+
+ // English content (default)
+ return (
+
+
Add/Edit a Score
+
+ Learn how to create and manage individual scores within scorecards using the Plexus dashboard interface.
+
+
+
+
+ Adding Scores in the Dashboard
+
+ Scores are individual evaluation criteria within a scorecard. The dashboard provides
+ an intuitive interface for creating and configuring scores.
+
+
+
+
+
Step-by-Step Guide
+
+
+ Access Score Creation:
+ Open your scorecard and click "Add Score" or edit an existing scorecard.
+
+
+ Choose Score Type:
+ Select from available score types:
+
+ Sentiment Analysis
+ Content Quality
+ Grammar Check
+ Custom Metrics
+
+
+
+ Configure Parameters:
+ Set up the score configuration:
+
+ Score name and description
+ Weight (importance in overall scorecard)
+ Threshold (minimum acceptable score)
+ Custom parameters specific to the score type
+
+
+
+ Preview and Test:
+ Use the preview feature to test the score against sample content.
+
+
+ Save Score:
+ Click "Add Score" to include it in your scorecard.
+
+
+
+
+
+
Editing Existing Scores
+
+
+ Locate the Score:
+ Find the score you want to modify within your scorecard.
+
+
+ Access Edit Mode:
+ Click the edit icon next to the score.
+
+
+ Modify Settings:
+ Update the score's configuration as needed.
+
+
+ Save Changes:
+ Click "Save" to apply your modifications.
+
+
+
+
+
+
+
+ Score Version Management
+
+ Scores in Plexus support versioning, allowing you to track changes and manage different implementations:
+
+
+
+
+
Creating New Versions
+
+ When you edit a score and save changes, a new version is automatically created.
+ You can add notes to document the changes made in each version.
+
+
+
+
Champion Versions
+
+ Each score has a designated "champion" version that is used for evaluations.
+ You can promote any version to champion status when you're satisfied with its performance.
+
+
+
+
Featured Versions
+
+ Mark important versions as "featured" to highlight them in the version history.
+ This helps track significant milestones in your score's development.
+
+
+
+
+
+
+ Score Configuration Tips
+
+
+
Weight Balancing
+
+ Carefully consider the relative importance of each score when setting weights.
+ The total of all weights in a scorecard should equal 1.0.
+
+
+
+
Threshold Setting
+
+ Set appropriate thresholds based on your quality requirements and test
+ with representative content samples.
+
+
+
+
Score Types
+
+ Choose score types that align with your evaluation goals. Combine different
+ types to create comprehensive assessments.
+
+
+
+
+
+
+ Using the CLI
+
+ For automated score management, you can use the Plexus CLI:
+
+
+
+ {`# View detailed information about a score
+plexus scorecards score "Score Name" --account "account-name"
+plexus scorecards score "score-key" --account "account-name"
+
+# Show version history and configuration
+plexus scorecards score "Score Name" --account "account-name" --show-versions --show-config
+
+# List all scores for a specific scorecard
+plexus scorecards list-scores --scorecard-id "scorecard-id"
+
+# Coming soon:
+# View version history for a score
+plexus scorecards history --account-key "account-key" --score-key "score-key"
+
+# Promote a version to champion
+plexus scorecards promote --account-key "account-key" --score-id "score-id" --version-id "version-id"
+
+# Add a new score to a scorecard
+plexus scores add --scorecard-id "card-id" --name "Quality Score" --type quality --weight 0.5
+
+# List all scores in a scorecard
+plexus scores list --scorecard "Quality Assurance"
+
+# View score configuration
+plexus scores info --score "Grammar Check"`}
+
+
+
+
+
Efficient Score Lookup
+
+ The score command supports multiple lookup methods:
+
+
+ By ID: plexus scorecards score "score-id"
+ By key: plexus scorecards score "score-key"
+ By name: plexus scorecards score "Score Name"
+ By external ID: plexus scorecards score "external-id"
+
+
+ You can scope the search to a specific account or scorecard for faster results.
+
+
+
+
+
+
+ Python SDK Reference
+
+ For programmatic score management, you can use the Python SDK:
+
+
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="your-api-key")
+
+# Get a scorecard using any identifier (name, key, ID, or external ID)
+scorecard = plexus.scorecards.get("Quality Assurance")
+
+# Get a score using any identifier
+score = plexus.scores.get("Grammar Check")
+
+# Get all scores in a scorecard
+scores = scorecard.get_scores()
+
+# Get score configuration
+config = score.get_configuration()
+
+# Get score evaluation results
+results = score.get_results(limit=10)`}
+
+
+
+ Like the CLI, the Python SDK also supports the flexible identifier system, allowing you to reference resources using different types of identifiers.
+
+
+
+
+ YAML Configuration
+
+ Scores can be configured using YAML for advanced customization:
+
+
+
+ {`name: Quality Score
+key: quality-score
+externalId: score_123
+type: LangGraphScore
+parameters:
+ check_grammar: true
+ check_style: true
+ min_word_count: 100
+threshold: 0.8
+weight: 0.5`}
+
+
+
+ Coming soon: The ability to pull and push YAML configurations using the CLI for offline editing and version control.
+
+
+
+
+ Coming Soon
+
+ Additional score features are being developed. Check back soon for:
+
+
+ New score types and metrics
+ Advanced scoring algorithms
+ Custom evaluation parameters
+ Score performance analytics
+ Bulk score operations
+ YAML synchronization for offline editing
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/[locale]/documentation/methods/add-edit-scorecard/page.tsx b/dashboard/app/[locale]/documentation/methods/add-edit-scorecard/page.tsx
new file mode 100644
index 000000000..8d2f4d4c2
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/methods/add-edit-scorecard/page.tsx
@@ -0,0 +1,438 @@
+'use client';
+
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
+
+export default function AddEditScorecardPage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
Agregar/Editar un Cuadro de Puntuación
+
+ Aprende cómo crear y administrar cuadros de puntuación usando la interfaz del dashboard de Plexus.
+
+
+
+
+ Crear un Cuadro de Puntuación en el Dashboard
+
+ Los cuadros de puntuación definen los criterios para evaluar tu contenido. El dashboard proporciona
+ una interfaz intuitiva para crear y administrar cuadros de puntuación.
+
+
+
+
+
Guía Paso a Paso
+
+
+ Acceder a Cuadros de Puntuación:
+ Navega a la sección "Cuadros de Puntuación" en el menú de navegación principal.
+
+
+ Crear Nuevo Cuadro de Puntuación:
+ Haz clic en el botón "Nuevo Cuadro de Puntuación" en la esquina superior derecha.
+
+
+ Información Básica:
+ Completa los detalles del cuadro de puntuación:
+
+ Nombre del cuadro de puntuación
+ Descripción
+ Categoría/etiquetas (opcional)
+
+
+
+ Agregar Puntuaciones:
+ Haz clic en "Agregar Puntuación" para incluir criterios de evaluación:
+
+ Seleccionar tipo de puntuación
+ Configurar parámetros de puntuación
+ Establecer peso y umbral
+
+
+
+ Guardar Cuadro de Puntuación:
+ Haz clic en "Crear" para guardar tu nuevo cuadro de puntuación.
+
+
+
+
+
+
Editar un Cuadro de Puntuación
+
+
+ Encontrar el Cuadro de Puntuación:
+ Localiza el cuadro de puntuación que deseas modificar en la lista de Cuadros de Puntuación.
+
+
+ Entrar en Modo de Edición:
+ Haz clic en el ícono de editar o selecciona "Editar" del menú de acciones.
+
+
+ Realizar Cambios:
+ Modifica los detalles del cuadro de puntuación, agrega/elimina puntuaciones, o ajusta pesos.
+
+
+ Guardar Actualizaciones:
+ Haz clic en "Guardar Cambios" para aplicar tus modificaciones.
+
+
+
+
+
+
+
+ Consejos para la Gestión de Cuadros de Puntuación
+
+
+
Organización
+
+ Usa nombres y descripciones significativos para mantener tus cuadros de puntuación organizados.
+ Considera usar etiquetas para agrupar cuadros de puntuación relacionados.
+
+
+
+
Pesos de Puntuación
+
+ Equilibra los pesos de las puntuaciones para reflejar la importancia relativa de cada criterio
+ en tu proceso de evaluación.
+
+
+
+
Plantillas
+
+ Guarda configuraciones de cuadros de puntuación comúnmente utilizadas como plantillas para reutilización rápida.
+
+
+
+
+
+
+ Usar la CLI
+
+ Para la gestión automatizada de cuadros de puntuación, puedes usar la CLI de Plexus:
+
+
+
+ {`# Listar cuadros de puntuación con rendimiento optimizado
+plexus scorecards list "account-name" --fast
+
+# Ver un cuadro de puntuación específico por filtrado
+plexus scorecards list "account-name" --name "Calidad de Contenido"
+
+# Ver información detallada sobre una puntuación
+plexus scorecards score "score-name" --account "account-name" --show-versions
+
+# Próximamente:
+# Crear un nuevo cuadro de puntuación
+plexus scorecards create --name "Calidad de Contenido" --description "Evalúa la calidad del contenido"
+
+# Obtener información detallada sobre un cuadro de puntuación específico
+plexus scorecards info --scorecard "Calidad de Contenido"
+
+# Listar todas las puntuaciones en un cuadro de puntuación
+plexus scorecards list-scores --scorecard "Calidad de Contenido"
+
+# Extraer configuración del cuadro de puntuación a YAML
+plexus scorecards pull --scorecard "Calidad de Contenido" --output ./mis-cuadros-puntuacion
+
+# Subir configuración del cuadro de puntuación desde YAML
+plexus scorecards push --scorecard "Calidad de Contenido" --file ./mi-cuadro-puntuacion.yaml --note "Configuración actualizada"
+
+# Eliminar un cuadro de puntuación
+plexus scorecards delete --scorecard "Calidad de Contenido"`}
+
+
+
+
+
Consideraciones de Rendimiento
+
+ La CLI ahora usa consultas GraphQL optimizadas para obtener datos de cuadros de puntuación de manera eficiente:
+
+
+
+ Enfoque de Consulta Única: En lugar de hacer consultas separadas para las secciones y puntuaciones de cada cuadro de puntuación,
+ el sistema ahora obtiene todos los datos en una sola consulta GraphQL comprensiva.
+
+
+ Modo Rápido: Usa la opción --fast para omitir la obtención de secciones y puntuaciones cuando solo necesitas información básica del cuadro de puntuación.
+
+
+ Ocultar Puntuaciones: Usa --hide-scores para excluir detalles de puntuación de la salida mientras aún obtienes datos básicos del cuadro de puntuación.
+
+
+
+
+
+
+
+ Referencia del SDK de Python
+
+ Para la gestión programática de cuadros de puntuación, puedes usar el SDK de Python:
+
+
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="tu-clave-api")
+
+# Obtener un cuadro de puntuación usando cualquier identificador (nombre, clave, ID, o ID externo)
+scorecard = plexus.scorecards.get("Calidad de Contenido")
+
+# Listar todos los cuadros de puntuación
+scorecards = plexus.scorecards.list()
+
+# Obtener todas las puntuaciones en un cuadro de puntuación
+scores = scorecard.get_scores()
+
+# Exportar cuadro de puntuación a YAML
+yaml_config = scorecard.to_yaml()
+with open("cuadro-puntuacion.yaml", "w") as f:
+ f.write(yaml_config)
+
+# Importar cuadro de puntuación desde YAML
+with open("cuadro-puntuacion.yaml", "r") as f:
+ yaml_content = f.read()
+
+nuevo_scorecard = plexus.scorecards.from_yaml(yaml_content)`}
+
+
+
+ Al igual que la CLI, el SDK de Python también soporta el sistema de identificadores flexible, permitiéndote referenciar cuadros de puntuación usando diferentes tipos de identificadores.
+
+
+
+
+ Próximamente
+
+ Se están desarrollando características adicionales para cuadros de puntuación. Regresa pronto para:
+
+
+ Opciones avanzadas de configuración de puntuación
+ Control de versiones de cuadros de puntuación
+ Características de edición colaborativa
+ Analíticas de rendimiento
+ Sincronización YAML para edición offline
+
+
+
+
+ );
+ }
+
+ // English content (default)
+ return (
+
+
Add/Edit a Scorecard
+
+ Learn how to create and manage scorecards using the Plexus dashboard interface.
+
+
+
+
+ Creating a Scorecard in the Dashboard
+
+ Scorecards define the criteria for evaluating your content. The dashboard provides
+ an intuitive interface for creating and managing scorecards.
+
+
+
+
+
Step-by-Step Guide
+
+
+ Access Scorecards:
+ Navigate to the "Scorecards" section in the main navigation menu.
+
+
+ Create New Scorecard:
+ Click the "New Scorecard" button in the top-right corner.
+
+
+ Basic Information:
+ Fill in the scorecard details:
+
+ Scorecard name
+ Description
+ Category/tags (optional)
+
+
+
+ Add Scores:
+ Click "Add Score" to include evaluation criteria:
+
+ Select score type
+ Configure score parameters
+ Set weight and threshold
+
+
+
+ Save Scorecard:
+ Click "Create" to save your new scorecard.
+
+
+
+
+
+
Editing a Scorecard
+
+
+ Find the Scorecard:
+ Locate the scorecard you want to modify in the Scorecards list.
+
+
+ Enter Edit Mode:
+ Click the edit icon or select "Edit" from the actions menu.
+
+
+ Make Changes:
+ Modify scorecard details, add/remove scores, or adjust weights.
+
+
+ Save Updates:
+ Click "Save Changes" to apply your modifications.
+
+
+
+
+
+
+
+ Scorecard Management Tips
+
+
+
Organization
+
+ Use meaningful names and descriptions to keep your scorecards organized.
+ Consider using tags to group related scorecards.
+
+
+
+
Score Weights
+
+ Balance score weights to reflect the relative importance of each criterion
+ in your evaluation process.
+
+
+
+
Templates
+
+ Save commonly used scorecard configurations as templates for quick reuse.
+
+
+
+
+
+
+ Using the CLI
+
+ For automated scorecard management, you can use the Plexus CLI:
+
+
+
+ {`# List scorecards with optimized performance
+plexus scorecards list "account-name" --fast
+
+# View a specific scorecard by filtering
+plexus scorecards list "account-name" --name "Content Quality"
+
+# View detailed information about a score
+plexus scorecards score "score-name" --account "account-name" --show-versions
+
+# Coming soon:
+# Create a new scorecard
+plexus scorecards create --name "Content Quality" --description "Evaluates content quality"
+
+# Get detailed information about a specific scorecard
+plexus scorecards info --scorecard "Content Quality"
+
+# List all scores in a scorecard
+plexus scorecards list-scores --scorecard "Content Quality"
+
+# Pull scorecard configuration to YAML
+plexus scorecards pull --scorecard "Content Quality" --output ./my-scorecards
+
+# Push scorecard configuration from YAML
+plexus scorecards push --scorecard "Content Quality" --file ./my-scorecard.yaml --note "Updated configuration"
+
+# Delete a scorecard
+plexus scorecards delete --scorecard "Content Quality"`}
+
+
+
+
+
Performance Considerations
+
+ The CLI now uses optimized GraphQL queries to efficiently fetch scorecard data:
+
+
+
+ Single Query Approach: Instead of making separate queries for each scorecard's sections and scores,
+ the system now fetches all data in one comprehensive GraphQL query.
+
+
+ Fast Mode: Use the --fast option to skip fetching sections and scores when you only need basic scorecard info.
+
+
+ Hide Scores: Use --hide-scores to exclude score details from output while still getting basic scorecard data.
+
+
+
+
+
+
+
+ Python SDK Reference
+
+ For programmatic scorecard management, you can use the Python SDK:
+
+
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="your-api-key")
+
+# Get a scorecard using any identifier (name, key, ID, or external ID)
+scorecard = plexus.scorecards.get("Content Quality")
+
+# List all scorecards
+scorecards = plexus.scorecards.list()
+
+# Get all scores in a scorecard
+scores = scorecard.get_scores()
+
+# Export scorecard to YAML
+yaml_config = scorecard.to_yaml()
+with open("scorecard.yaml", "w") as f:
+ f.write(yaml_config)
+
+# Import scorecard from YAML
+with open("scorecard.yaml", "r") as f:
+ yaml_content = f.read()
+
+new_scorecard = plexus.scorecards.from_yaml(yaml_content)`}
+
+
+
+ Like the CLI, the Python SDK also supports the flexible identifier system, allowing you to reference scorecards using different types of identifiers.
+
+
+
+
+ Coming Soon
+
+ Additional scorecard features are being developed. Check back soon for:
+
+
+ Advanced score configuration options
+ Scorecard version control
+ Collaborative editing features
+ Performance analytics
+ YAML synchronization for offline editing
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/[locale]/documentation/methods/add-edit-source/page.tsx b/dashboard/app/[locale]/documentation/methods/add-edit-source/page.tsx
new file mode 100644
index 000000000..12da5c7b3
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/methods/add-edit-source/page.tsx
@@ -0,0 +1,308 @@
+'use client';
+
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
+
+export default function AddEditSourcePage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
Agregar/Editar una Fuente
+
+ Aprende cómo crear y gestionar fuentes en Plexus usando la interfaz del Panel de Control.
+
+
+
+
+ Agregar una Fuente en el Panel de Control
+
+ El Panel de Control de Plexus proporciona una interfaz intuitiva para crear y gestionar tus fuentes.
+ Sigue estos pasos para agregar una nueva fuente:
+
+
+
+
+
Guía Paso a Paso
+
+
+ Navegar a Fuentes:
+ Haz clic en "Fuentes" en el menú de navegación principal para acceder a la página de gestión de fuentes.
+
+
+ Crear Nueva Fuente:
+ Haz clic en el botón "Agregar Fuente" en la esquina superior derecha de la página.
+
+
+ Elegir Tipo de Fuente:
+ Selecciona el tipo de fuente que deseas crear (ej. Texto, Audio).
+
+
+ Configurar Ajustes:
+ Completa la información requerida:
+
+ Nombre de la fuente
+ Descripción (opcional)
+ Contenido o carga de archivo
+ Ajustes adicionales específicos al tipo de fuente
+
+
+
+ Guardar:
+ Haz clic en "Crear" para guardar tu nueva fuente.
+
+
+
+
+
+
Editar una Fuente Existente
+
+
+ Localizar la Fuente:
+ Encuentra la fuente que deseas editar en la lista de Fuentes.
+
+
+ Acceder al Modo de Edición:
+ Haz clic en el ícono de edición (lápiz) junto al nombre de la fuente.
+
+
+ Realizar Cambios:
+ Actualiza la información de la fuente según sea necesario.
+
+
+ Guardar Cambios:
+ Haz clic en "Guardar" para aplicar tus actualizaciones.
+
+
+
+
+
+
+
+ Consejos de Gestión de Fuentes
+
+
+
Organización
+
+ Usa nombres claros y descriptivos junto con etiquetas opcionales para mantener tus fuentes organizadas
+ y fácilmente buscables.
+
+
+
+
Operaciones por Lote
+
+ Selecciona múltiples fuentes para realizar operaciones por lote como eliminación o actualización de etiquetas.
+
+
+
+
+
+
+ Usar la CLI
+
+ Para automatización y scripts, puedes usar la CLI de Plexus para gestionar fuentes:
+
+
+
+ {`# Crear una nueva fuente
+plexus sources create --name "Mi Fuente" --type text --content "Contenido de ejemplo"
+
+# Actualizar una fuente existente
+plexus sources update source-id --name "Nombre Actualizado" --content "Contenido actualizado"`}
+
+
+
+
+ Referencia del SDK de Python
+
+ Para acceso programático, puedes usar el SDK de Python:
+
+
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="tu-api-key")
+
+# Crear una nueva fuente
+source = plexus.sources.create(
+ name="Mi Fuente",
+ type="text",
+ data="Contenido de ejemplo"
+)
+
+# Actualizar una fuente existente
+source = plexus.sources.update(
+ source_id="source-id",
+ name="Nombre de Fuente Actualizado",
+ data="Contenido actualizado"
+)`}
+
+
+
+
+ Próximamente
+
+ Se están desarrollando documentación y características adicionales. Vuelve pronto para:
+
+
+ Técnicas avanzadas de gestión de fuentes
+ Capacidades de importación/exportación masiva
+ Plantillas de fuentes personalizadas
+ Ejemplos de integración
+
+
+
+
+ );
+ }
+
+ // English content (default)
+ return (
+
+
Add/Edit a Source
+
+ Learn how to create and manage sources in Plexus using the dashboard interface.
+
+
+
+
+ Adding a Source in the Dashboard
+
+ The Plexus dashboard provides an intuitive interface for creating and managing your sources.
+ Follow these steps to add a new source:
+
+
+
+
+
Step-by-Step Guide
+
+
+ Navigate to Sources:
+ Click on "Sources" in the main navigation menu to access the sources management page.
+
+
+ Create New Source:
+ Click the "Add Source" button in the top-right corner of the page.
+
+
+ Choose Source Type:
+ Select the type of source you want to create (e.g., Text, Audio).
+
+
+ Configure Settings:
+ Fill in the required information:
+
+ Source name
+ Description (optional)
+ Content or file upload
+ Additional settings specific to the source type
+
+
+
+ Save:
+ Click "Create" to save your new source.
+
+
+
+
+
+
Editing an Existing Source
+
+
+ Locate the Source:
+ Find the source you want to edit in the Sources list.
+
+
+ Access Edit Mode:
+ Click the edit icon (pencil) next to the source name.
+
+
+ Make Changes:
+ Update the source's information as needed.
+
+
+ Save Changes:
+ Click "Save" to apply your updates.
+
+
+
+
+
+
+
+ Source Management Tips
+
+
+
Organization
+
+ Use clear, descriptive names and optional tags to keep your sources organized
+ and easily searchable.
+
+
+
+
Batch Operations
+
+ Select multiple sources to perform batch operations like deletion or tag updates.
+
+
+
+
+
+
+ Using the CLI
+
+ For automation and scripting, you can use the Plexus CLI to manage sources:
+
+
+
+ {`# Create a new source
+plexus sources create --name "My Source" --type text --content "Sample content"
+
+# Update an existing source
+plexus sources update source-id --name "Updated Name" --content "Updated content"`}
+
+
+
+
+ Python SDK Reference
+
+ For programmatic access, you can use the Python SDK:
+
+
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="your-api-key")
+
+# Create a new source
+source = plexus.sources.create(
+ name="My Source",
+ type="text",
+ data="Sample content"
+)
+
+# Update an existing source
+source = plexus.sources.update(
+ source_id="source-id",
+ name="Updated Source Name",
+ data="Updated content"
+)`}
+
+
+
+
+ Coming Soon
+
+ Additional documentation and features are being developed. Check back soon for:
+
+
+ Advanced source management techniques
+ Bulk import/export capabilities
+ Custom source templates
+ Integration examples
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/[locale]/documentation/methods/evaluate-score/page.tsx b/dashboard/app/[locale]/documentation/methods/evaluate-score/page.tsx
new file mode 100644
index 000000000..89cabe66f
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/methods/evaluate-score/page.tsx
@@ -0,0 +1,298 @@
+'use client';
+
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
+
+export default function EvaluateScorePage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
Evaluar una Puntuación
+
+ Aprende cómo ejecutar evaluaciones usando puntuaciones individuales o cuadros de puntuación completos.
+
+
+
+
+ Ejecutar una Evaluación
+
+ Puedes evaluar contenido usando puntuaciones individuales o cuadros de puntuación completos. El proceso de evaluación
+ analiza tu contenido contra los criterios definidos y proporciona resultados detallados.
+
+
+
+
+
Usar el Dashboard
+
+ Selecciona tu contenido fuente
+ Elige un cuadro de puntuación o puntuación individual
+ Haz clic en "Ejecutar Evaluación"
+ Monitorea el progreso de la evaluación
+ Revisa los resultados
+
+
+
+
+
Usar el SDK
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="tu-clave-api")
+
+# Evaluar usando una puntuación específica (acepta ID, nombre, clave, o ID externo)
+evaluation = plexus.evaluations.create(
+ source_id="id-fuente",
+ score="Verificación Gramatical" # Puede usar nombre, clave, ID, o ID externo
+)
+
+# O evaluar usando un cuadro de puntuación completo (acepta ID, nombre, clave, o ID externo)
+evaluation = plexus.evaluations.create(
+ source_id="id-fuente",
+ scorecard="Calidad de Contenido" # Puede usar nombre, clave, ID, o ID externo
+)
+
+# Obtener resultados de evaluación
+results = evaluation.get_results()
+
+# Imprimir valores de puntuación
+for score in results.scores:
+ print(f"{score.name}: {score.value}")`}
+
+
+
+ El SDK soporta el sistema de identificadores flexible, permitiéndote referenciar cuadros de puntuación y puntuaciones usando diferentes tipos de identificadores (nombre, clave, ID, o ID externo).
+
+
+
+
+
Usar la CLI
+
+ {`# Evaluar usando un cuadro de puntuación
+plexus evaluate accuracy --scorecard "Calidad de Contenido" --number-of-samples 100
+
+# Listar resultados de evaluación
+plexus evaluations list
+
+# Ver resultados detallados para una evaluación específica
+plexus evaluations list-results --evaluation id-evaluacion`}
+
+
+
+ La CLI soporta el sistema de identificadores flexible, permitiéndote referenciar cuadros de puntuación usando diferentes tipos de identificadores (nombre, clave, ID, o ID externo).
+
+
+
+
+
+
+ Entender los Resultados
+
+
+
Valores de Puntuación
+
+ Resultados numéricos o categóricos para cada criterio evaluado.
+
+
+
+
Explicaciones
+
+ Razonamiento detallado detrás del resultado de evaluación de cada puntuación.
+
+
+
+
Sugerencias
+
+ Recomendaciones para mejora basadas en los resultados de evaluación.
+
+
+
+
+
+
+ Evaluaciones por Lotes
+
+ Puedes evaluar múltiples fuentes a la vez usando procesamiento por lotes:
+
+
+
+ {`# Crear una evaluación por lotes
+batch = plexus.evaluations.create_batch(
+ source_ids=["fuente-1", "fuente-2", "fuente-3"],
+ scorecard="Aseguramiento de Calidad" # Puede usar nombre, clave, ID, o ID externo
+)
+
+# Monitorear progreso del lote
+status = batch.get_status()
+
+# Obtener resultados cuando esté completo
+results = batch.get_results()`}
+
+
+
+ Al igual que las evaluaciones individuales, las evaluaciones por lotes también soportan el sistema de identificadores flexible para cuadros de puntuación y puntuaciones.
+
+
+
+
+ Próximamente
+
+ Se está desarrollando documentación detallada sobre evaluaciones. Regresa pronto para:
+
+
+ Opciones avanzadas de evaluación
+ Formato personalizado de resultados
+ Optimización de rendimiento de evaluaciones
+ Técnicas de análisis de resultados
+
+
+
+
+ );
+ }
+
+ // English content (default)
+ return (
+
+
Evaluate a Score
+
+ Learn how to run evaluations using individual scores or complete scorecards.
+
+
+
+
+ Running an Evaluation
+
+ You can evaluate content using individual scores or entire scorecards. The evaluation
+ process analyzes your content against the defined criteria and provides detailed results.
+
+
+
+
+
Using the Dashboard
+
+ Select your source content
+ Choose a scorecard or individual score
+ Click "Run Evaluation"
+ Monitor the evaluation progress
+ Review the results
+
+
+
+
+
Using the SDK
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="your-api-key")
+
+# Evaluate using a specific score (accepts ID, name, key, or external ID)
+evaluation = plexus.evaluations.create(
+ source_id="source-id",
+ score="Grammar Check" # Can use name, key, ID, or external ID
+)
+
+# Or evaluate using an entire scorecard (accepts ID, name, key, or external ID)
+evaluation = plexus.evaluations.create(
+ source_id="source-id",
+ scorecard="Content Quality" # Can use name, key, ID, or external ID
+)
+
+# Get evaluation results
+results = evaluation.get_results()
+
+# Print score values
+for score in results.scores:
+ print(f"{score.name}: {score.value}")`}
+
+
+
+ The SDK supports the flexible identifier system, allowing you to reference scorecards and scores using different types of identifiers (name, key, ID, or external ID).
+
+
+
+
+
Using the CLI
+
+ {`# Evaluate using a scorecard
+plexus evaluate accuracy --scorecard "Content Quality" --number-of-samples 100
+
+# List evaluation results
+plexus evaluations list
+
+# View detailed results for a specific evaluation
+plexus evaluations list-results --evaluation evaluation-id`}
+
+
+
+ The CLI supports the flexible identifier system, allowing you to reference scorecards using different types of identifiers (name, key, ID, or external ID).
+
+
+
+
+
+
+ Understanding Results
+
+
+
Score Values
+
+ Numerical or categorical results for each evaluated criterion.
+
+
+
+
Explanations
+
+ Detailed reasoning behind each score's evaluation result.
+
+
+
+
Suggestions
+
+ Recommendations for improvement based on the evaluation results.
+
+
+
+
+
+
+ Batch Evaluations
+
+ You can evaluate multiple sources at once using batch processing:
+
+
+
+ {`# Create a batch evaluation
+batch = plexus.evaluations.create_batch(
+ source_ids=["source-1", "source-2", "source-3"],
+ scorecard="Quality Assurance" # Can use name, key, ID, or external ID
+)
+
+# Monitor batch progress
+status = batch.get_status()
+
+# Get results when complete
+results = batch.get_results()`}
+
+
+
+ Like individual evaluations, batch evaluations also support the flexible identifier system for scorecards and scores.
+
+
+
+
+ Coming Soon
+
+ Detailed documentation about evaluations is currently being developed. Check back soon for:
+
+
+ Advanced evaluation options
+ Custom result formatting
+ Evaluation performance optimization
+ Result analysis techniques
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/[locale]/documentation/methods/monitor-tasks/page.tsx b/dashboard/app/[locale]/documentation/methods/monitor-tasks/page.tsx
new file mode 100644
index 000000000..dbfda92ed
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/methods/monitor-tasks/page.tsx
@@ -0,0 +1,278 @@
+'use client';
+
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
+
+export default function MonitorTasksPage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
Monitorear Tareas
+
+ Aprende cómo rastrear y gestionar tareas en tu implementación de Plexus.
+
+
+
+
+ Monitoreo de Tareas
+
+ Las tareas representan unidades individuales de trabajo en Plexus, como evaluaciones,
+ procesamiento de fuentes, o entrenamiento de modelos. Puedes monitorear tareas a través del
+ dashboard web y la interfaz de línea de comandos.
+
+
+
+
+
Usar el Dashboard
+
+ El dashboard web proporciona una interfaz visual para monitorear tareas:
+
+
+ Navega a la sección de Tareas en el dashboard
+ Ve tareas activas y completadas en tiempo real
+ Usa filtros para encontrar tareas específicas por tipo o estado
+ Monitorea el progreso de tareas con barras de progreso visuales
+ Ve información detallada de tareas incluyendo etapas y registros
+ Rastrea el rendimiento de tareas y uso de recursos
+
+
+
+
+
Usar la CLI
+
+ La CLI de Plexus proporciona herramientas poderosas para monitorear tareas directamente desde tu terminal:
+
+
+ {`# Listar tareas para una cuenta (muestra las 10 más recientes por defecto)
+plexus tasks list --account tu-clave-cuenta
+
+# Mostrar todas las tareas en lugar de solo las más recientes
+plexus tasks list --account tu-clave-cuenta --all
+
+# Filtrar tareas por estado
+plexus tasks list --account tu-clave-cuenta --status RUNNING
+plexus tasks list --account tu-clave-cuenta --status COMPLETED
+plexus tasks list --account tu-clave-cuenta --status FAILED
+
+# Filtrar tareas por tipo
+plexus tasks list --account tu-clave-cuenta --type evaluation
+
+# Combinar filtros
+plexus tasks list --account tu-clave-cuenta --status RUNNING --type evaluation
+
+# Limitar el número de tareas mostradas
+plexus tasks list --account tu-clave-cuenta --limit 5`}
+
+
+ La salida de la CLI muestra información integral de tareas en una vista bien formateada:
+
+
+ Detalles básicos de tarea (ID, tipo, estado, objetivo, comando)
+ IDs asociados (cuenta, cuadro de puntuación, puntuación)
+ Etapa actual e información del trabajador
+ Información completa de tiempo (creado, iniciado, completado, estimado)
+ Indicadores de estado codificados por color (azul para ejecutándose, verde para completado, rojo para fallido)
+ Mensajes de error y detalles cuando estén disponibles
+ Metadatos de tarea y registros de salida
+
+
+
+
+
+
+ Zona de Peligro: Eliminación de Tareas
+
+
+ ⚠️ Advertencia: La eliminación de tareas es una operación permanente. Las tareas eliminadas no pueden recuperarse.
+ Solo usa estos comandos cuando estés absolutamente seguro sobre la eliminación.
+
+
+
+
+ La CLI proporciona comandos para eliminación de tareas con medidas de seguridad integradas:
+
+
+
+ {`# Eliminar una tarea específica por ID
+plexus tasks delete --account tu-clave-cuenta --task-id "id-tarea"
+
+# Eliminar todas las tareas fallidas para una cuenta
+plexus tasks delete --account tu-clave-cuenta --status FAILED
+
+# Eliminar todas las tareas de un tipo específico para una cuenta
+plexus tasks delete --account tu-clave-cuenta --type evaluation
+
+# Eliminar TODAS las tareas para una cuenta específica
+plexus tasks delete --account tu-clave-cuenta --all
+
+# Eliminar TODAS las tareas en TODAS las cuentas (USAR CON EXTREMA PRECAUCIÓN)
+plexus tasks delete --all
+
+# Omitir confirmación con -y/--yes (USAR CON EXTREMA PRECAUCIÓN)
+plexus tasks delete --all -y`}
+
+
+
+
Características de Seguridad:
+
+ La bandera --all es requerida para eliminación masiva
+ El alcance de la cuenta está claramente indicado en las confirmaciones
+ La confirmación se muestra por defecto (puede omitirse con -y)
+ La vista previa de tareas a eliminar siempre se muestra
+ Las etapas de tarea asociadas se limpian automáticamente
+ La barra de progreso muestra el estado de eliminación
+
+
+
Antes de eliminar tareas, considera:
+
+ ¿Hay operaciones dependientes que podrían verse afectadas?
+ ¿Necesitas mantener los registros de tareas para propósitos de auditoría?
+ ¿Has respaldado algún resultado importante de tareas?
+ ¿Estás apuntando a las tareas correctas con tus filtros?
+ Si usas --all sin --account, ¿estás seguro de que quieres eliminar tareas en TODAS las cuentas?
+
+
+
+
+
+
+
+ );
+ }
+
+ // English content (default)
+ return (
+
+
Monitor Tasks
+
+ Learn how to track and manage tasks in your Plexus deployment.
+
+
+
+
+ Task Monitoring
+
+ Tasks represent individual units of work in Plexus, such as evaluations,
+ source processing, or model training. You can monitor tasks through both
+ the web dashboard and the command line interface.
+
+
+
+
+
Using the Dashboard
+
+ The web dashboard provides a visual interface for monitoring tasks:
+
+
+ Navigate to the Tasks section in the dashboard
+ View active and completed tasks in real-time
+ Use filters to find specific tasks by type or status
+ Monitor task progress with visual progress bars
+ View detailed task information including stages and logs
+ Track task performance and resource usage
+
+
+
+
+
Using the CLI
+
+ The Plexus CLI provides powerful tools for monitoring tasks directly from your terminal:
+
+
+ {`# List tasks for an account (shows 10 most recent by default)
+plexus tasks list --account your-account-key
+
+# Show all tasks instead of just the most recent
+plexus tasks list --account your-account-key --all
+
+# Filter tasks by status
+plexus tasks list --account your-account-key --status RUNNING
+plexus tasks list --account your-account-key --status COMPLETED
+plexus tasks list --account your-account-key --status FAILED
+
+# Filter tasks by type
+plexus tasks list --account your-account-key --type evaluation
+
+# Combine filters
+plexus tasks list --account your-account-key --status RUNNING --type evaluation
+
+# Limit the number of tasks shown
+plexus tasks list --account your-account-key --limit 5`}
+
+
+ The CLI output displays comprehensive task information in a well-formatted view:
+
+
+ Basic task details (ID, type, status, target, command)
+ Associated IDs (account, scorecard, score)
+ Current stage and worker information
+ Complete timing information (created, started, completed, estimated)
+ Color-coded status indicators (blue for running, green for completed, red for failed)
+ Error messages and details when available
+ Task metadata and output logs
+
+
+
+
+
+
+ Danger Zone: Task Deletion
+
+
+ ⚠️ Warning: Task deletion is a permanent operation. Deleted tasks cannot be recovered.
+ Only use these commands when you are absolutely certain about the deletion.
+
+
+
+
+ The CLI provides commands for task deletion with built-in safety measures:
+
+
+
+ {`# Delete a specific task by ID
+plexus tasks delete --account your-account-key --task-id "task-id"
+
+# Delete all failed tasks for an account
+plexus tasks delete --account your-account-key --status FAILED
+
+# Delete all tasks of a specific type for an account
+plexus tasks delete --account your-account-key --type evaluation
+
+# Delete ALL tasks for a specific account
+plexus tasks delete --account your-account-key --all
+
+# Delete ALL tasks across ALL accounts (USE WITH EXTREME CAUTION)
+plexus tasks delete --all
+
+# Skip confirmation prompt with -y/--yes (USE WITH EXTREME CAUTION)
+plexus tasks delete --all -y`}
+
+
+
+
Safety Features:
+
+ The --all flag is required for bulk deletion
+ Account scope is clearly indicated in confirmations
+ Confirmation prompt is shown by default (can be skipped with -y)
+ Preview of tasks to be deleted is always shown
+ Associated task stages are automatically cleaned up
+ Progress bar shows deletion status
+
+
+
Before deleting tasks, consider:
+
+ Are there any dependent operations that might be affected?
+ Do you need to keep the task records for auditing purposes?
+ Have you backed up any important task results?
+ Are you targeting the correct tasks with your filters?
+ If using --all without --account, are you certain you want to delete tasks across ALL accounts?
+
+
+
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/[locale]/documentation/methods/page.tsx b/dashboard/app/[locale]/documentation/methods/page.tsx
new file mode 100644
index 000000000..bf9831c3f
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/methods/page.tsx
@@ -0,0 +1,214 @@
+'use client';
+
+import { Button as DocButton } from "@/components/ui/button"
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
+import Link from "next/link"
+
+export default function MethodsPage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
Métodos
+
+ Bienvenido a nuestra sección de guías paso a paso. Aquí encontrarás instrucciones detalladas y prácticas para todas las operaciones comunes en Plexus. Ya sea que estés configurando tu primera fuente, creando cuadros de puntuación o ejecutando evaluaciones, estas guías te guiarán a través de cada proceso paso a paso.
+
+
+
+
+ Gestión de Fuentes
+
+
+
Agregar y Editar Fuentes
+
+ Aprende cómo crear nuevas fuentes y gestionar las existentes a través del panel de control.
+
+
+
Ver Guía de Gestión de Fuentes
+
+
+
+
+
Perfilado de Fuentes
+
+ Entiende cómo analizar tus fuentes para obtener insights sobre sus características.
+
+
+
Aprender sobre Perfilado
+
+
+
+
+
+
+ Configuración de Evaluaciones
+
+
+
Crear Cuadros de Puntuación
+
+ Configura criterios de evaluación completos con cuadros de puntuación personalizados.
+
+
+
Explorar Creación de Cuadros
+
+
+
+
+
Configurar Puntuaciones
+
+ Define métricas de evaluación individuales y sus parámetros.
+
+
+
Configurar Ajustes de Puntuación
+
+
+
+
+
+
+ Ejecutar Evaluaciones
+
+
+
Evaluar Contenido
+
+ Procesa tus fuentes usando cuadros de puntuación para generar insights.
+
+
+
Comenzar a Evaluar Contenido
+
+
+
+
+
Gestión de Tareas
+
+ Rastrea y gestiona tareas de evaluación a través de su ciclo de vida.
+
+
+
Monitorear tus Tareas
+
+
+
+
+
+
+ Próximos Pasos
+
+ ¿Listo para comenzar? Empieza con la gestión de fuentes para configurar tu contenido para evaluación.
+
+
+
+ Comenzar Gestión de Fuentes
+
+
+ Revisar Conceptos Fundamentales
+
+
+
+
+
+ );
+ }
+
+ // English content (default)
+ return (
+
+
Methods
+
+ Welcome to our step-by-step guides section. Here you'll find detailed, practical instructions for all common operations in Plexus. Whether you're setting up your first source, creating scorecards, or running evaluations, these guides will walk you through each process step by step.
+
+
+
+
+ Source Management
+
+
+
Adding and Editing Sources
+
+ Learn how to create new sources and manage existing ones through the dashboard.
+
+
+
View Source Management Guide
+
+
+
+
+
Source Profiling
+
+ Understand how to analyze your sources to gain insights into their characteristics.
+
+
+
Learn About Profiling
+
+
+
+
+
+
+ Evaluation Setup
+
+
+
Creating Scorecards
+
+ Set up comprehensive evaluation criteria with custom scorecards.
+
+
+
Explore Scorecard Creation
+
+
+
+
+
Configuring Scores
+
+ Define individual evaluation metrics and their parameters.
+
+
+
Configure Score Settings
+
+
+
+
+
+
+ Running Evaluations
+
+
+
Evaluating Content
+
+ Process your sources using scorecards to generate insights.
+
+
+
Start Evaluating Content
+
+
+
+
+
Task Management
+
+ Track and manage evaluation tasks through their lifecycle.
+
+
+
Monitor Your Tasks
+
+
+
+
+
+
+ Next Steps
+
+ Ready to get started? Begin with source management to set up your content for evaluation.
+
+
+
+ Start Managing Sources
+
+
+ Review Core Concepts
+
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/[locale]/documentation/methods/profile-source/page.tsx b/dashboard/app/[locale]/documentation/methods/profile-source/page.tsx
new file mode 100644
index 000000000..a241fd74f
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/methods/profile-source/page.tsx
@@ -0,0 +1,322 @@
+'use client';
+
+import { useTranslationContext } from '@/app/contexts/TranslationContext'
+
+export default function ProfileSourcePage() {
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
Perfilar una Fuente
+
+ Aprende cómo analizar y perfilar tus fuentes usando la interfaz del panel de control de Plexus.
+
+
+
+
+ Perfilado de Fuentes en el Panel de Control
+
+ El perfilado de fuentes te ayuda a entender las características y patrones en tus datos
+ antes de ejecutar evaluaciones. El panel de control proporciona herramientas completas para analizar
+ tus fuentes.
+
+
+
+
+
Guía Paso a Paso
+
+
+ Acceder a Detalles de Fuente:
+ Navega a tu fuente en la lista de Fuentes y haz clic en ella para ver detalles.
+
+
+ Iniciar Perfilado:
+ Haz clic en el botón "Perfilar Fuente" en la vista de detalles de la fuente.
+
+
+ Configurar Análisis:
+ Selecciona las opciones de perfilado que deseas ejecutar:
+
+ Análisis de contenido
+ Detección de patrones
+ Métricas de calidad
+ Opciones de análisis personalizado
+
+
+
+ Ejecutar Perfil:
+ Haz clic en "Iniciar Análisis" para comenzar el proceso de perfilado.
+
+
+ Revisar Resultados:
+ Una vez completo, examina los resultados detallados del perfilado en el panel de control.
+
+
+
+
+
+
+
+ Entendiendo los Resultados del Perfil
+
+
+
Análisis de Contenido
+
+ Ve desgloses detallados del contenido de tu fuente, incluyendo estructura, formato
+ y características clave. El panel de control presenta esta información a través de
+ visualizaciones interactivas y reportes detallados.
+
+
+
+
Detección de Patrones
+
+ Explora patrones identificados y anomalías a través de la vista de análisis de patrones
+ del panel de control. Esto te ayuda a entender temas comunes y problemas potenciales
+ en tu contenido.
+
+
+
+
Métricas de Calidad
+
+ Revisa mediciones de calidad completas a través de gráficos intuitivos y
+ desgloses detallados de métricas en la interfaz del panel de control.
+
+
+
+
+
+
+ Consejos de Gestión de Perfiles
+
+
+
Guardar Perfiles
+
+ Guarda configuraciones de perfil como plantillas para reutilización rápida en múltiples fuentes.
+
+
+
+
Comparar Resultados
+
+ Usa la vista de comparación del panel de control para analizar resultados de perfil entre diferentes
+ fuentes o períodos de tiempo.
+
+
+
+
+
+
+ Usar la CLI
+
+ Para flujos de trabajo automatizados de perfilado, puedes usar la CLI de Plexus:
+
+
+
+ {`# Ejecutar un perfil en una fuente
+plexus sources profile source-id --analysis-type full
+
+# Obtener resultados del perfil
+plexus sources profile-results source-id`}
+
+
+
+
+ Referencia del SDK de Python
+
+ Para perfilado programático, puedes usar el SDK de Python:
+
+
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="tu-api-key")
+
+# Ejecutar un perfil en una fuente
+profile = plexus.sources.profile(
+ source_id="source-id",
+ options={
+ "content_analysis": True,
+ "pattern_detection": True,
+ "quality_metrics": True
+ }
+)
+
+# Obtener resultados del perfil
+results = profile.get_results()`}
+
+
+
+
+ Próximamente
+
+ Se están desarrollando características adicionales de perfilado. Vuelve pronto para:
+
+
+ Opciones avanzadas de visualización
+ Plantillas de perfilado personalizadas
+ Generación automatizada de insights
+ Compartir perfiles y colaboración
+
+
+
+
+ );
+ }
+
+ // English content (default)
+ return (
+
+
Profile a Source
+
+ Learn how to analyze and profile your sources using the Plexus dashboard interface.
+
+
+
+
+ Profiling Sources in the Dashboard
+
+ Source profiling helps you understand the characteristics and patterns in your data
+ before running evaluations. The dashboard provides comprehensive tools for analyzing
+ your sources.
+
+
+
+
+
Step-by-Step Guide
+
+
+ Access Source Details:
+ Navigate to your source in the Sources list and click on it to view details.
+
+
+ Start Profiling:
+ Click the "Profile Source" button in the source details view.
+
+
+ Configure Analysis:
+ Select the profiling options you want to run:
+
+ Content analysis
+ Pattern detection
+ Quality metrics
+ Custom analysis options
+
+
+
+ Run Profile:
+ Click "Start Analysis" to begin the profiling process.
+
+
+ Review Results:
+ Once complete, examine the detailed profiling results in the dashboard.
+
+
+
+
+
+
+
+ Understanding Profile Results
+
+
+
Content Analysis
+
+ View detailed breakdowns of your source content, including structure, format,
+ and key characteristics. The dashboard presents this information through
+ interactive visualizations and detailed reports.
+
+
+
+
Pattern Detection
+
+ Explore identified patterns and anomalies through the dashboard's pattern
+ analysis view. This helps you understand common themes and potential issues
+ in your content.
+
+
+
+
Quality Metrics
+
+ Review comprehensive quality measurements through intuitive charts and
+ detailed metric breakdowns in the dashboard interface.
+
+
+
+
+
+
+ Profile Management Tips
+
+
+
Saving Profiles
+
+ Save profile configurations as templates for quick reuse across multiple sources.
+
+
+
+
Comparing Results
+
+ Use the dashboard's comparison view to analyze profile results across different
+ sources or time periods.
+
+
+
+
+
+
+ Using the CLI
+
+ For automated profiling workflows, you can use the Plexus CLI:
+
+
+
+ {`# Run a profile on a source
+plexus sources profile source-id --analysis-type full
+
+# Get profile results
+plexus sources profile-results source-id`}
+
+
+
+
+ Python SDK Reference
+
+ For programmatic profiling, you can use the Python SDK:
+
+
+
+ {`from plexus import Plexus
+
+plexus = Plexus(api_key="your-api-key")
+
+# Run a profile on a source
+profile = plexus.sources.profile(
+ source_id="source-id",
+ options={
+ "content_analysis": True,
+ "pattern_detection": True,
+ "quality_metrics": True
+ }
+)
+
+# Get profile results
+results = profile.get_results()`}
+
+
+
+
+ Coming Soon
+
+ Additional profiling features are being developed. Check back soon for:
+
+
+ Advanced visualization options
+ Custom profiling templates
+ Automated insights generation
+ Profile sharing and collaboration
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/[locale]/documentation/page.tsx b/dashboard/app/[locale]/documentation/page.tsx
new file mode 100644
index 000000000..eb8d2bdf8
--- /dev/null
+++ b/dashboard/app/[locale]/documentation/page.tsx
@@ -0,0 +1,241 @@
+'use client';
+
+import { Button as DocButton } from "@/components/ui/button"
+import { useTranslations, useTranslationContext } from '@/app/contexts/TranslationContext'
+import Link from "next/link"
+
+export default function DocumentationPage() {
+ const t = useTranslations('documentation');
+ const { locale } = useTranslationContext();
+
+ if (locale === 'es') {
+ return (
+
+
Documentación
+
+ Bienvenido a la documentación de Plexus. Aquí encontrarás guías completas y documentación
+ para ayudarte a comenzar a trabajar con Plexus lo más rápido posible.
+
+
+
+
+ Primeros Pasos
+
+
+
Conceptos Fundamentales
+
+ Aprende sobre los conceptos y componentes fundamentales que impulsan Plexus.
+
+
+
Explorar Fundamentos
+
+
+
+
+
Guías Paso a Paso
+
+ Sigue guías detalladas para operaciones y flujos de trabajo comunes.
+
+
+
Ver Métodos
+
+
+
+
+
+
+ Componentes de la Plataforma
+
+
+
Nodos de Trabajo
+
+ Configura y gestiona nodos de trabajo para procesar tu contenido a escala.
+
+
+
Aprender sobre Workers
+
+
+
+
+
+ Herramienta CLI plexus
+
+
+ Utiliza la interfaz de línea de comandos para gestionar tu implementación de Plexus.
+
+
+
Explorar CLI
+
+
+
+
+
SDK de Python
+
+ Integra Plexus en tus aplicaciones Python de manera programática.
+
+
+
Explorar Referencia SDK
+
+
+
+
+
+
+ Inicio Rápido
+
+ La forma más rápida de comenzar con Plexus es:
+
+
+
+ Revisar los Fundamentos
+ Comprende los conceptos básicos que conforman Plexus.
+
+
+ Crear tu Primera Fuente
+ Agrega contenido para analizar usando el panel de control.
+
+
+ Configurar un Cuadro de Puntuación
+ Define cómo quieres evaluar tu contenido.
+
+
+ Ejecutar una Evaluación
+ Procesa tu contenido y visualiza los resultados.
+
+
+
+
+
+ Próximos Pasos
+
+ ¿Listo para comenzar? Empieza con los fundamentos para entender los conceptos básicos de Plexus.
+
+
+
+ Comenzar con Fundamentos
+
+
+ Ir a Creación de Fuentes
+
+
+
+
+
+ );
+ }
+
+ // English content (default)
+ return (
+
+
Documentation
+
+ Welcome to the Plexus documentation. Here you'll find comprehensive guides and documentation
+ to help you start working with Plexus as quickly as possible.
+
+
+
+
+ Getting Started
+
+
+
Core Concepts
+
+ Learn about the fundamental concepts and components that power Plexus.
+
+
+
Explore Basics
+
+
+
+
+
Step-by-Step Guides
+
+ Follow detailed guides for common operations and workflows.
+
+
+
View Methods
+
+
+
+
+
+
+ Platform Components
+
+
+
Worker Nodes
+
+ Set up and manage worker nodes to process your content at scale.
+
+
+
Learn About Workers
+
+
+
+
+
+ plexus CLI Tool
+
+
+ Use the command-line interface to manage your Plexus deployment.
+
+
+
Explore CLI
+
+
+
+
+
Python SDK
+
+ Integrate Plexus into your Python applications programmatically.
+
+
+
Browse SDK Reference
+
+
+
+
+
+
+ Quick Start
+
+ The fastest way to get started with Plexus is to:
+
+
+
+ Review the Basics
+ Understand the core concepts that make up Plexus.
+
+
+ Create Your First Source
+ Add some content to analyze using the dashboard.
+
+
+ Set Up a Scorecard
+ Define how you want to evaluate your content.
+
+
+ Run an Evaluation
+ Process your content and view the results.
+
+
+
+
+
+ Next Steps
+
+ Ready to get started? Begin with the basics to understand Plexus's core concepts.
+
+
+
+ Start with Basics
+
+
+ Jump to Source Creation
+
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/evaluations/[id]/__tests__/page.test.tsx b/dashboard/app/[locale]/evaluations/[id]/__tests__/page.test.tsx
similarity index 100%
rename from dashboard/app/evaluations/[id]/__tests__/page.test.tsx
rename to dashboard/app/[locale]/evaluations/[id]/__tests__/page.test.tsx
diff --git a/dashboard/app/evaluations/[id]/client-layout.tsx b/dashboard/app/[locale]/evaluations/[id]/client-layout.tsx
similarity index 100%
rename from dashboard/app/evaluations/[id]/client-layout.tsx
rename to dashboard/app/[locale]/evaluations/[id]/client-layout.tsx
diff --git a/dashboard/app/evaluations/[id]/layout.tsx b/dashboard/app/[locale]/evaluations/[id]/layout.tsx
similarity index 100%
rename from dashboard/app/evaluations/[id]/layout.tsx
rename to dashboard/app/[locale]/evaluations/[id]/layout.tsx
diff --git a/dashboard/app/evaluations/[id]/page.tsx b/dashboard/app/[locale]/evaluations/[id]/page.tsx
similarity index 100%
rename from dashboard/app/evaluations/[id]/page.tsx
rename to dashboard/app/[locale]/evaluations/[id]/page.tsx
diff --git a/dashboard/app/evaluations/page.tsx b/dashboard/app/[locale]/evaluations/page.tsx
similarity index 100%
rename from dashboard/app/evaluations/page.tsx
rename to dashboard/app/[locale]/evaluations/page.tsx
diff --git a/dashboard/app/feedback-queues/page.tsx b/dashboard/app/[locale]/feedback-queues/page.tsx
similarity index 100%
rename from dashboard/app/feedback-queues/page.tsx
rename to dashboard/app/[locale]/feedback-queues/page.tsx
diff --git a/dashboard/app/feedback/page.tsx b/dashboard/app/[locale]/feedback/page.tsx
similarity index 100%
rename from dashboard/app/feedback/page.tsx
rename to dashboard/app/[locale]/feedback/page.tsx
diff --git a/dashboard/app/items/page.tsx b/dashboard/app/[locale]/items/page.tsx
similarity index 100%
rename from dashboard/app/items/page.tsx
rename to dashboard/app/[locale]/items/page.tsx
diff --git a/dashboard/app/lab/README-metadata.md b/dashboard/app/[locale]/lab/README-metadata.md
similarity index 100%
rename from dashboard/app/lab/README-metadata.md
rename to dashboard/app/[locale]/lab/README-metadata.md
diff --git a/dashboard/app/lab/activity/layout.tsx b/dashboard/app/[locale]/lab/activity/layout.tsx
similarity index 100%
rename from dashboard/app/lab/activity/layout.tsx
rename to dashboard/app/[locale]/lab/activity/layout.tsx
diff --git a/dashboard/app/lab/activity/page.tsx b/dashboard/app/[locale]/lab/activity/page.tsx
similarity index 100%
rename from dashboard/app/lab/activity/page.tsx
rename to dashboard/app/[locale]/lab/activity/page.tsx
diff --git a/dashboard/app/lab/alerts/page.tsx b/dashboard/app/[locale]/lab/alerts/page.tsx
similarity index 100%
rename from dashboard/app/lab/alerts/page.tsx
rename to dashboard/app/[locale]/lab/alerts/page.tsx
diff --git a/dashboard/app/lab/analysis/page.tsx b/dashboard/app/[locale]/lab/analysis/page.tsx
similarity index 100%
rename from dashboard/app/lab/analysis/page.tsx
rename to dashboard/app/[locale]/lab/analysis/page.tsx
diff --git a/dashboard/app/lab/batches/[id]/layout.tsx b/dashboard/app/[locale]/lab/batches/[id]/layout.tsx
similarity index 100%
rename from dashboard/app/lab/batches/[id]/layout.tsx
rename to dashboard/app/[locale]/lab/batches/[id]/layout.tsx
diff --git a/dashboard/app/lab/batches/[id]/page.tsx b/dashboard/app/[locale]/lab/batches/[id]/page.tsx
similarity index 100%
rename from dashboard/app/lab/batches/[id]/page.tsx
rename to dashboard/app/[locale]/lab/batches/[id]/page.tsx
diff --git a/dashboard/app/lab/batches/page.tsx b/dashboard/app/[locale]/lab/batches/page.tsx
similarity index 100%
rename from dashboard/app/lab/batches/page.tsx
rename to dashboard/app/[locale]/lab/batches/page.tsx
diff --git a/dashboard/app/lab/data/page.tsx b/dashboard/app/[locale]/lab/data/page.tsx
similarity index 100%
rename from dashboard/app/lab/data/page.tsx
rename to dashboard/app/[locale]/lab/data/page.tsx
diff --git a/dashboard/app/[locale]/lab/datasets/layout.tsx b/dashboard/app/[locale]/lab/datasets/layout.tsx
new file mode 100644
index 000000000..2b41d8be2
--- /dev/null
+++ b/dashboard/app/[locale]/lab/datasets/layout.tsx
@@ -0,0 +1,23 @@
+import React from 'react'
+import type { Metadata } from 'next'
+
+export const metadata: Metadata = {
+ title: "Datasets",
+ description: "Manage and explore your datasets for AI evaluation.",
+ openGraph: {
+ title: "Datasets",
+ description: "Manage and explore your datasets for AI evaluation.",
+ },
+ twitter: {
+ title: "Datasets",
+ description: "Manage and explore your datasets for AI evaluation.",
+ }
+}
+
+export default function DatasetsLayout({
+ children,
+}: {
+ children: React.ReactNode
+}) {
+ return children
+}
\ No newline at end of file
diff --git a/dashboard/app/[locale]/lab/datasets/page.tsx b/dashboard/app/[locale]/lab/datasets/page.tsx
new file mode 100644
index 000000000..8a882f701
--- /dev/null
+++ b/dashboard/app/[locale]/lab/datasets/page.tsx
@@ -0,0 +1,7 @@
+"use client";
+
+import DatasetsDashboard from '@/components/datasets-dashboard';
+
+export default function DatasetsPage() {
+ return
;
+}
\ No newline at end of file
diff --git a/dashboard/app/lab/evaluations/[id]/layout.tsx b/dashboard/app/[locale]/lab/evaluations/[id]/layout.tsx
similarity index 100%
rename from dashboard/app/lab/evaluations/[id]/layout.tsx
rename to dashboard/app/[locale]/lab/evaluations/[id]/layout.tsx
diff --git a/dashboard/app/lab/evaluations/[id]/page.tsx b/dashboard/app/[locale]/lab/evaluations/[id]/page.tsx
similarity index 100%
rename from dashboard/app/lab/evaluations/[id]/page.tsx
rename to dashboard/app/[locale]/lab/evaluations/[id]/page.tsx
diff --git a/dashboard/app/lab/evaluations/[id]/score-results/[scoreResultId]/layout.tsx b/dashboard/app/[locale]/lab/evaluations/[id]/score-results/[scoreResultId]/layout.tsx
similarity index 100%
rename from dashboard/app/lab/evaluations/[id]/score-results/[scoreResultId]/layout.tsx
rename to dashboard/app/[locale]/lab/evaluations/[id]/score-results/[scoreResultId]/layout.tsx
diff --git a/dashboard/app/lab/evaluations/[id]/score-results/[scoreResultId]/page.tsx b/dashboard/app/[locale]/lab/evaluations/[id]/score-results/[scoreResultId]/page.tsx
similarity index 100%
rename from dashboard/app/lab/evaluations/[id]/score-results/[scoreResultId]/page.tsx
rename to dashboard/app/[locale]/lab/evaluations/[id]/score-results/[scoreResultId]/page.tsx
diff --git a/dashboard/app/lab/evaluations/[id]/score-results/layout.tsx b/dashboard/app/[locale]/lab/evaluations/[id]/score-results/layout.tsx
similarity index 100%
rename from dashboard/app/lab/evaluations/[id]/score-results/layout.tsx
rename to dashboard/app/[locale]/lab/evaluations/[id]/score-results/layout.tsx
diff --git a/dashboard/app/lab/evaluations/[id]/score-results/page.tsx b/dashboard/app/[locale]/lab/evaluations/[id]/score-results/page.tsx
similarity index 100%
rename from dashboard/app/lab/evaluations/[id]/score-results/page.tsx
rename to dashboard/app/[locale]/lab/evaluations/[id]/score-results/page.tsx
diff --git a/dashboard/app/lab/evaluations/layout.tsx b/dashboard/app/[locale]/lab/evaluations/layout.tsx
similarity index 100%
rename from dashboard/app/lab/evaluations/layout.tsx
rename to dashboard/app/[locale]/lab/evaluations/layout.tsx
diff --git a/dashboard/app/lab/evaluations/page.tsx b/dashboard/app/[locale]/lab/evaluations/page.tsx
similarity index 100%
rename from dashboard/app/lab/evaluations/page.tsx
rename to dashboard/app/[locale]/lab/evaluations/page.tsx
diff --git a/dashboard/app/lab/feedback-queues/page.tsx b/dashboard/app/[locale]/lab/feedback-queues/page.tsx
similarity index 100%
rename from dashboard/app/lab/feedback-queues/page.tsx
rename to dashboard/app/[locale]/lab/feedback-queues/page.tsx
diff --git a/dashboard/app/lab/items/[id]/page.tsx b/dashboard/app/[locale]/lab/items/[id]/page.tsx
similarity index 100%
rename from dashboard/app/lab/items/[id]/page.tsx
rename to dashboard/app/[locale]/lab/items/[id]/page.tsx
diff --git a/dashboard/app/lab/items/page.tsx b/dashboard/app/[locale]/lab/items/page.tsx
similarity index 100%
rename from dashboard/app/lab/items/page.tsx
rename to dashboard/app/[locale]/lab/items/page.tsx
diff --git a/dashboard/app/lab/layout.tsx b/dashboard/app/[locale]/lab/layout.tsx
similarity index 100%
rename from dashboard/app/lab/layout.tsx
rename to dashboard/app/[locale]/lab/layout.tsx
diff --git a/dashboard/app/lab/metadata-template.txt b/dashboard/app/[locale]/lab/metadata-template.txt
similarity index 100%
rename from dashboard/app/lab/metadata-template.txt
rename to dashboard/app/[locale]/lab/metadata-template.txt
diff --git a/dashboard/app/lab/reports/[id]/page.tsx b/dashboard/app/[locale]/lab/reports/[id]/page.tsx
similarity index 100%
rename from dashboard/app/lab/reports/[id]/page.tsx
rename to dashboard/app/[locale]/lab/reports/[id]/page.tsx
diff --git a/dashboard/app/lab/reports/edit/[id]/page.tsx b/dashboard/app/[locale]/lab/reports/edit/[id]/page.tsx
similarity index 100%
rename from dashboard/app/lab/reports/edit/[id]/page.tsx
rename to dashboard/app/[locale]/lab/reports/edit/[id]/page.tsx
diff --git a/dashboard/app/lab/reports/edit/page.tsx b/dashboard/app/[locale]/lab/reports/edit/page.tsx
similarity index 100%
rename from dashboard/app/lab/reports/edit/page.tsx
rename to dashboard/app/[locale]/lab/reports/edit/page.tsx
diff --git a/dashboard/app/lab/reports/page.tsx b/dashboard/app/[locale]/lab/reports/page.tsx
similarity index 100%
rename from dashboard/app/lab/reports/page.tsx
rename to dashboard/app/[locale]/lab/reports/page.tsx
diff --git a/dashboard/app/lab/scorecards/[id]/layout.tsx b/dashboard/app/[locale]/lab/scorecards/[id]/layout.tsx
similarity index 100%
rename from dashboard/app/lab/scorecards/[id]/layout.tsx
rename to dashboard/app/[locale]/lab/scorecards/[id]/layout.tsx
diff --git a/dashboard/app/lab/scorecards/[id]/page.tsx b/dashboard/app/[locale]/lab/scorecards/[id]/page.tsx
similarity index 100%
rename from dashboard/app/lab/scorecards/[id]/page.tsx
rename to dashboard/app/[locale]/lab/scorecards/[id]/page.tsx
diff --git a/dashboard/app/lab/scorecards/[id]/scores/[scoreId]/layout.tsx b/dashboard/app/[locale]/lab/scorecards/[id]/scores/[scoreId]/layout.tsx
similarity index 100%
rename from dashboard/app/lab/scorecards/[id]/scores/[scoreId]/layout.tsx
rename to dashboard/app/[locale]/lab/scorecards/[id]/scores/[scoreId]/layout.tsx
diff --git a/dashboard/app/lab/scorecards/[id]/scores/[scoreId]/page.tsx b/dashboard/app/[locale]/lab/scorecards/[id]/scores/[scoreId]/page.tsx
similarity index 100%
rename from dashboard/app/lab/scorecards/[id]/scores/[scoreId]/page.tsx
rename to dashboard/app/[locale]/lab/scorecards/[id]/scores/[scoreId]/page.tsx
diff --git a/dashboard/app/lab/scorecards/[id]/scores/layout.tsx b/dashboard/app/[locale]/lab/scorecards/[id]/scores/layout.tsx
similarity index 100%
rename from dashboard/app/lab/scorecards/[id]/scores/layout.tsx
rename to dashboard/app/[locale]/lab/scorecards/[id]/scores/layout.tsx
diff --git a/dashboard/app/lab/scorecards/layout.tsx b/dashboard/app/[locale]/lab/scorecards/layout.tsx
similarity index 100%
rename from dashboard/app/lab/scorecards/layout.tsx
rename to dashboard/app/[locale]/lab/scorecards/layout.tsx
diff --git a/dashboard/app/lab/scorecards/page.tsx b/dashboard/app/[locale]/lab/scorecards/page.tsx
similarity index 100%
rename from dashboard/app/lab/scorecards/page.tsx
rename to dashboard/app/[locale]/lab/scorecards/page.tsx
diff --git a/dashboard/app/lab/settings/account/page.tsx b/dashboard/app/[locale]/lab/settings/account/page.tsx
similarity index 85%
rename from dashboard/app/lab/settings/account/page.tsx
rename to dashboard/app/[locale]/lab/settings/account/page.tsx
index d3cbc7226..bfe1fd615 100644
--- a/dashboard/app/lab/settings/account/page.tsx
+++ b/dashboard/app/[locale]/lab/settings/account/page.tsx
@@ -12,6 +12,8 @@ import { Switch } from "@/components/ui/switch"
import { Label } from "@/components/ui/label"
import { Button } from "@/components/ui/button"
import { useToast } from "@/components/ui/use-toast"
+import { LanguageSelector } from "@/components/ui/language-selector"
+import { useTranslations } from '@/app/contexts/TranslationContext'
import { useAccount } from "@/app/contexts/AccountContext"
type Account = Schema["Account"]["type"]
@@ -39,6 +41,8 @@ const MENU_ITEMS = [
]
export default function LabAccountSettings() {
+ const t = useTranslations('settings.account')
+ const tCommon = useTranslations('common')
const { authStatus } = useAuthenticator((context) => [context.authStatus])
const router = useRouter()
const { toast } = useToast()
@@ -85,15 +89,15 @@ export default function LabAccountSettings() {
await refreshAccount()
toast({
- title: "Success",
- description: "Account settings saved successfully"
+ title: tCommon('success'),
+ description: t('settingsSaved')
})
router.push("/lab/settings")
} catch (error) {
console.error("Error saving settings:", error)
toast({
- title: "Error",
- description: "Failed to save account settings",
+ title: tCommon('error'),
+ description: t('settingsSaveError'),
variant: "destructive"
})
} finally {
@@ -108,7 +112,7 @@ export default function LabAccountSettings() {
if (!selectedAccount) {
return (
-
No account selected
+
{t('noAccountSelected')}
)
}
@@ -116,17 +120,17 @@ export default function LabAccountSettings() {
return (
-
Account Settings
+
{t('title')}
- Customize your account menu visibility settings.
+ {t('description')}
- Menu Visibility for {selectedAccount.name}
+ {t('menuVisibilityTitle', { accountName: selectedAccount.name })}
- Choose which menu items to show or hide in the sidebar.
+ {t('menuVisibilityDescription')}
@@ -146,7 +150,7 @@ export default function LabAccountSettings() {
onClick={handleSave}
disabled={isSaving}
>
- {isSaving ? "Saving..." : "Save Changes"}
+ {isSaving ? t('saving') : t('saveChanges')}
diff --git a/dashboard/app/[locale]/lab/settings/page.tsx b/dashboard/app/[locale]/lab/settings/page.tsx
new file mode 100644
index 000000000..d1dc46f15
--- /dev/null
+++ b/dashboard/app/[locale]/lab/settings/page.tsx
@@ -0,0 +1,53 @@
+'use client'
+
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"
+import { LanguageSelector } from "@/components/ui/language-selector"
+import { useTranslations } from '@/app/contexts/TranslationContext'
+import Link from 'next/link'
+
+export default function LabSettings() {
+ const t = useTranslations('settings');
+ const tCommon = useTranslations('common');
+
+ return (
+
+
+
{t('title')}
+
+ {t('description')}
+
+
+
+
+
+ {t('user')}
+ {t('customize')}
+
+
+
+ {t('userDescription')}
+
+
+ {t('manageVisibility')}
+
+
+
+
+
+
+
+ {t('account.title')}
+ {t('account.description')}
+
+
+ {t('organizationDescription')}
+
+
+ {t('account.title')}
+
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/lab/tasks/[id]/page.tsx b/dashboard/app/[locale]/lab/tasks/[id]/page.tsx
similarity index 100%
rename from dashboard/app/lab/tasks/[id]/page.tsx
rename to dashboard/app/[locale]/lab/tasks/[id]/page.tsx
diff --git a/dashboard/app/lab/tasks/layout.tsx b/dashboard/app/[locale]/lab/tasks/layout.tsx
similarity index 100%
rename from dashboard/app/lab/tasks/layout.tsx
rename to dashboard/app/[locale]/lab/tasks/layout.tsx
diff --git a/dashboard/app/[locale]/layout.tsx b/dashboard/app/[locale]/layout.tsx
new file mode 100644
index 000000000..678c1a8a2
--- /dev/null
+++ b/dashboard/app/[locale]/layout.tsx
@@ -0,0 +1,82 @@
+import type { Metadata } from "next";
+import { Inter } from "next/font/google";
+import { Jersey_20 } from "next/font/google";
+import "../globals.css";
+import ClientLayout from "../client-layout";
+import { HydrationOverlay } from "@builder.io/react-hydration-overlay";
+import "@aws-amplify/ui-react/styles.css";
+import { AccountProvider } from "../contexts/AccountContext"
+import { SidebarProvider } from "../contexts/SidebarContext"
+import { TranslationProvider } from "../contexts/TranslationContext"
+import {notFound} from 'next/navigation';
+import {locales} from '../../i18n';
+
+const inter = Inter({ subsets: ["latin"] });
+const jersey20 = Jersey_20({
+ subsets: ["latin"],
+ weight: "400",
+ variable: "--font-jersey-20"
+});
+
+export const metadata: Metadata = {
+ title: "Plexus - No-Code AI Agents at Scale",
+ description: "Run AI agents over your data with no code. Plexus is a battle-tested platform for building agent-based AI workflows that analyze streams of content and take action.",
+ openGraph: {
+ title: "Plexus - No-Code AI Agents at Scale",
+ description: "Run AI agents over your data with no code. Plexus is a battle-tested platform for building agent-based AI workflows that analyze streams of content and take action.",
+ url: "https://plexus.anth.us",
+ siteName: "Plexus",
+ images: [
+ {
+ url: "/og-image.png",
+ width: 1200,
+ height: 630,
+ alt: "Plexus - No-Code AI Agents at Scale"
+ }
+ ],
+ locale: "en_US",
+ type: "website",
+ },
+ twitter: {
+ card: "summary_large_image",
+ title: "Plexus - No-Code AI Agents at Scale",
+ description: "Run AI agents over your data with no code. Plexus is a battle-tested platform for building agent-based AI workflows that analyze streams of content and take action.",
+ creator: "@Anthus_AI",
+ images: ["/og-image.png"],
+ }
+};
+
+export default async function LocaleLayout({
+ children,
+ params: {locale}
+}: {
+ children: React.ReactNode;
+ params: {locale: string};
+}) {
+ // Validate that the incoming `locale` parameter is valid
+ if (!locales.includes(locale as any)) {
+ notFound();
+ }
+
+ // Load messages for the locale synchronously
+ const messages = locale === 'es'
+ ? (await import('../../messages/es.json')).default
+ : (await import('../../messages/en.json')).default;
+
+ return (
+
+
+
+
+
+
+
+ {children}
+
+
+
+
+
+
+ );
+}
\ No newline at end of file
diff --git a/dashboard/app/menu-items.ts b/dashboard/app/[locale]/menu-items.ts
similarity index 100%
rename from dashboard/app/menu-items.ts
rename to dashboard/app/[locale]/menu-items.ts
diff --git a/dashboard/app/page.module.css b/dashboard/app/[locale]/page.module.css
similarity index 100%
rename from dashboard/app/page.module.css
rename to dashboard/app/[locale]/page.module.css
diff --git a/dashboard/app/[locale]/page.tsx b/dashboard/app/[locale]/page.tsx
new file mode 100644
index 000000000..568eb0e66
--- /dev/null
+++ b/dashboard/app/[locale]/page.tsx
@@ -0,0 +1,281 @@
+'use client'
+
+import React from 'react'
+import { StandardSection } from '@/components/landing/StandardSection'
+import { UseCases } from '@/components/landing/UseCases'
+import { CTASection } from '@/components/landing/CTASection'
+import { Footer } from '@/components/landing/Footer'
+import { Layout } from '@/components/landing/Layout'
+import { Download, Brain, Workflow as WorkflowIcon, ArrowRight, Cpu, FlaskRoundIcon as Flask, Cloud, Network } from 'lucide-react'
+import dynamic from 'next/dynamic'
+import ItemListWorkflow from '@/components/workflow/layouts/item-list-workflow'
+import MetricsGauges from '@/components/MetricsGauges'
+import { Button } from '@/components/ui/button'
+import Link from 'next/link'
+
+const CLOCKWISE_SEQUENCE = [0, 1, 3, 2] // accuracy -> precision -> specificity -> sensitivity
+
+const MultiModelWorkflowClient = dynamic(
+ () => import('@/components/workflow/layouts/multi-model-workflow'),
+ { ssr: false }
+)
+
+const WorkflowClient = dynamic(
+ () => import('@/components/workflow/base/workflow-base'),
+ { ssr: false }
+)
+
+const MultiTypeWorkflowClient = dynamic(
+ () => import('@/components/workflow/layouts/multi-type-workflow'),
+ { ssr: false }
+)
+
+const ItemListWorkflowClient = dynamic(
+ () => import('@/components/workflow/layouts/item-list-workflow'),
+ { ssr: false }
+)
+
+export default function LandingPage() {
+ const [selectedMetricIndex, setSelectedMetricIndex] = React.useState(0)
+ const [rotationIndex, setRotationIndex] = React.useState(0)
+
+ React.useEffect(() => {
+ const interval = setInterval(() => {
+ setRotationIndex(prev => (prev + 1) % CLOCKWISE_SEQUENCE.length)
+ }, 2000)
+
+ return () => clearInterval(interval)
+ }, [])
+
+ // Map rotation index to actual gauge index for clockwise movement
+ const selectedIndex = CLOCKWISE_SEQUENCE[rotationIndex]
+
+ return (
+
+
+
+ Plexus is a battle-tested platform for building AI workflows that analyze streams of content and take action.
+
+
+ Your team can use your data to set up step-by-step processes that automate information management. Without dealing with code development and deployment.
+
+
+ >
+ }
+ rightContent={
+
+
+
+ }
+ />
+
+ }
+ rightContent={
+
+
+ Run a scorecard on each item of your data, with multiple scores per scorecard.
+
+
+ Are your agents saying the right things? Are your inbound leads qualified?
+
+
+ Classify, predict, extract, and act on your data.
+
+
+ }
+ />
+
+
+ AI changes every week! Don't lock yourself into one solution.
+ Plexus is a workbench for applying any newfangled AI model to
+ solve your problems. Or simpler and cheaper ML models. Or
+ logical rules -- anything your solution requires.
+ {"\n\n"}
+ OpenAI, Anthropic, Google, Deepseek, Azure, AWS Bedrock, Hugging Face, PyTorch, TensorFlow —
+ Plexus supports them all.
+
+ }
+ rightContent={
+
+
+
+ }
+ />
+
+
+ }
+ rightContent={
+
+
+ You can't just write prompts and put them into production and hope they work, you need a way to evaluate them quantitatively to see if they meet your needs. You can't optimize a metric you're not measuring.
+
+
+ Each use case demands its own success metrics: Is this a regulatory compliance question where we need high sensitivity? Do we need to use balanced accuracy because the data is unbalanced? Plexus gives you the gauges you need.
+
+
+ }
+ />
+
+
+ Your answers should match your questions. Sometimes a simple yes/no will do,
+ other times you need a 5-star rating, a percentage score, or just a thumbs up.
+ Plexus gives you the flexibility to express your results in the format that makes sense
+ for your use case.
+ {"\n\n"}
+ Binary classifiers, multi-class classifiers, scalar values, entity extraction, quote extraction,
+ and more. The framework is flexible enough to support anything your solution requires.
+
+ }
+ rightContent={
+
+
+
+ }
+ />
+
+
+
+
+ Built by practitioners on the front lines of AI deployment. Our features
+ evolve as rapidly as AI itself, delivering battle-tested tools that
+ transform cutting-edge capabilities into real business value.
+
+
+
+
+
+
+ Multi-model
+
+
+ Use any AI/ML model, from GPT-4 or Claude, to your own fine-tuned local Llama, to custom BERT-based classifiers.
+
+
+
+
+
+ Lab workflow
+
+
+ Create and align your own custom classifiers using sophisticated tools for analyzing datasets and evaluating results.
+
+
+
+
+
+ Serverless
+
+
+ Plexus is a lightning-fast, fully DevOps / IaC / NoSQL project that doesn't depend on servers or databases.
+
+
+
+
+
+ Task dispatch
+
+
+ Connect any node as a worker for running agents, evaluations, or reports, from AWS to Azure to local computers.
+
+
+
+
+
+
+
+
+
+ )
+}
\ No newline at end of file
diff --git a/dashboard/app/platform/page.tsx b/dashboard/app/[locale]/platform/page.tsx
similarity index 100%
rename from dashboard/app/platform/page.tsx
rename to dashboard/app/[locale]/platform/page.tsx
diff --git a/dashboard/app/reports/[id]/client-layout.tsx b/dashboard/app/[locale]/reports/[id]/client-layout.tsx
similarity index 100%
rename from dashboard/app/reports/[id]/client-layout.tsx
rename to dashboard/app/[locale]/reports/[id]/client-layout.tsx
diff --git a/dashboard/app/reports/[id]/layout.tsx b/dashboard/app/[locale]/reports/[id]/layout.tsx
similarity index 100%
rename from dashboard/app/reports/[id]/layout.tsx
rename to dashboard/app/[locale]/reports/[id]/layout.tsx
diff --git a/dashboard/app/reports/[id]/page.tsx b/dashboard/app/[locale]/reports/[id]/page.tsx
similarity index 100%
rename from dashboard/app/reports/[id]/page.tsx
rename to dashboard/app/[locale]/reports/[id]/page.tsx
diff --git a/dashboard/app/reports/layout.tsx b/dashboard/app/[locale]/reports/layout.tsx
similarity index 100%
rename from dashboard/app/reports/layout.tsx
rename to dashboard/app/[locale]/reports/layout.tsx
diff --git a/dashboard/app/reports/page.tsx b/dashboard/app/[locale]/reports/page.tsx
similarity index 100%
rename from dashboard/app/reports/page.tsx
rename to dashboard/app/[locale]/reports/page.tsx
diff --git a/dashboard/app/scorecards/[scorecardId]/scores/[scoreId]/edit/page.tsx b/dashboard/app/[locale]/scorecards/[scorecardId]/scores/[scoreId]/edit/page.tsx
similarity index 100%
rename from dashboard/app/scorecards/[scorecardId]/scores/[scoreId]/edit/page.tsx
rename to dashboard/app/[locale]/scorecards/[scorecardId]/scores/[scoreId]/edit/page.tsx
diff --git a/dashboard/app/scorecards/page.tsx b/dashboard/app/[locale]/scorecards/page.tsx
similarity index 100%
rename from dashboard/app/scorecards/page.tsx
rename to dashboard/app/[locale]/scorecards/page.tsx
diff --git a/dashboard/app/settings/account/page.tsx b/dashboard/app/[locale]/settings/account/page.tsx
similarity index 100%
rename from dashboard/app/settings/account/page.tsx
rename to dashboard/app/[locale]/settings/account/page.tsx
diff --git a/dashboard/app/settings/page.tsx b/dashboard/app/[locale]/settings/page.tsx
similarity index 100%
rename from dashboard/app/settings/page.tsx
rename to dashboard/app/[locale]/settings/page.tsx
diff --git a/dashboard/app/solutions/call-center-qa/page.tsx b/dashboard/app/[locale]/solutions/call-center-qa/page.tsx
similarity index 100%
rename from dashboard/app/solutions/call-center-qa/page.tsx
rename to dashboard/app/[locale]/solutions/call-center-qa/page.tsx
diff --git a/dashboard/app/solutions/enterprise/page.tsx b/dashboard/app/[locale]/solutions/enterprise/page.tsx
similarity index 100%
rename from dashboard/app/solutions/enterprise/page.tsx
rename to dashboard/app/[locale]/solutions/enterprise/page.tsx
diff --git a/dashboard/app/solutions/optimizer-agents/page.tsx b/dashboard/app/[locale]/solutions/optimizer-agents/page.tsx
similarity index 100%
rename from dashboard/app/solutions/optimizer-agents/page.tsx
rename to dashboard/app/[locale]/solutions/optimizer-agents/page.tsx
diff --git a/dashboard/app/solutions/platform/page.tsx b/dashboard/app/[locale]/solutions/platform/page.tsx
similarity index 100%
rename from dashboard/app/solutions/platform/page.tsx
rename to dashboard/app/[locale]/solutions/platform/page.tsx
diff --git a/dashboard/app/solutions/resources/page.tsx b/dashboard/app/[locale]/solutions/resources/page.tsx
similarity index 100%
rename from dashboard/app/solutions/resources/page.tsx
rename to dashboard/app/[locale]/solutions/resources/page.tsx
diff --git a/dashboard/app/contexts/TranslationContext.tsx b/dashboard/app/contexts/TranslationContext.tsx
new file mode 100644
index 000000000..346a10dcc
--- /dev/null
+++ b/dashboard/app/contexts/TranslationContext.tsx
@@ -0,0 +1,77 @@
+"use client";
+
+import React, { createContext, useContext, ReactNode } from 'react';
+
+interface TranslationContextType {
+ t: (key: string, variables?: Record
) => string;
+ locale: string;
+}
+
+const TranslationContext = createContext(undefined);
+
+interface TranslationProviderProps {
+ children: ReactNode;
+ messages: Record;
+ locale: string;
+}
+
+export function TranslationProvider({ children, messages, locale }: TranslationProviderProps) {
+ const t = (key: string, variables?: Record): string => {
+ const keys = key.split('.');
+ let value = messages;
+
+ for (const k of keys) {
+ if (value && typeof value === 'object' && k in value) {
+ value = value[k];
+ } else {
+ return key; // Return key if translation not found
+ }
+ }
+
+ let result = typeof value === 'string' ? value : key;
+
+ // Handle variable interpolation
+ if (variables && typeof result === 'string') {
+ Object.keys(variables).forEach(varKey => {
+ const placeholder = `{${varKey}}`;
+ result = result.replace(new RegExp(placeholder, 'g'), String(variables[varKey]));
+ });
+ }
+
+ return result;
+ };
+
+ return (
+
+ {children}
+
+ );
+}
+
+export function useTranslations(namespace?: string) {
+ const context = useContext(TranslationContext);
+ if (!context) {
+ throw new Error('useTranslations must be used within a TranslationProvider');
+ }
+
+ return (key: string, variables?: Record) => {
+ const fullKey = namespace ? `${namespace}.${key}` : key;
+ return context.t(fullKey, variables);
+ };
+}
+
+export function useLocale() {
+ const context = useContext(TranslationContext);
+ if (!context) {
+ throw new Error('useLocale must be used within a TranslationProvider');
+ }
+ return context.locale;
+}
+
+export function useTranslationContext() {
+ const context = useContext(TranslationContext);
+ if (!context) {
+ throw new Error('useTranslationContext must be used within a TranslationProvider');
+ }
+ return context;
+}
\ No newline at end of file
diff --git a/dashboard/app/documentation/advanced/mcp-server/page.tsx b/dashboard/app/documentation/advanced/mcp-server/page.tsx
deleted file mode 100644
index 6fee9a88b..000000000
--- a/dashboard/app/documentation/advanced/mcp-server/page.tsx
+++ /dev/null
@@ -1,255 +0,0 @@
-'use client';
-
-import Link from "next/link";
-
-export default function McpServerPage() {
- return (
-
-
-
-
Using the Plexus MCP Server
-
- Connect AI assistants like Claude to your Plexus data and functionality using the Model Context Protocol (MCP) server.
-
-
-
-
- What is MCP?
-
- The Model Context Protocol (MCP) is an open standard designed by Anthropic that allows AI models, such as Claude,
- to securely interact with external tools and data sources. For an AI assistant, an MCP server acts as a gateway,
- enabling it to access and use capabilities from other systems. In the context of Plexus, this means you can
- empower an AI to work with your scorecards, evaluations, and reports directly. This allows for more dynamic and
- powerful ways to interact with your Plexus instance.
- For a deeper dive into the protocol itself, see the official Anthropic Model Context Protocol announcement.
-
-
-
-
- Plexus MCP Server Overview
-
- The Plexus MCP server is a pre-built tool that you can run on your system. Once running, it allows AI assistants
- that support MCP (like the Claude desktop app) to connect to your Plexus environment. This connection lets the AI
- perform various actions within Plexus on your behalf, such as listing scorecards, retrieving report details, or
- even initiating new evaluations. The server is typically run via a wrapper script (plexus_fastmcp_wrapper.py)
- which handles environment setup and ensures smooth communication with the AI client.
-
-
-
-
- Getting the Server Code
-
- To run the Plexus MCP server, you'll first need to obtain the server code. This is available in the main Plexus GitHub repository.
- You can clone or download it from: https://github.com/AnthusAI/Plexus.
- The necessary scripts (plexus_fastmcp_wrapper.py and plexus_fastmcp_server.py) are typically located at MCP/ within the repository.
- You will primarily need these files and to ensure their dependencies can be met in your Python environment.
-
-
-
-
- Setting Up an MCP Client (e.g., Claude Desktop App)
-
- To use the Plexus MCP server, you need an MCP client. For example, if you are using the Claude desktop application,
- you would configure it by creating or editing an mcp.json file. This file tells Claude (or another client)
- how to find and communicate with your running Plexus MCP server.
-
-
- Here is an example configuration for your mcp.json file. You will need to replace the placeholder paths
- (/path/to/...) with the actual paths relevant to your system and where you have cloned the Plexus repository.
-
-
-
-{`{
- "mcpServers": {
- "plexus-mcp-service": {
- "command": "/path/to/your/conda/envs/py311/bin/python",
- "args": [
- "/path/to/your/Plexus/MCP/plexus_fastmcp_wrapper.py",
- "--host", "127.0.0.1",
- "--port", "8002",
- "--transport", "stdio",
- "--env-file", "/path/to/your/Plexus/.env",
- "--target-cwd", "/path/to/your/Plexus/"
- ],
- "env": {
- "PYTHONUNBUFFERED": "1",
- "PYTHONPATH": "/path/to/your/Plexus"
- }
- }
- }
-}`}
-
-
- Key parts of this configuration:
-
- command: The full path to the Python interpreter within your Plexus conda environment (e.g., py311).
- args: Specifies the wrapper script to run (plexus_fastmcp_wrapper.py) and its parameters.
- The --host and --port arguments configure the local server settings.
- The --transport stdio argument is standard for client-server communication.
- The --env-file argument must point directly to your .env file (which contains API keys).
- The --target-cwd should point to your Plexus project root directory.
- env.PYTHONPATH: Should point to the root of your Plexus project directory to ensure the server can find all necessary Python modules.
-
-
- The location of the mcp.json file can vary depending on the client. For the Claude desktop app, consult its documentation for the correct location (often in a configuration directory within your user profile).
-
-
-
-
- Available Tools & Capabilities
- Once the Plexus MCP server is running (via the wrapper script) and your AI assistant is connected, you can instruct the assistant to use the following tools:
-
-
-
Scorecard Management
-
-
- list_plexus_scorecards : Ask the AI to list available scorecards in your Plexus Dashboard.
- You can optionally tell it to filter by an account name/key, a partial scorecard name, or a scorecard key. For example: "List Plexus scorecards for the 'Sales' account that include 'Q3' in the name."
-
-
- get_plexus_scorecard_info : Request detailed information about a specific scorecard.
- Provide the AI with an identifier for the scorecard (like its name, key, or ID). It will return the scorecard's description, sections, and the scores within each section. For example: "Get info for the 'Customer Satisfaction Q3' scorecard."
-
-
- get_plexus_score_details : Get specific details for a particular score within a scorecard, including its configuration and version history.
- You'll need to specify both the scorecard and the score. You can also ask for a specific version of the score. For example: "Show me the details for the 'Responsiveness' score in the 'Support Tickets' scorecard, especially its champion version."
-
-
-
-
-
-
Evaluation Tools
-
-
- run_plexus_evaluation : Instruct the AI to start a new scorecard evaluation.
- You need to provide the scorecard name and optionally a specific score name and the number of samples. The server will dispatch this task to your Plexus backend. Note that the MCP server itself doesn't track the progress; you would monitor the evaluation in the Plexus Dashboard as usual. For example: "Run a Plexus evaluation for the 'Lead Quality' scorecard using 100 samples."
-
-
-
-
-
-
Reporting Tools
-
-
- list_plexus_reports : Ask for a list of generated reports. You can filter by account or by a specific report configuration ID if you know it.
- The AI will return a list showing report names, IDs, and when they were created. For example: "List the latest Plexus reports for the main account."
-
-
- get_plexus_report_details : Retrieve detailed information about a specific report by providing its ID.
- This includes the report's parameters, output, and any generated blocks. For example: "Get the details for Plexus report ID '123-abc-456'."
-
-
- get_latest_plexus_report : A convenient way to get the details of the most recently generated report.
- You can optionally filter by account or report configuration ID. For example: "Show me the latest report generated from the 'Weekly Performance' configuration."
-
-
- list_plexus_report_configurations : Get a list of all available report configurations for an account.
- This is useful for knowing what reports you *can* generate. For example: "What report configurations are available for the 'Marketing' account?"
-
-
-
-
-
-
Utility Tools
-
-
- think : A planning tool used internally by the AI to structure reasoning before using other tools.
- This helps the AI organize its approach to complex tasks that may require multiple steps or tool calls.
-
-
-
-
-
-
- Environment Requirements for Running the Server
-
-
-
Software
-
- Python 3.11 or newer (required by the fastmcp library the server uses).
- An existing Plexus installation and access to its dashboard credentials.
- The python-dotenv Python package (used by the server to load your API keys from the .env file).
-
-
-
-
.env File with Plexus Credentials
-
- The server needs to access your Plexus API. Create a file named .env. The --env-file parameter in your mcp.json should point directly to this file.
- It's typically located in your main Plexus project root directory (e.g., Plexus/.env).
-
-
Required Variables in .env:
-
- PLEXUS_API_URL: The API endpoint URL for your Plexus instance.
- PLEXUS_API_KEY: Your API key for authenticating with Plexus.
- PLEXUS_DASHBOARD_URL: The main URL of your Plexus dashboard (used for generating links).
-
-
Optional Variables in .env:
-
- PLEXUS_ACCOUNT_KEY: If you work with multiple accounts, you can set a default account key here.
- LOG_LEVEL: You can set this to DEBUG, INFO, WARNING, or ERROR to control the server's logging verbosity.
-
-
-
-
-
-
- Running the Server
-
- Once you have the code and your .env file is set up, you should run the server using the plexus_fastmcp_wrapper.py script as configured in your mcp.json file.
- The MCP client (e.g., Claude Desktop App) will execute the command specified in mcp.json when it attempts to connect to the "plexus-mcp-service".
-
-
- You typically don't run the plexus_fastmcp_wrapper.py script manually from the terminal for client use. Instead, ensure your mcp.json is correctly configured, and the client application will start the server process as needed.
-
-
- Make sure your Plexus Python environment (e.g., conda activate py311) is correctly referenced by the full path to python in the command field of your mcp.json.
- The wrapper script handles passing the necessary environment variables and paths to the underlying plexus_fastmcp_server.py.
-
-
-
-
- Troubleshooting Common Issues
-
- Connection Errors: Double-check all paths in your mcp.json file (command, args, env.PYTHONPATH). Ensure they accurately point to your Python executable, the plexus_fastmcp_wrapper.py script, your .env file, and your project directory.
- Authentication Errors: Verify that the --env-file path in mcp.json correctly points to your .env file and that this file contains the correct PLEXUS_API_URL and PLEXUS_API_KEY.
-
-
-
-
- Server Logs
-
- The Plexus MCP server setup (via plexus_fastmcp_wrapper.py) directs operational logs and error messages to stderr.
- MCP clients like the Claude desktop app typically capture and display these stderr logs, or store them in a dedicated log file.
-
-
- For instance, Cursor often stores MCP interaction logs in ~/Library/Logs/Claude/mcp.log on macOS. Monitoring this file is key for diagnosing issues if the client doesn't display them directly.
-
-
-
-
- )
-}
\ No newline at end of file
diff --git a/dashboard/app/documentation/advanced/page.tsx b/dashboard/app/documentation/advanced/page.tsx
deleted file mode 100644
index c34a5809a..000000000
--- a/dashboard/app/documentation/advanced/page.tsx
+++ /dev/null
@@ -1,86 +0,0 @@
-import { Button as DocButton } from "@/components/ui/button"
-import Link from "next/link"
-import { Metadata } from "next"
-
-export const metadata: Metadata = {
- title: "Advanced - Plexus Documentation",
- description: "Advanced tools and concepts for power users of the Plexus platform."
-}
-
-export default function AdvancedPage() {
- return (
-
-
Advanced Tools & Concepts
-
- Explore advanced tools and concepts that enable deeper integration and customization of Plexus
- for technical users and developers.
-
-
-
-
- Command Line Interface
-
-
- The plexus CLI tool provides powerful command-line access to all Plexus functionality,
- perfect for automation and advanced workflows.
-
-
-
Explore CLI Tool
-
-
-
-
-
- Worker Infrastructure
-
-
- Learn how to set up and manage Plexus worker nodes to process tasks efficiently
- across your infrastructure.
-
-
-
Learn About Workers
-
-
-
-
-
- Python SDK
-
-
- Integrate Plexus directly into your Python applications with our comprehensive SDK,
- enabling programmatic access to all platform features.
-
-
-
Browse SDK Reference
-
-
-
-
-
- Universal Code Snippets
-
-
- Learn about Plexus's universal YAML code format designed for seamless communication
- between humans, AI models, and other systems.
-
-
-
Explore Universal Code Snippets
-
-
-
-
-
- Plexus MCP Server
-
-
- Enable AI agents and tools to interact with Plexus functionality using the Multi-Agent Cooperative Protocol (MCP).
-
-
-
Explore MCP Server
-
-
-
-
-
- )
-}
\ No newline at end of file
diff --git a/dashboard/app/documentation/advanced/sdk/page.tsx b/dashboard/app/documentation/advanced/sdk/page.tsx
deleted file mode 100644
index ca3ec73de..000000000
--- a/dashboard/app/documentation/advanced/sdk/page.tsx
+++ /dev/null
@@ -1,77 +0,0 @@
-'use client';
-
-export default function SdkPage() {
- return (
-
-
Python SDK Reference
-
- Explore the Python SDK for programmatic access to Plexus functionality.
-
-
-
-
- Overview
-
- The Plexus Python SDK provides a simple and intuitive way to interact with Plexus
- programmatically. Use it to automate workflows, manage resources, and integrate
- Plexus into your applications.
-
-
-
-
- Installation
-
- Install the Plexus SDK using pip:
-
-
- pip install plexus-sdk
-
-
-
-
- Quick Start
-
- Here's a simple example to get you started:
-
-
- {`from plexus import Plexus
-
-# Initialize the client
-plexus = Plexus(api_key="your-api-key")
-
-# Create a new source
-source = plexus.sources.create(
- name="My Source",
- type="text",
- data="Sample content"
-)
-
-# Run an evaluation
-evaluation = plexus.evaluations.create(
- source_id=source.id,
- scorecard_id="your-scorecard-id"
-)`}
-
-
-
-
- Complete Documentation
-
- For complete API reference, authentication guides, advanced usage examples, and best practices,
- visit our comprehensive Python SDK documentation:
-
-
-
-
-
- )
-}
\ No newline at end of file
diff --git a/dashboard/app/documentation/advanced/worker-nodes/page.tsx b/dashboard/app/documentation/advanced/worker-nodes/page.tsx
deleted file mode 100644
index e9eeaf9d5..000000000
--- a/dashboard/app/documentation/advanced/worker-nodes/page.tsx
+++ /dev/null
@@ -1,184 +0,0 @@
-'use client';
-
-export default function WorkerNodesPage() {
- return (
-
-
-
-
Worker Nodes
-
- Learn how to deploy and manage Plexus worker nodes across any infrastructure to process your evaluation tasks.
-
-
-
-
- Overview
-
- Plexus worker nodes are long-running daemon processes that handle evaluation tasks and other operations.
- You can run these workers on any computer with Python installed - whether it's in the cloud (AWS, Azure, GCP)
- or on your own premises.
-
-
- Workers are managed using the Plexus CLI tool, which makes it easy to start, configure, and monitor worker
- processes across your infrastructure.
-
-
-
-
- Starting a Worker
-
- Use the plexus command worker command to start a worker process. Here's a basic example:
-
-
-
-
- {`plexus command worker \\
- --concurrency 4 \\
- --queue celery \\
- --loglevel INFO`}
-
-
-
-
-
--concurrency: Number of worker processes (default: 4)
-
--queue: Queue to process (default: celery)
-
--loglevel: Logging level (default: INFO)
-
-
-
-
- Worker Specialization
-
- Workers can be specialized to handle specific types of tasks using target patterns. This allows you to
- dedicate certain workers to particular workloads:
-
-
-
-
- {`# Worker that only processes dataset-related tasks
-plexus command worker \\
- --target-patterns "datasets/*" \\
- --concurrency 4
-
-# Worker for GPU-intensive tasks
-plexus command worker \\
- --target-patterns "*/gpu-required" \\
- --concurrency 2
-
-# Worker handling multiple task types
-plexus command worker \\
- --target-patterns "datasets/*,training/*" \\
- --concurrency 8`}
-
-
-
-
- Target patterns use the format domain/subdomain and support wildcards. Some examples:
-
-
- datasets/call-criteria - Only process call criteria dataset tasks
- training/call-criteria - Only handle call criteria training tasks
- */gpu-required - Process any tasks requiring GPU resources
- datasets/* - Handle all dataset-related tasks
-
-
-
-
- Deployment Examples
-
- Here are some common deployment scenarios:
-
-
-
-
-
AWS EC2
-
-
- {`# Run in a screen session for persistence
-screen -S plexus-worker
-plexus command worker \\
- --concurrency 8 \\
- --loglevel INFO
-# Ctrl+A, D to detach`}
-
-
-
-
-
-
Local Development
-
-
- {`# Run with increased logging for debugging
-plexus command worker \\
- --concurrency 2 \\
- --loglevel DEBUG`}
-
-
-
-
-
-
GPU Worker
-
-
- {`# Dedicated GPU worker with specific targeting
-plexus command worker \\
- --concurrency 1 \\
- --target-patterns "*/gpu-required" \\
- --loglevel INFO`}
-
-
-
-
-
-
-
- Best Practices
-
- Use a process manager (like systemd, supervisor, or screen) to keep workers running
- Set concurrency based on available CPU cores and memory
- Use target patterns to optimize resource utilization
- Monitor worker logs for errors and performance issues
- Deploy workers close to your data sources when possible
- Consider using auto-scaling groups in cloud environments
-
-
-
-
- Additional Resources
-
- For more information about worker deployment and management:
-
-
- See the CLI documentation for detailed command reference
- Check the built-in help with plexus command worker --help
- View worker logs with --loglevel DEBUG for troubleshooting
-
-
-
-
- )
-}
\ No newline at end of file
diff --git a/dashboard/app/documentation/concepts/items/page.tsx b/dashboard/app/documentation/concepts/items/page.tsx
deleted file mode 100644
index 45b137e83..000000000
--- a/dashboard/app/documentation/concepts/items/page.tsx
+++ /dev/null
@@ -1,105 +0,0 @@
-export default function ItemsPage() {
- return (
-
-
Items
-
- Learn about Items, the core content units that Plexus analyzes and scores.
-
-
-
-
- What are Items?
-
- Items are individual pieces of content that you want to analyze or evaluate using Plexus.
- They can be any type of content that your AI, ML, or logical scoring techniques can process, such as:
-
-
- Call center transcripts for quality assurance
- Customer emails or support tickets
- Case files or documents
- Code repositories for analysis
- Images or videos for content moderation
-
-
-
-
- How Items Work
-
- Items are the foundation of Plexus's evaluation system:
-
-
-
-
1. Organization
-
- Each Item belongs to an Account and can be referenced by multiple Scorecards.
- This allows you to evaluate the same content using different criteria or scoring methods.
-
-
-
-
2. Scoring
-
- When you apply a Scorecard to an Item, Plexus creates a ScoringJob to process it.
- The results are stored as ScoreResults, which contain the scores, confidence levels,
- and any additional metadata from the scoring process.
-
-
-
-
3. Evaluation
-
- Items can be part of Evaluations, where their scoring results are compared against
- known correct answers to measure the accuracy and effectiveness of your scoring methods.
-
-
-
-
-
-
- Item Properties
-
-
-
Core Properties
-
- Name : A unique identifier for the Item
- Description : Optional details about the Item's content or purpose
- Account : The Account that owns this Item
-
-
-
-
Relationships
-
- Scorecards : Scorecards that reference this Item
- ScoringJobs : Records of scoring operations performed on this Item
- ScoreResults : Results from scoring operations
- Evaluation : Optional link to an Evaluation this Item is part of
-
-
-
-
-
-
- Best Practices
-
- Use clear, descriptive names for your Items to make them easy to identify
- Include relevant metadata in the description to provide context
- Organize Items logically within your Account structure
- Keep track of which Items are used in Evaluations for quality control
- Regularly review ScoreResults to monitor scoring effectiveness
-
-
-
-
- Next Steps
-
- Now that you understand Items, you can:
-
-
- Create Scorecards to evaluate your Items
- Set up scoring criteria using Scores
- Run Evaluations to measure scoring accuracy
- Monitor results through the dashboard
-
-
-
-
- )
-}
\ No newline at end of file
diff --git a/dashboard/app/documentation/concepts/page.tsx b/dashboard/app/documentation/concepts/page.tsx
deleted file mode 100644
index 876f44ade..000000000
--- a/dashboard/app/documentation/concepts/page.tsx
+++ /dev/null
@@ -1,152 +0,0 @@
-import { Button as DocButton } from "@/components/ui/button"
-import Link from "next/link"
-import { Metadata } from "next"
-
-export const metadata: Metadata = {
- title: "Basics - Plexus Documentation",
- description: "Learn about the core concepts in Plexus"
-}
-
-export default function BasicsPage() {
- return (
-
-
Core Concepts
-
- Learn about the fundamental building blocks that make up Plexus.
-
-
-
-
- Core Concepts
-
-
-
Items
-
- Individual pieces of content that you want to analyze or evaluate using Plexus. Items are the core units that get scored.
-
-
-
Learn about Items
-
-
-
-
-
Sources
-
- Input data for evaluation, including text and audio content. Sources are the foundation
- of content analysis in Plexus.
-
-
-
Learn about Sources
-
-
-
-
-
Scores
-
- Individual evaluation criteria that define what to measure. Scores are the building blocks
- of scorecards and can range from simple questions to complex metrics.
-
-
-
Learn about Scores
-
-
-
-
-
Scorecards
-
- Collections of scores that form a complete evaluation framework. Scorecards organize
- related evaluation criteria into meaningful groups.
-
-
-
Learn about Scorecards
-
-
-
-
-
Evaluations
-
- The process of analyzing sources using scorecards to generate insights
- and quality metrics.
-
-
-
Understand Evaluations
-
-
-
-
-
Tasks
-
- Individual units of work in Plexus, representing operations like source processing
- and evaluations.
-
-
-
Discover Tasks
-
-
-
-
-
Reports
-
- Flexible, template-driven analyses and summaries generated from your Plexus data using reusable components.
-
-
-
Learn about Reports
-
-
-
-
-
Evaluation Metrics
-
- Specialized visualization tools that help interpret agreement and accuracy metrics, especially when dealing with imbalanced data.
-
-
-
Understand Evaluation Metrics
-
-
-
-
-
-
- How It All Works Together
-
- The Plexus workflow follows a simple pattern:
-
-
-
- Create Sources
- Upload or connect your content for analysis.
-
-
- Define Scorecards
- Set up evaluation criteria and scoring rules.
-
-
- Run Evaluations
- Process sources using your scorecards.
-
-
- Monitor Tasks & View Reports
- Track progress of evaluations and report generation, then review the results and generated reports.
-
-
-
-
-
- Next Steps
-
- Start with Sources to learn how to add content to Plexus, then explore Scorecards
- to understand how to evaluate your content effectively.
-
-
-
- Get Started with Sources
-
-
- View Step-by-Step Guides
-
-
-
-
-
- )
-}
\ No newline at end of file
diff --git a/dashboard/app/documentation/concepts/scorecards/page.tsx b/dashboard/app/documentation/concepts/scorecards/page.tsx
deleted file mode 100644
index 23e8d8871..000000000
--- a/dashboard/app/documentation/concepts/scorecards/page.tsx
+++ /dev/null
@@ -1,203 +0,0 @@
-import { Button as DocButton } from "@/components/ui/button"
-import Link from "next/link"
-import { Metadata } from "next"
-
-export const metadata: Metadata = {
- title: "Scorecards - Plexus Documentation",
- description: "Learn about Scorecards in Plexus - the framework for evaluating content quality and performance"
-}
-
-export default function ScorecardsPage() {
- return (
-
-
Scorecards
-
- Understand how to create and manage Scorecards to evaluate your content effectively.
-
-
-
-
- What are Scorecards?
-
- Scorecards are collections of evaluation criteria that define how your content
- should be analyzed. They help ensure consistent evaluation across all your sources
- by providing a structured framework for assessment.
-
-
- Think of a scorecard as a comprehensive evaluation template that contains all the
- metrics and criteria you want to measure for a specific type of content. Scorecards
- can be tailored to different content types, business objectives, or quality standards.
-
-
-
-
- Scorecard Structure
-
-
-
Sections
-
- Scorecards are organized into logical sections that group related evaluation criteria.
- For example, a customer service scorecard might have sections for "Greeting", "Problem Resolution",
- and "Closing".
-
-
-
-
-
Scores
-
- Individual evaluation criteria that assess specific aspects of your content.
- Each score can be customized with its own evaluation logic and requirements.
- Scores are the building blocks of your evaluation framework.
-
-
- Examples of scores include:
-
-
- Grammar and spelling accuracy
- Sentiment analysis (positive/negative/neutral)
- Compliance with specific regulations
- Presence of required information
- Custom business-specific metrics
-
-
-
-
-
Sections
-
- Logical groupings of related scores within a scorecard. Sections help organize
- scores into categories for better management and understanding.
-
-
-
-
Weights
-
- Importance factors that determine how much each score contributes to the
- overall evaluation result. Weights allow you to prioritize certain criteria
- over others based on their importance to your business objectives.
-
-
-
-
Versions
-
- Score configurations are versioned, allowing you to track changes over time,
- compare different implementations, and promote specific versions to champion status.
-
-
-
-
-
-
- CLI Management
-
- The Plexus CLI provides powerful commands for managing scorecards:
-
-
-
-
Listing Scorecards
-
- {`# List all scorecards for an account
-plexus scorecards list "account-name"
-
-# List with filtering
-plexus scorecards list "account-name" --name "Scorecard Name"
-plexus scorecards list "account-name" --key "scorecard-key"
-
-# Performance options
-plexus scorecards list "account-name" --fast # Skip fetching scores for faster results
-plexus scorecards list "account-name" --hide-scores # Don't display scores in output`}
-
-
- The list command uses an optimized single GraphQL query to fetch scorecards, sections,
- and scores in one request, providing significantly faster performance.
-
-
-
-
Viewing Score Details
-
- {`# View a specific score by name, key, ID, or external ID
-plexus scorecards score "Score Name" --account "account-name"
-plexus scorecards score "score-key" --account "account-name"
-plexus scorecards score "score-id" --show-versions --show-config
-
-# Scope to a specific scorecard
-plexus scorecards score "Score Name" --scorecard "Scorecard Name"`}
-
-
-
-
Listing Scores in a Scorecard
-
- To list all scores within a scorecard, use the scores list command:
-
-
- {`# List all scores in a scorecard
-plexus scores list --scorecard "Scorecard Name"
-
-# You can also use the singular form
-plexus score list --scorecard "Scorecard Name"`}
-
-
- This command displays all scores organized by section, including their IDs, keys, and external IDs.
-
-
-
-
Version Management
-
- {`# View version history (coming soon)
-plexus scorecards history --account-key "account-key" --score-key "score-key"
-
-# Promote a version to champion (coming soon)
-plexus scorecards promote --account-key "account-key" --score-id "score-id" --version-id "version-id"
-
-# Pull latest champion versions (coming soon)
-plexus scorecards pull --account-key "account-key"
-
-# Push local changes as new versions
-plexus scorecards push --scorecard "scorecard-name" --note "Updated configuration"`}
-
-
-
-
-
-
- Best Practices
-
-
-
Scorecard Organization
-
- Group related scores into logical sections to improve clarity and maintainability.
- Use consistent naming conventions for scorecards, sections, and scores.
-
-
-
-
Version Management
-
- Add descriptive notes to new versions to document changes. Test new versions
- thoroughly before promoting them to champion status.
-
-
-
-
Performance Considerations
-
- Use the --fast option when listing many scorecards to improve performance.
- This skips fetching score details when you only need basic scorecard information.
-
-
-
-
-
-
- Coming Soon
-
- Additional scorecard features are being developed. Check back soon for:
-
-
- Advanced score configuration options
- Collaborative editing features
- Performance analytics
- Bulk operations for scorecard management
-
-
-
-
- )
-}
\ No newline at end of file
diff --git a/dashboard/app/documentation/concepts/sources/page.tsx b/dashboard/app/documentation/concepts/sources/page.tsx
deleted file mode 100644
index 6d7efeb4e..000000000
--- a/dashboard/app/documentation/concepts/sources/page.tsx
+++ /dev/null
@@ -1,53 +0,0 @@
-export default function SourcesPage() {
- return (
-
-
Sources
-
- Learn about Sources in Plexus and how they form the foundation of your evaluation workflows.
-
-
-
-
- What are Sources?
-
- Sources are the input data that you want to evaluate using Plexus. They can be text,
- audio files, or other supported formats that you want to analyze using AI models.
-
-
-
-
- Types of Sources
-
-
-
Text Sources
-
- Plain text content that can be evaluated for various metrics like sentiment,
- quality, or compliance with specific criteria.
-
-
-
-
Audio Sources
-
- Audio recordings that can be transcribed and analyzed for content,
- quality, or specific patterns.
-
-
-
-
-
-
- Coming Soon
-
- Detailed documentation about Sources is currently being developed. Check back soon for:
-
-
- Source creation and management
- Supported formats and limitations
- Best practices for organizing sources
- Advanced source configurations
-
-
-
-
- )
-}
\ No newline at end of file
diff --git a/dashboard/app/documentation/evaluation-metrics/page.tsx b/dashboard/app/documentation/evaluation-metrics/page.tsx
deleted file mode 100644
index 6b3c7d309..000000000
--- a/dashboard/app/documentation/evaluation-metrics/page.tsx
+++ /dev/null
@@ -1,480 +0,0 @@
-import { Button as DocButton } from "@/components/ui/button"
-import Link from "next/link"
-import { Metadata } from "next"
-import { GaugeThresholdComputer } from "@/utils/gauge-thresholds"
-import EvaluationCard from '@/components/EvaluationCard'
-import { Segment } from "@/components/gauge"
-
-export const metadata: Metadata = {
- title: "Interpreting Evaluation Metrics - Plexus Documentation",
- description: "Understanding the challenges of interpreting classifier accuracy and an overview of Plexus solutions."
-}
-
-// Helper function to create sample score data for examples
-const createExampleScore = (
- id: string,
- name: string,
- ac1: number,
- accuracy: number,
- itemCount: number,
- mismatches: number,
- labelDistribution?: Record
-) => ({
- id,
- score_name: name,
- cc_question_id: `example-${id}`,
- ac1,
- item_count: itemCount,
- mismatches,
- accuracy,
- label_distribution: labelDistribution
-})
-
-// Define fixed segments for the illustrative accuracy gauges in the initial scenarios (kept for initial coin flip examples if those are retained in narrative)
-const fixedAccuracyGaugeSegments: Segment[] = [
- { start: 0, end: 50, color: 'var(--gauge-inviable)' },
- { start: 50, end: 70, color: 'var(--gauge-converging)' },
- { start: 70, end: 80, color: 'var(--gauge-almost)' },
- { start: 80, end: 90, color: 'var(--gauge-viable)' },
- { start: 90, end: 100, color: 'var(--gauge-great)' },
-];
-
-export default function EvaluationMetricsPage() {
- // Article Topic Labeler - Our consistent example through the document
- const articleTopicLabelerExampleData = {
- id: 'article-topic-labeler',
- score_name: 'Article Topic Labeler Performance',
- cc_question_id: 'example-topic-labeler',
- accuracy: 62.0,
- item_count: 100,
- mismatches: 38, // 100 - 62
- gwetAC1: 0.512, // Lower AC1 reflecting 62% accuracy
- label_distribution: {
- 'News': 40,
- 'Sports': 15,
- 'Business': 15,
- 'Technology': 15,
- 'Lifestyle': 15
- }
- };
-
- const articleTopicLabelerClassDistribution = [
- { label: "News", count: 40 },
- { label: "Sports", count: 15 },
- { label: "Business", count: 15 },
- { label: "Technology", count: 15 },
- { label: "Lifestyle", count: 15 }
- ];
-
- const articleTopicLabelerConfusionMatrix = {
- labels: ["News", "Sports", "Business", "Technology", "Lifestyle"],
- matrix: [
- { actualClassLabel: "News", predictedClassCounts: { "News": 28, "Sports": 3, "Business": 3, "Technology": 3, "Lifestyle": 3 } },
- { actualClassLabel: "Sports", predictedClassCounts: { "News": 3, "Sports": 9, "Business": 1, "Technology": 1, "Lifestyle": 1 } },
- { actualClassLabel: "Business", predictedClassCounts: { "News": 3, "Sports": 1, "Business": 8, "Technology": 2, "Lifestyle": 1 } },
- { actualClassLabel: "Technology", predictedClassCounts: { "News": 3, "Sports": 1, "Business": 2, "Technology": 8, "Lifestyle": 1 } },
- { actualClassLabel: "Lifestyle", predictedClassCounts: { "News": 3, "Sports": 1, "Business": 1, "Technology": 1, "Lifestyle": 9 } },
- ],
- };
-
- const articleTopicLabelerPredictedDistribution = [
- { label: "News", count: 40 },
- { label: "Sports", count: 15 },
- { label: "Business", count: 15 },
- { label: "Technology", count: 15 },
- { label: "Lifestyle", count: 15 }
- ];
-
- // Segments for the final Article Topic Labeler example (fully contextualized)
- const articleTopicLabelerFullContextSegments = GaugeThresholdComputer.createSegments(
- GaugeThresholdComputer.computeThresholds(articleTopicLabelerExampleData.label_distribution)
- );
-
- // Coin flip examples for the narrative
- const fairCoinData = createExampleScore(
- 'fair-coin',
- 'Randomly Guessing Coin Flips (50/50)',
- -0.04,
- 48.0,
- 100,
- 52,
- { 'Heads': 50, 'Tails': 50 }
- )
-
- const alwaysHeadsData = createExampleScore(
- 'always-heads',
- 'Always Guessing "Heads" (50/50)',
- 0.02,
- 51.0,
- 100,
- 49,
- { 'Heads': 51, 'Tails': 49 }
- )
-
- const fairCoinDistribution = [
- { label: "Heads", count: 51 },
- { label: "Tails", count: 49 }
- ];
-
- const predictedFairCoinData = [
- { label: "Heads", count: 50 },
- { label: "Tails", count: 50 }
- ];
-
- const predictedAlwaysHeadsData = [
- { label: "Heads", count: 100 },
- { label: "Tails", count: 0 }
- ];
-
- const fairCoinConfusionMatrix = {
- labels: ["Heads", "Tails"],
- matrix: [
- { actualClassLabel: "Heads", predictedClassCounts: { "Heads": 24, "Tails": 26 } },
- { actualClassLabel: "Tails", predictedClassCounts: { "Heads": 26, "Tails": 24 } },
- ],
- };
-
- const alwaysHeadsConfusionMatrix = {
- labels: ["Heads", "Tails"],
- matrix: [
- { actualClassLabel: "Heads", predictedClassCounts: { "Heads": 51, "Tails": 0 } },
- { actualClassLabel: "Tails", predictedClassCounts: { "Heads": 49, "Tails": 0 } },
- ],
- };
-
- // Card Suit Guessing Example Data for narrative
- const cardSuitData = createExampleScore(
- 'card-suit-guessing',
- 'Predicting a Card Suit (4 Classes, Random Guessing)',
- -0.03,
- 23.0,
- 208,
- 160,
- { '♥️': 52, '♦️': 52, '♣️': 52, '♠️': 52 }
- );
-
- const cardSuitActualDistribution = [
- { label: "♥️", count: 52 },
- { label: "♦️", count: 52 },
- { label: "♣️", count: 52 },
- { label: "♠️", count: 52 }
- ];
-
- const cardSuitConfusionMatrix = {
- labels: ["♥️", "♦️", "♣️", "♠️"],
- matrix: [
- { actualClassLabel: "♥️", predictedClassCounts: { "♥️": 12, "♦️": 13, "♣️": 13, "♠️": 14 } },
- { actualClassLabel: "♦️", predictedClassCounts: { "♥️": 13, "♦️": 12, "♣️": 14, "♠️": 13 } },
- { actualClassLabel: "♣️", predictedClassCounts: { "♥️": 13, "♦️": 14, "♣️": 12, "♠️": 13 } },
- { actualClassLabel: "♠️", predictedClassCounts: { "♥️": 14, "♦️": 13, "♣️": 13, "♠️": 12 } },
- ],
- };
-
- const cardSuitPredictedDistribution = [
- { label: "♥️", count: 12+13+13+14 },
- { label: "♦️", count: 13+12+14+13 },
- { label: "♣️", count: 13+14+12+13 },
- { label: "♠️", count: 14+13+13+12 }
- ];
-
- return (
-
-
Interpreting Evaluation Metrics: The Challenge
-
- Understanding metrics like accuracy is key to evaluating AI performance. However, raw numbers can be deceptive without proper context. This page explores common pitfalls and introduces Plexus's approach to clearer, more reliable evaluation.
-
-
-
-
- The Big Question: Is This Classifier Good?
-
- When developing an AI system, we need gauges to tell if our model is performing well. Let's consider an "Article Topic Labeler" that classifies articles into five categories: News, Sports, Business, Technology, and Lifestyle. Evaluated on 100 articles, it achieves 62% accuracy.
-
-
-
-
-
Is 62% accuracy good?
-
- This number seems mediocre. The uncontextualized gauge suggests it's just 'converging'. But is this poor performance, or is there more to the story?
-
-
- >
- }
- />
-
-
- Intuitively, 62% seems somewhat weak—nearly 4 out of 10 articles are wrong. But to judge this, we need a baseline: what accuracy would random guessing achieve?
-
-
-
-
- Pitfall 1: Ignoring the Baseline (Chance Agreement)
-
- Raw accuracy is meaningless without knowing the chance agreement rate. Consider predicting 100 coin flips:
-
-
-
-
- ~50% accuracy achieved.
-
- But is this good guessing without knowing the chance baseline?
-
-
- }
- />
-
-
- ~51% accuracy achieved.
-
- Slightly better, but still hovering around the 50% chance rate.
-
-
- }
- />
-
-
-
-
Key Insight: The Baseline Problem
-
- Both strategies hover around 50% accuracy. This is the base random-chance agreement rate for a binary task. Without understanding this baseline, raw accuracy numbers are uninterpretable. Any reported accuracy must be compared against what random chance would yield for that specific problem.
-
-
-
-
-
- Pitfall 2: The Moving Target of Multiple Classes
-
- The chance agreement rate isn't fixed; it changes with the number of classes. For example, consider guessing the suit of a randomly drawn card from a standard 4-suit deck:
-
-
-
-
-
~23% accuracy in this run.
-
- The fixed gauge makes this look terrible. Is it?
-
-
-
-
Misleading Raw View
-
- For a 4-class problem, 25% is the actual random chance baseline. The raw gauge is deceptive here.
-
-
- >
- }
- />
-
-
-
Key Insight: Number of Classes Shifts the Baseline
-
- The baseline random-chance agreement rate dropped from 50% (for 2 classes like coin flips) to 25% (for 4 classes like card suits). This is a critical concept: as the number of equally likely options increases, the accuracy you'd expect from random guessing decreases . Therefore, a 30% accuracy is much better for a 10-class problem (10% chance) than for a 2-class problem (50% chance).
-
-
-
-
-
- Pitfall 3: The Illusion of Class Imbalance
-
- The distribution of classes in your data (class balance) adds another layer of complexity. If a dataset is imbalanced, a classifier can achieve high accuracy by simply always predicting the majority class, even if it has no real skill.
-
-
-
- ~52% accuracy.
- Strategy doesn't exploit the deck's known 75/25 imbalance.
-
- }
- />
-
-
-
-
Deceptively High!
-
- This 75% is achieved by exploiting the imbalance (always guessing majority), not by skill.
-
-
- >
- }
- />
-
- A more extreme example: an email filter claims 97% accuracy at detecting prohibited content. However, if only 3% of emails actually contain such content, a filter that labels *every single email* as "safe" (catching zero violations) will achieve 97% accuracy.
-