Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,4 @@ jobs:
- name: Run on-device examples
run: |
export PATH="$HOME/.local/bin:$PATH"
source /home/github-ci/Ascend/ascend-toolkit/latest/bin/setenv.bash && ./ci.sh -p a2a3 -d 5
source /home/github-ci/Ascend/ascend-toolkit/latest/bin/setenv.bash && ./ci.sh -p a2a3 -d 4-7 --parallel
317 changes: 251 additions & 66 deletions ci.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash
set -e # Exit immediately if any command fails (non-zero exit code)

# Parse arguments
PLATFORM=""
DEVICE_ID="0"
DEVICE_RANGE=""
PARALLEL=false

while [[ $# -gt 0 ]]; do
case $1 in
Expand All @@ -12,108 +12,293 @@ while [[ $# -gt 0 ]]; do
shift 2
;;
-d|--device)
DEVICE_ID="$2"
DEVICE_RANGE="$2"
shift 2
;;
--parallel)
PARALLEL=true
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done

# Parse device range (e.g., "5-8" or "5")
if [[ "$DEVICE_RANGE" == *-* ]]; then
IFS='-' read -r DEV_START DEV_END <<< "$DEVICE_RANGE"
DEVICES=()
for ((i=DEV_START; i<=DEV_END; i++)); do
DEVICES+=("$i")
done
else
DEVICES=("${DEVICE_RANGE:-0}")
fi
NUM_DEVICES=${#DEVICES[@]}

OS=$(uname -s)
echo "Running tests on $OS..."

# Run pytest
OVERALL_EXIT=0

# Run pytest synchronously first
if [[ -d "tests" && "$OS" == "Linux" && "$PLATFORM" != "a2a3sim" ]]; then
echo "Running pytest tests..."
pytest tests -v
if ! pytest tests -v; then
echo "PYTEST FAILED"
OVERALL_EXIT=1
fi
fi

# Discover and run all examples (recursively search for valid example directories)
# Setup temp directory for logs and results
LOG_DIR=$(mktemp -d "${TMPDIR:-/tmp}/ci_parallel_$$.XXXXXX")
RESULTS_FILE="${LOG_DIR}/results.txt"
touch "$RESULTS_FILE"

cleanup() {
kill 0 2>/dev/null
rm -rf "$LOG_DIR"
exit 130
}
trap cleanup INT TERM
trap 'rm -rf "$LOG_DIR"' EXIT

# ---- Discover all tasks ----
EXAMPLES_DIR="examples"
while IFS= read -r -d '' example_dir; do
# Skip the scripts directory
if [[ "$example_dir" == *"/scripts" ]]; then
continue
fi
DEVICE_TESTS_DIR="tests/device_tests"

# Check if this is a valid example (has kernels/kernel_config.py and golden.py)
declare -a HW_TASK_NAMES=()
declare -a HW_TASK_DIRS=()
declare -a SIM_TASK_NAMES=()
declare -a SIM_TASK_DIRS=()

# Discover examples
while IFS= read -r -d '' example_dir; do
[[ "$example_dir" == *"/scripts" ]] && continue
kernel_config="${example_dir}/kernels/kernel_config.py"
golden="${example_dir}/golden.py"
[[ -f "$kernel_config" && -f "$golden" ]] || continue

if [[ -f "$kernel_config" && -f "$golden" ]]; then
# Get relative path from examples directory for display
example_name="${example_dir#$EXAMPLES_DIR/}"
echo "========================================"
echo "Running example: $example_name"
echo "========================================"
example_name="${example_dir#$EXAMPLES_DIR/}"

# If platform is specified, use it
if [[ -n "$PLATFORM" ]]; then
if [[ "$PLATFORM" == "a2a3" ]]; then
python examples/scripts/run_example.py \
-k "${example_dir}/kernels" \
-g "$golden" \
-p "$PLATFORM" -d "$DEVICE_ID"
else
python examples/scripts/run_example.py \
-k "${example_dir}/kernels" \
-g "$golden" \
-p "$PLATFORM"
fi
# Otherwise, use OS-based defaults
elif [ "$OS" = "Darwin" ]; then
# Mac: only simulation
python examples/scripts/run_example.py \
-k "${example_dir}/kernels" \
-g "$golden" \
-p a2a3sim
if [[ -n "$PLATFORM" ]]; then
if [[ "$PLATFORM" == "a2a3" ]]; then
HW_TASK_NAMES+=("example:${example_name}")
HW_TASK_DIRS+=("${example_dir}")
else
# Linux: both hardware and simulation
python examples/scripts/run_example.py \
-k "${example_dir}/kernels" \
-g "$golden" \
-p a2a3 -d "$DEVICE_ID"
python examples/scripts/run_example.py \
-k "${example_dir}/kernels" \
-g "$golden" \
-p a2a3sim
SIM_TASK_NAMES+=("example:${example_name}")
SIM_TASK_DIRS+=("${example_dir}")
fi
elif [[ "$OS" == "Darwin" ]]; then
SIM_TASK_NAMES+=("example:${example_name}")
SIM_TASK_DIRS+=("${example_dir}")
else
HW_TASK_NAMES+=("example:${example_name}")
HW_TASK_DIRS+=("${example_dir}")
SIM_TASK_NAMES+=("example:${example_name}")
SIM_TASK_DIRS+=("${example_dir}")
fi
done < <(find "$EXAMPLES_DIR" -mindepth 1 -type d -print0 | sort -z)

# Discover and run device tests (a2a3 hardware only - never run on simulator)
DEVICE_TESTS_DIR="tests/device_tests"
# Discover device tests (hardware only)
if [[ -d "$DEVICE_TESTS_DIR" ]]; then
RUN_DEVICE_TESTS=false
if [[ "$PLATFORM" == "a2a3" ]]; then
RUN_DEVICE_TESTS=true
elif [[ -z "$PLATFORM" && "$OS" == "Linux" ]]; then
RUN_DEVICE_TESTS=true
fi
[[ "$PLATFORM" == "a2a3" ]] && RUN_DEVICE_TESTS=true
[[ -z "$PLATFORM" && "$OS" == "Linux" ]] && RUN_DEVICE_TESTS=true

if [[ "$RUN_DEVICE_TESTS" == "true" ]]; then
while IFS= read -r -d '' test_dir; do
kernel_config="${test_dir}/kernels/kernel_config.py"
golden="${test_dir}/golden.py"

if [[ -f "$kernel_config" && -f "$golden" ]]; then
test_name="${test_dir#$DEVICE_TESTS_DIR/}"
echo "========================================"
echo "Running device test: $test_name"
echo "========================================"

python examples/scripts/run_example.py \
-k "${test_dir}/kernels" \
-g "$golden" \
-p a2a3 -d "$DEVICE_ID"
fi
[[ -f "$kernel_config" && -f "$golden" ]] || continue
test_name="${test_dir#$DEVICE_TESTS_DIR/}"
HW_TASK_NAMES+=("device_test:${test_name}")
HW_TASK_DIRS+=("${test_dir}")
done < <(find "$DEVICE_TESTS_DIR" -mindepth 1 -type d -print0 | sort -z)
else
echo "Skipping device tests (a2a3 hardware only)"
fi
fi

echo "Discovered ${#HW_TASK_NAMES[@]} hardware tasks, ${#SIM_TASK_NAMES[@]} simulation tasks"

MAX_RETRIES=3

# Run a single HW task with retry across different devices (max 3 attempts).
# Writes final result to RESULTS_FILE. Each attempt logged separately.
# Usage: run_hw_task_with_retry <name> <dir> [initial_device_id]
run_hw_task_with_retry() {
local name="$1"
local dir="$2"
local initial_device="${3:-${DEVICES[0]}}"
local safe_name="${name//[:\/]/_}"
local tried_devices=()
local failed_devices=()

for attempt in $(seq 1 $MAX_RETRIES); do
# First attempt uses the assigned device, retries pick next untried
local device_id=""
if [[ $attempt -eq 1 ]]; then
device_id="$initial_device"
else
for dev in "${DEVICES[@]}"; do
local already_tried=false
for tried in "${tried_devices[@]}"; do
[[ "$tried" == "$dev" ]] && { already_tried=true; break; }
done
if [[ "$already_tried" == "false" ]]; then
device_id="$dev"
break
fi
done
fi

# No untried device left
if [[ -z "$device_id" ]]; then
echo "${name}:a2a3|FAIL|failed_on:${failed_devices[*]}|no_device_left" >> "$RESULTS_FILE"
return 1
fi

tried_devices+=("$device_id")
local task_log="${LOG_DIR}/${safe_name}_hw_attempt${attempt}.log"

{
echo "========================================"
echo "[Device $device_id] Running: $name (attempt $attempt/$MAX_RETRIES)"
echo "========================================"
python examples/scripts/run_example.py \
-k "${dir}/kernels" -g "${dir}/golden.py" \
-p a2a3 -d "$device_id"
} > "$task_log" 2>&1
local rc=$?

if [[ $rc -eq 0 ]]; then
echo "${name}:a2a3|PASS|device:${device_id}|attempt:${attempt}" >> "$RESULTS_FILE"
return 0
else
failed_devices+=("$device_id")
echo "[Retry] $name failed on device $device_id (attempt $attempt/$MAX_RETRIES)" >&2
fi
done

# All retries exhausted
echo "${name}:a2a3|FAIL|failed_on:${failed_devices[*]}|attempts:${MAX_RETRIES}" >> "$RESULTS_FILE"
return 1
}

# ---- Sequential mode ----
if [[ "$PARALLEL" == "false" ]]; then
for i in "${!HW_TASK_NAMES[@]}"; do
run_hw_task_with_retry "${HW_TASK_NAMES[$i]}" "${HW_TASK_DIRS[$i]}"
done
for i in "${!SIM_TASK_NAMES[@]}"; do
name="${SIM_TASK_NAMES[$i]}"
dir="${SIM_TASK_DIRS[$i]}"
echo "========================================"
echo "Running: $name (a2a3sim)"
echo "========================================"
if python examples/scripts/run_example.py \
-k "${dir}/kernels" -g "${dir}/golden.py" \
-p a2a3sim; then
echo "${name}:a2a3sim|PASS" >> "$RESULTS_FILE"
else
echo "${name}:a2a3sim|FAIL" >> "$RESULTS_FILE"
fi
done
else
# ---- Parallel mode ----
declare -a WORKER_PIDS=()

# Launch sim tasks in parallel (no device constraint)
for i in "${!SIM_TASK_NAMES[@]}"; do
name="${SIM_TASK_NAMES[$i]}"
dir="${SIM_TASK_DIRS[$i]}"
safe_name="${name//[:\/]/_}"
log_file="${LOG_DIR}/${safe_name}_sim.log"

(
echo "========================================"
echo "Running: $name (a2a3sim)"
echo "========================================"
if python examples/scripts/run_example.py \
-k "${dir}/kernels" -g "${dir}/golden.py" -p a2a3sim; then
echo "${name}:a2a3sim|PASS" >> "$RESULTS_FILE"
else
echo "${name}:a2a3sim|FAIL" >> "$RESULTS_FILE"
fi
) > "$log_file" 2>&1 &
WORKER_PIDS+=($!)
done

# Launch HW tasks with round-robin device assignment
# Each task runs with retry logic: on failure, retries on a different device
for d in $(seq 0 $((NUM_DEVICES - 1))); do
device_id="${DEVICES[$d]}"

# Collect task indices for this device slot
slot_indices=()
for i in "${!HW_TASK_NAMES[@]}"; do
if [[ $((i % NUM_DEVICES)) -eq $d ]]; then
slot_indices+=("$i")
fi
done
[[ ${#slot_indices[@]} -eq 0 ]] && continue

worker_log="${LOG_DIR}/device_${device_id}_worker.log"

(
for idx in "${slot_indices[@]}"; do
run_hw_task_with_retry "${HW_TASK_NAMES[$idx]}" "${HW_TASK_DIRS[$idx]}" "$device_id"
done
) > "$worker_log" 2>&1 &
WORKER_PIDS+=($!)
done

# Wait for all workers
for pid in "${WORKER_PIDS[@]}"; do
wait "$pid" 2>/dev/null || true
done
fi

# ---- Print summary ----
echo ""
echo "========================================"
echo " CI RESULTS SUMMARY"
echo "========================================"
printf "%-55s %s\n" "TASK" "RESULT"
printf "%-55s %s\n" "----" "------"

FAIL_COUNT=0
PASS_COUNT=0
while IFS='|' read -r task_name result extra1 extra2; do
if [[ "$result" == "FAIL" ]]; then
printf "%-55s \033[31mFAIL\033[0m (%s)\n" "$task_name" "${extra1:+$extra1 }${extra2}"
((FAIL_COUNT++))
# Print all attempt logs inline
safe_name="${task_name//[:\/]/_}"
for attempt_log in "${LOG_DIR}/${safe_name}_hw_attempt"*.log "${LOG_DIR}/${safe_name}_sim.log"; do
if [[ -f "$attempt_log" ]]; then
echo "--- LOG: $(basename "$attempt_log") ---"
cat "$attempt_log"
echo "--- END ---"
echo ""
fi
done
else
local_info=""
[[ -n "$extra1" ]] && local_info=" ($extra1, $extra2)"
printf "%-55s \033[32mPASS\033[0m%s\n" "$task_name" "$local_info"
((PASS_COUNT++))
fi
done < "$RESULTS_FILE"

echo "========================================"
echo "Total: $((PASS_COUNT + FAIL_COUNT)) Passed: $PASS_COUNT Failed: $FAIL_COUNT"
echo "========================================"

if [[ $FAIL_COUNT -gt 0 || $OVERALL_EXIT -ne 0 ]]; then
exit 1
fi
echo "All tests passed!"
Loading