forked from accel-sim/accel-sim-framework
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtraceL1
More file actions
executable file
·1732 lines (1607 loc) · 78.3 KB
/
traceL1
File metadata and controls
executable file
·1732 lines (1607 loc) · 78.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
# Accel-Sim 运行脚本,支持trace切换和自动更新CSV文件名
# 获取脚本所在目录作为基础路径
BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
# 默认配置
CONFIG_MODEL="SM80_A100"
NLEVEL_ENABLED=false
GTO_ENABLED=false
NLEVEL_DEFAULT_SCHEDULER="lrr"
PLOT_TMUX_ENABLED=false
PLOT_ENABLED=false
PLOT_BACKEND="local" # session|local
PLOT_BACKGROUND=false # local backend only
PLOT_FORMAT="svg"
PLOT_SESSION=""
PLOT_ATTACH=false
PLOT_KILL_EXISTING=false
PLOT_TMUX_AUTO_CLOSE=false
PLOT_TMUX_MONITOR=false
# Ideal L1D cache (force eligible global reads to L1D HIT)
IDEAL_L1D_ENABLED=false
# GRASP prefetcher
GRASP_ENABLED=false
GRASP_DEBUG=false
GRASP_CHAIN_CSV=""
GRASP_TRACE_ENABLED=false
GRASP_DISTANCE=0
GRASP_SPECULATIVE_STRIDE=0
# 额外控制(用于减小仿真时间/输出体量)
MAX_CYCLE=0
MAX_INSN=0
MAX_CTA=0
MAX_COMPLETED_CTA=0
ISSUE_TRACE_ENABLED=false
L1_TRACE_ENABLED=false
L2_TRACE_ENABLED=false
L2_TRACE_PRINT_BW=false
L2_TRACE_PRINT_COMPUTE=false
HBM_TRACE_ENABLED=false
HBM_TRACE_PERIOD=1
ICNT_BW_TRACE_ENABLED=false
L2_BW_TRACE_ENABLED=false
ICNT_BW_TRACE_PERIOD=500
L2_BW_TRACE_PERIOD=500
EXTRA_SIM_ARGS="" # raw args appended to gpgpusim config (--sim-args)
# 轻量级 stall reason / pc 统计(替代巨型 issue_trace)
STALL_REASON_PC_STATS_ENABLED=true
STALL_REASON_PC_STATS_TOPK=5
# Issue trace 边运行边压缩(默认关闭;仅影响 issue trace)
# 说明:gpgpu-sim 的 issue_tracer 会“多次 open/close 追加写入”,因此需要 FIFO + 常驻写端避免压缩进程提前 EOF 退出。
ISSUE_TRACE_COMPRESS="none" # none|gzip|zstd
ISSUE_TRACE_COMPRESS_LEVEL="" # gzip: 1~9; zstd: 1~19 (未指定则使用默认)
ISSUE_TRACE_COMPRESS_THREADS=1 # zstd only
# TRACE_BASE_DIR 默认使用脚本所在目录
# 尝试自动检测 hw_run 下的 trace 目录
if [ -d "$BASE_DIR/hw_run/traces/device-0" ]; then
CUDA_VERSION=$(ls "$BASE_DIR/hw_run/traces/device-0/" | head -n 1)
if [ -n "$CUDA_VERSION" ]; then
TRACE_BASE_DIR="$BASE_DIR/hw_run/traces/device-0/$CUDA_VERSION"
else
TRACE_BASE_DIR="$BASE_DIR"
fi
else
TRACE_BASE_DIR="$BASE_DIR"
fi
# 解析命令行参数 (在定义路径之前解析,以便 CONFIG_MODEL 生效)
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
-c|--config)
CONFIG_MODEL="$2"
shift # past argument
shift # past value
;;
--ideal-l1d)
IDEAL_L1D_ENABLED=true
shift # past argument
;;
--grasp)
GRASP_ENABLED=true
shift
;;
--grasp-debug)
GRASP_ENABLED=true
GRASP_DEBUG=true
shift
;;
--grasp-trace)
GRASP_ENABLED=true
GRASP_TRACE_ENABLED=true
shift
;;
--grasp-chain-csv)
GRASP_CHAIN_CSV="$2"
shift
shift
;;
--grasp-distance)
GRASP_DISTANCE="$2"
shift
shift
;;
--grasp-speculative-stride)
GRASP_SPECULATIVE_STRIDE="$2"
shift
shift
;;
--pair-table-scope)
PAIR_TABLE_SCOPE="$2"
shift
shift
;;
-nl|--nlevel)
NLEVEL_ENABLED=true
shift # past argument
;;
-gto|--gto)
GTO_ENABLED=true
shift # past argument
;;
-nls|--nlevel-scheduler)
if [ -z "$2" ]; then
echo "错误: --nlevel-scheduler 需要参数 (lrr|gto)"
exit 1
fi
NLEVEL_DEFAULT_SCHEDULER="$2"
shift # past argument
shift # past value
;;
-h|--help)
SHOW_HELP=true
shift # past argument
;;
--plot)
PLOT_ENABLED=true
PLOT_BACKEND="local"
shift # past argument
;;
--plot-backend)
if [ -z "$2" ]; then
echo "错误: --plot-backend 需要参数 (session|local)"
exit 1
fi
PLOT_ENABLED=true
PLOT_BACKEND="$2"
shift # past argument
shift # past value
;;
--plot-background)
PLOT_ENABLED=true
PLOT_BACKEND="local"
PLOT_BACKGROUND=true
shift # past argument
;;
# Backward-compatible aliases (not advertised)
--tmux-plots)
PLOT_ENABLED=true
PLOT_BACKEND="session"
shift # past argument
;;
--plots)
PLOT_ENABLED=true
PLOT_BACKEND="local"
shift # past argument
;;
--plots-parallel)
PLOT_ENABLED=true
PLOT_BACKEND="local"
shift # past argument
;;
--plot-format)
if [ -z "$2" ]; then
echo "错误: --plot-format 需要参数 (svg|png|both)"
exit 1
fi
PLOT_FORMAT="$2"
shift # past argument
shift # past value
;;
--plot-auto-close)
PLOT_TMUX_AUTO_CLOSE=true
shift # past argument
;;
--plot-monitor)
PLOT_TMUX_MONITOR=true
shift # past argument
;;
--plot-session)
if [ -z "$2" ]; then
echo "错误: --plot-session 需要参数"
exit 1
fi
PLOT_SESSION="$2"
shift # past argument
shift # past value
;;
--plot-attach)
PLOT_ATTACH=true
shift # past argument
;;
--plot-kill-existing)
PLOT_KILL_EXISTING=true
shift # past argument
;;
--max-cycle)
MAX_CYCLE="$2"
shift
shift
;;
--max-insn)
MAX_INSN="$2"
shift
shift
;;
--max-cta)
MAX_CTA="$2"
shift
shift
;;
--max-completed-cta)
MAX_COMPLETED_CTA="$2"
shift
shift
;;
--issue-trace)
ISSUE_TRACE_ENABLED=true
shift
;;
--no-issue-trace)
ISSUE_TRACE_ENABLED=false
shift
;;
--no-stall-reason-pc-stats)
STALL_REASON_PC_STATS_ENABLED=false
shift
;;
--stall-reason-pc-stats-topk)
STALL_REASON_PC_STATS_TOPK="$2"
shift
shift
;;
--l1-trace)
L1_TRACE_ENABLED=true
shift
;;
--no-l1-trace)
L1_TRACE_ENABLED=false
shift
;;
--l2-trace)
L2_TRACE_ENABLED=true
shift
;;
--no-l2-trace)
L2_TRACE_ENABLED=false
shift
;;
--l2-trace-print-bw)
L2_TRACE_PRINT_BW=true
shift
;;
--l2-trace-print-compute)
L2_TRACE_PRINT_COMPUTE=true
shift
;;
--hbm-trace)
HBM_TRACE_ENABLED=true
shift
;;
--no-hbm-trace)
HBM_TRACE_ENABLED=false
shift
;;
--no-icnt-bw-trace)
ICNT_BW_TRACE_ENABLED=false
shift
;;
--no-l2-bw-trace)
L2_BW_TRACE_ENABLED=false
shift
;;
--hbm-trace-period)
HBM_TRACE_PERIOD="$2"
shift
shift
;;
--icnt-bw-trace-period)
ICNT_BW_TRACE_PERIOD="$2"
shift
shift
;;
--l2-bw-trace-period)
L2_BW_TRACE_PERIOD="$2"
shift
shift
;;
--issue-trace-compress)
if [ -z "${2:-}" ]; then
echo "错误: --issue-trace-compress 需要参数 (none|gzip|zstd)"
exit 1
fi
ISSUE_TRACE_COMPRESS="$2"
shift
shift
;;
--issue-trace-compress-level)
if [ -z "${2:-}" ]; then
echo "错误: --issue-trace-compress-level 需要参数 (gzip:1~9, zstd:1~19)"
exit 1
fi
ISSUE_TRACE_COMPRESS_LEVEL="$2"
shift
shift
;;
--issue-trace-compress-threads)
if [ -z "${2:-}" ]; then
echo "错误: --issue-trace-compress-threads 需要参数 (zstd only)"
exit 1
fi
ISSUE_TRACE_COMPRESS_THREADS="$2"
shift
shift
;;
--sim-args)
EXTRA_SIM_ARGS="$2"
shift; shift
;;
*)
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
done
# 恢复位置参数
set -- "${POSITIONAL_ARGS[@]}"
# 定义路径
ACCEL_SIM_BIN="$BASE_DIR/gpu-simulator/bin/release/accel-sim.out"
TRACE_CONFIG="$BASE_DIR/gpu-simulator/configs/tested-cfgs/$CONFIG_MODEL/trace.config"
GPGPU_CONFIG="$BASE_DIR/gpu-simulator/gpgpu-sim/configs/tested-cfgs/$CONFIG_MODEL/gpgpusim.config"
RESULT_LOG_DIR="$BASE_DIR/result/log"
RESULT_L1_DIR="$BASE_DIR/result/L1cache_trace"
RESULT_L2_DIR="$BASE_DIR/result/L2cache_trace"
RESULT_ISSUE_DIR="$BASE_DIR/result/issue_trace"
RESULT_STALL_REASON_PC_STATS_BASE_DIR="$RESULT_ISSUE_DIR/stall_reason_pc_stats"
RESULT_BW_DIR="$BASE_DIR/result/bandwidth"
RESULT_ICNT_BW_DIR="$BASE_DIR/result/icnt_bw"
RESULT_L2_BW_DIR="$BASE_DIR/result/l2_bw"
RESULT_GRASP_TRACE_DIR="$BASE_DIR/result/grasp_trace"
# 创建结果目录(如果不存在)
mkdir -p "$RESULT_LOG_DIR"
mkdir -p "$RESULT_L1_DIR"
mkdir -p "$RESULT_L2_DIR"
mkdir -p "$RESULT_ISSUE_DIR"
mkdir -p "$RESULT_STALL_REASON_PC_STATS_BASE_DIR"
mkdir -p "$RESULT_BW_DIR"
mkdir -p "$RESULT_ICNT_BW_DIR"
mkdir -p "$RESULT_L2_BW_DIR"
# 定义trace词表和对应的路径
declare -A TRACE_MAP=(
#gardenia
["bfs_usa"]="bfs_linear_base/mtx___data_road_usa_0_0_0/traces/kernelslist.g"
["bfs_web"]="bfs_linear_base/mtx___data_web_Google_0_0_0/traces/kernelslist.g"
["bfs_socLive"]="bfs_linear_base/mtx___data_soc_LiveJournal1_0_0_0/traces/kernelslist.g"
["bfs_cit"]="bfs_linear_base/mtx___data_cit_Patents_0_0_0/traces/kernelslist.g"
# IMA 配置(cit-Patents, symmetrize=1, source=3569341)
["bfs_cit_ima"]="bfs_linear_base/mtx___data_cit_Patents_1_0_3569341/traces/kernelslist.g"
# IMA tiers(与 define-all-apps.yml 对齐;source sensitive)
["bfs_ima_high"]="bfs_linear_base/mtx___data_cit_Patents_1_0_3569341/traces/kernelslist.g"
["bfs_ima_med"]="bfs_linear_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["bfs_ima_small"]="bfs_linear_base/mtx___data_web_Google_0_0_506742/traces/kernelslist.g"
# IMA(更友善):cit-Patents, symmetrize=0(有向), source=3569341(需先生成对应 traces)
["bfs_cit_ima_dir"]="bfs_linear_base/mtx___data_cit_Patents_0_0_3569341/traces/kernelslist.g"
["bfs_roadnet"]="bfs_linear_base/mtx___data_roadNet_CA_0_0_0/traces/kernelslist.g"
["bfs_flickr"]="bfs_linear_base/mtx___data_flickr_0_0_0/traces/kernelslist.g"
["spmv_cu"]="spmv_cusparse/mtx___data_web_Google_1_0/traces/kernelslist.g"
["spmv"]="spmv_base/mtx___data_web_Google_1_0/traces/kernelslist.g"
# IMA tiers(direction sensitive;spmv 输入必须无向/对称 => 2 tiers,无 small)
["spmv_ima_high"]="spmv_base/mtx___data_cit_Patents_1_0/traces/kernelslist.g"
["spmv_ima_med"]="spmv_base/mtx___data_web_Google_1_0/traces/kernelslist.g"
# IMA 配置(cit-Patents, symmetrize=1)
["spmv_cit_ima"]="spmv_base/mtx___data_cit_Patents_1_0/traces/kernelslist.g"
# IMA tiers(与 define-all-apps.yml 对齐;source sensitive)
["sssp_ima_high"]="sssp_linear_base/mtx___data_cit_Patents_1_0_3569341/traces/kernelslist.g"
["sssp_ima_med"]="sssp_linear_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["sssp_ima_small"]="sssp_linear_base/mtx___data_web_Google_0_0_506742/traces/kernelslist.g"
# IMA tiers(source & direction insensitive => 2 tiers)
["tc_ima_high"]="tc_gpu_base/__data_cit_Patents/traces/kernelslist.g"
["tc_ima_low"]="tc_gpu_base/__data_web_Google/traces/kernelslist.g"
["tc"]="tc_gpu_base/__data_web_Google/traces/kernelslist.g"
["cc"]="cc_base/mtx___data_web_Google_1_1/traces/kernelslist.g"
# IMA tiers(与 define-all-apps.yml 对齐;direction sensitive)
["cc_ima_high"]="cc_base/mtx___data_cit_Patents_1_0/traces/kernelslist.g"
["cc_ima_med"]="cc_base/mtx___data_web_Google_1_0/traces/kernelslist.g"
["cc_ima_small"]="cc_base/mtx___data_web_Google_0_0/traces/kernelslist.g"
["bc"]="bc_linear_base/mtx___data_web_Google_0/traces/kernelslist.g"
# IMA tiers(与 define-all-apps.yml 对齐;source sensitive)
["bc_ima_high"]="bc_linear_base/mtx___data_cit_Patents_1_0_3569341/traces/kernelslist.g"
["bc_ima_med"]="bc_linear_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["bc_ima_small"]="bc_linear_base/mtx___data_web_Google_0_0_506742/traces/kernelslist.g"
# Gardenia IMA tiers (newly traced workloads)
["pr_ima_high"]="pr_base/mtx___data_cit_Patents_1/traces/kernelslist.g"
["pr_ima_med"]="pr_base/mtx___data_web_Google_1/traces/kernelslist.g"
["pr_ima_small"]="pr_base/mtx___data_web_Google_0/traces/kernelslist.g"
["scc_ima_high"]="scc_topo/__data_cit_Patents_mtx_1/traces/kernelslist.g"
["scc_ima_med"]="scc_topo/__data_web_Google_mtx_1/traces/kernelslist.g"
["scc_ima_small"]="scc_topo/__data_flickr_mtx_1/traces/kernelslist.g"
["vc_ima_high"]="vc_linear_base/mtx___data_cit_Patents_1_0/traces/kernelslist.g"
["vc_ima_med"]="vc_linear_base/mtx___data_web_Google_1_0/traces/kernelslist.g"
["vc_ima_small"]="vc_linear_base/mtx___data_roadNet_CA_1_0/traces/kernelslist.g"
["vc_road"]="vc_linear_base/mtx___data_roadNet_CA_1_0/traces/kernelslist.g"
["symgs_ima_high"]="symgs_base/mtx___data_cit_Patents_1/traces/kernelslist.g"
["symgs_ima_med"]="symgs_base/mtx___data_web_Google_1/traces/kernelslist.g"
["symgs_ima_small"]="symgs_base/mtx___data_roadNet_CA_1/traces/kernelslist.g"
# ============================================================
# 4-category dataset evaluation (2026-03-31)
# Naming: {algo}_{dataset}_{dir|sym}
# dir = directed (sym=0), sym = symmetrized (sym=1)
# Existing *_ima_{high,med} keys are reused where applicable
# ============================================================
# --- cit-Patents directed (sym=0) supplements ---
["bc_cit_dir"]="bc_linear_base/mtx___data_cit_Patents_0_0_3569341/traces/kernelslist.g"
["pr_cit_dir"]="pr_base/mtx___data_cit_Patents_0/traces/kernelslist.g"
["symgs_cit_dir"]="symgs_base/mtx___data_cit_Patents_0/traces/kernelslist.g"
# --- web-Google directed (sym=0) supplement ---
["symgs_web_dir"]="symgs_base/mtx___data_web_Google_0/traces/kernelslist.g"
# --- flickr (社交网络) ---
# source-based (hub=1586)
["bfs_flickr_dir"]="bfs_linear_base/mtx___data_flickr_0_0_1586/traces/kernelslist.g"
["bfs_flickr_sym"]="bfs_linear_base/mtx___data_flickr_1_0_1586/traces/kernelslist.g"
["sssp_flickr_dir"]="sssp_linear_base/mtx___data_flickr_0_0_1586/traces/kernelslist.g"
["sssp_flickr_sym"]="sssp_linear_base/mtx___data_flickr_1_0_1586/traces/kernelslist.g"
["bc_flickr_dir"]="bc_linear_base/mtx___data_flickr_0_0_1586/traces/kernelslist.g"
["bc_flickr_sym"]="bc_linear_base/mtx___data_flickr_1_0_1586/traces/kernelslist.g"
# source-independent
["cc_flickr"]="cc_base/mtx___data_flickr_1_0/traces/kernelslist.g"
["spmv_flickr"]="spmv_base/mtx___data_flickr_1_0/traces/kernelslist.g"
["pr_flickr_dir"]="pr_base/mtx___data_flickr_0/traces/kernelslist.g"
["pr_flickr_sym"]="pr_base/mtx___data_flickr_1/traces/kernelslist.g"
# VC/SymGS: MAXCOLOR=128 crash on flickr — not available
# --- roadNet-CA (道路网络) ---
# source-based (dir hub=117563, sym hub=562818)
["bfs_road_dir"]="bfs_linear_base/mtx___data_roadNet_CA_0_0_117563/traces/kernelslist.g"
["bfs_road_sym"]="bfs_linear_base/mtx___data_roadNet_CA_1_0_562818/traces/kernelslist.g"
["sssp_road_dir"]="sssp_linear_base/mtx___data_roadNet_CA_0_0_117563/traces/kernelslist.g"
["sssp_road_sym"]="sssp_linear_base/mtx___data_roadNet_CA_1_0_562818/traces/kernelslist.g"
["bc_road_dir"]="bc_linear_base/mtx___data_roadNet_CA_0_0_117563/traces/kernelslist.g"
["bc_road_sym"]="bc_linear_base/mtx___data_roadNet_CA_1_0_562818/traces/kernelslist.g"
# source-independent
["cc_road"]="cc_base/mtx___data_roadNet_CA_1_0/traces/kernelslist.g"
["spmv_road"]="spmv_base/mtx___data_roadNet_CA_1_0/traces/kernelslist.g"
["pr_road_dir"]="pr_base/mtx___data_roadNet_CA_0/traces/kernelslist.g"
["pr_road_sym"]="pr_base/mtx___data_roadNet_CA_1/traces/kernelslist.g"
["symgs_road_dir"]="symgs_base/mtx___data_roadNet_CA_0/traces/kernelslist.g"
# vc_road = vc_ima_small (already defined above), symgs_road_sym = symgs_ima_small
# --- soc-LiveJournal1 (大社交扩展) ---
["bfs_socLJ_dir"]="bfs_linear_base/mtx___data_soc_LiveJournal1_0_0_10009/traces/kernelslist.g"
["bfs_socLJ_sym"]="bfs_linear_base/mtx___data_soc_LiveJournal1_1_0_10009/traces/kernelslist.g"
["spmv_socLJ"]="spmv_base/mtx___data_soc_LiveJournal1_1_0/traces/kernelslist.g"
# ============================================================
# Unified naming: {algo}_{dataset}_{dir|sym}
# These are canonical names used in benchmark_suite.md.
# Old *_ima_* keys are kept above for backward compat.
# ============================================================
# --- cit-Patents sym=1 (= ima_high) ---
["bfs_cit_sym"]="bfs_linear_base/mtx___data_cit_Patents_1_0_3569341/traces/kernelslist.g"
["sssp_cit_sym"]="sssp_linear_base/mtx___data_cit_Patents_1_0_3569341/traces/kernelslist.g"
["bc_cit_sym"]="bc_linear_base/mtx___data_cit_Patents_1_0_3569341/traces/kernelslist.g"
["cc_cit_sym"]="cc_base/mtx___data_cit_Patents_1_0/traces/kernelslist.g"
["spmv_cit_sym"]="spmv_base/mtx___data_cit_Patents_1_0/traces/kernelslist.g"
["pr_cit_sym"]="pr_base/mtx___data_cit_Patents_1/traces/kernelslist.g"
["vc_cit_sym"]="vc_linear_base/mtx___data_cit_Patents_1_0/traces/kernelslist.g"
# --- cit-Patents dir (already defined above) ---
["bfs_cit_dir"]="bfs_linear_base/mtx___data_cit_Patents_0_0_3569341/traces/kernelslist.g"
["sssp_cit_dir"]="sssp_linear_base/mtx___data_cit_Patents_0_0_3569341/traces/kernelslist.g"
# --- web-Google sym=1 (= ima_med) ---
["bfs_web_sym"]="bfs_linear_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["sssp_web_sym"]="sssp_linear_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["bc_web_sym"]="bc_linear_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["cc_web_sym"]="cc_base/mtx___data_web_Google_1_0/traces/kernelslist.g"
["spmv_web_sym"]="spmv_base/mtx___data_web_Google_1_0/traces/kernelslist.g"
["pr_web_sym"]="pr_base/mtx___data_web_Google_1/traces/kernelslist.g"
["vc_web_sym"]="vc_linear_base/mtx___data_web_Google_1_0/traces/kernelslist.g"
# --- flickr sym=1 补齐 _sym 后缀 ---
["cc_flickr_sym"]="cc_base/mtx___data_flickr_1_0/traces/kernelslist.g"
["spmv_flickr_sym"]="spmv_base/mtx___data_flickr_1_0/traces/kernelslist.g"
# --- roadNet-CA sym=1 补齐 _sym 后缀 ---
["cc_road_sym"]="cc_base/mtx___data_roadNet_CA_1_0/traces/kernelslist.g"
["spmv_road_sym"]="spmv_base/mtx___data_roadNet_CA_1_0/traces/kernelslist.g"
["vc_road_sym"]="vc_linear_base/mtx___data_roadNet_CA_1_0/traces/kernelslist.g"
# --- soc-LJ1 补齐 _sym ---
["spmv_socLJ_sym"]="spmv_base/mtx___data_soc_LiveJournal1_1_0/traces/kernelslist.g"
# web-Google dir aliases for BFS/SSSP/BC/PR (existing traces)
["bfs_web_dir"]="bfs_linear_base/mtx___data_web_Google_0_0_506742/traces/kernelslist.g"
["sssp_web_dir"]="sssp_linear_base/mtx___data_web_Google_0_0_506742/traces/kernelslist.g"
["bc_web_dir"]="bc_linear_base/mtx___data_web_Google_0_0_506742/traces/kernelslist.g"
["pr_web_dir"]="pr_base/mtx___data_web_Google_0/traces/kernelslist.g"
# --- Variant sweep: web-Google sym=1 (14 implementation variants vs baselines) ---
["bfs_topo_web_sym"]="bfs_topo_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["bfs_lb_web_sym"]="bfs_linear_lb/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["sssp_lb_web_sym"]="sssp_linear_lb/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["sssp_topo_web_sym"]="sssp_topo_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["bc_lb_web_sym"]="bc_linear_lb/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["bc_topo_web_sym"]="bc_topo_base/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["bc_hlb_web_sym"]="bc_hybrid_lb/mtx___data_web_Google_1_0_506742/traces/kernelslist.g"
["cc_affo_web_sym"]="cc_afforest/mtx___data_web_Google_1_0_0/traces/kernelslist.g"
["cc_warp_web_sym"]="cc_warp/mtx___data_web_Google_1_0_0/traces/kernelslist.g"
["cc_part_web_sym"]="cc_partition/mtx___data_web_Google_1_0_0/traces/kernelslist.g"
["spmv_warp_web_sym"]="spmv_warp/mtx___data_web_Google_1_0_0/traces/kernelslist.g"
["spmv_tile_web_sym"]="spmv_tiling/mtx___data_web_Google_1_0_0/traces/kernelslist.g"
["vc_topo_web_sym"]="vc_topo_base/mtx___data_web_Google_1_0_0/traces/kernelslist.g"
["vc_bits_web_sym"]="vc_linear_bitset/mtx___data_web_Google_1_0_0/traces/kernelslist.g"
# --- Variant sweep: source=0 re-traces (for topo-driven variants to validate source-CTA alignment) ---
["bfs_topo_s0_web_sym"]="bfs_topo_base/mtx___data_web_Google_1_0_0_src0/traces/kernelslist.g"
["sssp_topo_s0_web_sym"]="sssp_topo_base/mtx___data_web_Google_1_0_0_src0/traces/kernelslist.g"
# flash attention
["fa"]="fa/NO_ARGS/traces/kernelslist.g"
["fa4k"]="fa_4096seq/NO_ARGS/traces/kernelslist.g"
["fi_dec_512"]="flashinfer_decode_512/NO_ARGS/traces/kernelslist.g"
["fi_dec_4k"]="flashinfer_decode_4096/NO_ARGS/traces/kernelslist.g"
["ima_tiny_case"]="ima_tiny_case/NO_ARGS/traces/kernelslist.g"
["ima_tiny_case_current"]="ima_tiny_case_current/NO_ARGS/traces/kernelslist.g"
#cache strategy
["wb"]="/strategybench.sm80/__test_alloc___store_wb/traces/kernelslist.g"
["wb2"]="/strategybench.sm80/__test_alloc2___store_wb/traces/kernelslist.g"
["cg"]="/strategybench.sm80/__test_alloc___store_cg/traces/kernelslist.g"
# GEMM系列
["gemm128"]="gemm128/NO_ARGS/traces/kernelslist.g"
["gemm256"]="gemm256/NO_ARGS/traces/kernelslist.g"
["gemm384"]="gemm384/NO_ARGS/traces/kernelslist.g"
["gemm512"]="gemm512/NO_ARGS/traces/kernelslist.g"
["gemm768"]="gemm768/NO_ARGS/traces/kernelslist.g"
["gemm1024"]="gemm1024/NO_ARGS/traces/kernelslist.g"
["gemm1536"]="gemm1536/NO_ARGS/traces/kernelslist.g"
["gemm2048"]="gemm2048/NO_ARGS/traces/kernelslist.g"
["gemm4096"]="gemm4096/NO_ARGS/traces/kernelslist.g"
["gemm_cublas"]="gemm_cublas/NO_ARGS/traces/kernelslist.g"
# Rodinia基准测试系列
["btree"]="b+tree-rodinia-3.1/file___data_mil_txt_command___data_command_txt/traces/kernelslist.g"
["btree_s20"]="b+tree-rodinia-3.1/file___data_mil_20_txt_command___data_command_txt/traces/kernelslist.g" #随机种子是20
["btree_s123"]="b+tree-rodinia-3.1/file___data_mil_123_txt_command___data_command_txt/traces/kernelslist.g" #随机种子是123
["btree_5900"]="b+tree-rodinia-3.1/file___data_mil_txt_command___data_j5900k9900_txt/traces/kernelslist.g" #范围搜索执行5900
["btree_6100"]="b+tree-rodinia-3.1/file___data_mil_txt_command___data_j6100k10100_txt/traces/kernelslist.g" #范围搜索执行6100
["backprop"]="backprop-rodinia-3.1/65536/traces/kernelslist.g"
# ["bfs"]="bfs-rodinia-3.1/__data_graph65536_txt/traces/kernelslist.g"
["bfs"]="bfs-rodinia-3.1/__data_graph1MW_6_txt/traces/kernelslist.g"
["gaussian"]="gaussian-rodinia-3.1/_s_256/traces/kernelslist.g"
["hotspot"]="hotspot-rodinia-3.1/512_2_2___data_temp_512___data_power_512_output_out/traces/kernelslist.g"
["hotspot_7"]="hotspot-rodinia-3.1/512_7_2___data_temp_512___data_power_512_output_out/traces/kernelslist.g"
["hotspot_3"]="hotspot-rodinia-3.1/512_3_2___data_temp_512___data_power_512_output_out/traces/kernelslist.g"
["lavaMD"]="lavaMD-rodinia-3.1/_boxes1d_10/traces/kernelslist.g"
["lud"]="lud-rodinia-3.1/_s_256__v/traces/kernelslist.g"
["nw"]="nw-rodinia-3.1/2048_10/traces/kernelslist.g"
["pathfinder"]="pathfinder-rodinia-3.1/100000_100_20___result_txt/traces/kernelslist.g"
["pathfinder22"]="pathfinder-rodinia-3.1/100000_100_22___result_txt/traces/kernelslist.g" #金字塔高度是22
["pathfinder18"]="pathfinder-rodinia-3.1/100000_100_18___result_txt/traces/kernelslist.g" #金字塔高度是18
["pathfinder_s1"]="pathfinder-rodinia-3.1/100000_100_20_1___result_txt/traces/kernelslist.g" #随机种子是1
["pathfinder_s2"]="pathfinder-rodinia-3.1/100000_100_20_2___result_txt/traces/kernelslist.g" #随机种子是2
["streamcluster"]="streamcluster-rodinia-3.1/10000_20_32_2000_10_1/traces/kernelslist.g"
["srad"]="srad_v1-rodinia-3.1/100_0_5_502_458/traces/kernelslist.g"
["myocyte"]="myocyte-rodinia-3.1/100_1_0/traces/kernelslist.g"
["nn"]="nn-rodinia-3.1/__data_filelist_4__r_5__lat_30__lng_90/traces/kernelslist.g"
["particlefilter_float"]="particlefilter_float-rodinia-3.1/_x_128__y_128__z_10__np_1000/traces/kernelslist.g"
["particlefilter_naive"]="particlefilter_naive-rodinia-3.1/_x_128__y_128__z_10__np_1000/traces/kernelslist.g"
["dwt2d"]="dwt2d-rodinia-3.1/NO_ARGS/traces/kernelslist.g"
# Polybench系列
["2mm"]="polybench-2mm/NO_ARGS/traces/kernelslist.g"
["3mm"]="polybench-3mm/NO_ARGS/traces/kernelslist.g"
["gemm_poly"]="polybench-gemm/NO_ARGS/traces/kernelslist.g"
["atax"]="polybench-atax/NO_ARGS/traces/kernelslist.g"
["bicg"]="polybench-bicg/NO_ARGS/traces/kernelslist.g"
["correlation"]="polybench-correlation/NO_ARGS/traces/kernelslist.g"
["covariance"]="polybench-covariance/NO_ARGS/traces/kernelslist.g"
["fdtd2d"]="polybench-fdtd2d/NO_ARGS/traces/kernelslist.g"
["gesummv"]="polybench-gesummv/NO_ARGS/traces/kernelslist.g"
["gramschmidt"]="polybench-gramschmidt/NO_ARGS/traces/kernelslist.g"
["mvt"]="polybench-mvt/NO_ARGS/traces/kernelslist.g"
["syr2k"]="polybench-syr2k/NO_ARGS/traces/kernelslist.g"
["syrk"]="polybench-syrk/NO_ARGS/traces/kernelslist.g"
["2DConvolution"]="polybench-2DConvolution/NO_ARGS/traces/kernelslist.g"
["3DConvolution"]="polybench-3DConvolution/NO_ARGS/traces/kernelslist.g"
# 其他应用
["fw"]="fw/__data_256_16384_gr/traces/kernelslist.g"
["fw_block"]="fw_block/__data_256_16384_gr/traces/kernelslist.g"
["color_max"]="color_max/__data_ecology1_graph_1/traces/kernelslist.g"
["color_maxmin"]="color_maxmin/__data_ecology1_graph_1/traces/kernelslist.g"
["mis"]="mis/__data_ecology1_graph_1/traces/kernelslist.g"
["pagerank"]="pagerank/__data_coAuthorsDBLP_graph_1/traces/kernelslist.g"
["pagerank_spmv"]="pagerank_spmv/NO_ARGS/traces/kernelslist.g"
["l1_bw_32f"]="l1_bw_32f/NO_ARGS/traces/kernelslist.g"
# === Pannotia MIS (SNAP datasets) ===
["pann_mis_cit"]="pann_mis_cit-Patents/traces/kernelslist.g"
["pann_mis_web"]="pann_mis_web-Google/traces/kernelslist.g"
["pann_mis_flickr"]="pann_mis_flickr/traces/kernelslist.g"
["pann_mis_road"]="pann_mis_roadNet-CA/traces/kernelslist.g"
# === Pannotia Color (SNAP datasets) ===
["pann_color_cit"]="pann_color_cit-Patents/traces/kernelslist.g"
["pann_color_web"]="pann_color_web-Google/traces/kernelslist.g"
["pann_color_road"]="pann_color_roadNet-CA/traces/kernelslist.g"
# === LonestarGPU MST (SNAP datasets) ===
["ls_mst_cit"]="ls_mst_cit-Patents/traces/kernelslist.g"
["ls_mst_web"]="ls_mst_web-Google/traces/kernelslist.g"
["ls_mst_flickr"]="ls_mst_flickr/traces/kernelslist.g"
["ls_mst_road"]="ls_mst_roadNet-CA/traces/kernelslist.g"
# === LonestarGPU non-graph algorithms ===
["ls_sp"]="ls_sp_small/traces/kernelslist.g"
["ls_sp_med"]="ls_sp_med/traces/kernelslist.g"
["ls_sp_42k3"]="ls_sp_42k_3/traces/kernelslist.g"
["ls_sp_42k5"]="ls_sp_42k_5/traces/kernelslist.g"
["ls_dmr_r1m"]="ls_dmr_r1m/traces/kernelslist.g"
["ls_pta"]="ls_pta_ex/traces/kernelslist.g"
["ls_dmr"]="ls_dmr_250k/traces/kernelslist.g"
["ls_dmr_25k"]="ls_dmr_25k/traces/kernelslist.g"
["ls_dmr_25k_tiny"]="ls_dmr_25k_tiny/traces/kernelslist.g"
["ls_dmr_25k_kc1"]="ls_dmr_25k_kc1/traces/kernelslist.g"
["ls_dmr_25k_kr28"]="ls_dmr_25k_kr28/traces/kernelslist.g"
["ls_mst_flickr_k3"]="ls_mst_flickr_k3/traces/kernelslist.g"
["ls_mst_flickr_k3only"]="ls_mst_flickr_k3only/traces/kernelslist.g"
# small default datasets
["pann_mis_eco"]="pann_mis_ecology1/traces/kernelslist.g"
["pann_mis_g3"]="pann_mis_g3circuit/traces/kernelslist.g"
["pann_color_eco"]="pann_color_ecology1/traces/kernelslist.g"
["pann_color_g3"]="pann_color_g3circuit/traces/kernelslist.g"
["ls_mst_rmat12"]="ls_mst_rmat12/traces/kernelslist.g"
["ls_mst_usany"]="ls_mst_usany/traces/kernelslist.g"
["ls_mst_2d2e20"]="ls_mst_2d2e20/traces/kernelslist.g"
["ls_mst_usafla"]="ls_mst_usafla/traces/kernelslist.g"
)
# n_level warp alloc 文件映射(默认使用 trace_key,gto 使用 <trace_key>gto)
declare -A NLEVEL_WARP_ALLOC_MAP=(
["fa"]="gpu-simulator/gpgpu-sim/n-level/output/fa_issue_warp_allocate.csv"
["fa4k"]="gpu-simulator/gpgpu-sim/n-level/output/fa4k_issue_warp_allocate.csv"
["fa4kgto"]="gpu-simulator/gpgpu-sim/n-level/output/fa4k_gto_issue_warp_allocate.csv"
["smpv_cu"]="gpu-simulator/gpgpu-sim/n-level/output/smpv_cu_issue_warp_allocate.csv"
)
# 显示使用说明
show_usage() {
echo "使用方法: $0 [选项] <trace_key> [log_name]"
echo ""
echo "选项:"
echo " -c, --config <model> 指定GPU配置模型 (默认: SM80_A100)"
echo " 支持: SM7_QV100, SM80_A100"
echo " --ideal-l1d 启用 Ideal L1D:强制 eligible global read 为 L1D HIT(上界实验,日志名自动加 _ideal_l1d 后缀)"
echo " --grasp 启用 GRASP IMA prefetcher(默认 chain CSV: ima_plan/05_implementation/ima_pair_table/golden/strict_selected_chain_instances.csv)"
echo " --grasp-debug 启用 GRASP + 详细调试日志"
echo " --grasp-trace 启用 GRASP + 结构化 CSV 事件 trace(含 --grasp)"
echo " --pair-table-scope <s> IMA pair table scope: warp(默认)/cta/kernel"
echo " --grasp-chain-csv <path> 指定 GRASP chain CSV 文件路径"
echo " -nl, --nlevel 启用 n_level 调度器(日志名自动加 _nl 后缀,与 -gto 互斥)"
echo " -nls, --nlevel-scheduler <lrr|gto>"
echo " 选择 n_level 默认调度器并匹配 warp alloc 文件 (默认: lrr)"
echo " -gto, --gto 启用 gto 调度器(日志名自动加 _gto 后缀,与 -nl 互斥)"
echo " --plot 模拟结束后自动并行绘图并显示进度(推荐,一键模式)"
echo " --plot-backend <session|local> 选择绘图后端 (默认: local)"
echo " --plot-background local 后端并行绘图并后台 detach(写日志后立即返回)"
echo " --plot-format <svg|png|both> 绘图输出格式 (默认: svg;png/both 可能需要 matplotlib+numpy)"
echo " --plot-session <name> 指定 session 名称(默认自动生成)"
echo " --plot-kill-existing 如果 session 已存在则先 kill"
echo " --plot-attach 运行完后直接 attach 到 session"
echo " --plot-auto-close session 后端下绘图结束后自动关闭窗口(session 保留)"
echo " --plot-monitor session 后端下增加一个 monitor 窗口显示日志尾部(用于看进度)"
echo ""
echo " # 体量控制(强烈建议用于图负载,避免输出 TB 级 CSV)"
echo " --issue-trace 开启 issue trace(默认关闭;最占空间/最慢)"
echo " --no-issue-trace 关闭 issue trace(默认关闭;最占空间/最慢)"
echo " --no-stall-reason-pc-stats 关闭轻量 stall reason->pc 统计(默认开启;替代 issue trace)"
echo " --stall-reason-pc-stats-topk <K> 设置每个 reason 的 topK(默认 5;0=不输出 topk_other.csv)"
echo " --l1-trace 开启 L1 trace(默认关闭)"
echo " --no-l1-trace 关闭 L1 trace(默认关闭)"
echo " --l2-trace 开启 L2 trace(默认关闭)"
echo " --no-l2-trace 关闭 L2 trace(默认关闭)"
echo " --l2-trace-print-bw 在 L2 trace 中打印 HBM 带宽/occupancy"
echo " --l2-trace-print-compute 在 L2 trace 中打印 SM compute utilization"
echo " --hbm-trace 开启 HBM 分区带宽 trace(默认关闭)"
echo " --no-hbm-trace 关闭 HBM 分区带宽 trace(默认关闭)"
echo " --no-icnt-bw-trace 关闭 ICNT 带宽 trace(默认关闭)"
echo " --no-l2-bw-trace 关闭 L2 端口带宽 trace(默认关闭)"
echo " --hbm-trace-period <N> HBM trace 采样周期(默认 1;建议 100/1000)"
echo " --icnt-bw-trace-period <N> ICNT 带宽 trace 采样周期(默认 500)"
echo " --l2-bw-trace-period <N> L2 端口带宽 trace 采样周期(默认 500)"
echo ""
echo " # Issue trace 边运行边压缩(避免生成超大 CSV;输出为 .csv.gz/.csv.zst)"
echo " --issue-trace-compress <none|gzip|zstd> issue trace 压缩方式(默认: none)"
echo " --issue-trace-compress-level <N> 压缩级别(gzip:1~9;zstd:1~19;未指定则用默认)"
echo " --issue-trace-compress-threads <N> zstd 线程数(默认 1;并行跑时建议 1)"
echo " --max-cycle <N> 提前结束:最多仿真 N 个 cycle(0=不限)"
echo " --max-insn <N> 提前结束:最多仿真 N 条指令(0=不限)"
echo " --max-cta <N> 提前结束:最多启动 N 个 CTA(0=不限)"
echo " --max-completed-cta <N> 提前结束:最多完成 N 个 CTA(0=不限)"
echo ""
echo "参数说明:"
echo " trace_key - 必需,要运行的trace键值"
echo " log_name - 可选,自定义日志文件名(不含扩展名)"
echo ""
echo "可用的trace键值:"
for key in "${!TRACE_MAP[@]}"; do
echo " $key"
done | sort
echo ""
echo "示例:"
echo " $0 gemm128 # 运行GEMM128基准测试,使用默认配置"
echo " $0 -c SM80_A100 gemm128 # 使用 SM80_A100 配置运行"
echo " $0 btree my_test # 运行B+Tree基准测试,使用自定义日志名"
echo " $0 2mm 2mm_experiment # 运行Polybench 2MM基准测试,使用自定义日志名"
echo " $0 -gto fa # 启用 gto 调度器运行 FA"
echo " $0 -nl fa # 启用 n_level 调度器运行 FA"
echo " $0 -nl -nls gto fa4k # n_level 使用 gto 默认调度器并匹配对应 warp alloc 文件"
echo " $0 -nl -nls lrr fa4k # n_level 使用 lrr 默认调度器(默认行为)"
echo " $0 --plot sssp # 结束后自动并行绘图并显示进度"
echo " $0 --plot --plot-format png sssp # 自动绘图输出 PNG"
echo " $0 --plot --plot-format both sssp # 自动绘图同时输出 SVG+PNG"
echo " $0 --plot-backend session --plot-monitor sssp # 用 session 后台跑,并在 monitor 看进度"
echo " $0 --no-issue-trace bfs_web # 关闭 issue trace,显著提速/减小输出"
echo " $0 --issue-trace-compress gzip bfs_roadnet bfs_roadnet_gz # issue trace 边运行边压缩(推荐用于大规模)"
echo " $0 --max-completed-cta 200 bfs_web # 只跑前 200 个 CTA(快速看 IMA 趋势)"
echo " $0 --hbm-trace-period 100 spmv # 带宽 trace 按 100 cycle 采样"
echo " $0 --ideal-l1d bfs_usa bfs_usa_ideal_l1d # Ideal L1D 上界实验"
echo " $0 --grasp bfs_ima_small bfs_grasp # GRASP prefetcher 实验"
echo " $0 --grasp-debug --max-completed-cta 5 bfs_ima_small grasp_dbg # GRASP 调试模式"
echo ""
echo "L1/ISSUE Trace 后处理 (生成带 L1 命中信息的 CSV):"
echo " python result/issue_trace/issue_trace_l1_match.py <issue_csv> [选项]"
echo " --l1-dir <dir> # 指定 L1 trace CSV 目录,默认 result/L1cache_trace"
echo " --output-tag <tag> # 输出目录/文件后缀,默认 per_sm"
echo " --output-mode <mode> # per-sm(默认) 逐 SM 输出,single 生成单个合并 CSV"
echo " --split-l1-trace # 额外把 L1 trace 本身按 SM 拆分成独立 CSV"
echo " 匹配规则: 只处理 mask 非 0 的 GLOBAL/LOCAL LOAD/STORE;需整 warp 全 HIT 才记 HIT;"
echo " 输出列包含 l1_cycle、l1_sector_addresses、l1_lane_ids、l1_cycle_delta 及其状态,并在 cycle 差超过 100 时自动告警。"
echo " 若存在未消费的 L1 事件,会在 unmatched_l1_events.csv(或 single 模式对应文件)中列出以便调试。"
echo " 若某些 issue 在 200 cycle 内未匹配完全部 sector,会生成 matching_warnings.csv 供排查。"
}
# 检查参数
if [ "$SHOW_HELP" = true ] || [ $# -eq 0 ]; then
show_usage
exit 0
fi
# 调度器选项互斥检查
if [ "$NLEVEL_ENABLED" = true ] && [ "$GTO_ENABLED" = true ]; then
echo "错误: -nl 和 -gto 不能同时使用"
echo ""
show_usage
exit 1
fi
PLOT_BACKEND="$(echo "$PLOT_BACKEND" | tr 'A-Z' 'a-z')"
if [ "$PLOT_BACKEND" != "session" ] && [ "$PLOT_BACKEND" != "local" ]; then
echo "错误: --plot-backend 仅支持 session|local (当前: $PLOT_BACKEND)"
exit 1
fi
NLEVEL_DEFAULT_SCHEDULER="$(echo "$NLEVEL_DEFAULT_SCHEDULER" | tr 'A-Z' 'a-z')"
if [ "$NLEVEL_DEFAULT_SCHEDULER" != "lrr" ] && [ "$NLEVEL_DEFAULT_SCHEDULER" != "gto" ]; then
echo "错误: n_level 默认调度器仅支持 lrr 或 gto (当前: $NLEVEL_DEFAULT_SCHEDULER)"
exit 1
fi
PLOT_FORMAT="$(echo "$PLOT_FORMAT" | tr 'A-Z' 'a-z')"
if [ "$PLOT_FORMAT" != "svg" ] && [ "$PLOT_FORMAT" != "png" ] && [ "$PLOT_FORMAT" != "both" ]; then
echo "错误: --plot-format 仅支持 svg|png|both (当前: $PLOT_FORMAT)"
exit 1
fi
# Issue trace 压缩参数检查
ISSUE_TRACE_COMPRESS="$(echo "$ISSUE_TRACE_COMPRESS" | tr 'A-Z' 'a-z')"
if [ "$ISSUE_TRACE_COMPRESS" != "none" ] && [ "$ISSUE_TRACE_COMPRESS" != "gzip" ] && [ "$ISSUE_TRACE_COMPRESS" != "zstd" ]; then
echo "错误: --issue-trace-compress 仅支持 none|gzip|zstd (当前: $ISSUE_TRACE_COMPRESS)"
exit 1
fi
if [ -n "${ISSUE_TRACE_COMPRESS_LEVEL}" ]; then
if ! [[ "${ISSUE_TRACE_COMPRESS_LEVEL}" =~ ^[0-9]+$ ]]; then
echo "错误: --issue-trace-compress-level 必须为整数 (当前: $ISSUE_TRACE_COMPRESS_LEVEL)"
exit 1
fi
fi
if [ -n "${ISSUE_TRACE_COMPRESS_THREADS}" ]; then
if ! [[ "${ISSUE_TRACE_COMPRESS_THREADS}" =~ ^[0-9]+$ ]]; then
echo "错误: --issue-trace-compress-threads 必须为整数 (当前: $ISSUE_TRACE_COMPRESS_THREADS)"
exit 1
fi
fi
if [ -n "${STALL_REASON_PC_STATS_TOPK}" ]; then
if ! [[ "${STALL_REASON_PC_STATS_TOPK}" =~ ^[0-9]+$ ]]; then
echo "错误: --stall-reason-pc-stats-topk 必须为非负整数 (当前: $STALL_REASON_PC_STATS_TOPK)"
exit 1
fi
fi
TRACE_KEY=$1
# 检查trace键值是否存在
if [ -z "${TRACE_MAP[$TRACE_KEY]}" ]; then
echo "错误: 未知的trace键值 '$TRACE_KEY'"
echo ""
show_usage
exit 1
fi
# 设置日志文件名(如果提供了第二个参数,则使用它作为日志文件名,否则使用trace键值)
if [ $# -gt 1 ]; then
LOG_NAME=$2
else
LOG_NAME=$TRACE_KEY
fi
if [ "$NLEVEL_ENABLED" = true ]; then
if [ "$NLEVEL_DEFAULT_SCHEDULER" = "gto" ]; then
if [[ "$LOG_NAME" == *_nl_gto ]]; then
:
elif [[ "$LOG_NAME" == *_nl ]]; then
LOG_NAME="${LOG_NAME}_gto"
else
LOG_NAME="${LOG_NAME}_nl_gto"
fi
else
if [[ "$LOG_NAME" != *_nl ]]; then
LOG_NAME="${LOG_NAME}_nl"
fi
fi
fi
if [ "$IDEAL_L1D_ENABLED" = true ] && [[ "$LOG_NAME" != *_ideal_l1d ]]; then
LOG_NAME="${LOG_NAME}_ideal_l1d"
fi
if [ "$GTO_ENABLED" = true ] && [[ "$LOG_NAME" != *_gto ]]; then
LOG_NAME="${LOG_NAME}_gto"
fi
# 构建完整的trace路径
TRACE_PATH="$TRACE_BASE_DIR/${TRACE_MAP[$TRACE_KEY]}"
# 检查trace文件是否存在
if [ ! -f "$TRACE_PATH" ]; then
echo "错误: Trace文件不存在: $TRACE_PATH"
exit 1
fi
if [ "$NLEVEL_ENABLED" = true ]; then
if [ "$NLEVEL_DEFAULT_SCHEDULER" = "gto" ]; then
NLEVEL_WARP_ALLOC_REL="${NLEVEL_WARP_ALLOC_MAP[${TRACE_KEY}gto]}"
if [ -z "$NLEVEL_WARP_ALLOC_REL" ]; then
echo "错误: 未配置 trace_key=$TRACE_KEY 的 n_level gto warp alloc 文件"
echo "请在脚本中补充 NLEVEL_WARP_ALLOC_MAP 的 ${TRACE_KEY}gto 映射,或改用 --nlevel-scheduler lrr"
exit 1
fi
else
NLEVEL_WARP_ALLOC_REL="${NLEVEL_WARP_ALLOC_MAP[$TRACE_KEY]}"
if [ -z "$NLEVEL_WARP_ALLOC_REL" ]; then
echo "错误: 未配置 trace_key=$TRACE_KEY 的 n_level warp alloc 文件"
echo "请在脚本中补充 NLEVEL_WARP_ALLOC_MAP 映射"
exit 1
fi
fi
NLEVEL_WARP_ALLOC_FILE="$BASE_DIR/$NLEVEL_WARP_ALLOC_REL"
if [ ! -f "$NLEVEL_WARP_ALLOC_FILE" ]; then
echo "错误: n_level warp alloc 文件不存在: $NLEVEL_WARP_ALLOC_FILE"
exit 1
fi
fi
# 设置输出文件名
LOG_FILE="$RESULT_LOG_DIR/${LOG_NAME}.log"
CSV_FILE="$RESULT_L1_DIR/${LOG_NAME}_l1.csv"
L2_CSV_FILE="$RESULT_L2_DIR/${LOG_NAME}_l2.csv"
ISSUE_FILE_RAW="$RESULT_ISSUE_DIR/${LOG_NAME}_issue.csv"
ISSUE_FILE_GZ="$RESULT_ISSUE_DIR/${LOG_NAME}_issue.csv.gz"
ISSUE_FILE_ZST="$RESULT_ISSUE_DIR/${LOG_NAME}_issue.csv.zst"
ISSUE_FIFO_REL="result/issue_trace/${LOG_NAME}_issue.csv.fifo"
ISSUE_FIFO_ABS="$BASE_DIR/$ISSUE_FIFO_REL"
ISSUE_COMPRESS_LOG="$RESULT_LOG_DIR/${LOG_NAME}_issue_trace_compress.log"
ISSUE_FILE="$ISSUE_FILE_RAW"
BANDWIDTH_FILE="$RESULT_BW_DIR/${LOG_NAME}_hbm_partition.csv"
ICNT_BW_FILE="$RESULT_ICNT_BW_DIR/${LOG_NAME}_icnt_bw.csv"
L2_BW_FILE="$RESULT_L2_BW_DIR/${LOG_NAME}_l2_bw.csv"
# 每次运行的 stall reason/pc stats 输出目录(避免并发/多算法覆盖)
RESULT_STALL_REASON_PC_STATS_DIR="$RESULT_STALL_REASON_PC_STATS_BASE_DIR/$LOG_NAME"
mkdir -p "$RESULT_STALL_REASON_PC_STATS_DIR"
mkdir -p "$RESULT_GRASP_TRACE_DIR"
REL_L1_FILE="result/L1cache_trace/${LOG_NAME}_l1.csv"
REL_L2_FILE="result/L2cache_trace/${LOG_NAME}_l2.csv"
REL_ISSUE_FILE="result/issue_trace/${LOG_NAME}_issue.csv"
REL_STALL_REASON_PC_STATS_DIR="result/issue_trace/stall_reason_pc_stats/${LOG_NAME}"
REL_BW_FILE="result/bandwidth/${LOG_NAME}_hbm_partition.csv"
REL_ICNT_BW_FILE="result/icnt_bw/${LOG_NAME}_icnt_bw.csv"
REL_L2_BW_FILE="result/l2_bw/${LOG_NAME}_l2_bw.csv"
REL_GRASP_TRACE_FILE="result/grasp_trace/${LOG_NAME}_grasp.csv"
STALL_REASON_PC_STATS_BREAKDOWN="$RESULT_STALL_REASON_PC_STATS_DIR/stall_reason_breakdown.csv"
STALL_REASON_PC_STATS_HIST="$RESULT_STALL_REASON_PC_STATS_DIR/stall_reason_pc_hist.csv"
STALL_REASON_PC_STATS_TOPK_FILE="$RESULT_STALL_REASON_PC_STATS_DIR/stall_reason_pc_topk_other.csv"
# 便于在 IDE 中区分:把 stats CSV 重命名为带 log-name 前缀的文件名(只保留前缀版)
STALL_REASON_PC_STATS_BREAKDOWN_PREFIXED="$RESULT_STALL_REASON_PC_STATS_DIR/${LOG_NAME}_stall_reason_breakdown.csv"
STALL_REASON_PC_STATS_HIST_PREFIXED="$RESULT_STALL_REASON_PC_STATS_DIR/${LOG_NAME}_stall_reason_pc_hist.csv"
STALL_REASON_PC_STATS_TOPK_PREFIXED="$RESULT_STALL_REASON_PC_STATS_DIR/${LOG_NAME}_stall_reason_pc_topk_other.csv"
# 创建临时配置文件,更新L1 trace路径
TEMP_GPGPU_CONFIG=$(mktemp)
ISSUE_TRACE_COMPRESS_PID=""
ISSUE_TRACE_KEEPFD=""
cleanup() {
# 关闭 FIFO 常驻写端(如存在),让压缩器收到 EOF 并退出
if [ -n "${ISSUE_TRACE_KEEPFD:-}" ]; then
exec {ISSUE_TRACE_KEEPFD}>&- 2>/dev/null || true
ISSUE_TRACE_KEEPFD=""
fi
# 如果压缩器仍在运行,尽量等待其退出;若仍未退出则尝试终止
if [ -n "${ISSUE_TRACE_COMPRESS_PID:-}" ]; then
if kill -0 "$ISSUE_TRACE_COMPRESS_PID" 2>/dev/null; then
wait "$ISSUE_TRACE_COMPRESS_PID" 2>/dev/null || true
fi
ISSUE_TRACE_COMPRESS_PID=""
fi
# 清理 FIFO
if [ -n "${ISSUE_FIFO_ABS:-}" ] && [ -p "${ISSUE_FIFO_ABS}" ]; then
rm -f "$ISSUE_FIFO_ABS" 2>/dev/null || true
fi
rm -f "$TEMP_GPGPU_CONFIG" 2>/dev/null || true
}
trap cleanup EXIT
# 如果启用 issue trace 边运行边压缩:把 -issue_trace_path 指向 FIFO,并把最终输出路径切换为压缩文件
ISSUE_TRACE_COMPRESS_EFFECTIVE="$ISSUE_TRACE_COMPRESS"
if [ "$ISSUE_TRACE_ENABLED" != true ]; then
ISSUE_TRACE_COMPRESS_EFFECTIVE="none"
fi
if [ "$ISSUE_TRACE_COMPRESS_EFFECTIVE" = "gzip" ]; then
REL_ISSUE_FILE="$ISSUE_FIFO_REL"
ISSUE_FILE="$ISSUE_FILE_GZ"
elif [ "$ISSUE_TRACE_COMPRESS_EFFECTIVE" = "zstd" ]; then
REL_ISSUE_FILE="$ISSUE_FIFO_REL"
ISSUE_FILE="$ISSUE_FILE_ZST"
fi
# 复制原始配置文件并更新 trace 输出路径(icnt/l2_bw 由后续开关块决定是否写入/启用)
sed -e "s|-l1_trace_path .*|-l1_trace_path \"$REL_L1_FILE\"|g" \
-e "s|-l2_trace_path .*|-l2_trace_path \"$REL_L2_FILE\"|g" \
-e "s|-issue_trace_path .*|-issue_trace_path \"$REL_ISSUE_FILE\"|g" \
-e "s|-hbm_partition_trace_path .*|-hbm_partition_trace_path \"$REL_BW_FILE\"|g" \
-e "s|-hbm_partition_trace_enable .*|-hbm_partition_trace_enable 1|g" \
-e "s|-hbm_partition_trace_period .*|-hbm_partition_trace_period 1|g" \
"$GPGPU_CONFIG" > "$TEMP_GPGPU_CONFIG"
# 如果原始 config 没有 L2 trace 相关项,则补全(保持与 L1/issue 同样的“脚本控制输出路径”行为)
if ! grep -q '^-l2_trace_enable[[:space:]]' "$TEMP_GPGPU_CONFIG"; then
echo "-l2_trace_enable 1" >> "$TEMP_GPGPU_CONFIG"
fi
if ! grep -q '^-l2_trace_path[[:space:]]' "$TEMP_GPGPU_CONFIG"; then
echo "-l2_trace_path \"$REL_L2_FILE\"" >> "$TEMP_GPGPU_CONFIG"
fi