Add paper

jaywonchung · jaywonchung · commit 2683f203a9c9 · 2025-10-16T14:03:41.000-04:00
diff --git a/source/_data/SymbioticLab.bib b/source/_data/SymbioticLab.bib
@@ -2125,19 +2125,35 @@ @PhDThesis{amberljc:dissertation
 }
 
 @Article{tetriserve:arxiv25,
-author          = {Runyu Lu and Shiqi He and Wenxuan Tan and Shenggui Li and Ruofan Wu and Jeff J. Ma and Ang Chen and Mosharaf Chowdhury},
-title           = {{TetriServe}: Efficient {DiT} Serving for Heterogeneous Image Generation},
-year            = {2025},
-month           = {Oct},
-volume          = {abs/2510.01565},
-archivePrefix   = {arXiv},
-eprint          = {2510.01565},
-url             = {https://arxiv.org/abs/2510.01565},
-publist_confkey = {arXiv:2510.01565},
-publist_link    = {paper || https://arxiv.org/abs/2510.01565},
-publist_topic   = {Systems + AI},
-publist_abstract = {
-Diffusion Transformer (DiT) models excel at generating high-quality images through iterative denoising steps, but serving them under strict Service Level Objectives (SLOs) is challenging due to their high computational cost, particularly at large resolutions. Existing serving systems use fixed degree sequence parallelism, which is inefficient for heterogeneous workloads with mixed resolutions and deadlines, leading to poor GPU utilization and low SLO attainment. In this paper, we propose step-level sequence parallelism to dynamically adjust the parallel degree of individual requests according to their deadlines. We present TetriServe, a DiT serving system that implements this strategy for highly efficient image generation. Specifically, TetriServe introduces a novel round-based scheduling mechanism that improves SLO attainment: (1) discretizing time into fixed rounds to make deadline-aware scheduling tractable, (2) adapting parallelism at the step level and minimize GPU hour consumption, and (3) jointly packing requests to minimize late completions. Extensive evaluation on state-of-the-art DiT models shows that TetriServe achieves up to 32% higher SLO attainment compared to existing solutions without degrading image quality.
-}
+  author          = {Runyu Lu and Shiqi He and Wenxuan Tan and Shenggui Li and Ruofan Wu and Jeff J. Ma and Ang Chen and Mosharaf Chowdhury},
+  title           = {{TetriServe}: Efficient {DiT} Serving for Heterogeneous Image Generation},
+  year            = {2025},
+  month           = {Oct},
+  volume          = {abs/2510.01565},
+  archivePrefix   = {arXiv},
+  eprint          = {2510.01565},
+  url             = {https://arxiv.org/abs/2510.01565},
+  publist_confkey = {arXiv:2510.01565},
+  publist_link    = {paper || https://arxiv.org/abs/2510.01565},
+  publist_topic   = {Systems + AI},
+  publist_abstract = {
+  Diffusion Transformer (DiT) models excel at generating high-quality images through iterative denoising steps, but serving them under strict Service Level Objectives (SLOs) is challenging due to their high computational cost, particularly at large resolutions. Existing serving systems use fixed degree sequence parallelism, which is inefficient for heterogeneous workloads with mixed resolutions and deadlines, leading to poor GPU utilization and low SLO attainment. In this paper, we propose step-level sequence parallelism to dynamically adjust the parallel degree of individual requests according to their deadlines. We present TetriServe, a DiT serving system that implements this strategy for highly efficient image generation. Specifically, TetriServe introduces a novel round-based scheduling mechanism that improves SLO attainment: (1) discretizing time into fixed rounds to make deadline-aware scheduling tractable, (2) adapting parallelism at the step level and minimize GPU hour consumption, and (3) jointly packing requests to minimize late completions. Extensive evaluation on state-of-the-art DiT models shows that TetriServe achieves up to 32% higher SLO attainment compared to existing solutions without degrading image quality.
+  }
 }
 
+@InProceedings{mlenergy-benchmark:neuripsdb25,
+  title           = {The {ML.ENERGY} Benchmark: Toward Automated Inference Energy Measurement and Optimization}, 
+  author          = {Jae-Won Chung and Jiachen Liu and Jeff J. Ma and Ruofan Wu and Oh Jun Kweon and Yuxuan Xia and Zhiyu Wu and Mosharaf Chowdhury},
+  year            = {2025},
+  month           = {Dec},
+  booktitle       = {NeurIPS D\&B},
+  publist_topic   = {Systems + AI},
+  publist_topic   = {Energy-Efficient Systems},
+  publist_confkey = {NeurIPS'25 D&B},
+  publist_link    = {paper || mlenergy-benchmark-neuripsdb25.pdf},
+  publist_link    = {code || https://github.com/ml-energy/benchmark},
+  publist_badge   = {Spotlight Paper (acceptance rate: 2.81%)},
+  publist_abstract = {
+    As the adoption of Generative AI in real-world services grow explosively, energy has emerged as a critical bottleneck resource. However, energy remains a metric that is often overlooked, under-explored, or poorly understood in the context of building ML systems. We present the ML.ENERGY Benchmark, a benchmark suite and tool for measuring inference energy consumption under realistic service environments, and the corresponding ML.ENERGY Leaderboard, which have served as a valuable resource for those hoping to understand and optimize the energy consumption of their generative AI services. In this paper, we explain four key design principles for benchmarking ML energy we have acquired over time, and then describe how they are implemented in the ML.ENERGY Benchmark. We then highlight results from the early 2025 iteration of the benchmark, including energy measurements of 40 widely used model architectures across 6 different tasks, case studies of how ML design choices impact energy consumption, and how automated optimization recommendations can lead to significant (sometimes more than 40\%) energy savings without changing what is being computed by the model. The ML.ENERGY Benchmark is open-source and can be easily extended to various customized models and application scenarios.
+  }
+}
diff --git a/source/publications/files/mlenergy-benchmark:neuripsdb25/mlenergy-benchmark-neuripsdb25.pdf b/source/publications/files/mlenergy-benchmark:neuripsdb25/mlenergy-benchmark-neuripsdb25.pdf
diff --git a/source/publications/index.md b/source/publications/index.md
@@ -446,11 +446,19 @@ venues:
         date: 2024-12-09
         url: https://neurips.cc/Conferences/2024
         acceptance: 25.8%
+  'NeurIPS D&B':
+    category: Conferences
+    occurrences:
+      - key: NeurIPS'25 D&B
+        name: The Thirty-ninth Conference on Neural Information Processing Systems Track on Datasets and Benchmarks
+        date: 2025-12-02
+        url: https://neurips.cc/Conferences/2025
+        acceptance: 24.91%
 {% endpublist %}
 
 ---
 
 {% note default %}
 #### Copyright notice
 The documents listed above have been provided by the contributing authors as a means to ensure timely dissemination of scholarly and technical work on a noncommercial basis. Copyright and all rights therein are maintained by the authors or by other copyright holders, notwithstanding that they have offered their works here electronically. It is understood that all persons copying this information will adhere to the terms and constraints invoked by each author’s copyright. These works may not be reposted without the explicit permission of the copyright holder.
-{% endnote %}
+{% endnote %}