2022 Publications
DietCode: Automatic Optimization for Dynamic Tensor Programs
Bojian Zheng, Ziheng Jiang, Cody Hao Yu, Haichen Shen, Josh Fromm, Yizhi Liu, Yida Wang, Luis Ceze, Tianqi Chen, Gennady Pekhimenko
MLSys, August 2022 (To Appear)

Roller: Fast and Efficient Tensor Compilation for Deep Learning
Hongyu Zhu, Gennady Pekhimenko et al.
OSDI, July 2022 (To Appear)

IOS: Inter-Operator Scheduler for CNN Acceleration BibTeX
Yaoyao Ding
MASc Thesis, March 2022

@article{UofTEcoSystem_Yaoyao_Ding_MASc_Thesis,
  author = {Yaoyao Ding},
  title  = {IOS: Inter-Operator Scheduler for CNN Acceleration},
  year   = {2021},
  url    = {https://www.cs.toronto.edu/ecosystem/papers/Theses/Yaoyao_Ding-MASc_2022.pdf}
}
Benchmarking, Profiling and White-Box Performance Modeling for DNN Training BibTeX
Hongyu Zhu
PhD Thesis, February 2022

@article{UofTEcoSystem_Hongyu_Zhu_PhD_Thesis,
  author = {Hongyu Zhu},
  title  = {Benchmarking, Profiling and White-Box Performance Modeling for {DNN} Training},
  year   = {2022},
  url    = {https://www.cs.toronto.edu/ecosystem/papers/Theses/Hongyu_Zhu-PhD_2022.pdf}
}
Automatic Horizontal Fusion for GPU Kernels BibTeX
Ao Li, Bojian Zheng, Gennady Pekhimenko, Fan Long
CGO, February 2022

@article{UofTEcoSystem_Horizontal_Fusion,
  author    = {Ao Li and
               Bojian Zheng and
               Gennady Pekhimenko and
               Fan Long},
  title     = {Automatic Horizontal Fusion for {GPU} Kernels},
  journal   = {CoRR},
  volume    = {abs/2007.01277},
  year      = {2020},
  url       = {https://arxiv.org/abs/2007.01277},
  eprinttype = {arXiv},
  eprint    = {2007.01277},
  timestamp = {Mon, 06 Jul 2020 15:26:01 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2007-01277.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
2021 Publications
Enabling Privacy-Preserving Model Personalization via On-Device Incremental Training BibTeX
Jiacheng Yang
MASc Thesis, December 2021

@article{UofTEcoSystem_Jiacheng_Yang_MASc_Thesis,
  author = {Jiacheng Yang},
  title  = {Enabling Privacy-Preserving Model Personalization via On-Device Incremental Training},
  year   = {2021},
  url    = {https://www.cs.toronto.edu/ecosystem/papers/Theses/Jiacheng_Yang-MASc_2021.pdf}
}
Moshpit SGD: Communication-Efficient Decentralized Training on Heterogeneous Unreliable Devices BibTeX
Max Ryabinin, Eduard Gorbunov, Vsevolod Plokhotnyuk, Gennady Pekhimenko
NeurIPS, December 2021

@article{UofTEcoSystem_Moshpit_SGD,
  title     = {{Moshpit SGD}: Communication-Efficient Decentralized Training on Heterogeneous Unreliable Devices},
  author    = {Max Ryabinin and Eduard Gorbunov and Vsevolod Plokhotnyuk and Gennady Pekhimenko},
  booktitle = {Advances in Neural Information Processing Systems},
  editor    = {A. Beygelzimer and Y. Dauphin and P. Liang and J. Wortman Vaughan},
  year      = {2021},
  url       = {https://openreview.net/forum?id=cwWfDHYpb1z}
}
Distributed Deep Learning In Open Collaborations BibTeX
Michael Diskin, Alexey Bukhtiyarov, Max Ryabinin, Lucile Saulnier, Quentin Lhoest, Anton Sinitsin, Dmitriy Popov, Dmitry Pyrkin, Maxim Kashirin, Alexander Borzunov, Albert Villanova del Moral, Denis Mazur, Ilia Kobelev, Yacine Jernite, Thomas Wolf, Gennady Pekhimenko
NeurIPS, December 2021

@article{UofTEcoSystem_Distrib_Training_in_Open_Collab,
  title     = {Distributed Deep Learning In Open Collaborations},
  author    = {Michael Diskin and
               Alexey Bukhtiyarov and
               Max Ryabinin and
               Lucile Saulnier and
               Quentin Lhoest and
               Anton Sinitsin and
               Dmitry Popov and
               Dmitriy Pyrkin and
               Maxim Kashirin and
               Alexander Borzunov and
               Albert Villanova del Moral and
               Denis Mazur and
               Ilia Kobelev and
               Yacine Jernite and
               Thomas Wolf and
               Gennady Pekhimenko},
  booktitle = {Advances in Neural Information Processing Systems},
  editor    = {A. Beygelzimer and Y. Dauphin and P. Liang and J. Wortman Vaughan},
  year      = {2021},
  url       = {https://openreview.net/forum?id=FYHktcK-7v}
}
FPRaker: A Processing Element For Accelerating Neural Network Training BibTeX
Omar Mohamed Awad, Mostafa Mahmoud, Isak Edo, Ali Hadi Zadeh, Ciaran Bannon, Anand Jayarajan, Gennady Pekhimenko, Andreas Moshovos
MICRO, October 2021

@inproceedings{UofTEcoSystem_FPRaker,
  author    = {Omar Mohamed Awad and
               Mostafa Mahmoud and
               Isak Edo and
               Ali Hadi Zadeh and
               Ciaran Bannon and
               Anand Jayarajan and
               Gennady Pekhimenko and
               Andreas Moshovos},
  title     = {{FPRaker}: A Processing Element For Accelerating Neural Network Training},
  booktitle = {{MICRO} '21: 54th Annual {IEEE/ACM} International Symposium on Microarchitecture,
               Virtual Event, Greece, October 18-22, 2021},
  pages     = {857--869},
  publisher = {{ACM}},
  year      = {2021},
  url       = {https://doi.org/10.1145/3466752.3480106},
  doi       = {10.1145/3466752.3480106},
  timestamp = {Tue, 19 Oct 2021 16:42:06 +0200},
  biburl    = {https://dblp.org/rec/conf/micro/AwadMEZBJPM21.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
MedPerf: Open Benchmarking Platform for Medical Artificial Intelligence using Federated Evaluation BibTeX
Alexandros Karargyris, Renato Umeton, Micah J. Sheller, Alejandro Aristizabal, Johnu George, Srini Bala, Daniel J. Beutel, Victor Bittorf, Akshay Chaudhari, Alexander Chowdhury, Cody Coleman, Bala Desinghu, Gregory Diamos, Debo Dutta, Diane Feddema, Grigori Fursin, Junyi Guo, Xinyuan Huang, David Kanter, Satyananda Kashyap, Nicholas Lane, Indranil Mallick, Pietro Mascagni, Virendra Mehta, Vivek Natarajan, Nikola Nikolov, Nicolas Padoy, Gennady Pekhimenko, Vijay Janapa Reddi, G Anthony Reina, Pablo Ribalta, Jacob Rosenthal, Abhishek Singh, Jayaraman J. Thiagarajan, Anna Wuest, Maria Xenochristou, Daguang Xu, Poonam Yadav, Michael Rosenthal, Massimo Loda, Jason M. Johnson, Peter Mattson
ArXiv, September 2021

@article{UofTEcoSystem_MedPerf,
  author    = {Alexandros Karargyris and
               Renato Umeton and
               Micah J. Sheller and
               Alejandro Aristizabal and
               Johnu George and
               Srini Bala and
               Daniel J. Beutel and
               Victor Bittorf and
               Akshay Chaudhari and
               Alexander Chowdhury and
               Cody Coleman and
               Bala Desinghu and
               Gregory F. Diamos and
               Debo Dutta and
               Diane Feddema and
               Grigori Fursin and
               Junyi Guo and
               Xinyuan Huang and
               David Kanter and
               Satyananda Kashyap and
               Nicholas D. Lane and
               Indranil Mallick and
               Pietro Mascagni and
               Virendra Mehta and
               Vivek Natarajan and
               Nikola Nikolov and
               Nicolas Padoy and
               Gennady Pekhimenko and
               Vijay Janapa Reddi and
               G. Anthony Reina and
               Pablo Ribalta and
               Jacob Rosenthal and
               Abhishek Singh and
               Jayaraman J. Thiagarajan and
               Anna Wuest and
               Maria Xenochristou and
               Daguang Xu and
               Poonam Yadav and
               Michael Rosenthal and
               Massimo Loda and
               Jason M. Johnson and
               Peter Mattson},
  title     = {{MedPerf}: Open Benchmarking Platform for Medical Artificial Intelligence
               using Federated Evaluation},
  journal   = {CoRR},
  volume    = {abs/2110.01406},
  year      = {2021},
  url       = {https://arxiv.org/abs/2110.01406},
  eprinttype = {arXiv},
  eprint    = {2110.01406},
  timestamp = {Fri, 08 Oct 2021 15:47:55 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2110-01406.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Habitat: A Runtime-Based Computational Performance Predictor for Deep Neural Network Training BibTeX
Geoffrey X. Yu, Yubo Gao, Pavel Golikov, Gennady Pekhimenko
USENIX ATC, July 2021

@inproceedings{UofTEcoSystem_Habitat,
  author    = {Geoffrey X. Yu and
               Yubo Gao and
               Pavel Golikov and
               Gennady Pekhimenko},
  editor    = {Irina Calciu and
               Geoff Kuenning},
  title     = {{Habitat}: A Runtime-Based Computational Performance Predictor for
               Deep Neural Network Training},
  booktitle = {2021 {USENIX} Annual Technical Conference, {USENIX} {ATC} 2021, July
               14-16, 2021},
  pages     = {503--521},
  publisher = {{USENIX} Association},
  year      = {2021},
  url       = {https://www.usenix.org/conference/atc21/presentation/yu},
  timestamp = {Thu, 12 Aug 2021 18:08:26 +0200},
  biburl    = {https://dblp.org/rec/conf/usenix/YuGGP21.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
NVOverlay: Enabling Efficient and Scalable High-Frequency Snapshotting to NVM BibTeX
Ziqi Wang, Chul-Hwan Choo, Michael A. Kozuch, Todd C. Mowry, Gennady Pekhimenko, Vivek Seshadri, Dimitrios Skarlatos
ISCA, June 2021

@article{UofTEcoSystem_NVOverlay,
  author    = {Ziqi Wang and
               Chul{-}Hwan Choo and
               Michael A. Kozuch and
               Todd C. Mowry and
               Gennady Pekhimenko and
               Vivek Seshadri and
               Dimitrios Skarlatos},
  title     = {{NVOverlay}: Enabling Efficient and Scalable High-Frequency Snapshotting
               to {NVM}},
  booktitle = {48th {ACM/IEEE} Annual International Symposium on Computer Architecture,
               {ISCA} 2021, Valencia, Spain, June 14-18, 2021},
  pages     = {498--511},
  publisher = {{IEEE}},
  year      = {2021},
  url       = {https://doi.org/10.1109/ISCA52012.2021.00046},
  doi       = {10.1109/ISCA52012.2021.00046},
  timestamp = {Thu, 14 Oct 2021 10:17:42 +0200},
  biburl    = {https://dblp.org/rec/conf/isca/WangCKMPS021.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Horizontally Fused Training Array: An Effective Hardware Utilization Squeezer for Training Novel Deep Learning Models BibTeX
Shang Wang, Peiming Yang, Yuxuan Zheng, Xin Li, Gennady Pekhimenko
MLSys, April 2021

@article{UofTEcoSystem_HFTA,
  author    = {Shang Wang and
               Peiming Yang and
               Yuxuan Zheng and
               Xin Li and
               Gennady Pekhimenko},
  booktitle = {Proceedings of Machine Learning and Systems},
  editor    = {A. Smola and A. Dimakis and I. Stoica},
  pages     = {599--623},
  title     = {Horizontally Fused Training Array: An Effective Hardware Utilization Squeezer for Training Novel Deep Learning Models},
  url       = {https://proceedings.mlsys.org/paper/2021/file/a97da629b098b75c294dffdc3e463904-Paper.pdf},
  volume    = {3},
  year      = {2021}
}
RL-Scope: Cross-Stack Profiling for Deep Reinforcement Learning Workloads BibTeX
James Gleeson, Srivatsan Krishnan, Moshe Gabel, Vijay Janapa Reddi, Eyal de Lara, Gennady Pekhimenko
MLSys, April 2021

@article{UofTEcoSystem_RLScope,
  author    = {James Gleeson and
               Srivatsan Krishnan and
               Moshe Gabel and
               Vijay Janapa Reddi and
               Eyal de Lara and
               Gennady Pekhimenko},
  booktitle = {Proceedings of Machine Learning and Systems},
  editor    = {A. Smola and A. Dimakis and I. Stoica},
  pages     = {783--799},
  title     = {{RL-Scope}: Cross-stack Profiling for Deep Reinforcement Learning Workloads},
  url       = {https://proceedings.mlsys.org/paper/2021/file/d1fe173d08e959397adf34b1d77e88d7-Paper.pdf},
  volume    = {3},
  year      = {2021}
}
IOS: Inter-Operator Scheduler for CNN Acceleration BibTeX
Yaoyao Ding, Ligeng Zhu, Zhihao Jia, Gennady Pekhimenko, Song Han
MLSys, April 2021

@article{UofTEcoSystem_IOS,
  author    = {Yaoyao Ding and
               Ligeng Zhu and
               Zhihao Jia and
               Gennady Pekhimenko and
               Song Han},
  booktitle = {Proceedings of Machine Learning and Systems},
  editor    = {A. Smola and A. Dimakis and I. Stoica},
  pages     = {167--180},
  title     = {{IOS}: Inter-Operator Scheduler for {CNN} Acceleration},
  url       = {https://proceedings.mlsys.org/paper/2021/file/38b3eff8baf56627478ec76a704e9b52-Paper.pdf},
  volume    = {3},
  year      = {2021}
}
Boveda: Building an On-Chip Deep Learning Memory Hierarchy Brick by Brick
Isak Edo Vivancos, Sayeh Sharify, Milos Nikolic, Ciaran Bannon, Mostafa Mahmoud, Alberto Delmas Lascorz, Gennady Pekhimenko, Andreas Moshovos
MLSys, April 2021

@article{UofTEcoSystem_Boveda,
  author   = {Edo Vivancos, Isak and
              Sharify, Sayeh and
              Ly-Ma, Daniel and
              Abdelhadi, Ameer and
              Bannon, Ciaran and
              Nikolic, Milos and
              Mahmoud, Mostafa and
              Delmas Lascorz, Alberto and
              Pekhimenko, Gennady and
              Moshovos, Andreas},
 booktitle = {Proceedings of Machine Learning and Systems},
 editor    = {A. Smola and A. Dimakis and I. Stoica},
 pages     = {1--20},
 title     = {{Boveda}: Building an On-Chip Deep Learning Memory Hierarchy Brick by Brick},
 url       = {https://proceedings.mlsys.org/paper/2021/file/013d407166ec4fa56eb1e1f8cbe183b9-Paper.pdf},
 volume    = {3},
 year      = {2021}
}
LifeStream: A High-performance Stream Processing Engine for Waveform Data BibTeX
Anand Jayarajan, Kimberly Hau, Andrew Goodwin, Gennady Pekhimenko
ASPLOS, April 2021

@article{UofTEcoSystem_LifeStream,
  author    = {Anand Jayarajan and
               Kimberly Hau and
               Andrew Goodwin and
               Gennady Pekhimenko},
  editor    = {Tim Sherwood and
               Emery D. Berger and
               Christos Kozyrakis},
  title     = {{LifeStream}: a high-performance stream processing engine for periodic
               streams},
  booktitle = {{ASPLOS} '21: 26th {ACM} International Conference on Architectural
               Support for Programming Languages and Operating Systems, Virtual Event,
               USA, April 19-23, 2021},
  pages     = {107--122},
  publisher = {{ACM}},
  year      = {2021},
  url       = {https://doi.org/10.1145/3445814.3446725},
  doi       = {10.1145/3445814.3446725},
  timestamp = {Sat, 08 Jan 2022 02:24:44 +0100},
  biburl    = {https://dblp.org/rec/conf/asplos/JayarajanHGP21.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Gretch: A Hardware Prefetcher for Graph Analytics BibTeX
Anirudh Mohan Kaushik, Gennady Pekhimenko, Hiren Patel
TACO, February 2021

@article{UofTEcoSystem_Gretch,
  author     = {Anirudh Mohan Kaushik and
                Gennady Pekhimenko and
                Hiren Patel},
  title      = {{Gretch}: A Hardware Prefetcher for Graph Analytics},
  year       = {2021},
  issue_date = {February 2021},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = {18},
  number     = {2},
  issn       = {1544-3566},
  url        = {https://doi.org/10.1145/3439803},
  doi        = {10.1145/3439803},
  journal    = {ACM Transaction Architecture Code Optimization},
  month      = Feb,
  articleno  = {18},
  numpages   = {25},
  keywords   = {data-dependent memory accesses, Hardware prefetching, graph analytics}
}
2020 Publications
TensorDash: Exploiting Sparsity to Accelerate Deep Neural Network Training BibTeX
Mostafa Mahmoud, Isak Edo Vivancos, Ali Hadi Zadeh, Omar Mohamed Awad, Gennady Pekhimenko, Jorge Albericio, Andreas Moshovos
MICRO, October 2020

@inproceedings{UofTEcoSystem_TensorDash,
  author    = {Mostafa Mahmoud and
               Isak Edo and
               Ali Hadi Zadeh and
               Omar Mohamed Awad and
               Gennady Pekhimenko and
               Jorge Albericio and
               Andreas Moshovos},
  title     = {{TensorDash}: Exploiting Sparsity to Accelerate Deep Neural Network
               Training},
  booktitle = {53rd Annual {IEEE/ACM} International Symposium on Microarchitecture,
               {MICRO} 2020, Athens, Greece, October 17-21, 2020},
  pages     = {781--795},
  publisher = {{IEEE}},
  year      = {2020},
  url       = {https://doi.org/10.1109/MICRO50266.2020.00069},
  doi       = {10.1109/MICRO50266.2020.00069},
  timestamp = {Tue, 17 Nov 2020 13:33:12 +0100},
  biburl    = {https://dblp.org/rec/conf/micro/MahmoudEZAPAM20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Skyline: Interactive In-Editor Computational Performance Profiling for Deep Neural Network Training BibTeX
Geoffrey Yu, Tovi Grossman, Gennady Pekhimenko
UIST, October 2020

@inproceedings{UofTEcoSystem_Skyline,
  author    = {Geoffrey X. Yu and
               Tovi Grossman and
               Gennady Pekhimenko},
  editor    = {Shamsi T. Iqbal and
               Karon E. MacLean and
               Fanny Chevalier and
               Stefanie Mueller},
  title     = {{Skyline}: Interactive In-Editor Computational Performance Profiling
               for Deep Neural Network Training},
  booktitle = {{UIST} '20: The 33rd Annual {ACM} Symposium on User Interface Software
               and Technology, Virtual Event, USA, October 20-23, 2020},
  pages     = {126--139},
  publisher = {{ACM}},
  year      = {2020},
  url       = {https://doi.org/10.1145/3379337.3415890},
  doi       = {10.1145/3379337.3415890},
  timestamp = {Sun, 18 Oct 2020 18:35:39 +0200},
  biburl    = {https://dblp.org/rec/conf/uist/YuGP20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Multi-Node Bert-pretraining: Cost-efficient Approach BibTeX
Jiahuang Lin, Xin Li, Gennady Pekhimenko
arXiv, August 2020

@misc{UofTEcoSystem_Multi_Node_BERT_Pretraining,
  title         = {Multi-Node {BERT}-Pretraining: Cost-efficient Approach}, 
  author        = {Jiahuang Lin and Xin Li and Gennady Pekhimenko},
  year          = {2020},
  eprint        = {2008.00177},
  archivePrefix = {arXiv},
  primaryClass  = {cs.LG}
}
Daydream: Accurately Estimating the Efficacy of Optimizations for DNN Training BibTeX
Hongyu Zhu, Amar Phanishayee, Gennady Pekhimenko
USENIX ATC, July 2020

@inproceedings{UofTEcoSystem_Daydream,
  author    = {Hongyu Zhu and
               Amar Phanishayee and
               Gennady Pekhimenko},
  editor    = {Ada Gavrilovska and
               Erez Zadok},
  title     = {{Daydream}: Accurately Estimating the Efficacy of Optimizations for
               {DNN} Training},
  booktitle = {2020 {USENIX} Annual Technical Conference, {USENIX} {ATC} 2020, July
               15-17, 2020},
  pages     = {337--352},
  publisher = {{USENIX} Association},
  year      = {2020},
  url       = {https://www.usenix.org/conference/atc20/presentation/zhu-hongyu},
  timestamp = {Mon, 01 Feb 2021 08:43:50 +0100},
  biburl    = {https://dblp.org/rec/conf/usenix/ZhuPP20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Echo: Compiler-based GPU Memory Footprint Reduction for LSTM RNN Training BibTeX
Bojian Zheng, Abhishek Tiwari, Nandita Vijaykumar, Gennady Pekhimenko
ISCA, June 2020

@inproceedings{UofTEcoSystem_Echo,
  author    = {Bojian Zheng and
               Nandita Vijaykumar and
               Gennady Pekhimenko},
  title     = {{Echo}: Compiler-based {GPU} Memory Footprint Reduction for {LSTM} {RNN}
               Training},
  booktitle = {47th {ACM/IEEE} Annual International Symposium on Computer Architecture,
               {ISCA} 2020, Valencia, Spain, May 30 - June 3, 2020},
  pages     = {1089--1102},
  publisher = {{IEEE}},
  year      = {2020},
  url       = {https://doi.org/10.1109/ISCA45697.2020.00092},
  doi       = {10.1109/ISCA45697.2020.00092},
  timestamp = {Wed, 22 Jul 2020 15:50:37 +0200},
  biburl    = {https://dblp.org/rec/conf/isca/ZhengVP20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
MLPerf Inference Benchmark BibTeX
Vijay Janapa Reddi, Christine Cheng, David Kanter, Peter Mattson, Guenther Schmuelling, Carole-Jean Wu, Brian Anderson, Maximilien Breughe, Mark Charlebois, William Chou, Ramesh Chukka, Cody Coleman, Sam Davis, Pan Deng, Greg Diamos, Jared Duke, Dave Fick, J. Scott Gardner, Itay Hubara, Sachin Idgunji, Thomas B. Jablin, Jeff Jiao, Tom St. John, Pankaj Kanwar, David Lee, Jeffery Liao, Anton Lokhmotov, Francisco Massa, Peng Meng, Paulius Micikevicius, Colin Osborne, Gennady Pekhimenko, Arun Tejusve Raghunath Rajan, Dilip Sequeira, Ashish Sirasao, Fei Sun, Hanlin Tang, Michael Thomson, Frank Wei, Ephrem Wu, Lingjie Xu, Koichi Yamada, Bing Yu, George Yuan, Aaron Zhong, Peizhao Zhang, Yuchen Zhou
ISCA, June 2020
MICRO Top Picks

@inproceedings{UofTEcoSystem_MLPerf_Inference,
  author    = {Vijay Janapa Reddi and
               Christine Cheng and
               David Kanter and
               Peter Mattson and
               Guenther Schmuelling and
               Carole{-}Jean Wu and
               Brian Anderson and
               Maximilien Breughe and
               Mark Charlebois and
               William Chou and
               Ramesh Chukka and
               Cody Coleman and
               Sam Davis and
               Pan Deng and
               Greg Diamos and
               Jared Duke and
               Dave Fick and
               J. Scott Gardner and
               Itay Hubara and
               Sachin Idgunji and
               Thomas B. Jablin and
               Jeff Jiao and
               Tom St. John and
               Pankaj Kanwar and
               David Lee and
               Jeffery Liao and
               Anton Lokhmotov and
               Francisco Massa and
               Peng Meng and
               Paulius Micikevicius and
               Colin Osborne and
               Gennady Pekhimenko and
               Arun Tejusve Raghunath Rajan and
               Dilip Sequeira and
               Ashish Sirasao and
               Fei Sun and
               Hanlin Tang and
               Michael Thomson and
               Frank Wei and
               Ephrem Wu and
               Lingjie Xu and
               Koichi Yamada and
               Bing Yu and
               George Yuan and
               Aaron Zhong and
               Peizhao Zhang and
               Yuchen Zhou},
  title     = {MLPerf Inference Benchmark},
  booktitle = {47th {ACM/IEEE} Annual International Symposium on Computer Architecture,
               {ISCA} 2020, Valencia, Spain, May 30 - June 3, 2020},
  pages     = {446--459},
  publisher = {{IEEE}},
  year      = {2020},
  url       = {https://doi.org/10.1109/ISCA45697.2020.00045},
  doi       = {10.1109/ISCA45697.2020.00045},
  timestamp = {Wed, 22 Jul 2020 15:50:37 +0200},
  biburl    = {https://dblp.org/rec/conf/isca/ReddiCKMSWABCCC20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
BPPSA: Scaling Back-propagation by Parallel Scan Algorithm BibTeX
Shang Wang, Yifan Bai, Gennady Pekhimenko
MLSys, March 2020

@inproceedings{UofTEcoSystem_BPPSA,
  author    = {Shang Wang and
               Yifan Bai and
               Gennady Pekhimenko},
  editor    = {Inderjit S. Dhillon and
               Dimitris S. Papailiopoulos and
               Vivienne Sze},
  title     = {{BPPSA}: Scaling Back-propagation by Parallel Scan Algorithm},
  booktitle = {Proceedings of Machine Learning and Systems 2020, MLSys 2020, Austin,
               TX, USA, March 2-4, 2020},
  publisher = {mlsys.org},
  year      = {2020},
  url       = {https://proceedings.mlsys.org/book/317.pdf},
  timestamp = {Thu, 18 Jun 2020 15:48:04 +0200},
  biburl    = {https://dblp.org/rec/conf/mlsys/0002BP20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
MLPerf Training Benchmark BibTeX
Peter Mattson, Christine Cheng, Cody Coleman, Greg Diamos, Paulius Micikevicius, David Patterson, Hanlin Tang, Gu-Yeon Wei, Peter Bailis, Victor Bittorf, David Brooks, Dehao Chen, Debojyoti Dutta, Udit Gupta, Kim Hazelwood, Andrew Hock, Xinyuan Huang, Bill Jia, Daniel Kang, David Kanter, Naveen Kumar, Jeffery Liao, Deepak Narayanan, Tayo Oguntebi, Gennady Pekhimenko, Lillian Pentecost, Vijay Janapa Reddi, Taylor Robie, Tom St. John, Carole-Jean Wu, Lingjie Xu, Cliff Young, Matei Zaharia
MLSys, March 2020

@inproceedings{UofTEcoSystem_MLPerf_Training,
  author    = {Peter Mattson and
               Christine Cheng and
               Gregory F. Diamos and
               Cody Coleman and
               Paulius Micikevicius and
               David A. Patterson and
               Hanlin Tang and
               Gu{-}Yeon Wei and
               Peter Bailis and
               Victor Bittorf and
               David Brooks and
               Dehao Chen and
               Debo Dutta and
               Udit Gupta and
               Kim M. Hazelwood and
               Andy Hock and
               Xinyuan Huang and
               Daniel Kang and
               David Kanter and
               Naveen Kumar and
               Jeffery Liao and
               Deepak Narayanan and
               Tayo Oguntebi and
               Gennady Pekhimenko and
               Lillian Pentecost and
               Vijay Janapa Reddi and
               Taylor Robie and
               Tom St. John and
               Carole{-}Jean Wu and
               Lingjie Xu and
               Cliff Young and
               Matei Zaharia},
  editor    = {Inderjit S. Dhillon and
               Dimitris S. Papailiopoulos and
               Vivienne Sze},
  title     = {MLPerf Training Benchmark},
  booktitle = {Proceedings of Machine Learning and Systems 2020, MLSys 2020, Austin,
               TX, USA, March 2-4, 2020},
  publisher = {mlsys.org},
  year      = {2020},
  url       = {https://proceedings.mlsys.org/book/309.pdf},
  timestamp = {Thu, 18 Jun 2020 15:48:04 +0200},
  biburl    = {https://dblp.org/rec/conf/mlsys/MattsonCDCMPTWB20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Towards Making the Most of BERT in Neural Machine Translation BibTeX
Jiacheng Yang, Mingxuan Wang, Hao Zhou, Chengqi Zhao, Weinan Zhang, Yong Yu, Lei Li
AAAI, February 2020

@inproceedings{UofTEcoSystem_BERT_for_NMT,
  author    = {Jiacheng Yang and
               Mingxuan Wang and
               Hao Zhou and
               Chengqi Zhao and
               Weinan Zhang and
               Yong Yu and
               Lei Li},
  title     = {Towards Making the Most of {BERT} in Neural Machine Translation},
  booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
               2020, The Thirty-Second Innovative Applications of Artificial Intelligence
               Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
               Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
               February 7-12, 2020},
  pages     = {9378--9385},
  publisher = {{AAAI} Press},
  year      = {2020},
  url       = {https://aaai.org/ojs/index.php/AAAI/article/view/6479},
  timestamp = {Tue, 02 Feb 2021 08:00:21 +0100},
  biburl    = {https://dblp.org/rec/conf/aaai/YangW0Z00020.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
2019 Publications
Janus: Optimizing Memory and Storage Support for Non-Volatile Memory Systems BibTeX
Sihang Liu, Korakit Seemakhupt, Gennady Pekhimenko, Aasheesh Kolli, and Samira Khan
ISCA, June 2019
MICRO Top Picks Honorable Mention

@inproceedings{UofTEcoSystem_Janus,
  author    = {Sihang Liu and
               Korakit Seemakhupt and
               Gennady Pekhimenko and
               Aasheesh Kolli and
               Samira Manabi Khan},
  editor    = {Srilatha Bobbie Manne and
               Hillery C. Hunter and
               Erik R. Altman},
  title     = {{Janus}: optimizing memory and storage support for non-volatile memory
               systems},
  booktitle = {Proceedings of the 46th International Symposium on Computer Architecture,
               {ISCA} 2019, Phoenix, AZ, USA, June 22-26, 2019},
  pages     = {143--156},
  publisher = {{ACM}},
  year      = {2019},
  url       = {https://doi.org/10.1145/3307650.3322206},
  doi       = {10.1145/3307650.3322206},
  timestamp = {Tue, 29 Dec 2020 09:53:49 +0100},
  biburl    = {https://dblp.org/rec/conf/isca/0001SPKK19.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Priority-based Parameter Propagation for Distributed DNN Training BibTeX
Anand Jayarajan, Jinliang Wei, Garth Gibson, Alexandra Fedorova, Gennady Pekhimenko
SysML, April 2019

@inproceedings{UofTEcoSystem_P3,
  author    = {Anand Jayarajan and
               Jinliang Wei and
               Garth Gibson and
               Alexandra Fedorova and
               Gennady Pekhimenko},
  editor    = {Ameet Talwalkar and
               Virginia Smith and
               Matei Zaharia},
  title     = {Priority-based Parameter Propagation for Distributed {DNN} Training},
  booktitle = {Proceedings of Machine Learning and Systems 2019, MLSys 2019, Stanford,
               CA, USA, March 31 - April 2, 2019},
  publisher = {mlsys.org},
  year      = {2019},
  url       = {https://proceedings.mlsys.org/book/283.pdf},
  timestamp = {Thu, 18 Jun 2020 15:48:01 +0200},
  biburl    = {https://dblp.org/rec/conf/mlsys/JayarajanWGFP19.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
StreamBox-HBM: Stream Analytics on High Bandwidth Hybrid Memory BibTeX
Hongyu Miao, Myeongjae Jeon, Gennady Pekhimenko, Kathryn S. McKinley, Felix Xiaozhu Lin
ASPLOS, April 2019

@inproceedings{UofTEcoSystem_StreamBox,
  author    = {Hongyu Miao and
               Myeongjae Jeon and
               Gennady Pekhimenko and
               Kathryn S. McKinley and
               Felix Xiaozhu Lin},
  editor    = {Iris Bahar and
               Maurice Herlihy and
               Emmett Witchel and
               Alvin R. Lebeck},
  title     = {{StreamBox-HBM}: Stream Analytics on High Bandwidth Hybrid Memory},
  booktitle = {Proceedings of the Twenty-Fourth International Conference on Architectural
               Support for Programming Languages and Operating Systems, {ASPLOS}
               2019, Providence, RI, USA, April 13-17, 2019},
  pages     = {167--181},
  publisher = {{ACM}},
  year      = {2019},
  url       = {https://doi.org/10.1145/3297858.3304031},
  doi       = {10.1145/3297858.3304031},
  timestamp = {Tue, 09 Feb 2021 13:47:47 +0100},
  biburl    = {https://dblp.org/rec/conf/asplos/MiaoJPML19.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
2018 Publications
EcoRNN: Efficient Computing of LSTM RNN on GPUs BibTeX
Bojian Zheng, Gennady Pekhimenko
MICRO ACM Student Research Competition, October 2018
Third Place in ACM Student Research Competition

@online{UofTEcoSystem_EcoRNN,
  author = {Bojian Zheng and
            Gennady Pekhimenko},
  title  = {{EcoRNN}: Efficient Computing of LSTM RNN on GPUs},
  year   = {2018},
  url    = {https://www.microarch.org/micro51/SRC/posters/20_zheng.pdf}
}
Benchmarking and Analyzing Deep Neural Network Training BibTeX
Hongyu Zhu, Mohamed Akrout, Bojian Zheng, Andrew Pelegris, Amar Phanishayee, Bianca Schroeder, Gennady Pekhimenko
IISWC, July 2018

@inproceedings{UofTEcoSystem_TBD_IISWC,
  author    = {Hongyu Zhu and
               Mohamed Akrout and
               Bojian Zheng and
               Andrew Pelegris and
               Anand Jayarajan and
               Amar Phanishayee and
               Bianca Schroeder and
               Gennady Pekhimenko},
  title     = {Benchmarking and Analyzing Deep Neural Network Training},
  booktitle = {2018 {IEEE} International Symposium on Workload Characterization,
               {IISWC} 2018, Raleigh, NC, USA, September 30 - October 2, 2018},
  pages     = {88--100},
  publisher = {{IEEE} Computer Society},
  year      = {2018},
  url       = {https://doi.org/10.1109/IISWC.2018.8573476},
  doi       = {10.1109/IISWC.2018.8573476},
  timestamp = {Wed, 16 Oct 2019 14:14:56 +0200},
  biburl    = {https://dblp.org/rec/conf/iiswc/ZhuAZPJPSP18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
TerseCades: Efficient Data Compression in Stream Processing BibTeX
Gennady Pekhimenko, Chuanxiong Guo, Myeongjae Jeon, Ryan Huang, and Lidong Zhou
USENIX Annual Technical Conference, July 2018

@inproceedings{UofTEcoSystem_TerseCades,
  author    = {Gennady Pekhimenko and
               Chuanxiong Guo and
               Myeongjae Jeon and
               Peng Huang and
               Lidong Zhou},
  editor    = {Haryadi S. Gunawi and
               Benjamin Reed},
  title     = {{TerseCades}: Efficient Data Compression in Stream Processing},
  booktitle = {2018 {USENIX} Annual Technical Conference, {USENIX} {ATC} 2018, Boston,
               MA, USA, July 11-13, 2018},
  pages     = {307--320},
  publisher = {{USENIX} Association},
  year      = {2018},
  url       = {https://www.usenix.org/conference/atc18/presentation/pekhimenko},
  timestamp = {Mon, 01 Feb 2021 08:43:54 +0100},
  biburl    = {https://dblp.org/rec/conf/usenix/PekhimenkoGJHZ18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Gist: Efficient Data Encoding for Deep Neural Network Training BibTeX
Animesh Jain, Amar Phanishayee, Jason Mars, Lingjia Tang, Gennady Pekhimenko
ISCA, June 2018

@inproceedings{UofTEcoSystem_Gist,
  author    = {Animesh Jain and
               Amar Phanishayee and
               Jason Mars and
               Lingjia Tang and
               Gennady Pekhimenko},
  editor    = {Murali Annavaram and
               Timothy Mark Pinkston and
               Babak Falsafi},
  title     = {{Gist}: Efficient Data Encoding for Deep Neural Network Training},
  booktitle = {45th {ACM/IEEE} Annual International Symposium on Computer Architecture,
               {ISCA} 2018, Los Angeles, CA, USA, June 1-6, 2018},
  pages     = {776--789},
  publisher = {{IEEE} Computer Society},
  year      = {2018},
  url       = {https://doi.org/10.1109/ISCA.2018.00070},
  doi       = {10.1109/ISCA.2018.00070},
  timestamp = {Sun, 10 May 2020 21:12:50 +0200},
  biburl    = {https://dblp.org/rec/conf/isca/JainPMTP18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
A Case for Richer Cross-layer Abstractions: Bridging the Semantic Gap to Enhance Memory Optimization BibTeX
Nandita Vijaykumar, Abhilasha Jain, Diptesh Majumdar, Kevin Hsieh, Gennady Pekhimenko, Eiman Ebrahimi, Nastaran Hajinazaran, Phillip B. Gibbons, Onur Mutlu
ISCA, June 2018

@inproceedings{UofTEcoSystem_XMem,
  author    = {Nandita Vijaykumar and
               Abhilasha Jain and
               Diptesh Majumdar and
               Kevin Hsieh and
               Gennady Pekhimenko and
               Eiman Ebrahimi and
               Nastaran Hajinazar and
               Phillip B. Gibbons and
               Onur Mutlu},
  editor    = {Murali Annavaram and
               Timothy Mark Pinkston and
               Babak Falsafi},
  title     = {A Case for Richer Cross-Layer Abstractions: Bridging the Semantic
               Gap with Expressive Memory},
  booktitle = {45th {ACM/IEEE} Annual International Symposium on Computer Architecture,
               {ISCA} 2018, Los Angeles, CA, USA, June 1-6, 2018},
  pages     = {207--220},
  publisher = {{IEEE} Computer Society},
  year      = {2018},
  url       = {https://doi.org/10.1109/ISCA.2018.00027},
  doi       = {10.1109/ISCA.2018.00027},
  timestamp = {Sun, 10 May 2020 21:12:50 +0200},
  biburl    = {https://dblp.org/rec/conf/isca/VijaykumarJMHPE18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
TBD: Benchmarking and Analyzing Deep Neural Network Training BibTeX
Hongyu Zhu, Mohamed Akrout, Bojian Zheng, Andrew Pelegris, Amar Phanishayee, Bianca Schroeder, Gennady Pekhimenko
arXiv, March 2018

@article{UofTEcoSystem_TBD_arXiv,
  author    = {Hongyu Zhu and
               Mohamed Akrout and
               Bojian Zheng and
               Andrew Pelegris and
               Amar Phanishayee and
               Bianca Schroeder and
               Gennady Pekhimenko},
  title     = {{TBD}: Benchmarking and Analyzing Deep Neural Network Training},
  journal   = {CoRR},
  volume    = {abs/1803.06905},
  year      = {2018},
  url       = {http://arxiv.org/abs/1803.06905},
  archivePrefix = {arXiv},
  eprint    = {1803.06905},
  timestamp = {Mon, 13 Aug 2018 16:46:27 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-1803-06905.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
DNN-Train: Benchmarking and Analyzing DNN Training BibTeX
Hongyu Zhu, Bojian Zheng, Amar Phanishayee, Bianca Schroeder, Gennady Pekhimenko
SysML Conference, February 2018

@online{UofTEcoSystem_TBD_MLSys,
  author = {Hongyu Zhu and 
            Bojian Zheng and 
            Amar Phanishayee and
            Bianca Schroeder and 
            Gennady Pekhimenko},
  title  = {{DNN-Train}: Benchmarking and Analyzing DNN Training},
  year   = {2018},
  url    = {https://mlsys.org/Conferences/doc/2018/167.pdf}
}
2017 Publications
StreamBox: Modern Stream Processing on a Multicore Machine BibTeX
Hongyu Miao, Heejin Park, Myeongjae Jeon, Gennady Pekhimenko, Kathryn S. McKinley, Felix Xiaozhu Lin
USENIX Annual Technical Conference, July 2017

@inproceedings{UofTEcoSystem_Streambox,
  author    = {Hongyu Miao and
               Heejin Park and
               Myeongjae Jeon and
               Gennady Pekhimenko and
               Kathryn S. McKinley and
               Felix Xiaozhu Lin},
  editor    = {Dilma Da Silva and
               Bryan Ford},
  title     = {{StreamBox}: Modern Stream Processing on a Multicore Machine},
  booktitle = {2017 {USENIX} Annual Technical Conference, {USENIX} {ATC} 2017, Santa
               Clara, CA, USA, July 12-14, 2017},
  pages     = {617--629},
  publisher = {{USENIX} Association},
  year      = {2017},
  url       = {https://www.usenix.org/conference/atc17/technical-sessions/presentation/miao},
  timestamp = {Tue, 09 Feb 2021 13:47:51 +0100},
  biburl    = {https://dblp.org/rec/conf/usenix/MiaoPJPML17.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
Design-Induced Latency Variation in Modern DRAM Chips: Characterization, Analysis, and Latency Reduction Mechanisms BibTeX
Donghyuk Lee, Samira Khan, Lavanya Subramanian, Saugata Ghose, Rachata Ausavarungnirun, Gennady Pekhimenko, Vivek Seshadri, Onur Mutlu
SIGMETRICS, June 2017

@article{UofTEcoSystem_DIVA,
  author    = {Donghyuk Lee and
               Samira Manabi Khan and
               Lavanya Subramanian and
               Saugata Ghose and
               Rachata Ausavarungnirun and
               Gennady Pekhimenko and
               Vivek Seshadri and
               Onur Mutlu},
  title     = {Design-Induced Latency Variation in Modern {DRAM} Chips: Characterization,
               Analysis, and Latency Reduction Mechanisms},
  journal   = {Proceedings of the ACM on Measurement and Analysis of Computing Systems},
  volume    = {1},
  number    = {1},
  pages     = {26:1--26:36},
  year      = {2017},
  url       = {https://doi.org/10.1145/3084464},
  doi       = {10.1145/3084464},
  timestamp = {Thu, 09 Jul 2020 22:58:52 +0200},
  biburl    = {https://dblp.org/rec/journals/pomacs/LeeKSGAPSM17.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
SoftMC: A Flexible and Practical Open-Source Infrastructure for Enabling Experimental DRAM Studies BibTeX
Hasan Hassan, Nandita Vijaykumar, Samira Khan, Saugata Ghose, Kevin Chang, Gennady Pekhimenko, Donghyuk Lee, Oguz Ergin, Onur Mutlu
HPCA-23, February 2017

@article{UofTEcoSystem_SoftMC,
  author    = {Hasan Hassan and
               Nandita Vijaykumar and
               Samira Manabi Khan and
               Saugata Ghose and
               Kevin K. Chang and
               Gennady Pekhimenko and
               Donghyuk Lee and
               Oguz Ergin and
               Onur Mutlu},
  title     = {{SoftMC}: Practical {DRAM} Characterization Using an FPGA-Based Infrastructure},
  journal   = {CoRR},
  volume    = {abs/1805.03195},
  year      = {2018},
  url       = {http://arxiv.org/abs/1805.03195},
  archivePrefix = {arXiv},
  eprint    = {1805.03195},
  timestamp = {Mon, 13 Aug 2018 16:46:07 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-1805-03195.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}