@inproceedings{e7df518281174f569b37e4d564785981,
title = "LCP: A Low-Communication Parallelization Method for Fast Neural Network Inference for IoT",
abstract = "Deep neural networks (DNNs) have stimulated research in diverse edge applications including robotics and Internet-of-Things (IoT) devices. However, IoT-based DNN inference poses significant challenges due to resource constraints. Further, as communication is costly, taking advantage of other available IoT devices by using data- or model-parallelism methods is not an effective solution. We introduce a low-communication parallelization (LCP) method to minimize communication over-head in distributed IoT systems. LCP models consist of multiple, largely-independent, narrow branches, providing enhanced distribution and parallelization opportunities while reducing memory and computational requirements. Implemented on AWS instances, Raspberry Pis, and PYNQ boards, as well as a customized 16mW 0.107mm2ASIC @7nm chip, LCP models yield maximum and average speedups of 56x and 7x, compared to original models, which could be improved by incorporating common optimizations such as pruning and quantization.",
keywords = "Distributed, DNN, FPGA, Inference, IoT, Parallel",
author = "Ramyad Hadidi and Bahar Asgari and Jiashen Cao and Younmin Bae and Shim, \{Da Eun\} and Hyojong Kim and Lim, \{Sung Kyu\} and Ryoo, \{Michael S.\} and Hyesoon Kim",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 Congress in Computer Science, Computer Engineering, and Applied Computing, CSCE 2023 ; Conference date: 24-07-2023 Through 27-07-2023",
year = "2023",
doi = "10.1109/CSCE60160.2023.00274",
language = "English",
series = "Proceedings - 2023 Congress in Computer Science, Computer Engineering, and Applied Computing, CSCE 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1670--1677",
booktitle = "Proceedings - 2023 Congress in Computer Science, Computer Engineering, and Applied Computing, CSCE 2023",
}