1.
Antonescu, Mihai; Maliţa, Mihaela; Ştefan, Gheorghe M.
Latency Hiding of Log-Depth Scan and Reduce Networks in Heterogenous Embedded Systems Proceedings Article
In: 2023 IEEE 29th International Symposium for Design and Technology in Electronic Packaging (SIITME), pp. 81-86, 2023, ISSN: 2642-7036.
Abstract | Links | BibTeX | Tags: Program processors;Embedded systems;Power demand;Electron accelerators;Computer architecture;Pipeline processing;Electronics packaging;Latency avoidance;Map-Scan accelerator;MapReduce accelerator
@inproceedings{10430611,
title = {Latency Hiding of Log-Depth Scan and Reduce Networks in Heterogenous Embedded Systems},
author = {Mihai Antonescu and Mihaela Maliţa and Gheorghe M. Ştefan},
doi = {10.1109/SIITME59799.2023.10430611},
issn = {2642-7036},
year = {2023},
date = {2023-10-01},
booktitle = {2023 IEEE 29th International Symposium for Design and Technology in Electronic Packaging (SIITME)},
pages = {81-86},
abstract = {This paper discusses methods, algorithmic examples and general principles regarding latency reduction methods for single chip Map-Reduce and Map-Scan many-core architectures. Processors designed for embedded systems suffer performance limitations (both performance and power consumption) when running intense instead of complex computations. A common solution is to add accelerators to the host processor in order to offload parts of the intense computations. We consider a Map-Scan-Reduce many-core architecture to be highly effective as a general-purpose accelerator and in this paper, we discuss the latencies introduced by the Scan and Reduce networks and ways in which to hide them based on practical applications and the solutions we have employed. Proper usage of pipelining technique and algorithmic improvements helps us obtain in simulations supralinear accelerations in relation to the number of processing cores used for the algorithms presented: matrixvector/matrix multiplication, FFT, pooling.},
keywords = {Program processors;Embedded systems;Power demand;Electron accelerators;Computer architecture;Pipeline processing;Electronics packaging;Latency avoidance;Map-Scan accelerator;MapReduce accelerator},
pubstate = {published},
tppubtype = {inproceedings}
}
This paper discusses methods, algorithmic examples and general principles regarding latency reduction methods for single chip Map-Reduce and Map-Scan many-core architectures. Processors designed for embedded systems suffer performance limitations (both performance and power consumption) when running intense instead of complex computations. A common solution is to add accelerators to the host processor in order to offload parts of the intense computations. We consider a Map-Scan-Reduce many-core architecture to be highly effective as a general-purpose accelerator and in this paper, we discuss the latencies introduced by the Scan and Reduce networks and ways in which to hide them based on practical applications and the solutions we have employed. Proper usage of pipelining technique and algorithmic improvements helps us obtain in simulations supralinear accelerations in relation to the number of processing cores used for the algorithms presented: matrixvector/matrix multiplication, FFT, pooling.