@inproceedings{116990a6fa0848469868d467babbaa7f,
title = "Parallel loops on distributed machines",
abstract = "Any programming environment for distributed memory machines that allows the user to specify parallel do loops over globally defined data structures requires optimizations that go beyond the specification of appropriate data and workload partitionings. In this paper, we consider optimizations that are required for efficient execution of a code segment that consists of parallel loops over distributed data structures. On distributed memory machines it is typically very expensive to fetch individual data elements. Instead, before a parallel loop executes, it is desirable to prefetch all off-processctr data required in the loop. We specify a scheme for s toring copies of fetched data along with a scheme for accessing copies of off-processor data during the computation of the loop. The performance of such optimizations on the iPSC/2 and the NCUBE is also presented.",
author = "Charles Koelbel and Piyush Mehrotra and Joel Saltz and Harry Berryman",
note = "Publisher Copyright: {\textcopyright} 1990 IEEE.; 5th Distributed Memory Computing Conference, DMCC 1990 ; Conference date: 08-04-1990 Through 12-04-1990",
year = "1990",
doi = "10.1109/DMCC.1990.556322",
language = "English",
series = "Proceedings of the 5th Distributed Memory Computing Conference, DMCC 1990",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1097--1104",
editor = "Walker, \{David W.\} and Stout, \{Quentin F.\}",
booktitle = "Architectures, Software Tools and Other General Issues",
}