Hauptinhalt

LS12 Publications on WCET-aware Compilation

2015

Timon Kelter.
WCET Analysis and Optimization for Multi-Core Real-Time Systems.
PhD Thesis, March 2015
[BibTeX][PDF]

@phdthesis { kelter:2015:phdthesis,
  title = {WCET Analysis and Optimization for Multi-Core Real-Time Systems},
  author = {Kelter, Timon},
  school = {TU Dortmund, Department of Computer Science},
  year = {2015},
  month = {March},
  keywords = {wcet},
  file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/kelter-phd.pdf},
  confidential = {n},
  adviser = {Prof. Dr. Peter Marwedel},
}

2014

	Timon Kelter and Peter Marwedel. Parallelism Analysis: Precise WCET Values for Complex Multi-Core Systems. In Third International Workshop on Formal Techniques for Safety-Critical Systems Luxembourg, November 2014 [BibTeX][PDF][Link] @inproceedings { kelter:2014:ftscs, author = {Kelter, Timon and Marwedel, Peter}, title = {Parallelism Analysis: Precise WCET Values for Complex Multi-Core Systems}, booktitle = {Third International Workshop on Formal Techniques for Safety-Critical Systems}, year = {2014}, editor = {Cyrille Artho and Peter \"Olveczky}, series = {FTSCS}, address = {Luxembourg}, month = {November}, publisher = {Springer}, url = {http://www.ftscs.org/index.php?n=Main.Home}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2014-kelter-ftscs.pdf}, confidential = {n}, }
	Chen-Wei Huang, Timon Kelter, Bjoern Boenninghoff, Jan Kleinsorge, Michael Engel, Peter Marwedel and Shiao-Li Tsao. Static WCET Analysis of the H.264/AVC Decoder Exploiting Coding Information. In International Conference on Embedded and Real-Time Computing Systems and Applications Chongqing, China, August 2014 [BibTeX] @inproceedings { huang:2014:rtcsa, author = {Huang, Chen-Wei and Kelter, Timon and Boenninghoff, Bjoern and Kleinsorge, Jan and Engel, Michael and Marwedel, Peter and Tsao, Shiao-Li}, title = {Static WCET Analysis of the H.264/AVC Decoder Exploiting Coding Information}, booktitle = {International Conference on Embedded and Real-Time Computing Systems and Applications}, year = {2014}, address = {Chongqing, China}, month = {August}, organization = {IEEE}, keywords = {wcet}, confidential = {n}, }
	Sudipta Chattopadhyay, Lee Kee Chong, Abhik Roychoudhury, Timon Kelter, Peter Marwedel and Heiko Falk. A Unified WCET Analysis Framework for Multi-core Platforms. ACM Transactions on Embedded Computing Systems (TECS) 13 4s July 2014 [BibTeX][Link][Abstract] @article { kelter:2014:tecs, author = {Chattopadhyay, Sudipta and Chong, Lee Kee and Roychoudhury, Abhik and Kelter, Timon and Marwedel, Peter and Falk, Heiko}, title = {A Unified WCET Analysis Framework for Multi-core Platforms}, journal = {ACM Transactions on Embedded Computing Systems (TECS)}, year = {2014}, volume = {13}, number = {4s}, month = {July}, url = {http://dl.acm.org/citation.cfm?id=2584654}, keywords = {wcet}, confidential = {n}, abstract = {With the advent of multi-core architectures, worst case execution time (WCET) analysis has become an increasingly difficult problem. In this paper, we propose a unified WCET analysis framework for multi-core processors featuring both shared cache and shared bus. Compared to other previous works, our work differs by modeling the interaction of shared cache and shared bus with other basic micro-architectural components (e.g. pipeline and branch predictor). In addition, our framework does not assume a timing anomaly free multi-core architecture for computing the WCET. A detailed experiment methodology suggests that we can obtain reasonably tight WCET estimates in a wide range of benchmark programs.}, } With the advent of multi-core architectures, worst case execution time (WCET) analysis has become an increasingly difficult problem. In this paper, we propose a unified WCET analysis framework for multi-core processors featuring both shared cache and shared bus. Compared to other previous works, our work differs by modeling the interaction of shared cache and shared bus with other basic micro-architectural components (e.g. pipeline and branch predictor). In addition, our framework does not assume a timing anomaly free multi-core architecture for computing the WCET. A detailed experiment methodology suggests that we can obtain reasonably tight WCET estimates in a wide range of benchmark programs.
	Timon Kelter, Peter Marwedel and Hendrik Borghorst. WCET-aware Scheduling Optimizations for Multi-Core Real-Time Systems. In International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS), pages 67-74 Samos, Greece, July 2014 [BibTeX][PDF] @inproceedings { kelter:2014:samos, author = {Kelter, Timon and Marwedel, Peter and Borghorst, Hendrik}, title = {WCET-aware Scheduling Optimizations for Multi-Core Real-Time Systems}, booktitle = {International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, year = {2014}, pages = {67-74}, address = {Samos, Greece}, month = {July}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2014-samos.pdf}, confidential = {n}, }
	Timon Kelter, Heiko Falk, Peter Marwedel, Sudipta Chattopadhyay and Abhik Roychoudhury. Static Analysis of Multi-Core TDMA Resource Arbitration Delays. Real-Time Systems 50 2, pages pp 185-229 March 2014 [BibTeX][Link] @article { kelter:2014:rts, author = {Kelter, Timon and Falk, Heiko and Marwedel, Peter and Chattopadhyay, Sudipta and Roychoudhury, Abhik}, title = {Static Analysis of Multi-Core TDMA Resource Arbitration Delays}, journal = {Real-Time Systems}, year = {2014}, volume = {50}, number = {2}, pages = {pp 185-229}, month = {March}, url = {http://link.springer.com/article/10.1007%2Fs11241-013-9189-x}, keywords = {wcet}, confidential = {n}, }

2013

	Tim Harde. Vergleichende Studie von Arbitrierungsverfahren für Kommunikationsstrukturen in eingebetteten Multicoresystemen. Bachelor Thesis, 2013 [BibTeX][PDF] @bachelorthesis { harde:2013, title = {Vergleichende Studie von Arbitrierungsverfahren f\"ur Kommunikationsstrukturen in eingebetteten Multicoresystemen}, author = {Harde, Tim}, school = {TU Dortmund}, year = {2013}, keywords = {wcet simulation}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/harde.pdf}, confidential = {n}, adviser = {Timon Kelter}, }
	Christian Günter. Unterstützung modularer WCET-Analyse durch annotierte Binärobjekte. Bachelor Thesis, 2013 [BibTeX][PDF] @bachelorthesis { guenter:2013, title = {Unterst\"utzung modularer WCET-Analyse durch annotierte Bin\"arobjekte}, author = {G\"unter, Christian}, school = {TU Dortmund}, year = {2013}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/guenter.pdf}, confidential = {n}, adviser = {Timon Kelter}, }
	Hendrik Borghorst. Schedulingverfahren zur WCET-Reduktion in eingebetteten Multicore-Systemen. Master's Thesis, 2013 [BibTeX][PDF] @mastersthesis { borghorst:2013, title = {Schedulingverfahren zur WCET-Reduktion in eingebetteten Multicore-Systemen}, author = {Borghorst, Hendrik}, school = {TU Dortmund}, year = {2013}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/borghorst-ma.pdf}, confidential = {n}, adviser = {Timon Kelter}, }
	Jan Kleinsorge, Heiko Falk and Peter Marwedel. Simple Analysis of Partial Worst-case Execution Paths on General Control Flow Graphs. In Proceedings of the International Conference on Embedded Software (EMSOFT 2013) Montreal, Canada, October 2013 [BibTeX][Link] @inproceedings { Kleinsorge:2013:EMSOFT, author = {Kleinsorge, Jan and Falk, Heiko and Marwedel, Peter}, title = {Simple Analysis of Partial Worst-case Execution Paths on General Control Flow Graphs}, booktitle = {Proceedings of the International Conference on Embedded Software (EMSOFT 2013)}, year = {2013}, series = {EMSOFT 2013}, address = {Montreal, Canada}, month = {oct}, url = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2013_emsoft.pdf}, keywords = {wcet; Worst-case Execution Time; Path Analysis; Static Analysis}, confidential = {n}, }
	Timon Kelter, Tim Harde, Peter Marwedel and Heiko Falk. Evaluation of resource arbitration methods for multi-core real-time systems. In Proceedings of the 13th International Workshop on Worst-Case Execution Time Analysis (WCET) Paris, France, July 2013 [BibTeX][PDF][Link][Abstract] @inproceedings { kelter:2013:wcet, author = {Kelter, Timon and Harde, Tim and Marwedel, Peter and Falk, Heiko}, title = {Evaluation of resource arbitration methods for multi-core real-time systems}, booktitle = {Proceedings of the 13th International Workshop on Worst-Case Execution Time Analysis (WCET)}, year = {2013}, editor = {Claire Maiza}, address = {Paris, France}, month = {July}, url = {http://wcet2013.imag.fr/}, keywords = {wcet}, file = {http://drops.dagstuhl.de/opus/volltexte/2013/4117/pdf/2.pdf}, confidential = {n}, abstract = {Multi-core systems have become prevalent in the last years, because of their favorable properties in terms of energy consumption, computing power and design complexity. First attempts have been made to devise WCET analyses for multi-core processors, which have to deal with the problem that the cores may experience interferences during accesses to shared resources. To limit these interferences, the vast amount of previous work is proposing a strict TDMA (time division multiple access) schedule for arbitrating shared resources. Though this type of arbitration yields a high predictability, this advantage is paid for with a poor resource utilization. In this work, we compare different arbitration methods with respect to their predictability and average case performance. We show how known WCET analysis techniques can be extended to work with the presented arbitration strategies and perform an evaluation of the resulting ACETs and WCETs on an extensive set of realworld benchmarks. Results show that there are cases when TDMA is not the best strategy, especially when predictability and performance are equally important.}, } Multi-core systems have become prevalent in the last years, because of their favorable properties in terms of energy consumption, computing power and design complexity. First attempts have been made to devise WCET analyses for multi-core processors, which have to deal with the problem that the cores may experience interferences during accesses to shared resources. To limit these interferences, the vast amount of previous work is proposing a strict TDMA (time division multiple access) schedule for arbitrating shared resources. Though this type of arbitration yields a high predictability, this advantage is paid for with a poor resource utilization. In this work, we compare different arbitration methods with respect to their predictability and average case performance. We show how known WCET analysis techniques can be extended to work with the presented arbitration strategies and perform an evaluation of the resulting ACETs and WCETs on an extensive set of realworld benchmarks. Results show that there are cases when TDMA is not the best strategy, especially when predictability and performance are equally important.

2012

	Sascha Plazar. Memory-based Optimization Techniques for Real-Time Systems. PhD Thesis, Dortmund, Germany, July 2012 [BibTeX][PDF][Link][Abstract] @phdthesis { plazar:12:diss, title = {Memory-based Optimization Techniques for Real-Time Systems}, author = {Plazar, Sascha}, school = {TU Dortmund, Department of Computer Science}, year = {2012}, type = {Dissertation}, address = {Dortmund, Germany}, month = {jul}, url = {https://eldorado.tu-dortmund.de/handle/2003/29500}, keywords = {wcet}, file = {https://eldorado.tu-dortmund.de/bitstream/2003/29500/1/Dissertation.pdf}, confidential = {n}, abstract = {Embedded/Cyber-physical systems have become popular in a wide range of application scenarios. Such systems are called real-time systems if they underlie strict timing constraints. To verify if such systems can meet their deadlines, the knowledge of an upper bound for a program's execution time is mandatory. This upper bound is also called worst-case execution time (WCET) and is estimated by static timing analyzers. Established optimizing compilers are not aware of the WCET as objective since they focus on the minimization of the average-case execution time (ACET). To overcome this obstacle, this thesis presents memory-based optimization techniques which focus on the reduction of the WCET of programs. All presented optimizations are integrated into the WCET-aware C Compiler (WCC) framework. Since the memory interface of a system often turns out to be a bottleneck which limits the performance of a system, the presented optimizations are applied to different levels of the memory hierarchy of a system. Starting within a CPU core, the instruction fetch buffer is the most tightly coupled memory which tries to provide the next few instructions to be executed. Optimization techniques are presented improving the efficiency of this buffer w.r.t. the WCET of a system. Instruction caches placed between the CPU core and the main memory try to speed up accesses to the main memory by storing local copies in fast small cache memories. In order to improve the efficiency of this part of the memory hierarchy, a memory content selection approach is introduced which improves the WCET of a program by improving the cache performance. Due to the fact that multi-task systems are employed in almost all domains, this thesis presents elaborate extensions to a compiler supporting the compilation and WCET-aware optimization of multi-task systems. These extensions are exploited to develop a number of novel optimizations for systems running multiple tasks. As first optimization, a WCET-driven software-based cache partitioning demonstrates the effectiveness of considering the WCET for the optimization of a set of tasks. Furthermore, many embedded systems integrate so-called scratchpad memories (SPM) as tightly coupled memories. An optimization approach for SPM allocation in a multi-task scenario is proposed. Besides, a holistic view of memory architecture compilation considers a number of memory-based WCET optimizations and presents approaches for a combined application. Existing compiler frameworks which are able to consider the WCET during optimization are limited to a particular hardware platform. In order to support multiple platforms, this thesis presents techniques to extend an existing WCET-aware compiler framework. Based on these extensions, a novel static cache locking optimization selects memory blocks which are statically locked into the instruction cache driven by WCET reductions. Applying these optimizations, the WCET of real-time applications can be reduced by about 35% to 48%. These results underline the need for specialized WCET-driven optimization techniques integrated into a sophisticated compiler framework. Otherwise, immense optimization potential would remain unused resulting in oversized and thus costly Embedded/Cyber-physical systems. }, } Embedded/Cyber-physical systems have become popular in a wide range of application scenarios. Such systems are called real-time systems if they underlie strict timing constraints. To verify if such systems can meet their deadlines, the knowledge of an upper bound for a program's execution time is mandatory. This upper bound is also called worst-case execution time (WCET) and is estimated by static timing analyzers. Established optimizing compilers are not aware of the WCET as objective since they focus on the minimization of the average-case execution time (ACET). To overcome this obstacle, this thesis presents memory-based optimization techniques which focus on the reduction of the WCET of programs. All presented optimizations are integrated into the WCET-aware C Compiler (WCC) framework. Since the memory interface of a system often turns out to be a bottleneck which limits the performance of a system, the presented optimizations are applied to different levels of the memory hierarchy of a system. Starting within a CPU core, the instruction fetch buffer is the most tightly coupled memory which tries to provide the next few instructions to be executed. Optimization techniques are presented improving the efficiency of this buffer w.r.t. the WCET of a system. Instruction caches placed between the CPU core and the main memory try to speed up accesses to the main memory by storing local copies in fast small cache memories. In order to improve the efficiency of this part of the memory hierarchy, a memory content selection approach is introduced which improves the WCET of a program by improving the cache performance. Due to the fact that multi-task systems are employed in almost all domains, this thesis presents elaborate extensions to a compiler supporting the compilation and WCET-aware optimization of multi-task systems. These extensions are exploited to develop a number of novel optimizations for systems running multiple tasks. As first optimization, a WCET-driven software-based cache partitioning demonstrates the effectiveness of considering the WCET for the optimization of a set of tasks. Furthermore, many embedded systems integrate so-called scratchpad memories (SPM) as tightly coupled memories. An optimization approach for SPM allocation in a multi-task scenario is proposed. Besides, a holistic view of memory architecture compilation considers a number of memory-based WCET optimizations and presents approaches for a combined application. Existing compiler frameworks which are able to consider the WCET during optimization are limited to a particular hardware platform. In order to support multiple platforms, this thesis presents techniques to extend an existing WCET-aware compiler framework. Based on these extensions, a novel static cache locking optimization selects memory blocks which are statically locked into the instruction cache driven by WCET reductions. Applying these optimizations, the WCET of real-time applications can be reduced by about 35% to 48%. These results underline the need for specialized WCET-driven optimization techniques integrated into a sophisticated compiler framework. Otherwise, immense optimization potential would remain unused resulting in oversized and thus costly Embedded/Cyber-physical systems.
	Sascha Plazar, Jan Kleinsorge, Heiko Falk and Peter Marwedel. WCET-aware Static Locking of Instruction Caches. In Proceedings of the International Symposium on Code Generation and Optimization (CGO), pages 44-52 San Jose, CA, USA, April 2012 [BibTeX][Link][Abstract] @inproceedings { plazar:2012:cgo, author = {Plazar, Sascha and Kleinsorge, Jan and Falk, Heiko and Marwedel, Peter}, title = {WCET-aware Static Locking of Instruction Caches}, booktitle = {Proceedings of the International Symposium on Code Generation and Optimization (CGO)}, year = {2012}, pages = {44-52}, address = {San Jose, CA, USA}, month = {apr}, url = {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.050/profile/profil_hfalk/publications/20120402-cgo-plazar.pdf}, keywords = {wcet}, confidential = {n}, abstract = {In the past decades, embedded system designers moved from simple, predictable system designs towards complex systems equipped with caches. This step was necessary in order to bridge the increasingly growing gap between processor and memory system performance. Static analysis techniques had to be developed to allow the estimation of the cache behavior and an upper bound of the execution time of a program. This bound is called worst-case execution time (WCET). Its knowledge is crucial to verify whether hard real-time systems satisfy their timing constraints, and the WCET is a key parameter for the design of embedded systems. In this paper, we propose a WCET-aware optimization technique for static I-cache locking which improves a program’s performance and predictability. To select the memory blocks to lock into the cache and avoid time consuming repetitive WCET analyses, we developed a new algorithm employing integer-linear programming (ILP). The ILP models the worst-case execution path (WCEP) of a program and takes the influence of locked cache contents into account. By modeling the effect of locked memory blocks on the runtime of basic blocks, the overall WCET of a program can be minimized. We show that our optimization is able to reduce the WCET of real-life benchmarks by up to 40.8%. At the same time, our proposed approach is able to outperform a regular cache by up to 23.8% in terms of WCET.}, } In the past decades, embedded system designers moved from simple, predictable system designs towards complex systems equipped with caches. This step was necessary in order to bridge the increasingly growing gap between processor and memory system performance. Static analysis techniques had to be developed to allow the estimation of the cache behavior and an upper bound of the execution time of a program. This bound is called worst-case execution time (WCET). Its knowledge is crucial to verify whether hard real-time systems satisfy their timing constraints, and the WCET is a key parameter for the design of embedded systems. In this paper, we propose a WCET-aware optimization technique for static I-cache locking which improves a program’s performance and predictability. To select the memory blocks to lock into the cache and avoid time consuming repetitive WCET analyses, we developed a new algorithm employing integer-linear programming (ILP). The ILP models the worst-case execution path (WCEP) of a program and takes the influence of locked cache contents into account. By modeling the effect of locked memory blocks on the runtime of basic blocks, the overall WCET of a program can be minimized. We show that our optimization is able to reduce the WCET of real-life benchmarks by up to 40.8%. At the same time, our proposed approach is able to outperform a regular cache by up to 23.8% in terms of WCET.
	Sudipta Chattopadhyay, Chong Lee Kee, Abhik Roychoudhury, Timon Kelter, Heiko Falk and Peter Marwedel. A Unified WCET Analysis Framework for Multi-core Platforms. In IEEE Real-Time and Embedded Technology and Applications Symposium (RTAS), pages 99-108 Beijing, China, April 2012 [BibTeX][PDF][Link][Abstract] @inproceedings { kelter:2012:rtas, author = {Chattopadhyay, Sudipta and Kee, Chong Lee and Roychoudhury, Abhik and Kelter, Timon and Falk, Heiko and Marwedel, Peter}, title = {A Unified WCET Analysis Framework for Multi-core Platforms}, booktitle = {IEEE Real-Time and Embedded Technology and Applications Symposium (RTAS)}, year = {2012}, pages = {99-108}, address = {Beijing, China}, month = {April}, url = {http://www.rtas.org/12-home.htm}, keywords = {wcet}, file = {http://www.comp.nus.edu.sg/~sudiptac/papers/mxtiming.pdf}, confidential = {n}, abstract = {With the advent of multi-core architectures, worst case execution time (WCET) analysis has become an increasingly difficult problem. In this paper, we propose a unified WCET analysis framework for multi-core processors featuring both shared cache and shared bus. Compared to other previous works, our work differs by modeling the interaction of shared cache and shared bus with other basic micro-architectural components (e.g. pipeline and branch predictor). In addition, our framework does not assume a timing anomaly free multicore architecture for computing the WCET. A detailed experiment methodology suggests that we can obtain reasonably tight WCET estimates in a wide range of benchmark programs.}, } With the advent of multi-core architectures, worst case execution time (WCET) analysis has become an increasingly difficult problem. In this paper, we propose a unified WCET analysis framework for multi-core processors featuring both shared cache and shared bus. Compared to other previous works, our work differs by modeling the interaction of shared cache and shared bus with other basic micro-architectural components (e.g. pipeline and branch predictor). In addition, our framework does not assume a timing anomaly free multicore architecture for computing the WCET. A detailed experiment methodology suggests that we can obtain reasonably tight WCET estimates in a wide range of benchmark programs.
	Heiko Falk and Jan C. Kleinsorge. Reconciling Compilers and Timing Analysis for Safety-Critical Real-Time Systems — the WCET-aware C Compiler WCC (Tutorial). March 2012 [BibTeX] @misc { falk:2012:cgo, author = {Falk, Heiko and Kleinsorge, Jan C.}, title = {Reconciling Compilers and Timing Analysis for Safety-Critical Real-Time Systems — the WCET-aware C Compiler WCC (Tutorial)}, howpublished = {International Symposium on Code Generation and Optimization (CGO), San Jose / United States}, month = {March}, year = {2012}, keywords = {wcet}, confidential = {n}, }
	Heiko Falk, Peter Marwedel and Paul Lokuciejewski. Reconciling Compilation and Timing Analysis. March 2012 [BibTeX][Link][Abstract] @inbook { falk:2012:arts, author = {Falk, Heiko and Marwedel, Peter and Lokuciejewski, Paul}, title = {Reconciling Compilation and Timing Analysis}, editor = {Chakraborty, Samarjit and Ebersp\"acher, J\"org}, chapter = {7}, pages = {145-170}, publisher = {Springer}, year = {2012}, month = {mar}, url = {http://www.springer.com/engineering/signals/book/978-3-642-24348-6}, keywords = {wcet}, confidential = {n}, abstract = {Abstract Timing constraints must be respected for embedded real-time applications. Traditionally, compilers are unable to use precise estimates of execution times for optimization, and timing properties of code are derived after compilation. A number of design iterations are required if timing constraints are not met. We propose to reconcile compilers and timing analysis and to create a worst-case execution time (WCET) aware compiler in this way. Such WCET-aware compilers can exploit precise WCET information during compilation. This way, they are able to improve the code quality. Also, we may be able to avoid some of the design iterations. In this chapter, we describe the integration of a compiler and a WCET analyzer, yielding our WCET-aware compiler WCC.We are then reconsidering standard compiler optimizations for their potential to reduce the WCET, assuming that the WCET is now used as the cost function. Considered optimizations include function inlining, loop unrolling, loop unswitching, register allocation, scratchpad memory allocation, and cache partitioning. For a set of benchmarks, average WCET reductions of up to 40% were observed. The results indicate that this new area of research has the potential of achieving worthwhile execution time reductions for real-time code.}, } Abstract Timing constraints must be respected for embedded real-time applications. Traditionally, compilers are unable to use precise estimates of execution times for optimization, and timing properties of code are derived after compilation. A number of design iterations are required if timing constraints are not met. We propose to reconcile compilers and timing analysis and to create a worst-case execution time (WCET) aware compiler in this way. Such WCET-aware compilers can exploit precise WCET information during compilation. This way, they are able to improve the code quality. Also, we may be able to avoid some of the design iterations. In this chapter, we describe the integration of a compiler and a WCET analyzer, yielding our WCET-aware compiler WCC.We are then reconsidering standard compiler optimizations for their potential to reduce the WCET, assuming that the WCET is now used as the cost function. Considered optimizations include function inlining, loop unrolling, loop unswitching, register allocation, scratchpad memory allocation, and cache partitioning. For a set of benchmarks, average WCET reductions of up to 40% were observed. The results indicate that this new area of research has the potential of achieving worthwhile execution time reductions for real-time code.

2011

	Hendrik Borghorst. WCET bewusste Scratchpad-Speicherallokation von Code und Daten für Multi-Task Systeme. Bachelor Thesis, 2011 [BibTeX][PDF] @bachelorthesis { Borghorst:2011, title = {WCET bewusste Scratchpad-Speicherallokation von Code und Daten f\"ur Multi-Task Systeme}, author = {Borghorst, Hendrik}, school = {TU Dortmund}, year = {2011}, keywords = {wcet optimizations}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/borghorst.pdf}, confidential = {n}, adviser = {Sascha Plazar}, }
	Heiko Falk and Helena Kotthaus. WCET-driven Cache-aware Code Positioning. In Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems (CASES), pages 145-154 Taipei, Taiwan, October 2011 [BibTeX][PDF][Abstract] @inproceedings { falk:11:cases, author = {Falk, Heiko and Kotthaus, Helena}, title = {WCET-driven Cache-aware Code Positioning}, booktitle = {Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems (CASES)}, year = {2011}, pages = {145-154}, address = {Taipei, Taiwan}, month = {oct}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-cases_1.pdf}, confidential = {n}, abstract = {Code positioning is a well-known compiler optimization aiming at the improvement of the instruction cache behavior. A contiguous mapping of code fragments in memory avoids overlapping of cache sets and thus decreases the number of cache conflict misses. We present a novel cache-aware code positioning optimization driven by worst-case execution time (WCET) information. For this purpose, we introduce a formal cache model based on a conflict graph which is able to capture a broad class of cache architectures. This cache model is combined with a formal WCET timing model, resulting in a cache conflict graph weighted with WCET data. This conflict graph is then exploited by heuristics for code positioning of both basic blocks and entire functions. Code positioning is able to decrease the accumulated cache misses for a total of 18 real-life benchmarks by 15.5% on average for an automotive processor featuring a 2-way set-associative cache. These cache miss reductions translate to average WCET reductions by 6.1%. For direct-mapped caches, even larger savings of 18.8% (cache misses) and 9.0% (WCET) were achieved. }, } Code positioning is a well-known compiler optimization aiming at the improvement of the instruction cache behavior. A contiguous mapping of code fragments in memory avoids overlapping of cache sets and thus decreases the number of cache conflict misses. We present a novel cache-aware code positioning optimization driven by worst-case execution time (WCET) information. For this purpose, we introduce a formal cache model based on a conflict graph which is able to capture a broad class of cache architectures. This cache model is combined with a formal WCET timing model, resulting in a cache conflict graph weighted with WCET data. This conflict graph is then exploited by heuristics for code positioning of both basic blocks and entire functions. Code positioning is able to decrease the accumulated cache misses for a total of 18 real-life benchmarks by 15.5% on average for an automotive processor featuring a 2-way set-associative cache. These cache miss reductions translate to average WCET reductions by 6.1%. For direct-mapped caches, even larger savings of 18.8% (cache misses) and 9.0% (WCET) were achieved.
	Sascha Plazar, Jan C. Kleinsorge, Heiko Falk and Peter Marwedel. WCET-driven Branch Prediction aware Code Positioning. In Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems (CASES), pages 165-174 Taipei, Taiwan, October 2011 [BibTeX][PDF][Abstract] @inproceedings { plazar:11:cases, author = {Plazar, Sascha and Kleinsorge, Jan C. and Falk, Heiko and Marwedel, Peter}, title = {WCET-driven Branch Prediction aware Code Positioning}, booktitle = {Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems (CASES)}, year = {2011}, pages = {165-174}, address = {Taipei, Taiwan}, month = {oct}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-cases_2.pdf}, confidential = {n}, abstract = {In the past decades, embedded system designers moved from simple, predictable system designs towards complex systems equipped with caches, branch prediction units and speculative execution. This step was necessary in order to fulfill increasing requirements on computational power. Static analysis techniques considering such speculative units had to be developed to allow the estimation of an upper bound of the execution time of a program. This bound is called worst-case execution time (WCET). Its knowledge is crucial to verify whether hard real-time systems satisfy their timing constraints, and the WCET is a key parameter for the design of embedded systems. In this paper, we propose a WCET-driven branch prediction aware optimization which reorders basic blocks of a function in order to reduce the amount of jump instructions and mispredicted branches. We employed a genetic algorithm which rearranges basic blocks in order to decrease the WCET of a program. This enables a first estimation of the possible optimization potential at the cost of high optimization runtimes. To avoid time consuming repetitive WCET analyses, we developed a new algorithm employing integer-linear programming (ILP). The ILP models the worst-case execution path (WCEP) of a program and takes branch prediction effects into account. This algorithm enables short optimization runtimes at slightly decreased optimization results. In a case study, the genetic algorithm is able to reduce the benchmarks’ WCET by up to 24.7% whereas our ILP-based approach is able to decrease the WCET by up to 20.0%. }, } In the past decades, embedded system designers moved from simple, predictable system designs towards complex systems equipped with caches, branch prediction units and speculative execution. This step was necessary in order to fulfill increasing requirements on computational power. Static analysis techniques considering such speculative units had to be developed to allow the estimation of an upper bound of the execution time of a program. This bound is called worst-case execution time (WCET). Its knowledge is crucial to verify whether hard real-time systems satisfy their timing constraints, and the WCET is a key parameter for the design of embedded systems. In this paper, we propose a WCET-driven branch prediction aware optimization which reorders basic blocks of a function in order to reduce the amount of jump instructions and mispredicted branches. We employed a genetic algorithm which rearranges basic blocks in order to decrease the WCET of a program. This enables a first estimation of the possible optimization potential at the cost of high optimization runtimes. To avoid time consuming repetitive WCET analyses, we developed a new algorithm employing integer-linear programming (ILP). The ILP models the worst-case execution path (WCEP) of a program and takes branch prediction effects into account. This algorithm enables short optimization runtimes at slightly decreased optimization results. In a case study, the genetic algorithm is able to reduce the benchmarks’ WCET by up to 24.7% whereas our ILP-based approach is able to decrease the WCET by up to 20.0%.
	Jan C. Kleinsorge, Heiko Falk and Peter Marwedel. A Synergetic Approach To Accurate Analysis Of Cache-Related Preemption Delay. In Proceedings of the International Conference on Embedded Software (EMSOFT), pages 329-338 Taipei, Taiwan, October 2011 [BibTeX][PDF][Abstract] @inproceedings { kleinsorge:11:emsoft, author = {Kleinsorge, Jan C. and Falk, Heiko and Marwedel, Peter}, title = {A Synergetic Approach To Accurate Analysis Of Cache-Related Preemption Delay}, booktitle = {Proceedings of the International Conference on Embedded Software (EMSOFT)}, year = {2011}, pages = {329-338}, address = {Taipei, Taiwan}, month = {oct}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-emsoft.pdf}, confidential = {n}, abstract = {The worst-case execution time (WCET) of a task denotes the largest possible execution time for all possible inputs and thus, hardware states. For non-preemptive multitask scheduling, techniques for the static estimation of safe upper bounds have been subject to industrial practice for years. For preemptive scheduling however, the isolated analysis of tasks becomes imprecise as interferences among tasks cannot be considered with sufficient precision. For such scenarios, the cache-related preemption delay (CRPD) denotes a key metric as it reflects the eects of preemptions on the execution behavior of a single task. Until recently, proposals for CRPD analyses were often limited to direct mapped caches or comparably imprecise for k-way set-associative caches. In this paper, we propose how the current best techniques for CRPD analysis, which have only been proposed separately and for dierent aspects of the analysis can be brought together to construct an efficient CRPD analysis with unique properties. Moreover, along the construction, we propose several different enhancements to the methods employed. We also exploit that in a complete approach, analysis steps are synergetic and can be combined into a single analysis pass solving all formerly separate steps at once. In addition, we argue that it is often sufficient to carry out the combined analysis on basic block bounds, which further lowers the overall complexity. The result is a proposal for a fast CRPD analysis of very high accuracy. }, } The worst-case execution time (WCET) of a task denotes the largest possible execution time for all possible inputs and thus, hardware states. For non-preemptive multitask scheduling, techniques for the static estimation of safe upper bounds have been subject to industrial practice for years. For preemptive scheduling however, the isolated analysis of tasks becomes imprecise as interferences among tasks cannot be considered with sufficient precision. For such scenarios, the cache-related preemption delay (CRPD) denotes a key metric as it reflects the eects of preemptions on the execution behavior of a single task. Until recently, proposals for CRPD analyses were often limited to direct mapped caches or comparably imprecise for k-way set-associative caches. In this paper, we propose how the current best techniques for CRPD analysis, which have only been proposed separately and for dierent aspects of the analysis can be brought together to construct an efficient CRPD analysis with unique properties. Moreover, along the construction, we propose several different enhancements to the methods employed. We also exploit that in a complete approach, analysis steps are synergetic and can be combined into a single analysis pass solving all formerly separate steps at once. In addition, we argue that it is often sufficient to carry out the combined analysis on basic block bounds, which further lowers the overall complexity. The result is a proposal for a fast CRPD analysis of very high accuracy.
	Samarjit Chakraborty, Marco Di Natale, Heiko Falk, Martin Lukasiewyzc and Frank Slomka. Timing and Schedulability Analysis for Distributed Automotive Control Applications. In Tutorial at the International Conference on Embedded Software (EMSOFT), pages 349-350 Taipei, Taiwan, October 2011 [BibTeX][PDF][Abstract] @inproceedings { falk:11:emsoft_tutorial, author = {Chakraborty, Samarjit and Di Natale, Marco and Falk, Heiko and Lukasiewyzc, Martin and Slomka, Frank}, title = {Timing and Schedulability Analysis for Distributed Automotive Control Applications}, booktitle = {Tutorial at the International Conference on Embedded Software (EMSOFT)}, year = {2011}, pages = {349-350}, address = {Taipei, Taiwan}, month = {oct}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-emsoft_tutorial.pdf}, confidential = {n}, abstract = {High-end cars today consist of more than 100 electronic control units (ECUs) that are connected to a set of sensors and actuators and run multiple distributed control applications. The design ow of such architectures consists of specifying control applications as Simulink/State flow models, followed by generating code from them and finally mapping such code onto multiple ECUs. In addition, the scheduling policies and parameters on both the ECUs and the communication buses over which they communicate also need to be specified. These policies and parameters are computed from high-level timing and control performance constraints. The proposed tutorial will cover different aspects of this design flow, with a focus on timing and schedulability problems. After reviewing the basic concepts of worst-case execution time analysis and schedulability analysis, we will discuss the differences between meeting timing constraints (as in classical real-time systems) and meeting control performance constraints (e.g., stability, steady and transient state performance). We will then describe various control performance related schedulability analysis techniques and how they may be tied to model-based software development. Finally, we will discuss various schedule synthesis techniques, both for ECUs as well as for communication protocols like FlexRay, so that control performance constraints specified at the model-level may be satisfied. Throughout the tutorial different commercial as well as academic tools will be discussed and demonstrated. }, } High-end cars today consist of more than 100 electronic control units (ECUs) that are connected to a set of sensors and actuators and run multiple distributed control applications. The design ow of such architectures consists of specifying control applications as Simulink/State flow models, followed by generating code from them and finally mapping such code onto multiple ECUs. In addition, the scheduling policies and parameters on both the ECUs and the communication buses over which they communicate also need to be specified. These policies and parameters are computed from high-level timing and control performance constraints. The proposed tutorial will cover different aspects of this design flow, with a focus on timing and schedulability problems. After reviewing the basic concepts of worst-case execution time analysis and schedulability analysis, we will discuss the differences between meeting timing constraints (as in classical real-time systems) and meeting control performance constraints (e.g., stability, steady and transient state performance). We will then describe various control performance related schedulability analysis techniques and how they may be tied to model-based software development. Finally, we will discuss various schedule synthesis techniques, both for ECUs as well as for communication protocols like FlexRay, so that control performance constraints specified at the model-level may be satisfied. Throughout the tutorial different commercial as well as academic tools will be discussed and demonstrated.
	Jens Möllmer. WCET Optimierung unter Beachtung der Speicherhierarchie. Bachelor Thesis, August 2011 [BibTeX][PDF] @bachelorthesis { Moellmer2011, title = {WCET Optimierung unter Beachtung der Speicherhierarchie}, author = {M\"ollmer, Jens}, school = {Technische Universit\"at Dortmund}, year = {2011}, month = {August}, keywords = {wcet optimizations}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/moellmer.pdf}, confidential = {n}, adviser = {Sascha Plazar}, }
	Heiko Falk, Norman Schmitz and Florian Schmoll. WCET-aware Register Allocation based on Integer-Linear Programming. In Proceedings of the 23rd Euromicro Conference on Real-Time Systems (ECRTS), pages 13-22 Porto / Portugal, July 2011 [BibTeX][PDF][Abstract] @inproceedings { falk:11:ecrts, author = {Falk, Heiko and Schmitz, Norman and Schmoll, Florian}, title = {WCET-aware Register Allocation based on Integer-Linear Programming}, booktitle = {Proceedings of the 23rd Euromicro Conference on Real-Time Systems (ECRTS)}, year = {2011}, pages = {13-22}, address = {Porto / Portugal}, month = {jul}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-ecrts_2.pdf}, confidential = {n}, abstract = {Current compilers lack precise timing models guiding their built-in optimizations. Hence, compilers apply ad-hoc heuristics during optimization to improve code quality. One of the most important optimizations is register allocation. Many compilers heuristically decide when and where to spill a register to memory, without having a clear understanding of the impact of such spill code on a program's runtime. This paper presents an integer-linear programming \textit{(ILP)} based register allocator that uses precise worst-case execution time \textit{(WCET)} models. Using this WCET timing data, the compiler avoids spill code generation along the critical path defining a program's WCET. To the best of our knowledge, this paper is the first one to present a WCET-aware ILP-based register allocator. Our results underline the effectiveness of the proposed techniques. For a total of 55 realistic benchmarks, we reduced WCETs by 20.2\% on average and ACETs by 14\%, compared to a standard graph coloring allocator. Furthermore, our ILP-based register allocator outperforms a WCET-aware graph coloring allocator by more than a factor of two for the considered benchmarks, while requiring less runtime.}, } Current compilers lack precise timing models guiding their built-in optimizations. Hence, compilers apply ad-hoc heuristics during optimization to improve code quality. One of the most important optimizations is register allocation. Many compilers heuristically decide when and where to spill a register to memory, without having a clear understanding of the impact of such spill code on a program's runtime. This paper presents an integer-linear programming (ILP) based register allocator that uses precise worst-case execution time (WCET) models. Using this WCET timing data, the compiler avoids spill code generation along the critical path defining a program's WCET. To the best of our knowledge, this paper is the first one to present a WCET-aware ILP-based register allocator. Our results underline the effectiveness of the proposed techniques. For a total of 55 realistic benchmarks, we reduced WCETs by 20.2% on average and ACETs by 14%, compared to a standard graph coloring allocator. Furthermore, our ILP-based register allocator outperforms a WCET-aware graph coloring allocator by more than a factor of two for the considered benchmarks, while requiring less runtime.
	Timon Kelter, Heiko Falk, Peter Marwedel, Sudipta Chattopadhyay and Abhik Roychoudhury. Bus-Aware Multicore WCET Analysis through TDMA Offset Bounds. In Proceedings of the 23rd Euromicro Conference on Real-Time Systems (ECRTS), pages 3-12 Porto / Portugal, July 2011 [BibTeX][PDF][Abstract] @inproceedings { kelter:11:ecrts, author = {Kelter, Timon and Falk, Heiko and Marwedel, Peter and Chattopadhyay, Sudipta and Roychoudhury, Abhik}, title = {Bus-Aware Multicore WCET Analysis through TDMA Offset Bounds}, booktitle = {Proceedings of the 23rd Euromicro Conference on Real-Time Systems (ECRTS)}, year = {2011}, pages = {3-12}, address = {Porto / Portugal}, month = {jul}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-ecrts_1.pdf}, confidential = {n}, abstract = {In the domain of real-time systems, the analysis of the timing behavior of programs is crucial for guaranteeing the schedulability and thus the safeness of a system. Static analyses of the \textit{WCET} (Worst-Case Execution Time) have proven to be a key element for timing analysis, as they provide safe upper bounds on a program's execution time. For single-core systems, industrial-strength WCET analyzers are already available, but up to now, only first proposals have been made to analyze the WCET in multicore systems, where the different cores may interfere during the access to shared resources. An important example for this are shared buses which connect the cores to a shared main memory. The time to gain access to the shared bus may vary significantly, depending on the used bus arbitration protocol and the access timings. In this paper, we propose a new technique for analyzing the duration of accesses to shared buses. We implemented a prototype tool which uses the new analysis and tested it on a set of realworld benchmarks. Results demonstrate that our analysis achieves the same precision as the best existing approach while drastically outperforming it in matters of analysis time.}, } In the domain of real-time systems, the analysis of the timing behavior of programs is crucial for guaranteeing the schedulability and thus the safeness of a system. Static analyses of the WCET (Worst-Case Execution Time) have proven to be a key element for timing analysis, as they provide safe upper bounds on a program's execution time. For single-core systems, industrial-strength WCET analyzers are already available, but up to now, only first proposals have been made to analyze the WCET in multicore systems, where the different cores may interfere during the access to shared resources. An important example for this are shared buses which connect the cores to a shared main memory. The time to gain access to the shared bus may vary significantly, depending on the used bus arbitration protocol and the access timings. In this paper, we propose a new technique for analyzing the duration of accesses to shared buses. We implemented a prototype tool which uses the new analysis and tested it on a set of realworld benchmarks. Results demonstrate that our analysis achieves the same precision as the best existing approach while drastically outperforming it in matters of analysis time.
	Paul Lokuciejewski, Sascha Plazar, Heiko Falk, Peter Marwedel and Lothar Thiele. Approximating Pareto optimal compiler optimization sequences---a trade-off between WCET, ACET and code size. Software: Practice and Experience May 2011, DOI 10.1002/spe.1079 [BibTeX][PDF][Abstract] @article { lokuciejewski:11:spe, author = {Lokuciejewski, Paul and Plazar, Sascha and Falk, Heiko and Marwedel, Peter and Thiele, Lothar}, title = {Approximating Pareto optimal compiler optimization sequences---a trade-off between WCET, ACET and code size}, journal = {Software: Practice and Experience}, year = {2011}, month = {may}, note = {DOI 10.1002/spe.1079}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-spe.pdf}, confidential = {n}, abstract = {With the growing complexity of embedded systems software, high code quality can only be achieved using a compiler. Sophisticated compilers provide a vast spectrum of various optimizations to improve code aggressively w.\,r.\,t.~different objective functions, e.\,g., average-case execution time \textit{(ACET)} or code size. Due to the complex interactions between the optimizations, the choice for a promising sequence of code transformations is not trivial. Compiler developers address this problem by proposing standard optimization levels, e.\,g., \textit{O3} or \textit{Os}. However, previous studies have shown that these standard levels often miss optimization potential or might even result in performance degradation. In this paper, we propose the first adaptive WCET-aware compiler framework for an automatic search of compiler optimization sequences which yield highly optimized code. Besides the objective functions ACET and code size, we consider the worst-case execution time \textit{(WCET)} which is a crucial parameter for real-time systems. To find suitable trade-offs between these objectives, stochastic evolutionary multi-objective algorithms identifying Pareto optimal solutions for the objectives $\langle$WCET, ACET$\rangle$ and $\langle$WCET, code size$\rangle$ are exploited. A comparison based on statistical performance assessments is performed which helps to determine the most suitable multi-objective optimizer. The effectiveness of our approach is demonstrated on real-life benchmarks showing that standard optimization levels can be significantly outperformed.}, } With the growing complexity of embedded systems software, high code quality can only be achieved using a compiler. Sophisticated compilers provide a vast spectrum of various optimizations to improve code aggressively w. r. t. different objective functions, e. g., average-case execution time (ACET) or code size. Due to the complex interactions between the optimizations, the choice for a promising sequence of code transformations is not trivial. Compiler developers address this problem by proposing standard optimization levels, e. g., O3 or Os. However, previous studies have shown that these standard levels often miss optimization potential or might even result in performance degradation. In this paper, we propose the first adaptive WCET-aware compiler framework for an automatic search of compiler optimization sequences which yield highly optimized code. Besides the objective functions ACET and code size, we consider the worst-case execution time (WCET) which is a crucial parameter for real-time systems. To find suitable trade-offs between these objectives, stochastic evolutionary multi-objective algorithms identifying Pareto optimal solutions for the objectives <WCET, ACET> and <WCET, code size> are exploited. A comparison based on statistical performance assessments is performed which helps to determine the most suitable multi-objective optimizer. The effectiveness of our approach is demonstrated on real-life benchmarks showing that standard optimization levels can be significantly outperformed.
	Arthur Pyka. Multikriterielle Exploration von Compileroptimierungen und Cacheparametern. Master's Thesis, February 2011 [BibTeX][PDF] @mastersthesis { Pyka2011, title = {Multikriterielle Exploration von Compileroptimierungen und Cacheparametern}, author = {Pyka, Arthur}, school = {Technische Universtit\"at Dortmund}, year = {2011}, month = {February}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/pyka.pdf}, confidential = {n}, adviser = {Sascha Plazar}, }
	Kyrill Risto. Scratchpad-Allokation zur Reduktion der größtmöglichen Laufzeit von Multitask-Systemen. Master's Thesis, January 2011 [BibTeX] @mastersthesis { Risto2011, title = {Scratchpad-Allokation zur Reduktion der gr\"o\"stm\"oglichen Laufzeit von Multitask-Systemen}, author = {Risto, Kyrill}, school = {Technische Universtit\"at Dortmund}, year = {2011}, month = {January}, keywords = {wcet}, confidential = {n}, adviser = {Heiko Falk}, }
	Helena Kotthaus. Cache-bewusste Code-Positionierung zur Reduktion der maximalen Programmlaufzeit (WCET). Master's Thesis, January 2011 [BibTeX] @mastersthesis { Kotthaus2011, title = {Cache-bewusste Code-Positionierung zur Reduktion der maximalen Programmlaufzeit (WCET)}, author = {Kotthaus, Helena}, school = {Technische Universtit\"at Dortmund}, year = {2011}, month = {January}, keywords = {wcet}, confidential = {n}, adviser = {Heiko Falk}, }

2010

	Paul Lokuciejewski and Peter Marwedel. Worst-Case Execution Time Aware Compilation Techniques for Real-Time Systems. Springer November 2010 [BibTeX][Abstract] @book { lokuciejewski:10:springer, author = {Lokuciejewski, Paul and Marwedel, Peter}, title = {Worst-Case Execution Time Aware Compilation Techniques for Real-Time Systems}, publisher = {Springer}, year = {2010}, month = {nov}, keywords = {wcet}, confidential = {n}, abstract = {For real-time systems, the worst-case execution time (WCET) is the key objective to be considered. Traditionally, code for real-time systems is generated without taking this objective into account and the WCET is computed only after code generation. \textit{Worst-Case Execution Time Aware Compilation Techniques for Real-Time Systems} presents the first comprehensive approach integrating WCET considerations into the code generation process. Based on the proposed reconciliation between a compiler and a timing analyzer, a wide range of novel optimization techniques is provided. Among others, the techniques cover source code and assembly level optimizations, exploit machine learning techniques and address the design of modern systems that have to meet multiple objectives. Using these optimizations, the WCET of real-time applications can be reduced by about 30\% to 45\% on the average. This opens opportunities for decreasing clock speeds, costs and energy consumption of embedded processors. The proposed techniques can be used for all types real-time systems, including automotive and avionics IT systems.}, } For real-time systems, the worst-case execution time (WCET) is the key objective to be considered. Traditionally, code for real-time systems is generated without taking this objective into account and the WCET is computed only after code generation. Worst-Case Execution Time Aware Compilation Techniques for Real-Time Systems presents the first comprehensive approach integrating WCET considerations into the code generation process. Based on the proposed reconciliation between a compiler and a timing analyzer, a wide range of novel optimization techniques is provided. Among others, the techniques cover source code and assembly level optimizations, exploit machine learning techniques and address the design of modern systems that have to meet multiple objectives. Using these optimizations, the WCET of real-time applications can be reduced by about 30% to 45% on the average. This opens opportunities for decreasing clock speeds, costs and energy consumption of embedded processors. The proposed techniques can be used for all types real-time systems, including automotive and avionics IT systems.
	Heiko Falk and Paul Lokuciejewski. A compiler framework for the reduction of worst-case execution times. Journal on Real-Time Systems 46 2, pages 251-300 October 2010, DOI 10.1007/s11241-010-9101-x [BibTeX][PDF][Abstract] @article { falk:10:springer-rts, author = {Falk, Heiko and Lokuciejewski, Paul}, title = {A compiler framework for the reduction of worst-case execution times}, journal = {Journal on Real-Time Systems}, year = {2010}, volume = {46}, number = {2}, pages = {251-300}, month = {oct}, note = {DOI 10.1007/s11241-010-9101-x}, keywords = {wcet}, file = {http://vg09.met.vgwort.de/na/1fbe4260e3244c11b33e4c6d0ffa10e3?l=http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2010-rts.pdf}, confidential = {n}, abstract = {The current practice to design software for real-time systems is tedious. There is almost no tool support that assists the designer in automatically deriving safe bounds of the \textit{worst-case execution time (WCET)} of a system during code generation and in systematically optimizing code to reduce WCET. This article presents concepts and infrastructures for WCET-aware code generation and optimization techniques for WCET reduction. All together, they help to obtain code explicitly optimized for its worst-case timing, to automate large parts of the real-time software design flow, and to reduce costs of a real-time system by allowing to use tailored hardware.}, } The current practice to design software for real-time systems is tedious. There is almost no tool support that assists the designer in automatically deriving safe bounds of the worst-case execution time (WCET) of a system during code generation and in systematically optimizing code to reduce WCET. This article presents concepts and infrastructures for WCET-aware code generation and optimization techniques for WCET reduction. All together, they help to obtain code explicitly optimized for its worst-case timing, to automate large parts of the real-time software design flow, and to reduce costs of a real-time system by allowing to use tailored hardware.
	Lutz Krumme. Dynamische Scratchpad-Allokation von Code und Daten zur WCET-Minimierung. Master's Thesis, August 2010 [BibTeX][PDF] @mastersthesis { Krumme2010, title = {Dynamische Scratchpad-Allokation von Code und Daten zur WCET-Minimierung}, author = {Krumme, Lutz}, school = {Technische Universtit\"at Dortmund}, year = {2010}, month = {August}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/krumme.pdf}, confidential = {n}, adviser = {Heiko Falk}, }
	Norman Schmitz. ILP-basierte Registerallokation zur Worst-Case Execution Time Minimierung. Master's Thesis, June 2010 [BibTeX][PDF] @mastersthesis { Schmitz2010, title = {ILP-basierte Registerallokation zur Worst-Case Execution Time Minimierung}, author = {Schmitz, Norman}, school = {Technische Universtit\"at Dortmund}, year = {2010}, month = {June}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/schmitz.pdf}, confidential = {n}, adviser = {Heiko Falk}, }
	Paul Lokuciejewski, Timon Kelter and Peter Marwedel. Superblock-Based Source Code Optimizations for WCET Reduction. In Proceedings of the 7th International Conference on Embedded Software and Systems (ICESS), pages 1918-1925 Bradford / UK, June 2010 [BibTeX][PDF][Abstract] @inproceedings { lokuciejewski:10:icess, author = {Lokuciejewski, Paul and Kelter, Timon and Marwedel, Peter}, title = {Superblock-Based Source Code Optimizations for WCET Reduction}, booktitle = {Proceedings of the 7th International Conference on Embedded Software and Systems (ICESS)}, year = {2010}, pages = {1918-1925}, address = {Bradford / UK}, month = {jun}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2010-icess.pdf}, confidential = {n}, abstract = {Superblocks represent regions in a program code that consist of multiple basic blocks. Compilers benefit from this structure since it enables optimization across block boundaries. This increased optimization potential was thoroughly studied in the past for average-case execution time (ACET) reduction at assembly level. In this paper, the concept of superblocks is exploited for the optimization of embedded real-time systems that have to meet stringent timing constraints specified by the worst-case execution time (WCET). To achieve this goal, our superblock formation is based on a novel trace selection algorithm which is driven by WCET data. Moreover, we translate superblocks for the first time from assembly to source code level. This approach enables an early code restructuring in the optimizer, providing more optimization opportunities for both subsequent source code and assembly level transformations. An adaption of the traditional optimizations common subexpression and dead code elimination to our WCET-aware superblocks allows an effective WCET reduction. Using our techniques, we significantly outperform standard optimizations and achieve an average WCET reduction of up to 10.2\% for a total of 55 real-life benchmarks.}, } Superblocks represent regions in a program code that consist of multiple basic blocks. Compilers benefit from this structure since it enables optimization across block boundaries. This increased optimization potential was thoroughly studied in the past for average-case execution time (ACET) reduction at assembly level. In this paper, the concept of superblocks is exploited for the optimization of embedded real-time systems that have to meet stringent timing constraints specified by the worst-case execution time (WCET). To achieve this goal, our superblock formation is based on a novel trace selection algorithm which is driven by WCET data. Moreover, we translate superblocks for the first time from assembly to source code level. This approach enables an early code restructuring in the optimizer, providing more optimization opportunities for both subsequent source code and assembly level transformations. An adaption of the traditional optimizations common subexpression and dead code elimination to our WCET-aware superblocks allows an effective WCET reduction. Using our techniques, we significantly outperform standard optimizations and achieve an average WCET reduction of up to 10.2% for a total of 55 real-life benchmarks.
	Andre Smolarczyk. Instruction Scheduling-Verfahren zur Minimierung der WCET. Master's Thesis, May 2010 [BibTeX][PDF] @mastersthesis { Smolarczyk2010, title = {Instruction Scheduling-Verfahren zur Minimierung der WCET}, author = {Smolarczyk, Andre}, school = {Technische Universtit\"at Dortmund}, year = {2010}, month = {May}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/smolarczyk.pdf}, confidential = {n}, adviser = {Paul Lokuciejewski}, }
	Sascha Plazar, Paul Lokuciejewski and Peter Marwedel. WCET-driven Cache-aware Memory Content Selection. In Proceedings of the 13th IEEE International Symposium on Object/Component/Service-oriented Real-time Distributed Computing (ISORC), pages 107-114 Carmona / Spain, May 2010 [BibTeX][PDF][Abstract] @inproceedings { plazar:10:isorc, author = {Plazar, Sascha and Lokuciejewski, Paul and Marwedel, Peter}, title = {WCET-driven Cache-aware Memory Content Selection}, booktitle = {Proceedings of the 13th IEEE International Symposium on Object/Component/Service-oriented Real-time Distributed Computing (ISORC)}, year = {2010}, pages = {107-114}, address = {Carmona / Spain}, month = {may}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2010-isorc.pdf}, confidential = {n}, abstract = {Caches are widely used to bridge the increasingly growing gap between processor and memory performance. They store copies of frequently used parts of the slow main memory for faster access. Static analysis techniques allow the estimation of the worst-case cache behavior and enable the computation of an upper bound of the execution time of a program. This bound is called worst-case execution time (WCET). Its knowledge is crucial to verify if hard real-time systems satisfy their timing constraints and the WCET is a key parameter for the design of embedded systems. In this paper, we propose a new WCET-driven cache-aware memory content selection algorithm, which allocates functions whose WCET highly benefits from a cached execution to cached memory areas. Vice versa, rarely used functions which do not benefit from a cached execution are allocated to non-cached memory areas. As a result of this, unfavorable functions w.\,r.\,t. a program's WCET can not evict beneficial functions from the cache. This can lead to a reduced cache miss ratio and a decreased WCET. The effectiveness of our approach is demonstrated by results achieved on real-life benchmarks. In a case study, our greedy algorithm is able to reduce the benchmarks' WCET by up to 20\%.}, } Caches are widely used to bridge the increasingly growing gap between processor and memory performance. They store copies of frequently used parts of the slow main memory for faster access. Static analysis techniques allow the estimation of the worst-case cache behavior and enable the computation of an upper bound of the execution time of a program. This bound is called worst-case execution time (WCET). Its knowledge is crucial to verify if hard real-time systems satisfy their timing constraints and the WCET is a key parameter for the design of embedded systems. In this paper, we propose a new WCET-driven cache-aware memory content selection algorithm, which allocates functions whose WCET highly benefits from a cached execution to cached memory areas. Vice versa, rarely used functions which do not benefit from a cached execution are allocated to non-cached memory areas. As a result of this, unfavorable functions w. r. t. a program's WCET can not evict beneficial functions from the cache. This can lead to a reduced cache miss ratio and a decreased WCET. The effectiveness of our approach is demonstrated by results achieved on real-life benchmarks. In a case study, our greedy algorithm is able to reduce the benchmarks' WCET by up to 20%.
	Paul Lokuciejewski, Sascha Plazar, Heiko Falk, Peter Marwedel and Lothar Thiele. Multi-Objective Exploration of Compiler Optimizations for Real-Time Systems. In Proceedings of the 13th International Symposium on Object/Component/Service-oriented Real-time Distributed Computing (ISORC), pages 115-122 Carmona / Spain, May 2010 [BibTeX][PDF][Abstract] @inproceedings { lokuciejewski:10:isorc, author = {Lokuciejewski, Paul and Plazar, Sascha and Falk, Heiko and Marwedel, Peter and Thiele, Lothar}, title = {Multi-Objective Exploration of Compiler Optimizations for Real-Time Systems}, booktitle = {Proceedings of the 13th International Symposium on Object/Component/Service-oriented Real-time Distributed Computing (ISORC)}, year = {2010}, pages = {115-122}, address = {Carmona / Spain}, month = {may}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2010-isorc_2.pdf}, confidential = {n}, abstract = {With the growing complexity of embedded systems software, high code quality can only be achieved using a compiler. Sophisticated compilers provide a vast spectrum of various optimizations to improve code aggressively w.r.t. different objective functions, e.g., average-case execution time \textit{(ACET)} or code size. Due to the complex interactions between the optimizations, the choice for a promising sequence of code transformations is not trivial. Compiler developers address this problem by proposing standard optimization levels, e.g., \textit{O3} or \textit{Os}. However, previous studies have shown that these standard levels often miss optimization potential or might even result in performance degradation. In this paper, we propose the first adaptive WCET-aware compiler framework for an automatic search of compiler optimization sequences which yield highly optimized code. Besides the objective functions ACET and code size, we consider the worst-case execution time \textit{(WCET)} which is a crucial parameter for real-time systems. To find suitable trade-offs between these objectives, stochastic evolutionary multi-objective algorithms identifying Pareto optimal solutions are exploited. A comparison based on statistical performance assessments is performed which helps to determine the most suitable multi-objective optimizer. The effectiveness of our approach is demonstrated on real-life benchmarks showing that standard optimization levels can be significantly outperformed.}, } With the growing complexity of embedded systems software, high code quality can only be achieved using a compiler. Sophisticated compilers provide a vast spectrum of various optimizations to improve code aggressively w.r.t. different objective functions, e.g., average-case execution time (ACET) or code size. Due to the complex interactions between the optimizations, the choice for a promising sequence of code transformations is not trivial. Compiler developers address this problem by proposing standard optimization levels, e.g., O3 or Os. However, previous studies have shown that these standard levels often miss optimization potential or might even result in performance degradation. In this paper, we propose the first adaptive WCET-aware compiler framework for an automatic search of compiler optimization sequences which yield highly optimized code. Besides the objective functions ACET and code size, we consider the worst-case execution time (WCET) which is a crucial parameter for real-time systems. To find suitable trade-offs between these objectives, stochastic evolutionary multi-objective algorithms identifying Pareto optimal solutions are exploited. A comparison based on statistical performance assessments is performed which helps to determine the most suitable multi-objective optimizer. The effectiveness of our approach is demonstrated on real-life benchmarks showing that standard optimization levels can be significantly outperformed.
	Peter Marwedel and Heiko Falk. Reconciling compilers and timing analysis (Invited Talk). April 2010 [BibTeX][PDF][Abstract] @misc { marw-falk:10:cpsweek, author = {Marwedel, Peter and Falk, Heiko}, title = {Reconciling compilers and timing analysis (Invited Talk)}, month = {apr}, year = {2010}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2010-cpsweek-industrialworkshop.pdf}, confidential = {n}, abstract = {Most embedded/cyber-physical systems have to respect timing constraints. Ensuring meeting such constraints is currently typically based on a trial-and-error procedure involving many time-consuming software generation attempts. In this talk, we will demonstrate how the integration of timing analysis into a compiler for an automotive processor can provide a systematic path toward optimized worst-case execution times and can cut down costs.}, } Most embedded/cyber-physical systems have to respect timing constraints. Ensuring meeting such constraints is currently typically based on a trial-and-error procedure involving many time-consuming software generation attempts. In this talk, we will demonstrate how the integration of timing analysis into a compiler for an automotive processor can provide a systematic path toward optimized worst-case execution times and can cut down costs.
	Igor Ionov. Design und Realisierung von Konzepten für retargierbare, multikriterielle Optimierungen im WCET-fähigen Compiler. Master's Thesis, March 2010 [BibTeX][PDF] @mastersthesis { Ionov2010, title = {Design und Realisierung von Konzepten f\"ur retargierbare, multikriterielle Optimierungen im WCET-f\"ahigen Compiler}, author = {Ionov, Igor}, school = {Technische Universtit\"at Dortmund}, year = {2010}, month = {March}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/ionov.pdf}, confidential = {n}, adviser = {Sascha Plazar}, }
	Paul Lokuciejewski, Marco Stolpe, Katharina Morik and Peter Marwedel. Automatic Selection of Machine Learning Models for WCET-aware Compiler Heuristic Generation. In Proceedings of the 4th Workshop on Statistical and Machine Learning Approaches to Architectures and Compilation (SMART), pages 3-17 Pisa / Italy, January 2010 [BibTeX][PDF][Abstract] @inproceedings { lokuciejewski:10:smart, author = {Lokuciejewski, Paul and Stolpe, Marco and Morik, Katharina and Marwedel, Peter}, title = {Automatic Selection of Machine Learning Models for WCET-aware Compiler Heuristic Generation}, booktitle = {Proceedings of the 4th Workshop on Statistical and Machine Learning Approaches to Architectures and Compilation (SMART)}, year = {2010}, pages = {3-17}, address = {Pisa / Italy}, month = {jan}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2010-smart.pdf}, confidential = {n}, abstract = {Machine learning has shown its capabilities for an automatic generation of heuristics used by optimizing compilers. The advantages of these heuristics are that they can be easily adopted to a new environment and in some cases outperform hand-crafted compiler optimizations. However, this approach shifts the effort from manual heuristic tuning to the model selection problem of machine learning - i.e., selecting learning algorithms and their respective parameters - which is a tedious task in its own right. In this paper, we tackle the model selection problem in a systematic way. As our experiments show, the right choice of a learning algorithm and its parameters can significantly affect the quality of the generated heuristics. We present a generic framework integrating machine learning into a compiler to enable an automatic search for the best learning algorithm. To find good settings for the learner parameters within the large search space, optimizations based on evolutionary algorithms are applied. In contrast to the majority of other approaches aiming at a reduction of the average-case execution time (ACET), our goal is the minimization of the worst-case execution time (WCET) which is a key parameter for embedded systems acting as real-time systems. A careful case study on the heuristic generation for the well-known optimization loop invariant code motion shows the challenges and benefits of our methods.}, } Machine learning has shown its capabilities for an automatic generation of heuristics used by optimizing compilers. The advantages of these heuristics are that they can be easily adopted to a new environment and in some cases outperform hand-crafted compiler optimizations. However, this approach shifts the effort from manual heuristic tuning to the model selection problem of machine learning - i.e., selecting learning algorithms and their respective parameters - which is a tedious task in its own right. In this paper, we tackle the model selection problem in a systematic way. As our experiments show, the right choice of a learning algorithm and its parameters can significantly affect the quality of the generated heuristics. We present a generic framework integrating machine learning into a compiler to enable an automatic search for the best learning algorithm. To find good settings for the learner parameters within the large search space, optimizations based on evolutionary algorithms are applied. In contrast to the majority of other approaches aiming at a reduction of the average-case execution time (ACET), our goal is the minimization of the worst-case execution time (WCET) which is a key parameter for embedded systems acting as real-time systems. A careful case study on the heuristic generation for the well-known optimization loop invariant code motion shows the challenges and benefits of our methods.

2009

	Timon Kelter. Superblock-Based High-Level WCET Optimizations: Concepts and Applications (in German). VDM Verlag Dr. Müller October 2009 [BibTeX] @book { kelter:2010:vdm, author = {Kelter, Timon}, title = {Superblock-Based High-Level WCET Optimizations: Concepts and Applications (in German)}, publisher = {VDM Verlag Dr. M\"uller}, year = {2009}, month = {oct}, keywords = {wcet}, confidential = {n}, }
	Timon Kelter. Superblock-basierte High-Level WCET-Optimierungen. Master's Thesis, September 2009 [BibTeX][PDF] @mastersthesis { Kelter2009, title = {Superblock-basierte High-Level WCET-Optimierungen}, author = {Kelter, Timon}, school = {Technische Universtit\"at Dortmund}, year = {2009}, month = {September}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/kelter.pdf}, confidential = {n}, adviser = {Paul Lokuciejewski}, }
	Heiko Falk. WCET-aware Register Allocation based on Graph Coloring. In The 46th Design Automation Conference (DAC), pages 726-731 San Francisco / USA, July 2009 [BibTeX][PDF][Abstract] @inproceedings { falk:09:dac1, author = {Falk, Heiko}, title = {WCET-aware Register Allocation based on Graph Coloring}, booktitle = {The 46th Design Automation Conference (DAC)}, year = {2009}, pages = {726-731}, address = {San Francisco / USA}, month = {jul}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2009-dac_1.pdf}, confidential = {n}, abstract = {Current compilers lack precise timing models guiding their built-in optimizations. Hence, compilers apply ad-hoc heuristics during optimization to improve code quality. One of the most important optimizations is register allocation. Many compilers heuristically decide when and where to spill a register to memory, without having a clear understanding of the impact of such spill code on a program's run time. This paper extends a graph coloring register allocator such that it uses precise worst-case execution time \textit{(WCET)} models. Using this WCET timing data, the compiler tries to avoid spill code generation along the critical path defining a program's WCET. To the best of our knowledge, this paper is the first one to present a WCET-aware register allocator. Our results underline the effectiveness of the proposed techniques. For a total of 46 realistic benchmarks, we reduced WCETs by 31.2\% on average. Additionally, the runtimes of our WCET-aware register allocator still remain acceptable.}, } Current compilers lack precise timing models guiding their built-in optimizations. Hence, compilers apply ad-hoc heuristics during optimization to improve code quality. One of the most important optimizations is register allocation. Many compilers heuristically decide when and where to spill a register to memory, without having a clear understanding of the impact of such spill code on a program's run time. This paper extends a graph coloring register allocator such that it uses precise worst-case execution time (WCET) models. Using this WCET timing data, the compiler tries to avoid spill code generation along the critical path defining a program's WCET. To the best of our knowledge, this paper is the first one to present a WCET-aware register allocator. Our results underline the effectiveness of the proposed techniques. For a total of 46 realistic benchmarks, we reduced WCETs by 31.2% on average. Additionally, the runtimes of our WCET-aware register allocator still remain acceptable.
	Heiko Falk and Jan C. Kleinsorge. Optimal Static WCET-aware Scratchpad Allocation of Program Code. In The 46th Design Automation Conference (DAC), pages 732-737 San Francisco / USA, July 2009 [BibTeX][PDF][Abstract] @inproceedings { falk:09:dac2, author = {Falk, Heiko and Kleinsorge, Jan C.}, title = {Optimal Static WCET-aware Scratchpad Allocation of Program Code}, booktitle = {The 46th Design Automation Conference (DAC)}, year = {2009}, pages = {732-737}, address = {San Francisco / USA}, month = {jul}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2009-dac_2.pdf}, confidential = {n}, abstract = {Caches are notorious for their unpredictability. It is difficult or even impossible to predict if a memory access will result in a definite cache hit or miss. This unpredictability is highly undesired especially when designing real-time systems where the \textit{worst-case execution time (WCET)} is one of the key metrics. \textit{Scratchpad memories (SPMs)} have proven to be a fully predictable alternative to caches. In contrast to caches, however, SPMs require dedicated compiler support. This paper presents an optimal static SPM allocation algorithm for program code. It minimizes WCETs by placing the most beneficial parts of a program's code in an SPM. Our results underline the effectiveness of the proposed techniques. For a total of 73 realistic benchmarks, we reduced WCETs on average by 7.4\% up to 40\%. Additionally, the run times of our ILP-based SPM allocator are negligible.}, } Caches are notorious for their unpredictability. It is difficult or even impossible to predict if a memory access will result in a definite cache hit or miss. This unpredictability is highly undesired especially when designing real-time systems where the worst-case execution time (WCET) is one of the key metrics. Scratchpad memories (SPMs) have proven to be a fully predictable alternative to caches. In contrast to caches, however, SPMs require dedicated compiler support. This paper presents an optimal static SPM allocation algorithm for program code. It minimizes WCETs by placing the most beneficial parts of a program's code in an SPM. Our results underline the effectiveness of the proposed techniques. For a total of 73 realistic benchmarks, we reduced WCETs on average by 7.4% up to 40%. Additionally, the run times of our ILP-based SPM allocator are negligible.
	Paul Lokuciejewski and Peter Marwedel. Combining Worst-Case Timing Models, Loop Unrolling, and Static Loop Analysis for WCET Minimization. In The 21st Euromicro Conference on Real-Time Systems (ECRTS), pages 35-44 Dublin / Ireland, July 2009 [BibTeX][PDF][Abstract] @inproceedings { lokuciejewski:09:ecrts, author = {Lokuciejewski, Paul and Marwedel, Peter}, title = {Combining Worst-Case Timing Models, Loop Unrolling, and Static Loop Analysis for WCET Minimization}, booktitle = {The 21st Euromicro Conference on Real-Time Systems (ECRTS)}, year = {2009}, pages = {35-44}, address = {Dublin / Ireland}, month = {jul}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2009-ecrts.pdf}, confidential = {n}, abstract = {Program loops are notorious for their optimization potential on modern high-performance architectures. Compilers aim at their aggressive transformation to achieve large improvements of the program performance. In particular, the optimization loop unrolling has shown in the past decades to be highly effective achieving significant increases of the average-case performance. In this paper, we present loop unrolling that is tailored towards real-time systems. Our novel optimization is driven by worst-case execution time (WCET) information to effectively minimize the program's worst-case behavior. To exploit maximal optimization potential, the determination of a suitable unrolling factor is based on precise loop iteration counts provided by a static loop analysis. In addition, our heuristics avoid adverse effects of unrolling which result from instruction cache overflows and the generation of additional spill code. Results on 45 real-life benchmarks demonstrate that aggressive loop unrolling can yield WCET reductions of up to 13.7\% over simple, naive approaches employed by many production compilers.}, } Program loops are notorious for their optimization potential on modern high-performance architectures. Compilers aim at their aggressive transformation to achieve large improvements of the program performance. In particular, the optimization loop unrolling has shown in the past decades to be highly effective achieving significant increases of the average-case performance. In this paper, we present loop unrolling that is tailored towards real-time systems. Our novel optimization is driven by worst-case execution time (WCET) information to effectively minimize the program's worst-case behavior. To exploit maximal optimization potential, the determination of a suitable unrolling factor is based on precise loop iteration counts provided by a static loop analysis. In addition, our heuristics avoid adverse effects of unrolling which result from instruction cache overflows and the generation of additional spill code. Results on 45 real-life benchmarks demonstrate that aggressive loop unrolling can yield WCET reductions of up to 13.7% over simple, naive approaches employed by many production compilers.
	Thomas Pucyk. Lokale und Globale Instruction Scheduling-Verfahren für den TriCore Prozessor. Master's Thesis, June 2009 [BibTeX][PDF] @mastersthesis { Pucyk2009, title = {Lokale und Globale Instruction Scheduling-Verfahren f\"ur den TriCore Prozessor}, author = {Pucyk, Thomas}, school = {Technische Universtit\"at Dortmund}, year = {2009}, month = {June}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/pucyk.pdf}, confidential = {n}, adviser = {Paul Lokuciejewski}, }
	Sascha Plazar, Paul Lokuciejewski and Peter Marwedel. WCET-aware Software Based Cache Partitioning for Multi-Task Real-Time Systems. In The 9th International Workshop on Worst-Case Execution Time Analysis (WCET), pages 78-88 Dublin / Ireland, June 2009 [BibTeX][PDF][Abstract] @inproceedings { plazar:09:wcet, author = {Plazar, Sascha and Lokuciejewski, Paul and Marwedel, Peter}, title = {WCET-aware Software Based Cache Partitioning for Multi-Task Real-Time Systems}, booktitle = {The 9th International Workshop on Worst-Case Execution Time Analysis (WCET)}, year = {2009}, pages = {78-88}, address = {Dublin / Ireland}, month = {jun}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2009-wcet.pdf}, confidential = {n}, abstract = {Caches are a source of unpredictability since it is very difficult to predict if a memory access results in a cache hit or miss. In systems running multiple tasks steered by a preempting scheduler, it is even impossible to determine the cache behavior since interrupt-driven schedulers lead to unknown points of time for context switches. Partitioned caches are already used in multi-task environments to increase the cache hit ratio by avoiding mutual eviction of tasks from the cache. For real-time systems, the upper bound of the execution time is one of the most important metrics, called the Worst-Case Execution Time (WCET). In this paper, we use partitioning of instruction caches as a technique to achieve tighter WCET estimations since tasks can not be evicted from their partition by other tasks. We propose a novel WCET-aware algorithm, which determines the optimal partition size for each task with focus on decreasing the system's WCET for a given set of possible partition sizes. Employing this algorithm, we are able to decrease the WCET depending on the number of tasks in a set by up to 34\%. On average, reductions between 12\% and 19\% can be achieved.}, } Caches are a source of unpredictability since it is very difficult to predict if a memory access results in a cache hit or miss. In systems running multiple tasks steered by a preempting scheduler, it is even impossible to determine the cache behavior since interrupt-driven schedulers lead to unknown points of time for context switches. Partitioned caches are already used in multi-task environments to increase the cache hit ratio by avoiding mutual eviction of tasks from the cache. For real-time systems, the upper bound of the execution time is one of the most important metrics, called the Worst-Case Execution Time (WCET). In this paper, we use partitioning of instruction caches as a technique to achieve tighter WCET estimations since tasks can not be evicted from their partition by other tasks. We propose a novel WCET-aware algorithm, which determines the optimal partition size for each task with focus on decreasing the system's WCET for a given set of possible partition sizes. Employing this algorithm, we are able to decrease the WCET depending on the number of tasks in a set by up to 34%. On average, reductions between 12% and 19% can be achieved.
	Paul Lokuciejewski, Fatih Gedikli and Peter Marwedel. Accelerating WCET-driven Optimizations by the Invariant Path Paradigm - a Case Study of Loop Unswitching. In The 12th International Workshop on Software & Compilers for Embedded Systems (SCOPES), pages 11-20 Nice / France, April 2009 [BibTeX][PDF][Abstract] @inproceedings { lokuciejewski:09:scopes, author = {Lokuciejewski, Paul and Gedikli, Fatih and Marwedel, Peter}, title = {Accelerating WCET-driven Optimizations by the Invariant Path Paradigm - a Case Study of Loop Unswitching}, booktitle = {The 12th International Workshop on Software \& Compilers for Embedded Systems (SCOPES)}, year = {2009}, pages = {11-20}, address = {Nice / France}, month = {apr}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2009-scopes.pdf}, confidential = {n}, abstract = {The worst-case execution time (WCET) being the upper bound of the maximum execution time corresponds to the longest path through the program's control flow graph. Its reduction is the objective of a WCET optimization. Unlike average-case execution time compiler optimizations which consider a static (most frequently executed) path, the longest path is variable since its optimization might result in another path becoming the effective longest path. To keep path information valid, WCET optimizations typically perform a time-consuming static WCET analysis after each code modification to ensure that subsequent optimization steps operate on the critical path. However, a code modification does not always lead to a path switch, making many WCET analyses superfluous. To cope with this problem, we propose a new paradigm called Invariant Path which eliminates the pessimism by indicating whether a path update is mandatory. To demonstrate the paradigm's practical use, we developed a novel optimization called WCET-driven Loop Unswitching which exploits the Invariant Path information. In a case study, our optimization reduced the WCET of real-world benchmarks by up to 18.3\%, while exploiting the Invariant Path paradigm led to a reduction of the optimization time by 57.5\% on average.}, } The worst-case execution time (WCET) being the upper bound of the maximum execution time corresponds to the longest path through the program's control flow graph. Its reduction is the objective of a WCET optimization. Unlike average-case execution time compiler optimizations which consider a static (most frequently executed) path, the longest path is variable since its optimization might result in another path becoming the effective longest path. To keep path information valid, WCET optimizations typically perform a time-consuming static WCET analysis after each code modification to ensure that subsequent optimization steps operate on the critical path. However, a code modification does not always lead to a path switch, making many WCET analyses superfluous. To cope with this problem, we propose a new paradigm called Invariant Path which eliminates the pessimism by indicating whether a path update is mandatory. To demonstrate the paradigm's practical use, we developed a novel optimization called WCET-driven Loop Unswitching which exploits the Invariant Path information. In a case study, our optimization reduced the WCET of real-world benchmarks by up to 18.3%, while exploiting the Invariant Path paradigm led to a reduction of the optimization time by 57.5% on average.
	Paul Lokuciejewski, Daniel Cordes, Heiko Falk and Peter Marwedel. A Fast and Precise Static Loop Analysis based on Abstract Interpretation, Program Slicing and Polytope Models. In International Symposium on Code Generation and Optimization (CGO), pages 136-146 Seattle / USA, March 2009 [BibTeX][PDF][Abstract] @inproceedings { lokuciejewski:09:cgo, author = {Lokuciejewski, Paul and Cordes, Daniel and Falk, Heiko and Marwedel, Peter}, title = {A Fast and Precise Static Loop Analysis based on Abstract Interpretation, Program Slicing and Polytope Models}, booktitle = {International Symposium on Code Generation and Optimization (CGO)}, year = {2009}, pages = {136-146}, address = {Seattle / USA}, month = {mar}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2009-cgo.pdf}, confidential = {n}, abstract = {A static loop analysis is a program analysis computing loop iteration counts. This information is crucial for different fields of applications. In the domain of compilers, the knowledge about loop iterations can be exploited for aggressive loop optimizations like Loop Unrolling. A loop analyzer also provides static information about code execution frequencies which can assist feedback-directed optimizations. Another prominent application is the static worst-case execution time (WCET) analysis which relies on a safe approximation of loop iteration counts. In this paper, we propose a framework for a static loop analysis based on Abstract Interpretation, a theory of a sound approximation of program semantics. To accelerate the analysis, we preprocess the analyzed code using Program Slicing, a technique that removes statements irrelevant for the loop analysis. In addition, we introduce a novel polytope-based loop evaluation that further significantly reduces the analysis time. The efficiency of our loop analyzer is evaluated on a large number of benchmarks. Results show that 99\% of the considered loops could be successfully analyzed in an acceptable amount of time. This study points out that our methodology is best suited for real-world problems.}, } A static loop analysis is a program analysis computing loop iteration counts. This information is crucial for different fields of applications. In the domain of compilers, the knowledge about loop iterations can be exploited for aggressive loop optimizations like Loop Unrolling. A loop analyzer also provides static information about code execution frequencies which can assist feedback-directed optimizations. Another prominent application is the static worst-case execution time (WCET) analysis which relies on a safe approximation of loop iteration counts. In this paper, we propose a framework for a static loop analysis based on Abstract Interpretation, a theory of a sound approximation of program semantics. To accelerate the analysis, we preprocess the analyzed code using Program Slicing, a technique that removes statements irrelevant for the loop analysis. In addition, we introduce a novel polytope-based loop evaluation that further significantly reduces the analysis time. The efficiency of our loop analyzer is evaluated on a large number of benchmarks. Results show that 99% of the considered loops could be successfully analyzed in an acceptable amount of time. This study points out that our methodology is best suited for real-world problems.
	Paul Lokuciejewski, Fatih Gedikli, Peter Marwedel and Katharina Morik. Automatic WCET Reduction by Machine Learning Based Heuristics for Function Inlining. In Proceedings of the 3rd Workshop on Statistical and Machine Learning Approaches to Architectures and Compilation (SMART), pages 1-15 Paphos / Cyprus, January 2009 [BibTeX][PDF][Abstract] @inproceedings { lokuciejewski:09:smart, author = {Lokuciejewski, Paul and Gedikli, Fatih and Marwedel, Peter and Morik, Katharina}, title = {Automatic WCET Reduction by Machine Learning Based Heuristics for Function Inlining}, booktitle = {Proceedings of the 3rd Workshop on Statistical and Machine Learning Approaches to Architectures and Compilation (SMART)}, year = {2009}, pages = {1-15}, address = {Paphos / Cyprus}, month = {jan}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2009-smart.pdf}, confidential = {n}, abstract = {The application of machine learning techniques in compiler frameworks has become a challenging research area. Learning algorithms are exploited for an automatic generation of optimization heuristics which often outperform hand-crafted models. Moreover, these automatic approaches can effectively tune the compilers' heuristics after larger changes in the optimization sequence or they can be leveraged to tailor heuristics towards a particular architectural model. Previous works focussed on a reduction of the average-case performance. In this paper, learning approaches are studied in the context of an automatic minimization of the worst-case execution time (WCET) which is the upper bound of the program's maximum execution time. We show that explicitly taking the new timing model into account allows the construction of compiler heuristics that effectively reduce the WCET. This is demonstrated for the well-known optimization function inlining. Our WCET-driven inlining heuristics based on a fast classifier called random forests outperform standard heuristics by up to 9.1% on average in terms of the WCET reduction. Moreover, we point out that our classifier is highly accurate with a prediction rate for inlining candidates of 84.0%.}, } The application of machine learning techniques in compiler frameworks has become a challenging research area. Learning algorithms are exploited for an automatic generation of optimization heuristics which often outperform hand-crafted models. Moreover, these automatic approaches can effectively tune the compilers' heuristics after larger changes in the optimization sequence or they can be leveraged to tailor heuristics towards a particular architectural model. Previous works focussed on a reduction of the average-case performance. In this paper, learning approaches are studied in the context of an automatic minimization of the worst-case execution time (WCET) which is the upper bound of the program's maximum execution time. We show that explicitly taking the new timing model into account allows the construction of compiler heuristics that effectively reduce the WCET. This is demonstrated for the well-known optimization function inlining. Our WCET-driven inlining heuristics based on a fast classifier called random forests outperform standard heuristics by up to 9.1% on average in terms of the WCET reduction. Moreover, we point out that our classifier is highly accurate with a prediction rate for inlining candidates of 84.0%.

2008

	Peter Marwedel and Heiko Falk (presentation). Memory-architecture aware compilation. In The ARTIST2 Summer School 2008 in Europe Autrans / France, 2008 [BibTeX][PDF] @inproceedings { marwedel:08:artist2, author = {Marwedel, Peter and Falk (presentation), Heiko}, title = {Memory-architecture aware compilation}, booktitle = {The ARTIST2 Summer School 2008 in Europe}, year = {2008}, address = {Autrans / France}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2008-artist2summerschool.pdf}, confidential = {n}, }
	Sascha Plazar, Paul Lokuciejewski and Peter Marwedel. A Retargetable Framework for Multi-objective WCET-aware High-level Compiler Optimizations. In Proceedings of The 29th IEEE Real-Time Systems Symposium (RTSS) WiP, pages 49-52 Barcelona / Spain, December 2008 [BibTeX][PDF][Abstract] @inproceedings { plazar:08:rtss, author = {Plazar, Sascha and Lokuciejewski, Paul and Marwedel, Peter}, title = {A Retargetable Framework for Multi-objective WCET-aware High-level Compiler Optimizations}, booktitle = {Proceedings of The 29th IEEE Real-Time Systems Symposium (RTSS) WiP}, year = {2008}, pages = {49-52}, address = {Barcelona / Spain}, month = {dec}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2008-rtss.pdf}, confidential = {n}, abstract = {The worst-case execution time (WCET) is a key parameter in the domain of real-time systems and its automatic compiler-based minimization becomes a challenging research area. Although today's embedded system applications are written in a high-level language, most published works consider low-level optimizations which complicate their portability to other processors. In this work, we present a framework for the development of novel WCETdriven high-level optimizations. Our WCET-aware compiler framework provides a multi-target support as well as an integration of different non-functional objectives. It enables multi-objective optimizations, thus opens avenues to a state-of-the-art design of predictable and efficient systems. In addition, the multi-target support provides the opportunity to efficiently evaluate the impact of different compiler optimizations on various processors.}, } The worst-case execution time (WCET) is a key parameter in the domain of real-time systems and its automatic compiler-based minimization becomes a challenging research area. Although today's embedded system applications are written in a high-level language, most published works consider low-level optimizations which complicate their portability to other processors. In this work, we present a framework for the development of novel WCETdriven high-level optimizations. Our WCET-aware compiler framework provides a multi-target support as well as an integration of different non-functional objectives. It enables multi-objective optimizations, thus opens avenues to a state-of-the-art design of predictable and efficient systems. In addition, the multi-target support provides the opportunity to efficiently evaluate the impact of different compiler optimizations on various processors.
	Fatih Gedikli. Transformation und Ausnutzung von WCET-Informationen für High-Level Optimierungen. Master's Thesis, September 2008 [BibTeX][PDF] @mastersthesis { Gedikli2008, title = {Transformation und Ausnutzung von WCET-Informationen f\"ur High-Level Optimierungen}, author = {Gedikli, Fatih}, school = {Technische Universtit\"at Dortmund}, year = {2008}, month = {September}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/gedikli.pdf}, confidential = {n}, adviser = {Paul Lokuciejewski}, }
	Florian Schmoll. ILP-basierte Registerallokation unter Ausnutzung von WCET-Daten. Master's Thesis, September 2008 [BibTeX][PDF] @mastersthesis { Schmoll2008, title = {ILP-basierte Registerallokation unter Ausnutzung von WCET-Daten}, author = {Schmoll, Florian}, school = {Technische Universtit\"at Dortmund}, year = {2008}, month = {September}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/schmoll.pdf}, confidential = {n}, adviser = {Heiko Falk}, }
	Jan Christopher Kleinsorge. WCET-centric code allocation for scratchpad memories. Master's Thesis, September 2008 [BibTeX][PDF] @mastersthesis { Kleinsorge2008, title = {WCET-centric code allocation for scratchpad memories}, author = {Kleinsorge, Jan Christopher}, school = {Technische Universtit\"at Dortmund}, year = {2008}, month = {September}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/kleinsorge.pdf}, confidential = {n}, adviser = {Heiko Falk}, }
	Niklas Holsti, Jan Gustafsson, Guillem Bernat, Clément Ballabriga, Armelle Bonenfant, Roman Bourgade, Hugues Cassé, Daniel Cordes, Albrecht Kadlec, Raimund Kirner, Jens Knoop, Paul Lokuciejewski and Merriam. WCET Tool Challenge 2008: Report. In International Workshop on Worst-Case Execution Time Analysis (WCET) Prague / Czech Republic, September 2008 [BibTeX][PDF][Abstract] @inproceedings { holsti:08:wcet, author = {Holsti, Niklas and Gustafsson, Jan and Bernat, Guillem and Ballabriga, Cl\'ement and Bonenfant, Armelle and Bourgade, Roman and Cass\'e, Hugues and Cordes, Daniel and Kadlec, Albrecht and Kirner, Raimund and Knoop, Jens and Lokuciejewski, Paul and Merriam,}, title = {WCET Tool Challenge 2008: Report}, booktitle = {International Workshop on Worst-Case Execution Time Analysis (WCET)}, year = {2008}, address = {Prague / Czech Republic}, month = {sep}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2008-wcet.pdf}, confidential = {n}, abstract = {Following the successful WCET Tool Challenge in 2006, the second event in this series was organized in 2008, again with support from the ARTIST2 Network of Excellence. The WCET Tool Challenge 2008 (WCC'08) provides benchmark programs and poses a number of "analysis problems" about the dynamic, runtime properties of these programs. The participants are challenged to solve these problems with their program-analysis tools. Two kinds of problems are defined: WCET problems, which ask for bounds on the execution time of chosen parts (subprograms) of the benchmarks, under given constraints on input data; and flow-analysis problems, which ask for bounds on the number of times certain parts of the benchmark can be executed, again under some constraints. We describe the organization of WCC'08, the benchmark programs, the participating tools, and the general results, successes, and failures. Most participants found WCC'08 to be a useful test of their tools. Unlike the 2006 Challenge, the WCC'08 participants include several tools for the same target (ARM7, LPC2138), and tools that combine measurements and static analysis, as well as pure static-analysis tools.}, } Following the successful WCET Tool Challenge in 2006, the second event in this series was organized in 2008, again with support from the ARTIST2 Network of Excellence. The WCET Tool Challenge 2008 (WCC'08) provides benchmark programs and poses a number of "analysis problems" about the dynamic, runtime properties of these programs. The participants are challenged to solve these problems with their program-analysis tools. Two kinds of problems are defined: WCET problems, which ask for bounds on the execution time of chosen parts (subprograms) of the benchmarks, under given constraints on input data; and flow-analysis problems, which ask for bounds on the number of times certain parts of the benchmark can be executed, again under some constraints. We describe the organization of WCC'08, the benchmark programs, the participating tools, and the general results, successes, and failures. Most participants found WCC'08 to be a useful test of their tools. Unlike the 2006 Challenge, the WCC'08 participants include several tools for the same target (ARM7, LPC2138), and tools that combine measurements and static analysis, as well as pure static-analysis tools.
	Felix Rotthowe. Scratchpad-Allokation von Daten zur Worst-Case Execution Time Minimierung. Master's Thesis, August 2008 [BibTeX][PDF] @mastersthesis { Rotthowe2008, title = {Scratchpad-Allokation von Daten zur Worst-Case Execution Time Minimierung}, author = {Rotthowe, Felix}, school = {Technische Universtit\"at Dortmund}, year = {2008}, month = {August}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/rotthowe.pdf}, confidential = {n}, adviser = {Heiko Falk}, }
	Paul Lokuciejewski, Heiko Falk and Peter Marwedel. WCET-driven Cache-based Procedure Positioning Optimizations. In The 20th Euromicro Conference on Real-Time Systems (ECRTS), pages 321-330 Prague / Czech Republic, July 2008 [BibTeX][PDF][Abstract] @inproceedings { loku:08:ecrts, author = {Lokuciejewski, Paul and Falk, Heiko and Marwedel, Peter}, title = {WCET-driven Cache-based Procedure Positioning Optimizations}, booktitle = {The 20th Euromicro Conference on Real-Time Systems (ECRTS)}, year = {2008}, pages = {321-330}, address = {Prague / Czech Republic}, month = {jul}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2008-ecrts.pdf}, confidential = {n}, abstract = {Procedure Positioning is a well known compiler optimization aiming at the improvement of the instruction cache behavior. A contiguous mapping of procedures calling each other frequently in the memory avoids overlapping of cache lines and thus decreases the number of cache conflict misses. In standard literature, these positioning techniques are guided by execution profile data and focus on an improved average-case performance. We present two novel positioning optimizations driven by worst-case execution time (WCET) information to effectively minimize the program's worst-case behavior. WCET reductions by 10\% on average are achieved. Moreover, a combination of positioning and the WCET-driven Procedure Cloning optimization is presented improving the WCET analysis by 36\% on average.}, } Procedure Positioning is a well known compiler optimization aiming at the improvement of the instruction cache behavior. A contiguous mapping of procedures calling each other frequently in the memory avoids overlapping of cache lines and thus decreases the number of cache conflict misses. In standard literature, these positioning techniques are guided by execution profile data and focus on an improved average-case performance. We present two novel positioning optimizations driven by worst-case execution time (WCET) information to effectively minimize the program's worst-case behavior. WCET reductions by 10% on average are achieved. Moreover, a combination of positioning and the WCET-driven Procedure Cloning optimization is presented improving the WCET analysis by 36% on average.
	Daniel Cordes. Schleifenanalyse für einen WCET-optimierenden Compiler basierend auf Abstrakter Interpretation und Polylib. Master's Thesis, April 2008 [BibTeX][PDF] @mastersthesis { Cordes2008, title = {Schleifenanalyse f\"ur einen WCET-optimierenden Compiler basierend auf Abstrakter Interpretation und Polylib}, author = {Cordes, Daniel}, school = {Technische Universtit\"at Dortmund}, year = {2008}, month = {April}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/cordes.pdf}, confidential = {n}, adviser = {Paul Lokuciejewski}, }
	Paul Lokuciejewski, Heiko Falk, Peter Marwedel and Henrik Theiling. WCET-Driven, Code-Size Critical Procedure Cloning. In The 11th International Workshop on Software & Compilers for Embedded Systems (SCOPES), pages 21-30 Munich / Germany, March 2008 [BibTeX][PDF][Abstract] @inproceedings { loku:08:scopes, author = {Lokuciejewski, Paul and Falk, Heiko and Marwedel, Peter and Theiling, Henrik}, title = {WCET-Driven, Code-Size Critical Procedure Cloning}, booktitle = {The 11th International Workshop on Software \& Compilers for Embedded Systems (SCOPES)}, year = {2008}, pages = {21-30}, address = {Munich / Germany}, month = {mar}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2008-scopes.pdf}, confidential = {n}, abstract = {In the domain of the worst-case execution time (WCET) analysis, loops are an inherent source of unpredictability and loss of precision since the determination of tight and safe information on the number of loop iterations is a diffi- cult task. In particular, data-dependent loops whose itera- tion counts depend on function parameters can not be pre- cisely handled by a timing analysis. Procedure Cloning can be exploited to make these loops explicit within the source code allowing a highly precise WCET analysis. In this paper we extend the standard Procedure Cloning optimization by WCET-aware concepts with the objective to improve the tightness of the WCET estimation. Our novel approach is driven by WCET information which succes- sively eliminates code structures leading to overestimated timing results, thus making the code more suitable for the analysis. In addition, the code size increase during the op- timization is monitored and large increases are avoided. The effectiveness of our optimization is shown by tests on real-world benchmarks. After performing our optimiza- tion, the estimated WCET is reduced by up to 64.2\% while the employed code transformations yield an additional code size increase of 22.6\% on average. In contrast, the average- case performance being the original objective of Procedure Cloning showed a slight decrease.}, } In the domain of the worst-case execution time (WCET) analysis, loops are an inherent source of unpredictability and loss of precision since the determination of tight and safe information on the number of loop iterations is a diffi- cult task. In particular, data-dependent loops whose itera- tion counts depend on function parameters can not be pre- cisely handled by a timing analysis. Procedure Cloning can be exploited to make these loops explicit within the source code allowing a highly precise WCET analysis. In this paper we extend the standard Procedure Cloning optimization by WCET-aware concepts with the objective to improve the tightness of the WCET estimation. Our novel approach is driven by WCET information which succes- sively eliminates code structures leading to overestimated timing results, thus making the code more suitable for the analysis. In addition, the code size increase during the op- timization is monitored and large increases are avoided. The effectiveness of our optimization is shown by tests on real-world benchmarks. After performing our optimiza- tion, the estimated WCET is reduced by up to 64.2% while the employed code transformations yield an additional code size increase of 22.6% on average. In contrast, the average- case performance being the original objective of Procedure Cloning showed a slight decrease.

2007

	Paul Lokuciejewski. A WCET-Aware Compiler. Design, Concepts and Realization. VDM Verlag 2007 [BibTeX][Abstract] @book { loku:07, author = {Lokuciejewski, Paul}, title = {A WCET-Aware Compiler. Design, Concepts and Realization}, publisher = {VDM Verlag}, year = {2007}, keywords = {wcet}, confidential = {n}, abstract = {In contrast to general-purpose systems, the correctness of real-time systems not only depends on the logical results of the computation but also on its temporal behavior specified by the worst-case execution time (WCET). Nowadays, software for embedded systems acting as real-time systems is written in high-level languages requiring the presence of a compiler. Modern compiler optimizations aim at reducing the program's average-case execution time completely ignoring the WCET. Tuning an application with respect to its worst-case execution time must be performed manually. To avoid this tedious and error-prone approach, an automation by the compiler is highly desired. This book faces this issue and describes the integration of a timing analyzer into a compiler infractructure. It presents flexible concepts describing the design and realization of a novel WCET-aware C compiler. Due to the combination of the extensive compiler knowledge on the program and the timing information, this compiler framework is best suited for the development of WCET-aware compiler optimizations. This book is intended for students but also for any reader interested in the construction of real-time compilers.}, } In contrast to general-purpose systems, the correctness of real-time systems not only depends on the logical results of the computation but also on its temporal behavior specified by the worst-case execution time (WCET). Nowadays, software for embedded systems acting as real-time systems is written in high-level languages requiring the presence of a compiler. Modern compiler optimizations aim at reducing the program's average-case execution time completely ignoring the WCET. Tuning an application with respect to its worst-case execution time must be performed manually. To avoid this tedious and error-prone approach, an automation by the compiler is highly desired. This book faces this issue and describes the integration of a timing analyzer into a compiler infractructure. It presents flexible concepts describing the design and realization of a novel WCET-aware C compiler. Due to the combination of the extensive compiler knowledge on the program and the timing information, this compiler framework is best suited for the development of WCET-aware compiler optimizations. This book is intended for students but also for any reader interested in the construction of real-time compilers.
	Daniel Schulte. Flow Facts für WCET-optimierende Compiler - Modellierung und Transformation. VDM Verlag November 2007 [BibTeX][Abstract] @book { schulte:07, author = {Schulte, Daniel}, title = {Flow Facts f\"ur WCET-optimierende Compiler - Modellierung und Transformation}, publisher = {VDM Verlag}, year = {2007}, month = {nov}, keywords = {wcet}, confidential = {n}, abstract = {Die Korrektheit von Echtzeitsystemen h\"angt nicht nur von logisch richtigen Ergebnissen sondern auch vom Zeitpunkt ihrer Berechnung ab. Analysatoren zur Ermittlung oberer Schranken von Programmlaufzeiten (WCET) sind verf\"ugbar, ben\"otigen aber Flow Facts, die vom Programmierer mit Bezug auf sein optimiertes ausf\"uhrbares Programm formuliert werden m\"ussen, w\"ahrend er selbst jedoch in einer Hochsprache wie C arbeitet. Notwendig wird daher eine manuelle \"Ubersetzung dieser Flow Facts, bei der jede Modifizierung des Programms z.B. durch Optimierungen im Compiler Anpassungen notwendig machen kann. Um diese fehleranf\"allige und aufw\"andige Arbeit zu vermeiden, modelliert der Autor Flow Facts beispielhaft f\"ur einen WCET-optimierenden Compiler und stellt verschiedene Techniken zu deren automatischen Transformation in diesem vor. Der Programmierer wird somit in die Lage versetzt, lediglich Flow Facts auf Ebene seiner Hochsprache ermitteln zu m\"ussen, w\"ahrend der Compiler diese automatisch f\"ur eine WCET-Analyse verf\"ugbar macht. Dieses Buch richtet sich an Entwickler und Forscher im Bereich (sicherheitskritischer) eingebetteter Systeme und an Entwickler von Compilern f\"ur diese Systeme.}, } Die Korrektheit von Echtzeitsystemen hängt nicht nur von logisch richtigen Ergebnissen sondern auch vom Zeitpunkt ihrer Berechnung ab. Analysatoren zur Ermittlung oberer Schranken von Programmlaufzeiten (WCET) sind verfügbar, benötigen aber Flow Facts, die vom Programmierer mit Bezug auf sein optimiertes ausführbares Programm formuliert werden müssen, während er selbst jedoch in einer Hochsprache wie C arbeitet. Notwendig wird daher eine manuelle \"Ubersetzung dieser Flow Facts, bei der jede Modifizierung des Programms z.B. durch Optimierungen im Compiler Anpassungen notwendig machen kann. Um diese fehleranfällige und aufwändige Arbeit zu vermeiden, modelliert der Autor Flow Facts beispielhaft für einen WCET-optimierenden Compiler und stellt verschiedene Techniken zu deren automatischen Transformation in diesem vor. Der Programmierer wird somit in die Lage versetzt, lediglich Flow Facts auf Ebene seiner Hochsprache ermitteln zu müssen, während der Compiler diese automatisch für eine WCET-Analyse verfügbar macht. Dieses Buch richtet sich an Entwickler und Forscher im Bereich (sicherheitskritischer) eingebetteter Systeme und an Entwickler von Compilern für diese Systeme.
	Heiko Falk, Sascha Plazar and Henrik Theiling. Compile Time Decided Instruction Cache Locking Using Worst-Case Execution Paths. In International Conference on Hardware/Software Codesign and System Synthesis (CODES+ISSS, pages 143-148 Salzburg/Austria, September 2007 [BibTeX][PDF][Abstract] @inproceedings { falk:07:codes_isss, author = {Falk, Heiko and Plazar, Sascha and Theiling, Henrik}, title = {Compile Time Decided Instruction Cache Locking Using Worst-Case Execution Paths}, booktitle = {International Conference on Hardware/Software Codesign and System Synthesis (CODES+ISSS}, year = {2007}, pages = {143-148}, address = {Salzburg/Austria}, month = {sep}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2007-codes+isss_1.pdf}, confidential = {n}, abstract = {Caches are notorious for their unpredictability. It is difficult or even impossible to predict if a memory access results in a definite cache hit or miss. This unpredictability is highly undesired for real-time systems. The Worst-Case Execution Time \textem{(WCET)} of a software running on an embedded processor is one of the most important metrics during real-time system design. The WCET depends to a large extent on the total amount of time spent for memory accesses. In the presence of caches, WCET analysis must always assume a memory access to be a cache miss if it can not be guaranteed that it is a hit. Hence, WCETs for cached systems are imprecise due to the overestimation caused by the caches. Modern caches can be controlled by software. The software can load parts of its code or of its data into the cache and lock the cache afterwards. Cache locking prevents the cache's contents from being flushed by deactivating the replacement. A locked cache is highly predictable and leads to very precise WCET estimates, because the uncertainty caused by the replacement strategy is eliminated completely. This paper presents techniques exploring the lockdown of instruction caches at compile-time to minimize WCETs. In contrast to the current state of the art in the area of cache locking, our techniques explicitly take the worst-case execution path into account during each step of the optimization procedure. This way, we can make sure that always those parts of the code are locked in the I-cache that lead to the highest WCET reduction. The results demonstrate that WCET reductions from 54\% up to 73\% can be achieved with an acceptable amount of CPU seconds required for the optimization and WCET analyses themselves.}, } Caches are notorious for their unpredictability. It is difficult or even impossible to predict if a memory access results in a definite cache hit or miss. This unpredictability is highly undesired for real-time systems. The Worst-Case Execution Time (WCET) of a software running on an embedded processor is one of the most important metrics during real-time system design. The WCET depends to a large extent on the total amount of time spent for memory accesses. In the presence of caches, WCET analysis must always assume a memory access to be a cache miss if it can not be guaranteed that it is a hit. Hence, WCETs for cached systems are imprecise due to the overestimation caused by the caches. Modern caches can be controlled by software. The software can load parts of its code or of its data into the cache and lock the cache afterwards. Cache locking prevents the cache's contents from being flushed by deactivating the replacement. A locked cache is highly predictable and leads to very precise WCET estimates, because the uncertainty caused by the replacement strategy is eliminated completely. This paper presents techniques exploring the lockdown of instruction caches at compile-time to minimize WCETs. In contrast to the current state of the art in the area of cache locking, our techniques explicitly take the worst-case execution path into account during each step of the optimization procedure. This way, we can make sure that always those parts of the code are locked in the I-cache that lead to the highest WCET reduction. The results demonstrate that WCET reductions from 54% up to 73% can be achieved with an acceptable amount of CPU seconds required for the optimization and WCET analyses themselves.
	Paul Lokuciejewski, Heiko Falk, Martin Schwarzer, Peter Marwedel and Henrik Theiling. Influence of Procedure Cloning on WCET Prediction. In International Conference on Hardware/Software Codesign and System Synthesis (CODES+ISSS), pages 137-142 Salzburg/Austria, September 2007 [BibTeX][PDF][Abstract] @inproceedings { loku:07:codes_isss, author = {Lokuciejewski, Paul and Falk, Heiko and Schwarzer, Martin and Marwedel, Peter and Theiling, Henrik}, title = {Influence of Procedure Cloning on WCET Prediction}, booktitle = {International Conference on Hardware/Software Codesign and System Synthesis (CODES+ISSS)}, year = {2007}, pages = {137-142}, address = {Salzburg/Austria}, month = {sep}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2007-codes+isss_2.pdf}, confidential = {n}, abstract = {For the worst-case execution time \textem{(WCET)} analysis, especially loops are an inherent source of unpredictability and loss of precision. This is caused by the difficulty to obtain safe and tight information on the number of iterations executed by a loop in the worst case. In particular, data-dependent loops whose iteration counts depend on function parameters are extremely difficult to analyze precisely. Procedure cloning helps by making such data-dependent loops explicit within the source code, thus making them accessible for high-precision WCET analyses. This paper presents the effect of procedure cloning applied at the source-code level on worst-case execution time. The optimization generates specialized versions of functions being called with constant values as arguments. In standard literature, it is used to enable further optimizations like constant propagation within functions and to reduce calling overhead. We show that procedure cloning for WCET minimization leads to significant improvements. Reductions of the WCET from 12\% up to 95\% were measured for real-life benchmarks. These results demonstrate that procedure cloning improves analyzability and predictability of real-time applications dramatically. In contrast, average-case performance as the criterion procedure cloning was developed for is reduced by only 3\% at most. Our results also show that these WCET reductions only implied small overhead during WCET analysis.}, } For the worst-case execution time (WCET) analysis, especially loops are an inherent source of unpredictability and loss of precision. This is caused by the difficulty to obtain safe and tight information on the number of iterations executed by a loop in the worst case. In particular, data-dependent loops whose iteration counts depend on function parameters are extremely difficult to analyze precisely. Procedure cloning helps by making such data-dependent loops explicit within the source code, thus making them accessible for high-precision WCET analyses. This paper presents the effect of procedure cloning applied at the source-code level on worst-case execution time. The optimization generates specialized versions of functions being called with constant values as arguments. In standard literature, it is used to enable further optimizations like constant propagation within functions and to reduce calling overhead. We show that procedure cloning for WCET minimization leads to significant improvements. Reductions of the WCET from 12% up to 95% were measured for real-life benchmarks. These results demonstrate that procedure cloning improves analyzability and predictability of real-time applications dramatically. In contrast, average-case performance as the criterion procedure cloning was developed for is reduced by only 3% at most. Our results also show that these WCET reductions only implied small overhead during WCET analysis.
	Peter Marwedel, Heiko Falk, Sascha Plazar, Robert Pyka and Lars Wehmeyer. Automatic mapping to tightly-coupled memories and cache locking. In Proceedings of 4th HiPEAC Industrial Workshop on Compilers and Architectures Cambridge, UK, August 2007 [BibTeX][PDF][Link] @inproceedings { marwedel:07:hipeac, author = {Marwedel, Peter and Falk, Heiko and Plazar, Sascha and Pyka, Robert and Lars Wehmeyer,}, title = {Automatic mapping to tightly-coupled memories and cache locking}, booktitle = {Proceedings of 4th HiPEAC Industrial Workshop on Compilers and Architectures}, year = {2007}, address = {Cambridge, UK}, month = {aug}, url = {http://www.hipeac.net/industry_workshop4}, keywords = {wcet}, file = {http://www.hipeac.net/system/files?file=session1_3.ppt}, confidential = {n}, }
	Paul Lokuciejewski, Heiko Falk, Martin Schwarzer and Peter Marwedel. Tighter WCET Estimates by Procedure Cloning. In 7th International Workshop on Worst-Case Execution Time Analysis (WCET), pages 27-32 Pisa/Italy, July 2007 [BibTeX][PDF][Abstract] @inproceedings { loku:07:wcet, author = {Lokuciejewski, Paul and Falk, Heiko and Schwarzer, Martin and Marwedel, Peter}, title = {Tighter WCET Estimates by Procedure Cloning}, booktitle = {7th International Workshop on Worst-Case Execution Time Analysis (WCET)}, year = {2007}, pages = {27-32}, address = {Pisa/Italy}, month = {jul}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2007-wcet.pdf}, confidential = {n}, abstract = {Embedded software spends most of its execution time in loops. To allow a precise static WCET analysis, each loop iteration should, in theory, be represented by an individual calling context. However, due to the enormous analysis times of real-world applications, this approach is not feasible and requires a reduction of the analysis complexity by limiting the number of considered contexts. This restricted timing analysis results in imprecise WCET estimates. In particular, data-dependent loops with iteration counts depending on function parameters cannot be precisely analyzed. In order to reduce the number of contexts that must be implicitly considered, causing an increase in analysis time, we apply the standard compiler optimization \textem{procedure cloning} which improves the program's predictability by making loops explicit and thus allowing a precise annotation of loop bounds. The result is a tight WCET estimation within a reduced analysis time. Our results indicate that reductions of the WCET between 12\% and 95\% were achieved for real-world benchmarks. In contrast, the reduction of the simulated program execution time remained marginal with only 3\%. As will be also shown, this optimization only produces a small overhead for the WCET analysis.}, } Embedded software spends most of its execution time in loops. To allow a precise static WCET analysis, each loop iteration should, in theory, be represented by an individual calling context. However, due to the enormous analysis times of real-world applications, this approach is not feasible and requires a reduction of the analysis complexity by limiting the number of considered contexts. This restricted timing analysis results in imprecise WCET estimates. In particular, data-dependent loops with iteration counts depending on function parameters cannot be precisely analyzed. In order to reduce the number of contexts that must be implicitly considered, causing an increase in analysis time, we apply the standard compiler optimization procedure cloning which improves the program's predictability by making loops explicit and thus allowing a precise annotation of loop bounds. The result is a tight WCET estimation within a reduced analysis time. Our results indicate that reductions of the WCET between 12% and 95% were achieved for real-world benchmarks. In contrast, the reduction of the simulated program execution time remained marginal with only 3%. As will be also shown, this optimization only produces a small overhead for the WCET analysis.
	Sascha Plazar. Algorithmen zur WCET Optimierung - Einfluss von statischem Cache-Locking auf Worst-Case Execution Times. VDM Verlag June 2007 [BibTeX][Abstract] @book { plazar:07, author = {Plazar, Sascha}, title = {Algorithmen zur WCET Optimierung - Einfluss von statischem Cache-Locking auf Worst-Case Execution Times}, publisher = {VDM Verlag}, year = {2007}, month = {jun}, keywords = {wcet}, confidential = {n}, abstract = {F\"ur die Entwicklung von Realzeit-Anwendungen ist es notwendig, maximale Ausf\"uhrungszeiten von Programmen zu garantieren. Daher versucht man durch unterschiedliche Ans\"atze, die Worst-Case Laufzeit zu verk\"urzen. Caches sind in Computersystemen inzwischen sehr beliebt, um die durchschnittliche Geschwindigkeit von Programmen zu erh\"ohen. In Realzeit-Systemen mit harten Zeitschranken ist der Einsatz von Caches bisher eher hinderlich, da die Geschwindigkeit nur schwierig und in Multitasking-Systemen z.T. nicht vorhersagbar ist. Der Autor stellt unterschiedliche Algorithmen vor, um die Worst-Case Laufzeit von Programmen mit Hilfe neuerer Cache-Architekturen zu verk\"urzen. Die Algorithmen laden dazu die Teile eines Programms in den Cache, die besonders gro\"se Einsparungen in der Laufzeit erm\"oglichen. Der Inhalt des Caches wird durch Lockdown vor Verdr\"angung gesch\"utzt. Dadurch ist es m\"oglich, die Zugriffsgeschwindigkeit auf Befehle und Daten, die in den Cache gelockt werden, sicher vorherzusagen und somit verbindliche Aussagen \"uber die Worst-Case Laufzeit von Programmen zu treffen.}, } Für die Entwicklung von Realzeit-Anwendungen ist es notwendig, maximale Ausführungszeiten von Programmen zu garantieren. Daher versucht man durch unterschiedliche Ansätze, die Worst-Case Laufzeit zu verkürzen. Caches sind in Computersystemen inzwischen sehr beliebt, um die durchschnittliche Geschwindigkeit von Programmen zu erhöhen. In Realzeit-Systemen mit harten Zeitschranken ist der Einsatz von Caches bisher eher hinderlich, da die Geschwindigkeit nur schwierig und in Multitasking-Systemen z.T. nicht vorhersagbar ist. Der Autor stellt unterschiedliche Algorithmen vor, um die Worst-Case Laufzeit von Programmen mit Hilfe neuerer Cache-Architekturen zu verkürzen. Die Algorithmen laden dazu die Teile eines Programms in den Cache, die besonders große Einsparungen in der Laufzeit ermöglichen. Der Inhalt des Caches wird durch Lockdown vor Verdrängung geschützt. Dadurch ist es möglich, die Zugriffsgeschwindigkeit auf Befehle und Daten, die in den Cache gelockt werden, sicher vorherzusagen und somit verbindliche Aussagen über die Worst-Case Laufzeit von Programmen zu treffen.
	Daniel Höcker. Effiziente Darstellung und Nutzung von WCET Pfad Analysen. Master's Thesis, May 2007 [BibTeX] @mastersthesis { Hoecker2007, title = {Effiziente Darstellung und Nutzung von WCET Pfad Analysen}, author = {H\"ocker, Daniel}, school = {Technische Universtit\"at Dortmund}, year = {2007}, month = {May}, keywords = {wcet}, confidential = {n}, adviser = {Heiko Falk}, }
	Daniel Schulte. Modellierung und Transformation von Flow Facts in einem WCET-optimierenden Compiler. Master's Thesis, May 2007 [BibTeX][PDF] @mastersthesis { Schulte2007, title = {Modellierung und Transformation von Flow Facts in einem WCET-optimierenden Compiler}, author = {Schulte, Daniel}, school = {Technische Universtit\"at Dortmund}, year = {2007}, month = {May}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/schulte.pdf}, confidential = {n}, adviser = {Heiko Falk}, }
	Sascha Plazar. Einfluss von statischem Cache Locking auf Worst Case Execution Times. Master's Thesis, January 2007 [BibTeX][PDF] @mastersthesis { Plazar2007, title = {Einfluss von statischem Cache Locking auf Worst Case Execution Times}, author = {Plazar, Sascha}, school = {Technische Universtit\"at Dortmund}, year = {2007}, month = {January}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/plazar.pdf}, confidential = {n}, adviser = {Heiko Falk}, }
	Martin Schwarzer. Untersuchung des Einflusses von Compiler-Optimierungen auf die Maximale Programm-Laufzeit (WCET). Master's Thesis, January 2007 [BibTeX][PDF] @mastersthesis { Schwarzer2007, title = {Untersuchung des Einflusses von Compiler-Optimierungen auf die Maximale Programm-Laufzeit (WCET)}, author = {Schwarzer, Martin}, school = {Technische Universtit\"at Dortmund}, year = {2007}, month = {January}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/schwarzer.pdf}, confidential = {n}, adviser = {Heiko Falk}, }

2006

	Lars Wehmeyer and Peter Marwedel. Fast, Efficient and Predictable Memory Accesses - Optimization Algorithms for Memory Architecture Aware Compilation. Springer-Verlag 2006 [BibTeX][Link][Abstract] @book { wehmeyer:06, author = {Wehmeyer, Lars and Marwedel, Peter}, title = {Fast, Efficient and Predictable Memory Accesses - Optimization Algorithms for Memory Architecture Aware Compilation}, publisher = {Springer-Verlag}, year = {2006}, url = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2006-springer.pdf}, keywords = {wcet}, confidential = {n}, abstract = {The memory system is increasingly turning into a bottleneck in the design of embedded systems. The speed improvements of memory systems are lower than the speed improvements of processors, eventually leading to embedded systems whose performance is limited by the memory. This problem is known as the \textem{memory wall} problem. Furthermore, memory systems may consume the largest share of the system s energy budget and may be the source of unpredictable timing behaviour. Hence, the design of the memory system deserves an increasing amount of attention. Fast, Efficient and Predictable Memory Accesses presents techniques for designing fast, energy-efficient and timing predictable memory systems. By using a careful combination of compiler optimizations and architectural improvements, we can achieve more than what would be feasible at one of the levels in isolation. The described optimization algorithms achieve the goals of high performance and low energy consumption. In addition to these benefits, the use of scratchpad memories significantly improves the timing predictability of the entire system, leading to tighter worst case execution time bounds (WCET). The WCET is a relevant design parameter for all timing critical systems. In addition, the book covers algorithms to exploit the power down modes of main memories in SDRAM technology, as well as the execute-in-place feature of Flash memories. The final chapter considers the impact of the register file, which is also part of the memory hierarchy.}, } The memory system is increasingly turning into a bottleneck in the design of embedded systems. The speed improvements of memory systems are lower than the speed improvements of processors, eventually leading to embedded systems whose performance is limited by the memory. This problem is known as the memory wall problem. Furthermore, memory systems may consume the largest share of the system s energy budget and may be the source of unpredictable timing behaviour. Hence, the design of the memory system deserves an increasing amount of attention. Fast, Efficient and Predictable Memory Accesses presents techniques for designing fast, energy-efficient and timing predictable memory systems. By using a careful combination of compiler optimizations and architectural improvements, we can achieve more than what would be feasible at one of the levels in isolation. The described optimization algorithms achieve the goals of high performance and low energy consumption. In addition to these benefits, the use of scratchpad memories significantly improves the timing predictability of the entire system, leading to tighter worst case execution time bounds (WCET). The WCET is a relevant design parameter for all timing critical systems. In addition, the book covers algorithms to exploit the power down modes of main memories in SDRAM technology, as well as the execute-in-place feature of Flash memories. The final chapter considers the impact of the register file, which is also part of the memory hierarchy.
	Heiko Falk and Martin Schwarzer. Loop Nest Splitting for WCET-Optimization and Predictability Improvement. In 4th IEEE Workshop on Embedded Systems for Real-Time Multimedia (ESTIMedia), pages 115-120 Seoul/Korea, October 2006 [BibTeX][PDF][Abstract] @inproceedings { falk:06:estimedia2, author = {Falk, Heiko and Schwarzer, Martin}, title = {Loop Nest Splitting for WCET-Optimization and Predictability Improvement}, booktitle = {4th IEEE Workshop on Embedded Systems for Real-Time Multimedia (ESTIMedia)}, year = {2006}, pages = {115-120}, address = {Seoul/Korea}, month = {oct}, keywords = {sco, wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2006-estimedia_2.pdf}, confidential = {n}, abstract = {This paper presents the effect of the loop nest splitting source code optimization on worst-case execution time \textem{(WCET)}. Loop nest splitting minimizes the number of executed if-statements in loop nests of multimedia applications. It identifies iterations where all if-statements are satisfied and splits the loop nest such that if-statements are not executed at all for large parts of the loop nest's iteration space. Especially loops and if-statements are an inherent source of unpredictability and loss of precision for WCET analysis. This is caused by the difficulty to obtain safe and tight worst-case estimates of an application's high-level control flow. In addition, assembly-level control flow redirections reduce predictability even more due to complex processor pipelines and branch prediction units. Loop nest splitting bases on precise mathematical models combined with genetic algorithms. On the one hand, these techniques achieve a significantly more homogeneous control flow structure. On the other hand, the precision of our analyses enables to generate very accurate high-level flow facts for loops and if-statements. The application of our implemented algorithms to three real-life benchmarks leads to average speed-ups by 25.0\% - 30.1\%, while WCET is reduced by 34.0\% - 36.3\%.}, } This paper presents the effect of the loop nest splitting source code optimization on worst-case execution time (WCET). Loop nest splitting minimizes the number of executed if-statements in loop nests of multimedia applications. It identifies iterations where all if-statements are satisfied and splits the loop nest such that if-statements are not executed at all for large parts of the loop nest's iteration space. Especially loops and if-statements are an inherent source of unpredictability and loss of precision for WCET analysis. This is caused by the difficulty to obtain safe and tight worst-case estimates of an application's high-level control flow. In addition, assembly-level control flow redirections reduce predictability even more due to complex processor pipelines and branch prediction units. Loop nest splitting bases on precise mathematical models combined with genetic algorithms. On the one hand, these techniques achieve a significantly more homogeneous control flow structure. On the other hand, the precision of our analyses enables to generate very accurate high-level flow facts for loops and if-statements. The application of our implemented algorithms to three real-life benchmarks leads to average speed-ups by 25.0% - 30.1%, while WCET is reduced by 34.0% - 36.3%.
	Heiko Falk, Paul Lokuciejewski and Henrik Theiling. Design of a WCET-Aware C Compiler. In 4th IEEE Workshop on Embedded Systems for Real-Time Multimedia (ESTIMedia), pages 121-126 Seoul/Korea, October 2006 [BibTeX][PDF][Abstract] @inproceedings { falk:06:estimedia3, author = {Falk, Heiko and Lokuciejewski, Paul and Theiling, Henrik}, title = {Design of a WCET-Aware C Compiler}, booktitle = {4th IEEE Workshop on Embedded Systems for Real-Time Multimedia (ESTIMedia)}, year = {2006}, pages = {121-126}, address = {Seoul/Korea}, month = {oct}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2006-estimedia_3.pdf}, confidential = {n}, abstract = {This paper presents techniques to integrate worst-case execution time \textem{(WCET)} data into a compiler. Currently, a tight integration of WCET into compilers is strongly desired, but only some ad-hoc approaches were reported currently. Previous work mainly used self-written WCET estimators with limited functionality and preciseness during compilation. A very tight integration of a high quality WCET analyzer into a compiler was not yet achieved. This work is the first to present such a tight coupling between a compiler and the WCET analyzer aiT. This is done by automatically translating the assembly-like contents of the compiler's low-level format \textem{(LLIR)} to aiT's exchange format CRL2. Additionally, the results produced by aiT are automatically collected and re-imported into the compiler infrastructure. The work described in this paper is smoothly integrated into a C compiler for the Infineon TriCore processor. It opens up new possibilities for the design of WCET-aware optimizations in the future. The concepts for extending the compiler structure are kept very general so that they are not limited to WCET information. Rather, it is possible to use our concepts also for multi-objective optimization of e.g. best-case execution time \textem{(BCET)} or energy dissipation.}, } This paper presents techniques to integrate worst-case execution time (WCET) data into a compiler. Currently, a tight integration of WCET into compilers is strongly desired, but only some ad-hoc approaches were reported currently. Previous work mainly used self-written WCET estimators with limited functionality and preciseness during compilation. A very tight integration of a high quality WCET analyzer into a compiler was not yet achieved. This work is the first to present such a tight coupling between a compiler and the WCET analyzer aiT. This is done by automatically translating the assembly-like contents of the compiler's low-level format (LLIR) to aiT's exchange format CRL2. Additionally, the results produced by aiT are automatically collected and re-imported into the compiler infrastructure. The work described in this paper is smoothly integrated into a C compiler for the Infineon TriCore processor. It opens up new possibilities for the design of WCET-aware optimizations in the future. The concepts for extending the compiler structure are kept very general so that they are not limited to WCET information. Rather, it is possible to use our concepts also for multi-objective optimization of e.g. best-case execution time (BCET) or energy dissipation.
	Heiko Falk, Paul Lokuciejewski and Henrik Theiling. Design of a WCET-Aware C Compiler. In 6th International Workshop on Worst-Case Execution Time Analysis (WCET) Dresden/Germany, July 2006 [BibTeX][PDF][Abstract] @inproceedings { falk:06:wcet, author = {Falk, Heiko and Lokuciejewski, Paul and Theiling, Henrik}, title = {Design of a WCET-Aware C Compiler}, booktitle = {6th International Workshop on Worst-Case Execution Time Analysis (WCET)}, year = {2006}, address = {Dresden/Germany}, month = {jul}, keywords = {wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2006-wcet_1.pdf}, confidential = {n}, abstract = {This paper presents techniques to tightly integrate worst-case execution time \textem{(WCET)} information into a compiler framework. Currently, a tight integration of WCET information into the compilation process is strongly desired, but only some ad-hoc approaches have been reported currently. Previous publications mainly used self-written WCET estimators with very limited functionality and preciseness during compilation. A very tight integration of a high quality industry-relevant WCET analyzer into a compiler was not yet achieved up to now. This work is the first to present techniques capable of achieving such a tight coupling between a compiler and the WCET analyzer aiT. This is done by automatically translating the assembly-like contents of the compiler's low-level intermediate representation \textem{(LLIR)} to aiT's exchange format CRL2. Additionally, the results produced by the WCET analyzer are automatically collected and re-imported into the compiler infrastructure. The work described in this paper is smoothly integrated into a C compiler environment for the Infineon TriCore processor. It opens up new possibilities for the design of WCET-aware optimizations in the future. The concepts for extending the compiler infrastructure are kept very general so that they are not limited to WCET information. Rather, it is possible to use our structures also for multi-objective optimization of e.g. best-case execution time \textem{(BCET)} or energy dissipation.}, } This paper presents techniques to tightly integrate worst-case execution time (WCET) information into a compiler framework. Currently, a tight integration of WCET information into the compilation process is strongly desired, but only some ad-hoc approaches have been reported currently. Previous publications mainly used self-written WCET estimators with very limited functionality and preciseness during compilation. A very tight integration of a high quality industry-relevant WCET analyzer into a compiler was not yet achieved up to now. This work is the first to present techniques capable of achieving such a tight coupling between a compiler and the WCET analyzer aiT. This is done by automatically translating the assembly-like contents of the compiler's low-level intermediate representation (LLIR) to aiT's exchange format CRL2. Additionally, the results produced by the WCET analyzer are automatically collected and re-imported into the compiler infrastructure. The work described in this paper is smoothly integrated into a C compiler environment for the Infineon TriCore processor. It opens up new possibilities for the design of WCET-aware optimizations in the future. The concepts for extending the compiler infrastructure are kept very general so that they are not limited to WCET information. Rather, it is possible to use our structures also for multi-objective optimization of e.g. best-case execution time (BCET) or energy dissipation.
	Heiko Falk and Martin Schwarzer. Loop Nest Splitting for WCET-Optimization and Predictability Improvement. In 6th International Workshop on Worst-Case Execution Time Analysis (WCET) Dresden/Germany, July 2006 [BibTeX][PDF][Abstract] @inproceedings { falk:06:wcet2, author = {Falk, Heiko and Schwarzer, Martin}, title = {Loop Nest Splitting for WCET-Optimization and Predictability Improvement}, booktitle = {6th International Workshop on Worst-Case Execution Time Analysis (WCET)}, year = {2006}, address = {Dresden/Germany}, month = {jul}, keywords = {sco, wcet}, file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2006-wcet_2.pdf}, confidential = {n}, abstract = {This paper presents the influence of the loop nest splitting source code optimization on the worst-case execution time \textem{(WCET)}. Loop nest splitting minimizes the number of executed if-statements in loop nests of embedded multimedia applications. It identifies iterations of a loop nest where all if-statements are satisfied and splits the loop nest such that if-statements are not executed at all for large parts of the loop nest's iteration space. Especially loops and if-statements of high-level languages are an inherent source of unpredictability and loss of precision for WCET analysis. This is caused by the fact that it is difficult to obtain safe and tight worst-case estimates of an application's flow of control through these high-level constructs. In addition, the corresponding control flow redirections expressed at the assembly level reduce predictability even more due to the complex pipeline and branch prediction behavior of modern embedded processors. The analysis techniques for loop nest splitting are based on precise mathematical models combined with genetic algorithms. On the one hand, these techniques achieve a significantly more homogeneous structure of the control flow. On the other hand, the precision of our analyses leads to the generation of very accurate high-level flow facts for loops and if-statements. The application of our implemented algorithms to three real-life multimedia benchmarks leads to average speed-ups by 25.0\% - 30.1\%, while WCET is reduced between 34.0\% and 36.3\%.}, } This paper presents the influence of the loop nest splitting source code optimization on the worst-case execution time (WCET). Loop nest splitting minimizes the number of executed if-statements in loop nests of embedded multimedia applications. It identifies iterations of a loop nest where all if-statements are satisfied and splits the loop nest such that if-statements are not executed at all for large parts of the loop nest's iteration space. Especially loops and if-statements of high-level languages are an inherent source of unpredictability and loss of precision for WCET analysis. This is caused by the fact that it is difficult to obtain safe and tight worst-case estimates of an application's flow of control through these high-level constructs. In addition, the corresponding control flow redirections expressed at the assembly level reduce predictability even more due to the complex pipeline and branch prediction behavior of modern embedded processors. The analysis techniques for loop nest splitting are based on precise mathematical models combined with genetic algorithms. On the one hand, these techniques achieve a significantly more homogeneous structure of the control flow. On the other hand, the precision of our analyses leads to the generation of very accurate high-level flow facts for loops and if-statements. The application of our implemented algorithms to three real-life multimedia benchmarks leads to average speed-ups by 25.0% - 30.1%, while WCET is reduced between 34.0% and 36.3%.

2005

Paul Lokuciejewski.
Design and Realization of Concepts for WCET Compiler Optimization.
Master's Thesis, Decmber 2005
[BibTeX]

@mastersthesis { Lokuciejewski2005,
  title = {Design and Realization of Concepts for WCET Compiler Optimization},
  author = {Lokuciejewski, Paul},
  school = {Technische Universtit\"at Dortmund},
  year = {2005},
  month = {Decmber},
  keywords = {wcet},
  confidential = {n},
  adviser = {Heiko Falk},
}

Lars Wehmeyer and Peter Marwedel.
Influence of Memory Hierarchies on Predictability for Time Constrained Embedded Software.
In Design Automation and Test in Europe (DATE)
Munich, Germany, March 2005
[BibTeX][PDF][Abstract]

@inproceedings { wehm:05:date,
  author = {Wehmeyer, Lars and Marwedel, Peter},
  title = {Influence of Memory Hierarchies on Predictability for Time Constrained Embedded Software},
  booktitle = {Design Automation and Test in Europe (DATE)},
  year = {2005},
  address = {Munich, Germany},
  month = {mar},
  keywords = {wcet},
  file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2005-date.pdf},
  confidential = {n},
  abstract = {Safety-critical embedded systems having to meet real-time constraints are expected to be highly predictable in order to guarantee at design time that certain timing deadlines will always be met. This requirement usually prevents designers from utilizing caches due to their highly dynamic, thus hardly predictable behavior. The integration of scratchpad memories represents an alternative approach which allows the system to benefit from a performance gain comparable to that of caches while at the same time maintaining predictability. In this work, we compare the impact of scratchpad memories and caches on worst case execution time (WCET) analysis results. We show that caches, despite requiring complex techniques, can have a negative impact on the predicted WCET, while the estimated WCET for scratchpad memories scales with the achieved performance gain at no extra analysis cost.},
}

2004

Lars Wehmeyer and Peter Marwedel.
Influence of Onchip Scratchpad Memories on WCET prediction.
In Proceedings of the 4th International Workshop on Worst-Case Execution Time (WCET) Analysis
Catania, Sicily, Italy, June 2004
[BibTeX][PDF][Abstract]

@inproceedings { wehm:04:wcet,
  author = {Wehmeyer, Lars and Marwedel, Peter},
  title = {Influence of Onchip Scratchpad Memories on WCET prediction},
  booktitle = {Proceedings of the 4th International Workshop on Worst-Case Execution Time (WCET) Analysis},
  year = {2004},
  address = {Catania, Sicily, Italy},
  month = {jun},
  keywords = {wcet},
  file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2004-WCET.pdf},
  confidential = {n},
  abstract = {In contrast to standard PCs and many high-performance computer systems, systems that have to meet real-time requirements usually do not feature caches, since caches primarily improve the average case performance, whereas their impact on WCET is generally hard to predict. Especially in embedded systems, scratchpad memories have become popular. Since these small, fast memories can be controlled by the programmer or the compiler, their behavior is perfectly predictable. In this paper, we study for the first time the impact of scratchpad memories on worst case execution time (WCET) prediction. Our results indicate that scratchpads can significantly improve WCET at no extra analysis cost.},
}

Peter Marwedel, Lars Wehmeyer, Manish Verma, Stefan Steinke and Urs Helmig.
Fast, predictable and low energy memory references through architecture-aware compilation.
In ASPDAC, pages 4-11
January 2004
[BibTeX][PDF][Abstract]

@inproceedings { marw:04:aspdac,
  author = {Marwedel, Peter and Wehmeyer, Lars and Verma, Manish and Steinke, Stefan and Helmig, Urs},
  title = {Fast, predictable and low energy memory references through architecture-aware compilation},
  booktitle = {ASPDAC},
  year = {2004},
  pages = {4-11},
  month = {jan},
  keywords = {wcet},
  file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2004-aspdac-spm.pdf},
  confidential = {n},
  abstract = {The design of future high-performance embedded systems is hampered by two problems: First, the required hardware needs more energy than is available from batteries. Second, current cache-based approaches for bridging the increasing speed gap between processors and memories cannot guarantee predictable real-time behavior. A contribution to solving both problems is made in this paper which describes a comprehensive set of algorithms that can be applied at design time in order to maximally exploit scratch pad memories (SPMs). We show that both the energy consumption as well as the computed worst case execution time (WCET) can be reduced by up to to 80\% and 48\%, respectively, by establishing a strong link between the memory architecture and the compiler.},
}

Sprungmarken

Servicenavigation

Search >

Hauptnavigation

You are here:

Bereichsnavigation

Hauptinhalt

LS12 Publications on WCET-aware Compilation