| Hendrik Borghorst. WCET bewusste Scratchpad-Speicherallokation von Code und Daten für Multi-Task Systeme. Bachelor Thesis, 2011 [BibTeX][PDF]@bachelorthesis { Borghorst:2011,
title = {WCET bewusste Scratchpad-Speicherallokation von Code und Daten f\"ur Multi-Task Systeme},
author = {Borghorst, Hendrik},
school = {TU Dortmund},
year = {2011},
keywords = {wcet optimizations},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/borghorst.pdf},
confidential = {n},
adviser = {Sascha Plazar},
} |
| Heiko Falk and Helena Kotthaus. WCET-driven Cache-aware Code Positioning. In Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems (CASES), pages 145-154 Taipei, Taiwan, October 2011 [BibTeX][PDF][Abstract]@inproceedings { falk:11:cases,
author = {Falk, Heiko and Kotthaus, Helena},
title = {WCET-driven Cache-aware Code Positioning},
booktitle = {Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems (CASES)},
year = {2011},
pages = {145-154},
address = {Taipei, Taiwan},
month = {oct},
keywords = {wcet},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-cases_1.pdf},
confidential = {n},
abstract = {Code positioning is a well-known compiler optimization aiming at the improvement of the instruction cache behavior. A contiguous mapping of code fragments in memory avoids overlapping of cache sets and thus decreases the number of cache conflict misses.
We present a novel cache-aware code positioning optimization driven by worst-case execution time (WCET) information. For this purpose, we introduce a formal cache model based on a conflict graph which is able to capture a broad class of cache architectures. This cache model is combined with a formal WCET timing model, resulting in a cache conflict graph weighted with WCET data. This conflict graph is then exploited by heuristics for code positioning of both basic blocks and entire functions.
Code positioning is able to decrease the accumulated cache misses for a total of 18 real-life benchmarks by 15.5% on average for an automotive processor featuring a 2-way set-associative cache. These cache miss reductions translate to average WCET reductions by 6.1%. For direct-mapped caches, even larger savings of 18.8% (cache misses) and 9.0% (WCET) were achieved.
},
} Code positioning is a well-known compiler optimization aiming at the improvement of the instruction cache behavior. A contiguous mapping of code fragments in memory avoids overlapping of cache sets and thus decreases the number of cache conflict misses.
We present a novel cache-aware code positioning optimization driven by worst-case execution time (WCET) information. For this purpose, we introduce a formal cache model based on a conflict graph which is able to capture a broad class of cache architectures. This cache model is combined with a formal WCET timing model, resulting in a cache conflict graph weighted with WCET data. This conflict graph is then exploited by heuristics for code positioning of both basic blocks and entire functions.
Code positioning is able to decrease the accumulated cache misses for a total of 18 real-life benchmarks by 15.5% on average for an automotive processor featuring a 2-way set-associative cache. These cache miss reductions translate to average WCET reductions by 6.1%. For direct-mapped caches, even larger savings of 18.8% (cache misses) and 9.0% (WCET) were achieved.
|
| Sascha Plazar, Jan C. Kleinsorge, Heiko Falk and Peter Marwedel. WCET-driven Branch Prediction aware Code Positioning. In Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems (CASES), pages 165-174 Taipei, Taiwan, October 2011 [BibTeX][PDF][Abstract]@inproceedings { plazar:11:cases,
author = {Plazar, Sascha and Kleinsorge, Jan C. and Falk, Heiko and Marwedel, Peter},
title = {WCET-driven Branch Prediction aware Code Positioning},
booktitle = {Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems (CASES)},
year = {2011},
pages = {165-174},
address = {Taipei, Taiwan},
month = {oct},
keywords = {wcet},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-cases_2.pdf},
confidential = {n},
abstract = {In the past decades, embedded system designers moved from simple, predictable system designs towards complex systems equipped with caches, branch prediction units and speculative execution. This step was necessary in order to fulfill increasing requirements on computational power. Static analysis techniques considering such speculative units had to be developed to allow the estimation of an upper bound of the execution time of a program. This bound is called worst-case execution time (WCET). Its knowledge is crucial to verify whether hard real-time systems satisfy their timing constraints, and the WCET is a key parameter for the design of embedded systems.
In this paper, we propose a WCET-driven branch prediction aware optimization which reorders basic blocks of a function in order to reduce the amount of jump instructions and mispredicted branches. We employed a genetic algorithm which rearranges basic blocks in order to decrease the WCET of a program. This enables a first estimation of the possible optimization potential at the cost of high optimization runtimes. To avoid time consuming repetitive WCET analyses, we developed a new algorithm employing integer-linear programming (ILP). The ILP models the worst-case execution path (WCEP) of a program and takes branch prediction effects into account. This algorithm enables short optimization runtimes at slightly decreased optimization results. In a case study, the genetic algorithm is able to reduce the benchmarks’ WCET by up to 24.7% whereas our ILP-based approach is able to decrease the WCET by up to 20.0%.
},
} In the past decades, embedded system designers moved from simple, predictable system designs towards complex systems equipped with caches, branch prediction units and speculative execution. This step was necessary in order to fulfill increasing requirements on computational power. Static analysis techniques considering such speculative units had to be developed to allow the estimation of an upper bound of the execution time of a program. This bound is called worst-case execution time (WCET). Its knowledge is crucial to verify whether hard real-time systems satisfy their timing constraints, and the WCET is a key parameter for the design of embedded systems.
In this paper, we propose a WCET-driven branch prediction aware optimization which reorders basic blocks of a function in order to reduce the amount of jump instructions and mispredicted branches. We employed a genetic algorithm which rearranges basic blocks in order to decrease the WCET of a program. This enables a first estimation of the possible optimization potential at the cost of high optimization runtimes. To avoid time consuming repetitive WCET analyses, we developed a new algorithm employing integer-linear programming (ILP). The ILP models the worst-case execution path (WCEP) of a program and takes branch prediction effects into account. This algorithm enables short optimization runtimes at slightly decreased optimization results. In a case study, the genetic algorithm is able to reduce the benchmarks’ WCET by up to 24.7% whereas our ILP-based approach is able to decrease the WCET by up to 20.0%.
|
| Jan C. Kleinsorge, Heiko Falk and Peter Marwedel. A Synergetic Approach To Accurate Analysis Of Cache-Related Preemption Delay. In Proceedings of the International Conference on Embedded Software (EMSOFT), pages 329-338 Taipei, Taiwan, October 2011 [BibTeX][PDF][Abstract]@inproceedings { kleinsorge:11:emsoft,
author = {Kleinsorge, Jan C. and Falk, Heiko and Marwedel, Peter},
title = {A Synergetic Approach To Accurate Analysis Of Cache-Related Preemption Delay},
booktitle = {Proceedings of the International Conference on Embedded Software (EMSOFT)},
year = {2011},
pages = {329-338},
address = {Taipei, Taiwan},
month = {oct},
keywords = {wcet},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-emsoft.pdf},
confidential = {n},
abstract = {The worst-case execution time (WCET) of a task denotes the largest possible execution time for all possible inputs and thus, hardware states. For non-preemptive multitask scheduling, techniques for the static estimation of safe upper bounds have been subject to industrial practice for years. For preemptive scheduling however, the isolated analysis of tasks becomes imprecise as interferences among tasks cannot be considered with sufficient precision. For such scenarios, the cache-related preemption delay (CRPD) denotes a key metric as it reflects the eects of preemptions on the execution behavior of a single task. Until recently, proposals for CRPD analyses were often limited to direct mapped caches or comparably imprecise for k-way set-associative caches.
In this paper, we propose how the current best techniques for CRPD analysis, which have only been proposed separately and for dierent aspects of the analysis can be brought together to construct an efficient CRPD analysis with unique properties. Moreover, along the construction, we propose several different enhancements to the methods employed. We also exploit that in a complete approach, analysis steps are synergetic and can be combined into a single analysis pass solving all formerly separate steps at once. In addition, we argue that it is often sufficient to carry out the combined analysis on basic block bounds, which further lowers the overall complexity. The result is a proposal for a fast CRPD analysis of very high accuracy.
},
} The worst-case execution time (WCET) of a task denotes the largest possible execution time for all possible inputs and thus, hardware states. For non-preemptive multitask scheduling, techniques for the static estimation of safe upper bounds have been subject to industrial practice for years. For preemptive scheduling however, the isolated analysis of tasks becomes imprecise as interferences among tasks cannot be considered with sufficient precision. For such scenarios, the cache-related preemption delay (CRPD) denotes a key metric as it reflects the eects of preemptions on the execution behavior of a single task. Until recently, proposals for CRPD analyses were often limited to direct mapped caches or comparably imprecise for k-way set-associative caches.
In this paper, we propose how the current best techniques for CRPD analysis, which have only been proposed separately and for dierent aspects of the analysis can be brought together to construct an efficient CRPD analysis with unique properties. Moreover, along the construction, we propose several different enhancements to the methods employed. We also exploit that in a complete approach, analysis steps are synergetic and can be combined into a single analysis pass solving all formerly separate steps at once. In addition, we argue that it is often sufficient to carry out the combined analysis on basic block bounds, which further lowers the overall complexity. The result is a proposal for a fast CRPD analysis of very high accuracy.
|
| Samarjit Chakraborty, Marco Di Natale, Heiko Falk, Martin Lukasiewyzc and Frank Slomka. Timing and Schedulability Analysis for Distributed Automotive Control Applications. In Tutorial at the International Conference on Embedded Software (EMSOFT), pages 349-350 Taipei, Taiwan, October 2011 [BibTeX][PDF][Abstract]@inproceedings { falk:11:emsoft_tutorial,
author = {Chakraborty, Samarjit and Di Natale, Marco and Falk, Heiko and Lukasiewyzc, Martin and Slomka, Frank},
title = {Timing and Schedulability Analysis for Distributed Automotive Control Applications},
booktitle = {Tutorial at the International Conference on Embedded Software (EMSOFT)},
year = {2011},
pages = {349-350},
address = {Taipei, Taiwan},
month = {oct},
keywords = {wcet},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-emsoft_tutorial.pdf},
confidential = {n},
abstract = {High-end cars today consist of more than 100 electronic control units (ECUs) that are connected to a set of sensors and actuators and run multiple distributed control applications. The design
ow of such architectures consists of specifying control applications as Simulink/State
flow models, followed by generating code from them and finally mapping such code onto multiple ECUs. In addition, the scheduling policies and parameters on both the ECUs and the communication buses over which they communicate also need to be specified. These policies and parameters are computed from high-level timing and control performance constraints. The proposed tutorial will cover different aspects of this design
flow, with a focus on timing and schedulability problems. After reviewing the basic concepts of worst-case execution time analysis and schedulability analysis, we will discuss the differences between meeting timing constraints (as in classical real-time systems) and meeting control performance constraints (e.g., stability, steady and transient state performance). We will then describe various control performance related schedulability analysis techniques and how they may be tied to model-based software development. Finally, we will discuss various schedule synthesis techniques, both for ECUs as well as for communication protocols like FlexRay, so that control performance constraints specified at the model-level may be satisfied. Throughout the tutorial different commercial as well as academic tools will be discussed and demonstrated.
},
} High-end cars today consist of more than 100 electronic control units (ECUs) that are connected to a set of sensors and actuators and run multiple distributed control applications. The design
ow of such architectures consists of specifying control applications as Simulink/State
flow models, followed by generating code from them and finally mapping such code onto multiple ECUs. In addition, the scheduling policies and parameters on both the ECUs and the communication buses over which they communicate also need to be specified. These policies and parameters are computed from high-level timing and control performance constraints. The proposed tutorial will cover different aspects of this design
flow, with a focus on timing and schedulability problems. After reviewing the basic concepts of worst-case execution time analysis and schedulability analysis, we will discuss the differences between meeting timing constraints (as in classical real-time systems) and meeting control performance constraints (e.g., stability, steady and transient state performance). We will then describe various control performance related schedulability analysis techniques and how they may be tied to model-based software development. Finally, we will discuss various schedule synthesis techniques, both for ECUs as well as for communication protocols like FlexRay, so that control performance constraints specified at the model-level may be satisfied. Throughout the tutorial different commercial as well as academic tools will be discussed and demonstrated.
|
| Jens Möllmer. WCET Optimierung unter Beachtung der Speicherhierarchie. Bachelor Thesis, August 2011 [BibTeX][PDF]@bachelorthesis { Moellmer2011,
title = {WCET Optimierung unter Beachtung der Speicherhierarchie},
author = {M\"ollmer, Jens},
school = {Technische Universit\"at Dortmund},
year = {2011},
month = {August},
keywords = {wcet optimizations},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/moellmer.pdf},
confidential = {n},
adviser = {Sascha Plazar},
} |
| Heiko Falk, Norman Schmitz and Florian Schmoll. WCET-aware Register Allocation based on Integer-Linear Programming. In Proceedings of the 23rd Euromicro Conference on Real-Time Systems (ECRTS), pages 13-22 Porto / Portugal, July 2011 [BibTeX][PDF][Abstract]@inproceedings { falk:11:ecrts,
author = {Falk, Heiko and Schmitz, Norman and Schmoll, Florian},
title = {WCET-aware Register Allocation based on Integer-Linear Programming},
booktitle = {Proceedings of the 23rd Euromicro Conference on Real-Time Systems (ECRTS)},
year = {2011},
pages = {13-22},
address = {Porto / Portugal},
month = {jul},
keywords = {wcet},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-ecrts_2.pdf},
confidential = {n},
abstract = {Current compilers lack precise timing models guiding their built-in optimizations. Hence, compilers apply ad-hoc heuristics during optimization to improve code quality. One of the most important optimizations is register allocation. Many compilers heuristically decide when and where to spill a register to memory, without having a clear understanding of the impact of such spill code on a program's runtime. This paper presents an integer-linear programming \textit{(ILP)} based register allocator that uses precise worst-case execution time \textit{(WCET)} models. Using this WCET timing data, the compiler avoids spill code generation along the critical path defining a program's WCET. To the best of our knowledge, this paper is the first one to present a WCET-aware ILP-based register allocator. Our results underline the effectiveness of the proposed techniques. For a total of 55 realistic benchmarks, we reduced WCETs by 20.2\% on average and ACETs by 14\%, compared to a standard graph coloring allocator. Furthermore, our ILP-based register allocator outperforms a WCET-aware graph coloring allocator by more than a factor of two for the considered benchmarks, while requiring less runtime.},
} Current compilers lack precise timing models guiding their built-in optimizations. Hence, compilers apply ad-hoc heuristics during optimization to improve code quality. One of the most important optimizations is register allocation. Many compilers heuristically decide when and where to spill a register to memory, without having a clear understanding of the impact of such spill code on a program's runtime. This paper presents an integer-linear programming (ILP) based register allocator that uses precise worst-case execution time (WCET) models. Using this WCET timing data, the compiler avoids spill code generation along the critical path defining a program's WCET. To the best of our knowledge, this paper is the first one to present a WCET-aware ILP-based register allocator. Our results underline the effectiveness of the proposed techniques. For a total of 55 realistic benchmarks, we reduced WCETs by 20.2% on average and ACETs by 14%, compared to a standard graph coloring allocator. Furthermore, our ILP-based register allocator outperforms a WCET-aware graph coloring allocator by more than a factor of two for the considered benchmarks, while requiring less runtime.
|
| Timon Kelter, Heiko Falk, Peter Marwedel, Sudipta Chattopadhyay and Abhik Roychoudhury. Bus-Aware Multicore WCET Analysis through TDMA Offset Bounds. In Proceedings of the 23rd Euromicro Conference on Real-Time Systems (ECRTS), pages 3-12 Porto / Portugal, July 2011 [BibTeX][PDF][Abstract]@inproceedings { kelter:11:ecrts,
author = {Kelter, Timon and Falk, Heiko and Marwedel, Peter and Chattopadhyay, Sudipta and Roychoudhury, Abhik},
title = {Bus-Aware Multicore WCET Analysis through TDMA Offset Bounds},
booktitle = {Proceedings of the 23rd Euromicro Conference on Real-Time Systems (ECRTS)},
year = {2011},
pages = {3-12},
address = {Porto / Portugal},
month = {jul},
keywords = {wcet},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-ecrts_1.pdf},
confidential = {n},
abstract = {In the domain of real-time systems, the analysis of the timing behavior of programs is crucial for guaranteeing the schedulability and thus the safeness of a system. Static analyses of the \textit{WCET} (Worst-Case Execution Time) have proven to be a key element for timing analysis, as they provide safe upper bounds on a program's execution time. For single-core systems, industrial-strength WCET analyzers are already available, but up to now, only first proposals have been made to analyze the WCET in multicore systems, where the different cores may interfere during the access to shared resources. An important example for this are shared buses which connect the cores to a shared main memory. The time to gain access to the shared bus may vary significantly, depending on the used bus arbitration protocol and the access timings. In this paper, we propose a new technique for analyzing the duration of accesses to shared buses. We implemented a prototype tool which uses the new analysis and tested it on a set of realworld benchmarks. Results demonstrate that our analysis achieves the same precision as the best existing approach while drastically outperforming it in matters of analysis time.},
} In the domain of real-time systems, the analysis of the timing behavior of programs is crucial for guaranteeing the schedulability and thus the safeness of a system. Static analyses of the WCET (Worst-Case Execution Time) have proven to be a key element for timing analysis, as they provide safe upper bounds on a program's execution time. For single-core systems, industrial-strength WCET analyzers are already available, but up to now, only first proposals have been made to analyze the WCET in multicore systems, where the different cores may interfere during the access to shared resources. An important example for this are shared buses which connect the cores to a shared main memory. The time to gain access to the shared bus may vary significantly, depending on the used bus arbitration protocol and the access timings. In this paper, we propose a new technique for analyzing the duration of accesses to shared buses. We implemented a prototype tool which uses the new analysis and tested it on a set of realworld benchmarks. Results demonstrate that our analysis achieves the same precision as the best existing approach while drastically outperforming it in matters of analysis time.
|
| Paul Lokuciejewski, Sascha Plazar, Heiko Falk, Peter Marwedel and Lothar Thiele. Approximating Pareto optimal compiler optimization sequences---a trade-off between WCET, ACET and code size. Software: Practice and Experience May 2011, DOI 10.1002/spe.1079 [BibTeX][PDF][Abstract]@article { lokuciejewski:11:spe,
author = {Lokuciejewski, Paul and Plazar, Sascha and Falk, Heiko and Marwedel, Peter and Thiele, Lothar},
title = {Approximating Pareto optimal compiler optimization sequences---a trade-off between WCET, ACET and code size},
journal = {Software: Practice and Experience},
year = {2011},
month = {may},
note = {DOI 10.1002/spe.1079},
keywords = {wcet},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2011-spe.pdf},
confidential = {n},
abstract = {With the growing complexity of embedded systems software, high code quality can only be achieved using a compiler. Sophisticated compilers provide a vast spectrum of various optimizations to improve code aggressively w.\,r.\,t.~different objective functions, e.\,g., average-case execution time \textit{(ACET)} or code size. Due to the complex interactions between the optimizations, the choice for a promising sequence of code transformations is not trivial. Compiler developers address this problem by proposing standard optimization levels, e.\,g., \textit{O3} or \textit{Os}. However, previous studies have shown that these standard levels often miss optimization potential or might even result in performance degradation. In this paper, we propose the first adaptive WCET-aware compiler framework for an automatic search of compiler optimization sequences which yield highly optimized code. Besides the objective functions ACET and code size, we consider the worst-case execution time \textit{(WCET)} which is a crucial parameter for real-time systems. To find suitable trade-offs between these objectives, stochastic evolutionary multi-objective algorithms identifying Pareto optimal solutions for the objectives $\langle$WCET, ACET$\rangle$ and $\langle$WCET, code size$\rangle$ are exploited. A comparison based on statistical performance assessments is performed which helps to determine the most suitable multi-objective optimizer. The effectiveness of our approach is demonstrated on real-life benchmarks showing that standard optimization levels can be significantly outperformed.},
} With the growing complexity of embedded systems software, high code quality can only be achieved using a compiler. Sophisticated compilers provide a vast spectrum of various optimizations to improve code aggressively w. r. t. different objective functions, e. g., average-case execution time (ACET) or code size. Due to the complex interactions between the optimizations, the choice for a promising sequence of code transformations is not trivial. Compiler developers address this problem by proposing standard optimization levels, e. g., O3 or Os. However, previous studies have shown that these standard levels often miss optimization potential or might even result in performance degradation. In this paper, we propose the first adaptive WCET-aware compiler framework for an automatic search of compiler optimization sequences which yield highly optimized code. Besides the objective functions ACET and code size, we consider the worst-case execution time (WCET) which is a crucial parameter for real-time systems. To find suitable trade-offs between these objectives, stochastic evolutionary multi-objective algorithms identifying Pareto optimal solutions for the objectives <WCET, ACET> and <WCET, code size> are exploited. A comparison based on statistical performance assessments is performed which helps to determine the most suitable multi-objective optimizer. The effectiveness of our approach is demonstrated on real-life benchmarks showing that standard optimization levels can be significantly outperformed.
|
| Arthur Pyka. Multikriterielle Exploration von Compileroptimierungen und Cacheparametern. Master's Thesis, February 2011 [BibTeX][PDF]@mastersthesis { Pyka2011,
title = {Multikriterielle Exploration von Compileroptimierungen und Cacheparametern},
author = {Pyka, Arthur},
school = {Technische Universtit\"at Dortmund},
year = {2011},
month = {February},
keywords = {wcet},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/pyka.pdf},
confidential = {n},
adviser = {Sascha Plazar},
} |
| Kyrill Risto. Scratchpad-Allokation zur Reduktion der größtmöglichen Laufzeit von Multitask-Systemen. Master's Thesis, January 2011 [BibTeX]@mastersthesis { Risto2011,
title = {Scratchpad-Allokation zur Reduktion der gr\"o\"stm\"oglichen Laufzeit von Multitask-Systemen},
author = {Risto, Kyrill},
school = {Technische Universtit\"at Dortmund},
year = {2011},
month = {January},
keywords = {wcet},
confidential = {n},
adviser = {Heiko Falk},
} |
| Helena Kotthaus. Cache-bewusste Code-Positionierung zur Reduktion der maximalen Programmlaufzeit (WCET). Master's Thesis, January 2011 [BibTeX]@mastersthesis { Kotthaus2011,
title = {Cache-bewusste Code-Positionierung zur Reduktion der maximalen Programmlaufzeit (WCET)},
author = {Kotthaus, Helena},
school = {Technische Universtit\"at Dortmund},
year = {2011},
month = {January},
keywords = {wcet},
confidential = {n},
adviser = {Heiko Falk},
} |