@article{DBLP:journals/rts/ChenC13, author = {Jian-Jia Chen and Samarjit Chakraborty}, title = {Resource augmentation for uniprocessor and multiprocessor partitioned scheduling of sporadic real-time tasks}, journal = {Real-Time Systems}, volume = {49}, number = {4}, year = {2013}, pages = {475-516}, url = {http://dx.doi.org/10.1007/s11241-013-9181-5}, abstract = {Although the earliest-deadline-first (EDF) policy is known to be optimal for preemptive real-time task scheduling in uniprocessor systems, the schedulability analysis problem has recently been shown to be co NP-hard. Therefore, approximation algorithms, and in particular, approximations based on resource augmentation have attracted a lot of attention for both uniprocessor and multiprocessor systems. Resource augmentation based approximations assume a certain speedup of the processor(s). Using the notion of approximate demand bound function (dbf), in this paper we show that for uniprocessor systems the resource augmentation factor is at most $\frac{2e−1}{e}\approx 1.6322$, where e is the Euler number. We approximate the dbf using a linear approximation when the analysis interval length of interest is larger than the relative deadline of the task. For identical multiprocessor systems with M processors and constrained-deadline task sets, we show that the deadline-monotonic partitioning (that has been proposed by Baruah and Fisher) with the approximate dbf leads to an approximation factor of $\frac{3e−1}{e}−\frac{1}{M}\approx 2.6322−\frac{1}{M}$ with respect to resource augmentation. We also show that the corresponding factor is $3−\frac{1}{M}$ for arbitrary-deadline task sets. The best known results so far were $3−\frac{1}{M}$ for constrained-deadline tasks and $4−\frac{2}{M}$ for arbitrary-deadline ones. Our tighter analysis exploits the structure of the approximate dbf directly and uses the processor utilization violations (which were ignored in all previous analysis) for analyzing resource augmentation factors. We also provide concrete input instances to show that the lower bound on the resource augmentation factor for uniprocessor systems—using the above approximate dbf is 1.5, and the corresponding bound is 2.5 for identical multiprocessor systems with an arbitrary order of fitting and a large number of processors. Further, we also provide a polynomial-time approximation scheme (PTAS) to derive near-optimal solutions under the assumption that the ratio of the maximum relative deadline to the minimum relative deadline of tasks is a constant, which is a more relaxed assumption compared to the assumptions required for deriving such a PTAS in the past.} } @article{DBLP:journals/rts/YangBRCT13, author = {Hoeseok Yang and Iuliana Bacivarov and Devendra Rai and Jian-Jia Chen and Lothar Thiele}, title = {Real-time worst-case temperature analysis with temperature-dependent parameters}, journal = {Real-Time Systems}, volume = {49}, number = {6}, year = {2013}, pages = {730-762}, url = {http://dx.doi.org/10.1007/s11241-013-9188-y}, abstract = {With the evolution of today’s semiconductor technology, chip temperature increases rapidly mainly due to the growth in power density. Therefore, for modern embedded real-time systems it is crucial to estimate maximal temperatures early in the design in order to avoid burnout and to guarantee that the system can meet its real-time constraints. This paper provides answers to a fundamental question: What is the worst-case peak temperature of a real-time embedded system under all feasible scenarios of task arrivals? A novel thermal-aware analytic framework is proposed that combines a general event/resource model based on network and real-time calculus with system thermal equations. This analysis framework has the capability to handle a broad range of uncertainties in terms of task execution times, task invocation periods, jitter in task arrivals, and resource availability. The considered model takes both dynamic and leakage power as well as thermal dependent conductivity into consideration. Thorough simulation experiments validate the theoretical results.} } @article{DBLP:journals/tsmc/LiSCL13, author = {Jianjun Li and LihChyun Shu and Jian-Jia Chen and Guohui Li}, title = {Energy-Efficient Scheduling in Nonpreemptive Systems With Real-Time Constraints}, journal = {IEEE T. Systems, Man, and Cybernetics: Systems}, volume = {43}, number = {2}, year = {2013}, pages = {332-344}, url = {http://dx.doi.org/10.1109/TSMCA.2012.2199305}, abstract = {In the past decade, the development of mobile and embedded systems has demanded energy efficiency for improving the lifetime of embedded devices. To avoid preemption overhead or ease timing verification, nonpreemptive scheduling has been deemed useful or necessary in meeting system timing requirements for certain applications built on embedded devices. In this paper, our aim is to design nonpreemptive scheduling algorithms that ensure timing correctness and optimize energy consumption on a processor with variable speeds. We propose a representative algorithm, ISA, which can produce lower speeds for a variety of nonpreemptive task sets than other comparable methods, and hence resulting in significant energy savings. When combined with a selective frequency-inheritance policy we design to efficiently determine if processor speedup can be disabled without jeopardizing any task deadlines, ISA can achieve even larger gains, up to 30\% reduction in energy consumption. Finally, we propose a dynamic slack reclamation policy built on ISA, namely ISA-DR, which can result in additional energy savings when a task consumes less than its worst-case execution time.} } @inproceedings{DBLP:conf/aspdac/ChangCKF13, author = {Che-Wei Chang and Jian-Jia Chen and Tei-Wei Kuo and Heiko Falk}, title = {Real-time partitioned scheduling on multi-core systems with local and global memories}, booktitle = {18th Asia and South Pacific Design Automation Conference, (ASP-DAC) 2013, Yokohama, Japan, January 22-25, 2013}, year = {2013}, pages = {467-472}, url = {http://dx.doi.org/10.1109/ASPDAC.2013.6509640}, abstract = {Real-time task scheduling becomes even more challenging with the emerging of island-based multi-core architecture, where the local memory module of an island offers shorter access time than the global memory module does. With such a popular architecture design in mind, this paper exploits real-time task scheduling over island-based homogeneous cores with local and global memory pools. Joint considerations of real-time scheduling and memory allocation are presented to efficiently use the computing and memory resources. A polynomial-time algorithm with an asymptotic 4-approximation bound is proposed to minimize the number of needed islands to successfully schedule tasks. To evaluate the performance of the proposed algorithm, 82 benchmarks from the MRTC, MediaBench, UTDSP, NetBench, and DSPstone benchmark suites were profiled by a worst-case-execution-time analyzer aiT and included in the experiments.} } @inproceedings{DBLP:conf/dac/JahnPKCH13, author = {Janmartin Jahn and Santiago Pagani and Sebastian Kobbe and Jian-Jia Chen and J{\"o}rg Henkel}, title = {Optimizations for configuring and mapping software pipelines in many core systems}, booktitle = {The 50th Annual Design Automation Conference 2013, (DAC), Austin, TX, USA, May 29 - June 07, 2013}, year = {2013}, pages = {130}, url = {http://doi.acm.org/10.1145/2463209.2488894}, abstract = {Efficiently utilizing the computational resources of many core systems is one of the most prominent challenges. The problem worsens when resource requirements vary unpredictably and applications may be started/stopped at any time. To address this challenge, we propose two schemes that calculate and adapt task mappings at runtime: a centralized, optimal mapping scheme and a distributed, hierarchical mapping scheme that trades optimality for a high degree of scalability. Experiments on Intel's 48-core Single-Chip Cloud Computer and in a many core simulator show that a significant improvement in system performance can be achieved over current state-of-the-art.} } @inproceedings{DBLP:conf/date/RehmanSAKCH13, author = {Semeen Rehman and Muhammad Shafique and Pau Vilimelis Aceituno and Florian Kriebel and Jian-Jia Chen and J{\"o}rg Henkel}, title = {Leveraging variable function resilience for selective software reliability on unreliable hardware}, booktitle = {Design, Automation and Test in Europe, (DATE) 13, Grenoble, France, March 18-22, 2013}}, year = {2013}, pages = {1759-1764}, url = {http://dl.acm.org/citation.cfm?id=2485704}, abstract = {State-of-the-art reliability optimizing schemes deploy spatial or temporal redundancy for the complete functionality. This introduces significant performance/area overhead which is often prohibitive within the stringent design constraints of embedded systems. This paper presents a novel scheme for selective software reliability optimization constraint under user-provided tolerable performance overhead constraint. To enable this scheme, statistical models for quantifying software resilience and error masking properties at function and instruction level are proposed. These models leverage a whole new range of reliability optimization. Given a tolerable performance overhead, our scheme selectively protects the reliability-wise most important instructions based on their masking probability, vulnerability, and redundancy overhead. Compared to state-of-the-art [7], our scheme provides a 4.84X improved reliability at 50\% tolerable performance overhead constraint.} } @inproceedings{DBLP:conf/ecrts/TomaC13, author = {Anas Toma and Jian-Jia Chen}, title = {Computation Offloading for Frame-Based Real-Time Tasks with Resource Reservation Servers}, booktitle = {25th Euromicro Conference on Real-Time Systems, (ECRTS) 2013, Paris, France, July 9-12, 2013}, year = {2013}, pages = {103-112}, url = {http://dx.doi.org/10.1109/ECRTS.2013.21}, abstract = {Computation offloading concept has been recently adopted to improve the performance of embedded systems by moving some computation-intensive tasks (partially or wholly) to a powerful remote server. In this paper, we consider a computation offloading problem for frame-based real-time tasks, in which all the tasks have the same arrival time and the same relative deadline/period, by adopting the total bandwidth server (TBS) as resource reservations in the server side (remote execution unit). We prove that the problem is N P-complete and propose two algorithms in this paper. The first algorithm is a greedy algorithm with low complexity and provides a quick heuristic approach to decide which tasks to be offloaded and how the tasks are scheduled. The maximum finishing time of the solution derived from the greedy algorithm is at most twice of the finishing time (make span, maximal on the client and on the server) of any schedule. The second algorithm is a dynamic programming approach, which builds a three-dimensional table and requires pseudo-polynomial time complexity, to make an optimal decision for computation offloading. The algorithms are evaluated with a case study of a surveillance system and synthesized benchmarks.} } @inproceedings{DBLP:conf/estimedia/TomaC13, author = {Anas Toma and Jian-Jia Chen}, title = {Server resource reservations for computation offloading in real-time embedded systems}, booktitle = {The 11th IEEE Symposium on Embedded Systems for Real-time Multimedia (ESTIMedia), Montreal, QC, Canada, October 3-4, 2013}, year = {2013}, pages = {31-39}, url = {http://dx.doi.org/10.1109/ESTIMedia.2013.6704500}, abstract = {Mobile devices have become very popular nowadays. They are used nearly everywhere. They run complex applications where the multimedia data are heavily processed. For example, ubiquitous applications in smart phones and different surveillance tasks on mobile robots. However, most of these applications have real-time constraints, and the resources of the mobile devices are limited. So, it is challenging to finish such complex applications on these resource-constrained devices without violating the real-time constraints. One solution is to adopt the Computation Offloading concept by moving some computation-intensive tasks to a powerful server. In this paper, we use the total bandwidth server (TBS) as resource reservations in the server side, and propose two algorithms based on the computation offloading to decide which tasks to be offloaded and how they are scheduled, such that the utilization (i.e., bandwidth) required from the server is minimized. We consider frame-based real-time tasks, in which all the tasks have the same arrival time, relative deadline and period. The first algorithm is a greedy algorithm with low complexity based on a fast heuristic. The second one is a pseudo-polynomial-time algorithm based on dynamic programming. Finally, the algorithms are evaluated with a case study for surveillance system and synthesized benchmarks.} } @inproceedings{DBLP:conf/iccad/JahnPCH13, author = {Janmartin Jahn and Santiago Pagani and Jian-Jia Chen and J{\"o}rg Henkel}, title = {MOMA: mapping of memory-intensive software-pipelined applications for systems with multiple memory controllers}, booktitle = {The IEEE/ACM International Conference on Computer-Aided Design, (ICCAD), San Jose, CA, USA, November 18-21, 2013}, year = {2013}, pages = {508-515}, url = {http://dl.acm.org/citation.cfm?id=2561928}, abstract = {In many-core systems, the efficient deployment of computational and other resources is key in order to achieve a high throughput. Current state-of-the-art task mapping schemes balance the computational load among cores while avoiding congestions within the communication links. The problem is that a large number of cores running many memory-intensive tasks may congest memory controllers because their number and bandwidth is constrained. To avoid a high throughput degradation that could result from congested memory controllers, the mapping of tasks must be sensitized to the limited bandwidth of off-chip memory. Designing efficient and effective algorithms to optimize the throughput by jointly considering the load of memory controllers, computation, and communication is very challenging. In this paper, we address this problem by distributing cores among applications and then heuristically map tasks such that the load of the memory controllers is sufficiently balanced. Our heuristic also minimizes the effect of decreased throughput resulting from mapping communicating tasks to cores that belong to different controllers. Our experiments encourage us in that we can reduce the saturation of memory controllers and significantly increase the system throughput compared to employing several state-of-the-art task mapping schemes.} } @inproceedings{DBLP:conf/patmos/MunawarC13, author = {Waqaas Munawar and Jian-Jia Chen}, title = {Peak power demand analysis and reduction by using battery buffers for monotonic controllers}, booktitle = {23rd International Workshop on Power and Timing Modeling, Optimization and Simulation (PATMOS), Karlsruhe, Germany, September 9-11, 2013}, year = {2013}, pages = {255-258}, url = {http://dx.doi.org/10.1109/PATMOS.2013.6662185}, abstract = {Demand of electricity varies on hourly basis whereas the production is quite inelastic. This results in fluctuating prices. Data centers and industrial consumers of electricity are penalized for the peak power demand of their loads. To shave the peak power demand, a battery buffer can be adopted. The battery is charged during low load and is discharged during peaks. One essential question is to analyze the reduction of the peak power demand by adopting battery buffers. The power loads are modeled in this paper by adopting the concept of arrival curves in Network Calculus. We analyze monotonic controllers, which have these two properties: (1) comparing one given trace of power loads and two initial battery statuses, if we start from higher battery status, the resulting battery status in the future will not become lower; (2) to increase the power demand at time slot t, the power loads released before t should be as close as possible to t. We present a simple and effective monotonic controller and also provide analyses for the peak power demand to the power grid. Our analysis mechanism can help determine the appropriate battery size for a given load arrival curve to reduce the peak.} } @inproceedings{DBLP:conf/rtas/RehmanTKSCH13, author = {Semeen Rehman and Anas Toma and Florian Kriebel and Muhammad Shafique and Jian-Jia Chen and J{\"o}rg Henkel}, title = {Reliable code generation and execution on unreliable hardware under joint functional and timing reliability considerations}, booktitle = {19th IEEE Real-Time and Embedded Technology and Applications Symposium, (RTAS), Philadelphia, PA, USA, April 9-11, 2013}, year = {2013}, pages = {273-282}, url = {http://doi.ieeecomputersociety.org/10.1109/RTAS.2013.6531099}, abstract = {To enable reliable embedded systems, it is imperative to leverage the compiler and system software for joint optimization of functional correctness, i.e., vulnerability indexes, and timing correctness, i.e., the deadline misses. This paper considers the optimization of the Reliability-Timing (RT) penalty, defined as a linear combination of the vulnerability indexes (reliability penalties) and the deadline misses. We propose a multi-layer approach to achieve reliable code generation and execution at compilation and system software layers for embedded systems. This is enabled by the concept of generating multiple versions, for given application functions, with diverse performance and reliability tradeoffs, by exploiting different reliability-guided compilation options. Based on the reliability and execution time profiling of these versions, our reliability-driven system software employs dynamic version selections to dynamically select a suitable version of a function according to the execution behavior of the previous functions. Specifically, our scheme builds a schedule table offline to optimize the RT penalty, and uses this table at run time to select suitable versions for the subsequent functions properly. A complex real-world application of “secure video and audio processing” composed of various functions is evaluated for reliable code generation and execution. The reliability analysis and evaluation is performed on a reliability-aware processor simulator.} } @inproceedings{DBLP:conf/rtcsa/PaganiC13, author = {Santiago Pagani and Jian-Jia Chen}, title = {Energy efficiency analysis for the Single Frequency Approximation (SFA) scheme}, booktitle = {IEEE 19th International Conference on Embedded and Real-Time Computing Systems and Applications, (RTCSA), Taipei, Taiwan, August 19-21, 2013}, year = {2013}, pages = {82-91}, note = {Best Paper Award}, url = {http://dx.doi.org/10.1109/RTCSA.2013.6732206}, abstract = {Energy-efficient designs are important issues in computing systems. This paper studies the energy efficiency of a simple and linear-time strategy, called Single Frequency Approximation (SFA) scheme, for periodic real-time tasks on multi-core systems with a shared supply voltage in a voltage island. The strategy executes all the cores at a single frequency to just meet the timing constraints. SFA has been adopted in the literature after task partitioning, but the worst-case performance of SFA, in terms of energy consumption, is an open problem. We provide comprehensive analysis for SFA to derive the cycle utilization distribution for its worst-case behaviour for energy minimization. Our analysis shows that the energy consumption by using SFA for task execution is at most 1.53 (1.74, 2.10, 2.69, respectively), compared to the energy consumption of the optimal voltage/frequency scaling, when the dynamic power consumption is a cubic function of the frequency and the voltage island has up to 4 (8, 16, 32, respectively) cores. The analysis shows that SFA is indeed an effective scheme under practical settings, even though it is not optimal. Furthermore, since all the cores run at a single frequency and no frequency alignment for Dynamic Voltage and Frequency Scaling (DVFS) between cores is needed, any uni-core dynamic power management technique for reducing the energy consumption for idling can be easily incorporated individually on each core in the voltage island. This paper also provides the analysis of energy consumption for SFA, combined with the procrastination for Dynamic Power Management (DPM). Furthermore, we also extend our analysis for deriving the approximation factor of SFA for a multi-core system with multiple voltage islands.} } @inproceedings{DBLP:conf/rtss/PaganiC13, author = {Santiago Pagani and Jian-Jia Chen}, title = {Energy Efficient Task Partitioning Based on the Single Frequency Approximation Scheme}, booktitle = { IEEE 34th Real-Time Systems Symposium, (RTSS), Vancouver, BC, Canada, December 3-6, 2013}, year = {2013}, pages = {308-318}, url = {http://dx.doi.org/10.1109/RTSS.2013.38}, abstract = {Energy-efficiency is a major concern in modern computing systems. For such systems, the presence of multiple voltage islands, where the voltage of each island can change independently and all cores in an island share the same supply voltage at any given time, is an expected compromise between global and per-core Dynamic Voltage and Frequency Scaling (DVFS). This paper focuses on energy minimization for a set of periodic tasks assigned on a voltage island. We present a simple and practical solution, that assigns the tasks onto cores in the island and then applies a DVFS schedule, particularly the Single Frequency Approximation (SFA) scheme. Furthermore, we provide thorough theoretical analysis of our solution, in terms of energy efficiency, against the optimal task partitioning and optimal DVFS schedule, especially for the state-of-the-art designs, that have a few number of cores per voltage island. The analysis shows that, our task partitioning scheme combined with SFA is a good and practical solution for energy efficiency. Particularly, when the number of cores in each voltage island is limited, the approximation factor is at most 2.01 (2.29, 2.55, 2.80, respectively) when the dynamic power consumption is a cubic function of the frequency and the islands have up to 4 (8, 16, 32, respectively) cores. Moreover, with non-negligible overhead for sleeping, further combination with any uni-core procrastination algorithm that consumes no more energy than keeping a core idle when it has no workload in its ready queue, increases the approximation factor by at most 1.} } @inproceedings{DBLP:conf/rtss/Chen13, author = {Jian-Jia Chen}, title = {Task Set Synthesis with Cost Minimization for Sporadic Real-Time Tasks}, booktitle = {IEEE 34th Real-Time Systems Symposium, (RTSS), Vancouver, BC, Canada, December 3-6, 2013}, year = {2013}, pages = {350-359}, url = {http://dx.doi.org/10.1109/RTSS.2013.42}, abstract = {By allowing users to specify multiple execution versions of a task with different amounts of worst-case execution time and costs, this paper explores how to minimize of the overall system cost under the timing constraints for sporadic real-time tasks. One specific application is to minimize the requirement scratchpad memory size (system cost) to meet the timing constraint, while the worst-case execution time of a task depends on its allocated scratchpad memory size. This paper shows that the problem is NP-hard for approximation, if speed augmentation is not allowed. The algorithms proposed in this paper are analyzed based on $(\alpha, \beta)$-approximation, in which a $\beta$ speed augmentation factor is allowed and the system cost is at most α times of the optimal solution. For tasks with constrained deadlines, an efficient ($1, 2/1-\eta$)-approximation algorithm based on dynamic programming is proposed for deadline-monotonic (DM) scheduling, where$ 0 < \eta < 1$ is a user-defined parameter for the rounding precision in dynamic programming. This is further extended to a ($1, 1.6322/1-\eta $)-approximation algorithm for earliest-deadline-first (EDF) scheduling. A polynomialtime ($1 + \epsilon, 1 + \eta$)-approximation scheme is also developed for EDF scheduling by considering $0 < \epsilon, 0 < \eta < 1$ when the ratio of the maximum relative deadline to the minimum relative deadline of tasks is a constant. This paper is concluded by considering the dual problem to maximize of the system profit by selecting execution versions with different amounts of worst-case execution time.} } @inproceedings{DBLP:conf/sac/TomaC13, author = {Anas Toma and Jian-Jia Chen}, title = {Computation offloading for real-time systems}, booktitle = {Proceedings of the 28th Annual ACM Symposium on Applied Computing, SAC '13, Coimbra, Portugal, March 18-22, 2013}, year = {2013}, pages = {1650-1651}, url = {http://doi.acm.org/10.1145/2480362.2480670}, abstract = {Computation offloading has been adopted to improve the performance of embedded systems by offloading the computation of some tasks, especially computation-intensive tasks, to servers or clouds. This paper explores computation offloading for real-time embedded systems to decide which tasks should be offloaded to get the results in time. Such a problem is NP-complete even for frame-based real-time tasks with the same period and relative deadline. We develop a pseudo-polynomial-time algorithm for deriving feasible schedules, if they exist.} } @inproceedings{DBLP:conf/sies/FreierC13, author = {Matthias Freier and Jian-Jia Chen}, title = {Prioritization for real-time embedded systems on dual-core platforms by exploiting the typical- and worst-case execution times}, booktitle = {8th IEEE International Symposium on Industrial Embedded Systems, SIES 2013, Porto, Portugal, June 19-21, 2013}, year = {2013}, pages = {21-29}, url = {http://dx.doi.org/10.1109/SIES.2013.6601467}, crossref = {DBLP:conf/sies/2013}, abstract = {Adopting multicore platforms for real-time systems has recently been an active topic for both academia and industry. For hard real-time systems, the static worst-case execution time (WCET) analysis is usually needed for analyzing the schedulability. However, as the execution of a job depends on its input data, its internal state, and the architectural state, the worst-case execution time may be much larger than the typical execution time. Even though the jobs have unpredictable execution behavior, the practice now is to consider the feasibility and schedulability of the system based on their worst-case execution times. Therefore, the utilization of the system may be very low in most cases. This paper exploits a new scheme by adopting one core for executing tasks under their typical execution behavior, while the gap of the execution time between the typical case and the worst case is served on the other core. The objective is to use only one core most of time, whereas the second core is activated when it is necessary. We analyze the ineffectiveness when adopting the end-to-end deadline approaches in distributed systems for our studied problem. We show the connection of this problem to the well-known two-stage flowshop scheduling problem when considering frame-based real-time tasks, in which all the tasks have the same relative deadline and period. For general cases, in which tasks have different relative deadlines or periods, we provide a prioritization heuristic algorithm and its schedulability analysis. We also evaluate our algorithms to show the effectiveness.} } @inproceedings{DBLP:conf/smartgreens/WangMLC13, author = {Shengquan Wang and Waqaas Munawar and Xue Liu and Jian-Jia Chen}, title = {Power-saving Design in Server Farms for Multi-tier Applications under Response Time Constraint}, booktitle = {SMARTGREENS 2013 - Proceedings of the 2nd International Conference on Smart Grids and Green IT Systems, Aachen, Germany, 9-10 May, 2013}, year = {2013}, pages = {137-148}, url = {http://dx.doi.org/10.5220/0004357201370148}, abstract = {Server farms suffer from an increasing power consumption nowadays. Power saving has become a prominent design issue in server farms. This paper presents a power-saving design in server farms under the constraint of the response time. In particular, we target on multi-tier applications, which are very typical on the web in modern days. We propose an efficient power-saving design strategy, called PowerTier. This strategy exploits two major techniques by using Dynamic Power management (DPM) to activate/deactivate servers and using Dynamic Voltage Scaling (DVS) to adjust the processor speed for each activated server. In addition, PowerTier considers two different application models: the open-queueing model and the closed-queueing model for session-less and session-based web applications respectively. With PowerTier, we are able to choose the number of activated servers at each tier and the processor speed for each server to minimize the overall power consumption in server farms while meeting a given mean response time guarantee for multi-tier applications. Our comprehensive simulation confirms the effectiveness and efficiency of PowerTier.} } @article{DBLP:journals/jise/ChenHT12, author = {Jian-Jia Chen and Kai Huang and Lothar Thiele}, title = {Dynamic Frequency Scaling Schemes for Heterogeneous Clusters under Quality of Service Requirements}, journal = {J. Inf. Sci. Eng.}, volume = {28}, number = {6}, year = {2012}, pages = {1073-1090}, url = {http://www.iis.sinica.edu.tw/page/jise/2012/201211_06.html}, abstract = {Nowadays, both the performance and power consumption for modern server clusters and data centers must be considered to reduce the maintenance cost for quality of service guarantees, as power dissipation affects the cost of both the power delivery subsystems and colling facility. Considering the popularity of heterogeneous clusters, this paper proposes efficient and effective power management schemes for large scale server farms. Distinct from existing heuristic approaches, we propose dynamic frequency scaling schemes with approximation factor guarantees, compared to the optimal power management. By considering systems with discrete frequency levels on every server, our schemes can be applied for different power consumption models. Our greedy power management schemes have 1.5 or 2 approximation guarantees depending on the complexity. Our dynamic-programming approach can trade the quality, in terms of power consumption, of the resulting solutions with the time/space complexity. We provide extensive simulation results to show that the proposed schemes are effective for the minimization of the power consumption for large scale clusters. } } @inproceedings{DBLP:conf/date/MasrurGCCAB12, author = {Alejandro Masrur and Dip Goswami and Samarjit Chakraborty and Jian-Jia Chen and Anuradha Annaswamy and Ansuman Banerjee}, title = {Timing analysis of cyber-physical applications for hybrid communication protocols}, booktitle = {2012 Design, Automation {\&} Test in Europe Conference {\&} Exhibition, DATE 2012, Dresden, Germany, March 12-16, 2012}, year = {2012}, pages = {1233-1238}, url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=6176681}, abstract = {Many cyber-physical systems consist of a collection of control loops implemented on multiple electronic control units (ECUs) communicating via buses such as FlexRay. Such buses support hybrid communication protocols consisting of a mix of time- and event-triggered slots. The time-triggered slots may be perfectly synchronized to the ECUs and hence result in zero communication delay, while the event-triggered slots are arbitrated using a priority-based policy and hence messages mapped onto them can suffer non-negligible delays. In this paper, we study a switching scheme where control messages are dynamically scheduled between the time-triggered and the event-triggered slots. This allows more efficient use of time-triggered slots which are often scarce and therefore should be used sparingly. Our focus is to perform a schedulability analysis for this setup, i.e., in the event of an external disturbance, can a message be switched from an event-triggered to a time-triggered slot within a specified deadline? We show that this analysis can check whether desired control performance objectives may be satisfied, with a limited number of time-triggered slots being used.} } @inproceedings{DBLP:conf/ecrts/ChenC12, author = {Jian-Jia Chen and Samarjit Chakraborty}, title = {Partitioned Packing and Scheduling for Sporadic Real-Time Tasks in Identical Multiprocessor Systems}, booktitle = {24th Euromicro Conference on Real-Time Systems, ECRTS 2012, Pisa, Italy, July 11-13, 2012}, year = {2012}, pages = {24-33}, url = {http://doi.ieeecomputersociety.org/10.1109/ECRTS.2012.43}, abstract = {Multiprocessor platforms have been widely adopted to accommodate the increasing computation requirement of modern applications. Partitioned scheduling (or packing) has been widely exploited by partitioning real-time tasks onto processors to meet the timing constraints, which has been shown to be NP-complete in the strong sense. This paper studies the approximation of partitioned scheduling by exploiting resource augmentation with (1) speeding up or (2) allocating more processors. When adopting speeding up to meet timing constraints, we provide a polynomial-time approximation scheme (PTAS) to derive near-optimal solutions only with the assumption that the ratio of the maximum relative deadline to the minimum relative deadline is a constant. The previously known PTAS for this problem imposes additional restrictions on the periods and the execution times of tasks. By removing these additional constraints, our scheme can be adopted for wider task sets. When considering the resource augmentation by allocating more processors, we show that there does not exist any asymptotic polynomial-time approximation scheme (APTAS) unless P=NP.} } % math format problem @inproceedings{DBLP:conf/emsoft/ChangCMKF12, author = {Che-Wei Chang and Jian-Jia Chen and Waqaas Munawar and Tei-Wei Kuo and Heiko Falk}, title = {Partitioned scheduling for real-time tasks on multiprocessor embedded systems with programmable shared srams}, booktitle = {Proceedings of the 12th International Conference on Embedded Software, EMSOFT 2012, part of the Eighth Embedded Systems Week, ESWeek 2012, Tampere, Finland, October 7-12, 2012}, year = {2012}, pages = {153-162}, url = {http://doi.acm.org/10.1145/2380356.2380384}, abstract = {This work is motivated by the advance of multiprocessor system architecture, in which the allocation of tasks over heterogeneous memory modules has a significant impact on the task execution. By considering two different types of memory modules with different access latencies, this paper explores joint considerations of memory allocation and real-time task scheduling to minimize the maximum utilization of processors of the system. For implicit-deadline sporadic tasks, a two-phase algorithm is developed, where the first phase determines memory allocation to derive a lower bound of the maximum utilization, and the second phase adopts worst-fit partitioning to assign tasks. It is shown that the proposed algorithm leads to a tight ($2-\frac{2}{M+1}$)-approximation bound where M is the number of processors. The proposed algorithm is then evaluated with 82 realistic benchmarks from MRTC, MediaBench, UTDSP, NetBench and DSPstone, and extensive simulations are further conducted to analyze the proposed algorithm.} } @inproceedings{DBLP:conf/iccad/ChenYC12, author = {Yi-Jung Chen and Chia-Lin Yang and Jian-Jia Chen}, title = {Distributed memory interface synthesis for Network-on-Chips with 3D-stacked DRAMs}, booktitle = {2012 IEEE/ACM International Conference on Computer-Aided Design, ICCAD 2012, San Jose, CA, USA, November 5-8, 2012}, year = {2012}, pages = {458-465}, url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=6386707}, abstract = {Stacking DRAMs on processing cores by Through-Silicon Vias (TSVs) provides abundant bandwidth and enables a distributed memory interface design. To achieve the best balance in performance and cost in an application-specific system, the distributed memory interface should be tailored for the target applications. In this paper, we propose the first distributed memory interface synthesis framework for application-specific Network-on-Chips (NoCs) with 3D-stacked DRAMs. To maximize the performance of a selected hardware configuration, the proposed framework co-synthesizes the hardware configuration of the distributed memory interface, and the software configuration, e.g. task mapping and data assignment. Since TSVs have adverse impact on chip costs and yields, the goal of the framework is minimizing the number of TSVs provided that the user-defined performance constraint is met.} } @inproceedings{DBLP:conf/isaac/KaoCRW12, author = {Mong-Jen Kao and Jian-Jia Chen and Ignaz Rutter and Dorothea Wagner}, title = {Competitive Design and Analysis for Machine-Minimizing Job Scheduling Problem}, booktitle = {Algorithms and Computation - 23rd International Symposium, ISAAC 2012, Taipei, Taiwan, December 19-21, 2012. Proceedings}, year = {2012}, pages = {75-84}, url = {http://dx.doi.org/10.1007/978-3-642-35261-4_11}, abstract = {We explore the machine-minimizing job scheduling problem, which has a rich history in the line of research, under an online setting. We consider systems with arbitrary job arrival times, arbitrary job deadlines, and unit job execution time. For this problem, we present a lower bound 2.09 on the competitive factor of any online algorithms, followed by designing a 5.2-competitive online algorithm. We would also like to point out a false claim made in an existing paper of Shi and Ye regarding a further restricted case of the considered problem. To the best of our knowledge, what we present is the first concrete result concerning online machine-minimizing job scheduling with arbitrary job arrival times and deadlines.} } @inproceedings{DBLP:conf/marc/JahnKPCH12, author = {Janmartin Jahn and Sebastian Kobbe and Santiago Pagani and Jian-Jia Chen and J{\"o}rg Henkel}, title = {Work in Progress: Malleable Software Pipelines for Efficient Many-core System Utilization}, booktitle = {6th Many-core Applications Research Community (MARC) Symposium. Proceedings of the 6th MARC Symposium, 19-20 July 2012, Toulouse, France}, year = {2012}, pages = {30-33}, url = {http://hal.archives-ouvertes.fr/hal-00719027}, abstract = {This paper details our current research project on the efficient utilization of many-core systems by utilizing applications based on a novel kind of software pipelines. These pipelines form malleable applications that can change their degree of parallelism at runtime. This allows not only for a well-balanced load, but also for an efficient distribution of the cores to the individual, competing applications to maximize the overall system performance. We are convinced that malleable software pipelines will significantly outperform existing mapping and scheduling solutions.} } @inproceedings{DBLP:conf/rtas/WangMLCL12, author = {Shengquan Wang and Waqaas Munawar and Jun Liu and Jian-Jia Chen and Xue Liu}, title = {Power-Saving Design for Server Farms with Response Time Percentile Guarantees}, booktitle = {2012 IEEE 18th Real Time and Embedded Technology and Applications Symposium, Beijing, China, April 16-19, 2012}, year = {2012}, pages = {273-284}, url = {http://doi.ieeecomputersociety.org/10.1109/RTAS.2012.35}, abstract = {The expense of power cost in server farms has driven the recent power-aware development in both industry and academia. At the same time, a Service Level Agreement (SLA) of service performance between a customer and a service provider is demanded to meet the customer satisfaction. This paper investigates the queueing-theoretical power-saving design strategy for server farms under a given SLA, which in particular is measured in a certain level percentile of the job response time. We consider server farms with servers that are equipped with the capabilities of Dynamic Voltage Scaling (DVFS) and Dynamic Power Management (DPM). We adopt an M/G/1/PS server model, where the job service time distribution is assumed heavy-tailed, as discovered and validated by previous research. We propose a design strategy called Power Tail to minimize the power consumption under the given SLA. Our data confirms that the proposed Power Tail strategy indeed provides statistical guarantee in comparison with existing dynamic DVFS approaches and significantly outperforms the intuitive load-balancing strategy.} }