| Ruoheng Ma. A RISCV Emulation Board for Non-Volatile Memory. 2020 [BibTeX][PDF][Abstract]@misc { RMaMA,
author = {Ma, Ruoheng},
title = {A RISCV Emulation Board for Non-Volatile Memory},
howpublished = {KIT},
year = {2020},
keywords = {nvm-oma, kit},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/riscv.pdf},
confidential = {n},
abstract = {As DRAM is facing some problems such as limited scalability and huge power
consumption, researchers are turning their focus on the emerging Non-Volatile Memory
(NVM) technologies, like Phase-Change RAM (PRAM), Spin-Transfer Torque RAM (STT-RAM)
and Resistive RAM (ReRAM), to get rid of these problems. Compared to DRAM, NVM is non
volatile and has higher density. Moreover, in contrast to flash memory, NVM are byte
addressable. Therefore, they can be considered as a candidate for replacing traditional
DRAM. In order to explore the characteristics of different NVM technologies, in this thesis,
an emulation board with parameterizable memory access latency is built upon a platform
called FU500, which utilizing the open-source instruction set architecture RISCV, to ensure
flexibility and extensibility for further research. Moreover, a benchmark suite comprising
MiBench and Tinymembench is also built for benchmarking this board.},
} As DRAM is facing some problems such as limited scalability and huge power
consumption, researchers are turning their focus on the emerging Non-Volatile Memory
(NVM) technologies, like Phase-Change RAM (PRAM), Spin-Transfer Torque RAM (STT-RAM)
and Resistive RAM (ReRAM), to get rid of these problems. Compared to DRAM, NVM is non
volatile and has higher density. Moreover, in contrast to flash memory, NVM are byte
addressable. Therefore, they can be considered as a candidate for replacing traditional
DRAM. In order to explore the characteristics of different NVM technologies, in this thesis,
an emulation board with parameterizable memory access latency is built upon a platform
called FU500, which utilizing the open-source instruction set architecture RISCV, to ensure
flexibility and extensibility for further research. Moreover, a benchmark suite comprising
MiBench and Tinymembench is also built for benchmarking this board.
|
| Manuel Killinger. Implementation of a Memory Access Trace Unit for a RISC-V SoC. 2020 [BibTeX][PDF][Abstract]@misc { BAmkTrace,
author = {Killinger, Manuel},
title = {Implementation of a Memory Access Trace Unit for a RISC-V SoC},
year = {2020},
keywords = {soc, tracing, fpga, risc-v, memory, wear-leveling, kit, nvm-oma},
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/killinger.pdf},
confidential = {n},
abstract = {{One of the most important aspects of testing and analyzing embedded systems is the system monitoring itself. Newly developed System on a Chip can be verified and tested using simulation software, with the drawback of often being computationally expensive or slow. Especially of interest is the read and write behavior of SoCs. This work presents the implementation of a Trace Unit, developed for the Freedom U500 SoC. It is a non-invasive component that can monitor all memory accesses flowing to and from the main memory. The Trace Unit is integrated into U500\^a€TMs memory subsystem to expose the access behavior during the execution of various benchmarks, and the results are collected and presented.}
}
@techreport{scheidtSA,
author = {David Scheidt},
title = {Dynamic Memory Placement of Applications on Extreme Resource Constrained Hardware},
institution = {TU Dortmund},
year = {2020},
keywords = {nvm-oma,thesis},
file = {https://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/2020-scheidt.pdf},
confidential = {n}
},
} {One of the most important aspects of testing and analyzing embedded systems is the system monitoring itself. Newly developed System on a Chip can be verified and tested using simulation software, with the drawback of often being computationally expensive or slow. Especially of interest is the read and write behavior of SoCs. This work presents the implementation of a Trace Unit, developed for the Freedom U500 SoC. It is a non-invasive component that can monitor all memory accesses flowing to and from the main memory. The Trace Unit is integrated into U500\^a€TMs memory subsystem to expose the access behavior during the execution of various benchmarks, and the results are collected and presented.}
}
@techreport{scheidtSA,
author = {David Scheidt},
title = {Dynamic Memory Placement of Applications on Extreme Resource Constrained Hardware},
institution = {TU Dortmund},
year = {2020},
keywords = {nvm-oma,thesis},
file = {https://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/2020-scheidt.pdf},
confidential = {n}
|
| Christian Hakert, Kuan-Hsun Chen, Mikail Yayla, Georg von der Brüggen, Sebastian Bloemeke and Jian-Jia Chen. Software-Based Memory Analysis Environments for In-Memory Wear-Leveling. In 25th Asia and South Pacific Design Automation Conference ASP-DAC 2020, Invited Paper Beijing, China, 2020 [BibTeX][PDF][Abstract]@inproceedings { nvmsimulator,
author = {Hakert, Christian and Chen, Kuan-Hsun and Yayla, Mikail and Br\"uggen, Georg von der and Bloemeke, Sebastian and Chen, Jian-Jia},
title = {Software-Based Memory Analysis Environments for In-Memory Wear-Leveling},
booktitle = {25th Asia and South Pacific Design Automation Conference ASP-DAC 2020, Invited Paper},
year = {2020},
address = {Beijing, China},
keywords = {kuan, nvm-oma, georg},
file = {https://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2020-aspdac-nvm.pdf},
confidential = {n},
abstract = {Emerging non-volatile memory (NVM) architectures are considered as a replacement for DRAM and storage in the near future, since NVMs provide low power consumption, fast access speed, and low unit cost. Due to the lower write-endurance of NVMs, several in-memory wear-leveling techniques have been studied over the last years. Since most approaches propose or rely on specialized hardware, the techniques are often evaluated based on assumptions and in-house simulations rather than on real systems. To address this issue, we develop a setup consisting of a gem5 instance and an NVMain2.0 instance, which simulates an entire system (CPU, peripherals, etc.) together with an NVM plugged into the system. Taking a recorded memory access pattern from a low-level simulation into consideration to design and optimize wear-leveling techniques as operating system services allows a cross-layer design of wear- leveling techniques. With the insights gathered by analyzing the recorded memory access patterns, we develop a software-only wear-leveling solution, which does not require special hardware at all. This algorithm is evaluated afterwards by the full system simulation.},
} Emerging non-volatile memory (NVM) architectures are considered as a replacement for DRAM and storage in the near future, since NVMs provide low power consumption, fast access speed, and low unit cost. Due to the lower write-endurance of NVMs, several in-memory wear-leveling techniques have been studied over the last years. Since most approaches propose or rely on specialized hardware, the techniques are often evaluated based on assumptions and in-house simulations rather than on real systems. To address this issue, we develop a setup consisting of a gem5 instance and an NVMain2.0 instance, which simulates an entire system (CPU, peripherals, etc.) together with an NVM plugged into the system. Taking a recorded memory access pattern from a low-level simulation into consideration to design and optimize wear-leveling techniques as operating system services allows a cross-layer design of wear- leveling techniques. With the insights gathered by analyzing the recorded memory access patterns, we develop a software-only wear-leveling solution, which does not require special hardware at all. This algorithm is evaluated afterwards by the full system simulation.
|
| Wei-Chun Cheng, Shuo-Han Chen, Yuan-Hao Chang, Kuan-Hsun Chen, Jian-Jia Chen, Tseng-Yi Chen, Ming-Chang Yang and Wei-Kuan Shih. NS-FTL: Alleviating the Uneven Bit-Level Wearing of NVRAM-based FTL via NAND-SPIN. In 9th Non-Volatile Memory Systems and Applications Symposium (NVMSA) Virtual Conference, 2020 [BibTeX][PDF][Abstract]@inproceedings { most2020nvmsa,
author = {Cheng, Wei-Chun and Chen, Shuo-Han and Chang, Yuan-Hao and Chen, Kuan-Hsun and Chen, Jian-Jia and Chen, Tseng-Yi and Yang, Ming-Chang and Shih, Wei-Kuan},
title = {NS-FTL: Alleviating the Uneven Bit-Level Wearing of NVRAM-based FTL via NAND-SPIN},
booktitle = {9th Non-Volatile Memory Systems and Applications Symposium (NVMSA)},
year = {2020},
address = {Virtual Conference},
keywords = {kuan, nvm-oma, },
file = {http://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2020nvmsa-ftl.pdf},
confidential = {n},
abstract = {Non-Volatile random access memory (NVRAM) has been regarded as a promising DRAM alternative with its non volatility, near-zero idle power consumption, and byte addressability. In particular, some NVRAM devices, such as Spin Torque Transfer (STT) RAM, can provide the same or better access performance and lower power consumption when compared with dynamic random access memory (DRAM). These nice features make NVRAM become an attractive DRAM replacement on NAND flash storage for resolving the management overhead of the flash translation layer (FTL). For instance, when adopting NVRAM for storing the mapping entries of FTL, the overheads of loading and storing the mapping entries between the non-volatile NAND flash and the volatile DRAM can be eliminated. Nevertheless, due to the limited lifetime constraint of NVRAM, the bit-level update behavior of FTL may lead to the issue of uneven bit-level wearing and the lifetime capacity of those less-worn NVRAM cells could be underutilized. Such an observation motivates this study to utilize the emerging NAND-like Spin Torque Transfer memory (NAND-SPIN) for alleviating the uneven bit-level wearing of NVRAM-based FTL and making the best of the lifetime capacity of each NAND-SPIN cell. The experimental results show that the proposed design can effectively avoid the uneven bit-level wearing, when compared with page-based FTL on NAND-SPIN.},
} Non-Volatile random access memory (NVRAM) has been regarded as a promising DRAM alternative with its non volatility, near-zero idle power consumption, and byte addressability. In particular, some NVRAM devices, such as Spin Torque Transfer (STT) RAM, can provide the same or better access performance and lower power consumption when compared with dynamic random access memory (DRAM). These nice features make NVRAM become an attractive DRAM replacement on NAND flash storage for resolving the management overhead of the flash translation layer (FTL). For instance, when adopting NVRAM for storing the mapping entries of FTL, the overheads of loading and storing the mapping entries between the non-volatile NAND flash and the volatile DRAM can be eliminated. Nevertheless, due to the limited lifetime constraint of NVRAM, the bit-level update behavior of FTL may lead to the issue of uneven bit-level wearing and the lifetime capacity of those less-worn NVRAM cells could be underutilized. Such an observation motivates this study to utilize the emerging NAND-like Spin Torque Transfer memory (NAND-SPIN) for alleviating the uneven bit-level wearing of NVRAM-based FTL and making the best of the lifetime capacity of each NAND-SPIN cell. The experimental results show that the proposed design can effectively avoid the uneven bit-level wearing, when compared with page-based FTL on NAND-SPIN.
|
| Christian Hakert, Kuan-Hsun Chen, Simon Kuenzer, Sharan Santhanam, Shuo-Han Chen, Yuan-Hao Chang, Felipe Huici and Jian-Jia Chen. Split’n Trace NVM: Leveraging Library OSes for Semantic Memory Tracing. In 9th Non-Volatile Memory Systems and Applications Symposium (NVMSA) Virtual Conference, 2020 [BibTeX][PDF][Abstract]@inproceedings { hakert2020nvmsa,
author = {Hakert, Christian and Chen, Kuan-Hsun and Kuenzer, Simon and Santhanam, Sharan and Chen, Shuo-Han and Chang, Yuan-Hao and Huici, Felipe and Chen, Jian-Jia},
title = {Split’n Trace NVM: Leveraging Library OSes for Semantic Memory Tracing},
booktitle = {9th Non-Volatile Memory Systems and Applications Symposium (NVMSA)},
year = {2020},
address = {Virtual Conference},
keywords = {kuan, nvm-oma, },
file = {https://ls12-www.cs.tu-dortmund.de/daes/media/documents/publications/downloads/2020-nvmsa-hakert.pdf},
confidential = {n},
abstract = {With the rise of non-volatile memory (NVM) as a replacement for traditional main memories (e.g. DRAM), memory access analysis is becoming an increasingly important topic. NVMs suffer from technical shortcomings as such as reduced cell endurance which call for precise memory access analysis in order to design maintenance strategies that can extend the memory’s lifetime. While existing memory access analyzers trace memory accesses at various levels, from the application level with code instrumentation, down to the hardware level where software is executed on special analysis hardware, they usually interpret main memory as a consecutive area, without investigating the application semantics of different memory regions.
In contrast, this paper presents a memory access simulator, which splits the main memory into semantic regions and enriches the simulation result with semantics from the analyzed application. We leverage a library-based operating system called Unikraft by ascribing memory regions of the simulation to the relevant OS libraries. This novel approach allows us to derive a detailed analysis of which libraries (and thus functionalities) are responsible for which memory access patterns. Through offline profiling with our simulator, we provide a fine-granularity analysis of memory access patterns that provide insights for the design of efficient NVM maintenance strategies.},
} With the rise of non-volatile memory (NVM) as a replacement for traditional main memories (e.g. DRAM), memory access analysis is becoming an increasingly important topic. NVMs suffer from technical shortcomings as such as reduced cell endurance which call for precise memory access analysis in order to design maintenance strategies that can extend the memory’s lifetime. While existing memory access analyzers trace memory accesses at various levels, from the application level with code instrumentation, down to the hardware level where software is executed on special analysis hardware, they usually interpret main memory as a consecutive area, without investigating the application semantics of different memory regions.
In contrast, this paper presents a memory access simulator, which splits the main memory into semantic regions and enriches the simulation result with semantics from the analyzed application. We leverage a library-based operating system called Unikraft by ascribing memory regions of the simulation to the relevant OS libraries. This novel approach allows us to derive a detailed analysis of which libraries (and thus functionalities) are responsible for which memory access patterns. Through offline profiling with our simulator, we provide a fine-granularity analysis of memory access patterns that provide insights for the design of efficient NVM maintenance strategies.
|
| Dennis Morczinek. Configurable FPGA-based Access Latency Emulation for Non-Volatile Main Memory. Bachelor Thesis, 2020 [BibTeX][PDF][Abstract]@bachelorthesis { morczinek2020ba,
title = {Configurable FPGA-based Access Latency Emulation for Non-Volatile Main Memory},
author = {Morczinek, Dennis},
school = {TU Dortmund},
year = {2020},
keywords = {nvm-oma},
file = {https://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/2020-morczinek.pdf},
confidential = {n},
abstract = {Since the drawbacks of using non-volatile memory (NVM) technologies as main memory
are being addressed by researchers, its use as an energy efficient alternative to traditional
DRAM is more interesting than ever. The impact of the greater memory access latencies
of NVM compared to DRAM on a system can be investigated in systems that utilize at
least one type of NVM as main memory. In many cases those systems do not exist yet,
so the research has to be conducted on non-volatile main memory (NVMM) emulators or
simulators. Yu Omori et al. developed such an emulator on an SoC-FPGA. Their emulator
injects additional read and write delays to memory accesses that are configurable by the
user. However, the configurations are applied to the whole memory of the emulator, so
emulating hybrid systems with more than one NVMM type is not possible.
This thesis extends the FPGA emulator design of Yu Omori et al. to allow the emulation
of more than one NVMM type by making it possible to define areas in the main memory
with different access latencies. The underlying emulator architecture is adapted so that
the corresponding latency for each area can be stored and the delay injection logic receives
the appropriate value when a memory access is performed. The count of definable areas
and the utilization of the FPGA are kept in balance to allow for future modifications of
the emulator design.
},
adviser = {Christian Hakert},
} Since the drawbacks of using non-volatile memory (NVM) technologies as main memory
are being addressed by researchers, its use as an energy efficient alternative to traditional
DRAM is more interesting than ever. The impact of the greater memory access latencies
of NVM compared to DRAM on a system can be investigated in systems that utilize at
least one type of NVM as main memory. In many cases those systems do not exist yet,
so the research has to be conducted on non-volatile main memory (NVMM) emulators or
simulators. Yu Omori et al. developed such an emulator on an SoC-FPGA. Their emulator
injects additional read and write delays to memory accesses that are configurable by the
user. However, the configurations are applied to the whole memory of the emulator, so
emulating hybrid systems with more than one NVMM type is not possible.
This thesis extends the FPGA emulator design of Yu Omori et al. to allow the emulation
of more than one NVMM type by making it possible to define areas in the main memory
with different access latencies. The underlying emulator architecture is adapted so that
the corresponding latency for each area can be stored and the delay injection logic receives
the appropriate value when a memory access is performed. The count of definable areas
and the utilization of the FPGA are kept in balance to allow for future modifications of
the emulator design.
|
| Junior Delrich Kamtchogom Namtchueng. Extendable Hardware-Based Main Memory Access Snooping for Non-volatile Memory Simulations and Analysis. Bachelor Thesis, 2020 [BibTeX][PDF][Abstract]@bachelorthesis { kamtchogom2020ba,
title = {Extendable Hardware-Based Main Memory Access Snooping for Non-volatile Memory Simulations and Analysis},
author = {Namtchueng, Junior Delrich Kamtchogom},
school = {TU Dortmund},
year = {2020},
keywords = {nvm-oma},
file = {https://ls12-www.cs.tu-dortmund.de/daes/media/documents/theses/kamtchogom-ba.pdf},
confidential = {n},
abstract = {In order to determine the internal functioning of a computer memory, a simulation is
necessary. Simulate a memory means, make read/write requests to this memory and
observe the path that the requests are going through. Knowledges about the internal
functioning is for example important to be able to give some guarantees about its latency
or its reponse time. Make this simulation implicitly means the need of a simulator.
This thesis is about designing the simulator by using an extension hardware interface of
the processor and then analysing the output values of the simulator.
The methodology followed here is, first the presentation of the hardware interface, then
the presentation of the dataflow during the different requests, then the presentation of the
design used for the simulation and finally the analysis of the simulation result.
The analysis of the simulation result showed that both part of the memory (registers and
BRAM) simulated here work perfectly. In addition, during the access to the BRAM, it
results a delay between the moment when it receives the request and the moment when
it returns a response. Beyond that, the registers and the BRAM have in common that
they have a small addressable space. To have an access to a larger space another means
(DRAM access) is slightly described.
},
adviser = {Christian Hakert},
} In order to determine the internal functioning of a computer memory, a simulation is
necessary. Simulate a memory means, make read/write requests to this memory and
observe the path that the requests are going through. Knowledges about the internal
functioning is for example important to be able to give some guarantees about its latency
or its reponse time. Make this simulation implicitly means the need of a simulator.
This thesis is about designing the simulator by using an extension hardware interface of
the processor and then analysing the output values of the simulator.
The methodology followed here is, first the presentation of the hardware interface, then
the presentation of the dataflow during the different requests, then the presentation of the
design used for the simulation and finally the analysis of the simulation result.
The analysis of the simulation result showed that both part of the memory (registers and
BRAM) simulated here work perfectly. In addition, during the access to the BRAM, it
results a delay between the moment when it receives the request and the moment when
it returns a response. Beyond that, the registers and the BRAM have in common that
they have a small addressable space. To have an access to a larger space another means
(DRAM access) is slightly described.
|
| Christian Hakert, Kuan-Hsun Chen, Paul R. Genssler, Georg Brüggen, Lars Bauer, Hussam Amrouch, Jian-Jia Chen and Jörg Henkel. SoftWear: Software-Only In-Memory Wear-Leveling for Non-Volatile Main Memory. CoRR abs/2004.03244 2020 [BibTeX][Link][Abstract]@article { hakert2020softwear,
author = {Hakert, Christian and Chen, Kuan-Hsun and Genssler, Paul R. and Br\"uggen, Georg and Bauer, Lars and Amrouch, Hussam and Chen, Jian-Jia and Henkel, J\"org},
title = {SoftWear: Software-Only In-Memory Wear-Leveling for Non-Volatile Main Memory},
journal = {CoRR},
year = {2020},
volume = {abs/2004.03244},
url = {https://arxiv.org/pdf/2004.03244.pdf},
keywords = {kuan, nvm-oma, georg},
confidential = {n},
abstract = {Several emerging technologies for byte-addressable non-volatile memory (NVM) have been considered to replace DRAM as the main memory in computer systems during the last years. The disadvantage of a lower write endurance, compared to DRAM, of NVM technologies like Phase-Change Memory (PCM) or Ferroelectric RAM (FeRAM) has been addressed in the literature. As a solution, in-memory wear-leveling techniques have been proposed, which aim to balance the wear-level over all memory cells to achieve an increased memory lifetime. Generally, to apply such advanced aging-aware wear-leveling techniques proposed in the literature, additional special hardware is introduced into the memory system to provide the necessary information about the cell age and thus enable aging-aware wear-leveling decisions.
This paper proposes software-only aging-aware wear-leveling based on common CPU features and does not rely on any additional hardware support from the memory subsystem. Specifically, we exploit the memory management unit (MMU), performance counters, and interrupts to approximate the memory write counts as an aging indicator. Although the software-only approach may lead to slightly worse wear-leveling, it is applicable on commonly available hardware. We achieve page-level coarse-grained wear-leveling by approximating the current cell age through statistical sampling and performing physical memory remapping through the MMU. This method results in non-uniform memory usage patterns within a memory page. Hence, we further propose a fine-grained wear-leveling in the stack region of C / C++ compiled software.
By applying both wear-leveling techniques, we achieve up to 78.43% of the ideal memory lifetime, which is a lifetime improvement of more than a factor of 900 compared to the lifetime without any wear-leveling. },
} Several emerging technologies for byte-addressable non-volatile memory (NVM) have been considered to replace DRAM as the main memory in computer systems during the last years. The disadvantage of a lower write endurance, compared to DRAM, of NVM technologies like Phase-Change Memory (PCM) or Ferroelectric RAM (FeRAM) has been addressed in the literature. As a solution, in-memory wear-leveling techniques have been proposed, which aim to balance the wear-level over all memory cells to achieve an increased memory lifetime. Generally, to apply such advanced aging-aware wear-leveling techniques proposed in the literature, additional special hardware is introduced into the memory system to provide the necessary information about the cell age and thus enable aging-aware wear-leveling decisions.
This paper proposes software-only aging-aware wear-leveling based on common CPU features and does not rely on any additional hardware support from the memory subsystem. Specifically, we exploit the memory management unit (MMU), performance counters, and interrupts to approximate the memory write counts as an aging indicator. Although the software-only approach may lead to slightly worse wear-leveling, it is applicable on commonly available hardware. We achieve page-level coarse-grained wear-leveling by approximating the current cell age through statistical sampling and performing physical memory remapping through the MMU. This method results in non-uniform memory usage patterns within a memory page. Hence, we further propose a fine-grained wear-leveling in the stack region of C / C++ compiled software.
By applying both wear-leveling techniques, we achieve up to 78.43% of the ideal memory lifetime, which is a lifetime improvement of more than a factor of 900 compared to the lifetime without any wear-leveling.
|
| Sebastian Buschjäger, Jian-Jia Chen, Kuan-Hsun Chen, Mario Günzel, Christian Hakert, Katharina Morik, Rodion Novkin, Lukas Pfahler and Mikail Yayla. Towards Explainable Bit Error Tolerance of Resistive RAM-Based Binarized Neural Networks. CoRR abs/2002.00909 2020 [BibTeX][Link][Abstract]@article { buschjger2020explainable,
author = {Buschj\"ager, Sebastian and Chen, Jian-Jia and Chen, Kuan-Hsun and G\"unzel, Mario and Hakert, Christian and Morik, Katharina and Novkin, Rodion and Pfahler, Lukas and Yayla, Mikail},
title = {Towards Explainable Bit Error Tolerance of Resistive RAM-Based Binarized Neural Networks},
journal = {CoRR},
year = {2020},
volume = {abs/2002.00909},
url = {https://arxiv.org/pdf/2002.00909.pdf},
keywords = {kuan, nvm-oma, mario},
confidential = {n},
abstract = {Non-volatile memory, such as resistive RAM (RRAM), is an emerging energy-efficient storage, especially for low-power machine learning models on the edge. It is reported, however, that the bit error rate of RRAMs can be up to 3.3% in the ultra low-power setting, which might be crucial for many use cases. Binary neural networks (BNNs), a resource efficient variant of neural networks (NNs), can tolerate a certain percentage of errors without a loss in accuracy and demand lower resources in computation and storage. The bit error tolerance (BET) in BNNs can be achieved by flipping the weight signs during training, as proposed by Hirtzlin et al., but their method has a significant drawback, especially for fully connected neural networks (FCNN): The FCNNs overfit to the error rate used in training, which leads to low accuracy under lower error rates. In addition, the underlying principles of BET are not investigated. In this work, we improve the training for BET of BNNs and aim to explain this property. We propose straight-through gradient approximation to improve the weight-sign-flip training, by which BNNs adapt less to the bit error rates. To explain the achieved robustness, we define a metric that aims to measure BET without fault injection. We evaluate the metric and find that it correlates with accuracy over error rate for all FCNNs tested. Finally, we explore the influence of a novel regularizer that optimizes with respect to this metric, with the aim of providing a configurable trade-off in accuracy and BET.},
} Non-volatile memory, such as resistive RAM (RRAM), is an emerging energy-efficient storage, especially for low-power machine learning models on the edge. It is reported, however, that the bit error rate of RRAMs can be up to 3.3% in the ultra low-power setting, which might be crucial for many use cases. Binary neural networks (BNNs), a resource efficient variant of neural networks (NNs), can tolerate a certain percentage of errors without a loss in accuracy and demand lower resources in computation and storage. The bit error tolerance (BET) in BNNs can be achieved by flipping the weight signs during training, as proposed by Hirtzlin et al., but their method has a significant drawback, especially for fully connected neural networks (FCNN): The FCNNs overfit to the error rate used in training, which leads to low accuracy under lower error rates. In addition, the underlying principles of BET are not investigated. In this work, we improve the training for BET of BNNs and aim to explain this property. We propose straight-through gradient approximation to improve the weight-sign-flip training, by which BNNs adapt less to the bit error rates. To explain the achieved robustness, we define a metric that aims to measure BET without fault injection. We evaluate the metric and find that it correlates with accuracy over error rate for all FCNNs tested. Finally, we explore the influence of a novel regularizer that optimizes with respect to this metric, with the aim of providing a configurable trade-off in accuracy and BET.
|