1 // Copyright 2006 Google Inc. All Rights Reserved. 2 // Author: nsanders, menderico 3 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 8 // http://www.apache.org/licenses/LICENSE-2.0 9 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #ifndef STRESSAPPTEST_OS_H_ // NOLINT 17 #define STRESSAPPTEST_OS_H_ 18 19 #include <dirent.h> 20 #include <unistd.h> 21 #include <sys/syscall.h> 22 #include <stdint.h> 23 24 #include <string> 25 #include <list> 26 #include <map> 27 #include <vector> 28 29 // This file must work with autoconf on its public version, 30 // so these includes are correct. 31 #include "adler32memcpy.h" // NOLINT 32 #include "sattypes.h" // NOLINT 33 #include "clock.h" // NOLINT 34 35 const char kPagemapPath[] = "/proc/self/pagemap"; 36 37 struct PCIDevice { 38 int32 domain; 39 uint16 bus; 40 uint8 dev; 41 uint8 func; 42 uint16 vendor_id; 43 uint16 device_id; 44 uint64 base_addr[6]; 45 uint64 size[6]; 46 }; 47 48 typedef vector<PCIDevice*> PCIDevices; 49 50 class ErrorDiag; 51 52 class Clock; 53 54 // This class implements OS/Platform specific funtions. 55 class OsLayer { 56 public: 57 OsLayer(); 58 virtual ~OsLayer(); 59 60 // Set the minimum amount of hugepages that should be available for testing. 61 // Must be set before Initialize(). SetMinimumHugepagesSize(int64 min_bytes)62 void SetMinimumHugepagesSize(int64 min_bytes) { 63 min_hugepages_bytes_ = min_bytes; 64 } 65 66 // Set the minium amount of memory that should not be allocated. This only 67 // has any affect if hugepages are not used. 68 // Must be set before Initialize(). SetReserveSize(int64 reserve_mb)69 void SetReserveSize(int64 reserve_mb) { 70 reserve_mb_ = reserve_mb; 71 } 72 73 // Set parameters needed to translate physical address to memory module. SetDramMappingParams(uintptr_t channel_hash,int channel_width,vector<vector<string>> * channels)74 void SetDramMappingParams(uintptr_t channel_hash, int channel_width, 75 vector< vector<string> > *channels) { 76 channel_hash_ = channel_hash; 77 channel_width_ = channel_width; 78 channels_ = channels; 79 } 80 81 // Initializes data strctures and open files. 82 // Returns false on error. 83 virtual bool Initialize(); 84 85 // Virtual to physical. This implementation is optional for 86 // subclasses to implement. 87 // Takes a pointer, and returns the corresponding bus address. 88 virtual uint64 VirtualToPhysical(void *vaddr); 89 90 // Prints failed dimm. This implementation is optional for 91 // subclasses to implement. 92 // Takes a bus address and string, and prints the DIMM name 93 // into the string. Returns the DIMM number that corresponds to the 94 // address given, or -1 if unable to identify the DIMM number. 95 // Note that subclass implementations of FindDimm() MUST fill 96 // buf with at LEAST one non-whitespace character (provided len > 0). 97 virtual int FindDimm(uint64 addr, char *buf, int len); 98 99 // Classifies addresses according to "regions" 100 // This may mean different things on different platforms. 101 virtual int32 FindRegion(uint64 paddr); 102 // Find cpu cores associated with a region. Either NUMA or arbitrary. 103 virtual cpu_set_t *FindCoreMask(int32 region); 104 // Return cpu cores associated with a region in a hex string. 105 virtual string FindCoreMaskFormat(int32 region); 106 107 // Returns the HD device that contains this file. 108 virtual string FindFileDevice(string filename); 109 110 // Returns a list of paths coresponding to HD devices found on this machine. 111 virtual list<string> FindFileDevices(); 112 113 // Polls for errors. This implementation is optional. 114 // This will poll once for errors and return zero iff no errors were found. 115 virtual int ErrorPoll(); 116 117 // Delay an appropriate amount of time between polling. 118 virtual void ErrorWait(); 119 120 // Report errors. This implementation is mandatory. 121 // This will output a machine readable line regarding the error. 122 virtual bool ErrorReport(const char *part, const char *symptom, int count); 123 124 // Flushes page cache. Used to circumvent the page cache when doing disk 125 // I/O. This will be a NOP until ActivateFlushPageCache() is called, which 126 // is typically done when opening a file with O_DIRECT fails. 127 // Returns false on error, true on success or NOP. 128 // Subclasses may implement this in machine specific ways.. 129 virtual bool FlushPageCache(void); 130 // Enable FlushPageCache() to actually do the flush instead of being a NOP. 131 virtual void ActivateFlushPageCache(void); 132 133 // Flushes cacheline. Used to distinguish read or write errors. 134 // Subclasses may implement this in machine specific ways.. 135 // Takes a pointer, and flushed the cacheline containing that pointer. 136 virtual void Flush(void *vaddr); 137 138 // Fast flush, for use in performance critical code. 139 // This is bound at compile time, and will not pick up 140 // any runtime machine configuration info. FastFlush(void * vaddr)141 inline static void FastFlush(void *vaddr) { 142 #ifdef STRESSAPPTEST_CPU_PPC 143 asm volatile("dcbf 0,%0; sync" : : "r" (vaddr)); 144 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 145 // Put mfence before and after clflush to make sure: 146 // 1. The write before the clflush is committed to memory bus; 147 // 2. The read after the clflush is hitting the memory bus. 148 // 149 // From Intel manual: 150 // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed 151 // to be ordered by any other fencing, serializing or other CLFLUSH 152 // instruction. For example, software can use an MFENCE instruction to 153 // insure that previous stores are included in the write-back. 154 asm volatile("mfence"); 155 asm volatile("clflush (%0)" : : "r" (vaddr)); 156 asm volatile("mfence"); 157 #elif defined(STRESSAPPTEST_CPU_MIPS) 158 syscall(__NR_cacheflush, vaddr, 32, 0); 159 #elif defined(STRESSAPPTEST_CPU_ARMV7A) 160 // ARMv7a cachelines are 8 words (32 bytes). 161 syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0); 162 #elif defined(STRESSAPPTEST_CPU_AARCH64) 163 asm volatile("dc cvau, %0" : : "r" (vaddr)); 164 asm volatile("dsb ish"); 165 asm volatile("ic ivau, %0" : : "r" (vaddr)); 166 asm volatile("dsb ish"); 167 asm volatile("isb"); 168 #else 169 #warning "Unsupported CPU type: Unable to force cache flushes." 170 #endif 171 } 172 173 // Fast flush, for use in performance critical code. 174 // This is bound at compile time, and will not pick up 175 // any runtime machine configuration info. Takes a NULL-terminated 176 // array of addresses to flush. FastFlushList(void ** vaddrs)177 inline static void FastFlushList(void **vaddrs) { 178 #ifdef STRESSAPPTEST_CPU_PPC 179 while (*vaddrs) { 180 asm volatile("dcbf 0,%0" : : "r" (*vaddrs++)); 181 } 182 asm volatile("sync"); 183 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 184 // Put mfence before and after clflush to make sure: 185 // 1. The write before the clflush is committed to memory bus; 186 // 2. The read after the clflush is hitting the memory bus. 187 // 188 // From Intel manual: 189 // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed 190 // to be ordered by any other fencing, serializing or other CLFLUSH 191 // instruction. For example, software can use an MFENCE instruction to 192 // insure that previous stores are included in the write-back. 193 asm volatile("mfence"); 194 while (*vaddrs) { 195 asm volatile("clflush (%0)" : : "r" (*vaddrs++)); 196 } 197 asm volatile("mfence"); 198 #elif defined(STRESSAPPTEST_CPU_MIPS) || defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64) 199 while (*vaddrs) { 200 FastFlush(*vaddrs++); 201 } 202 #else 203 #warning "Unsupported CPU type: Unable to force cache flushes." 204 #endif 205 } 206 207 // Fast flush hint, for use in performance critical code. 208 // This is bound at compile time, and will not pick up 209 // any runtime machine configuration info. Note that this 210 // will not guarantee that a flush happens, but will at least 211 // hint that it should. This is useful for speeding up 212 // parallel march algorithms. FastFlushHint(void * vaddr)213 inline static void FastFlushHint(void *vaddr) { 214 #ifdef STRESSAPPTEST_CPU_PPC 215 asm volatile("dcbf 0,%0" : : "r" (vaddr)); 216 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 217 // From Intel manual: 218 // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed 219 // to be ordered by any other fencing, serializing or other CLFLUSH 220 // instruction. For example, software can use an MFENCE instruction to 221 // insure that previous stores are included in the write-back. 222 asm volatile("clflush (%0)" : : "r" (vaddr)); 223 #elif defined(STRESSAPPTEST_CPU_MIPS) || defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64) 224 FastFlush(vaddr); 225 #else 226 #warning "Unsupported CPU type: Unable to force cache flushes." 227 #endif 228 } 229 230 // Fast flush, for use in performance critical code. 231 // This is bound at compile time, and will not pick up 232 // any runtime machine configuration info. Sync's any 233 // transactions for ordering FastFlushHints. FastFlushSync()234 inline static void FastFlushSync() { 235 #ifdef STRESSAPPTEST_CPU_PPC 236 asm volatile("sync"); 237 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 238 // Put mfence before and after clflush to make sure: 239 // 1. The write before the clflush is committed to memory bus; 240 // 2. The read after the clflush is hitting the memory bus. 241 // 242 // From Intel manual: 243 // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed 244 // to be ordered by any other fencing, serializing or other CLFLUSH 245 // instruction. For example, software can use an MFENCE instruction to 246 // insure that previous stores are included in the write-back. 247 asm volatile("mfence"); 248 #elif defined(STRESSAPPTEST_CPU_MIPS) || defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64) 249 // This is a NOP, FastFlushHint() always does a full flush, so there's 250 // nothing to do for FastFlushSync(). 251 #else 252 #warning "Unsupported CPU type: Unable to force cache flushes." 253 #endif 254 } 255 256 // Get time in cpu timer ticks. Useful for matching MCEs with software 257 // actions. GetTimestamp(void)258 inline static uint64 GetTimestamp(void) { 259 uint64 tsc; 260 #ifdef STRESSAPPTEST_CPU_PPC 261 uint32 tbl, tbu, temp; 262 __asm __volatile( 263 "1:\n" 264 "mftbu %2\n" 265 "mftb %0\n" 266 "mftbu %1\n" 267 "cmpw %2,%1\n" 268 "bne 1b\n" 269 : "=r"(tbl), "=r"(tbu), "=r"(temp) 270 : 271 : "cc"); 272 273 tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl); 274 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 275 datacast_t data; 276 __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h)); 277 tsc = data.l64; 278 #elif defined(STRESSAPPTEST_CPU_MIPS) 279 __asm __volatile("rdhwr %0, $2\n" : "=r" (tsc)); 280 #elif defined(STRESSAPPTEST_CPU_ARMV7A) 281 #warning "Unsupported CPU type ARMV7A: your timer may not function correctly" 282 tsc = 0; 283 #elif defined(STRESSAPPTEST_CPU_AARCH64) 284 __asm __volatile("mrs %0, CNTVCT_EL0" : "=r" (tsc) : : ); 285 #else 286 #warning "Unsupported CPU type: your timer may not function correctly" 287 tsc = 0; 288 #endif 289 return (tsc); 290 } 291 292 // Find the free memory on the machine. 293 virtual int64 FindFreeMemSize(); 294 295 // Allocates test memory of length bytes. 296 // Subclasses must implement this. 297 // Call PepareTestMem to get a pointer. 298 virtual int64 AllocateAllMem(); // Returns length. 299 // Returns success. 300 virtual bool AllocateTestMem(int64 length, uint64 paddr_base); 301 virtual void FreeTestMem(); 302 303 // Prepares the memory for use. You must call this 304 // before using test memory, and after you are done. 305 virtual void *PrepareTestMem(uint64 offset, uint64 length); 306 virtual void ReleaseTestMem(void *addr, uint64 offset, uint64 length); 307 308 // Machine type detected. Can we implement all these functions correctly? 309 // Returns true if machine type is detected and implemented. 310 virtual bool IsSupported(); 311 312 // Returns 32 for 32-bit, 64 for 64-bit. 313 virtual int AddressMode(); 314 // Update OsLayer state regarding cpu support for various features. 315 virtual void GetFeatures(); 316 317 // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file. 318 virtual int PciOpen(int bus, int device, int function); 319 virtual void PciWrite(int fd, uint32 offset, uint32 value, int width); 320 virtual uint32 PciRead(int fd, uint32 offset, int width); 321 322 // Read MSRs 323 virtual bool ReadMSR(uint32 core, uint32 address, uint64 *data); 324 virtual bool WriteMSR(uint32 core, uint32 address, uint64 *data); 325 326 // Extract bits [n+len-1, n] from a 32 bit word. 327 // so GetBitField(0x0f00, 8, 4) == 0xf. 328 virtual uint32 GetBitField(uint32 val, uint32 n, uint32 len); 329 330 // Platform and CPU specific CPU-stressing function. 331 // Returns true on success, false otherwise. 332 virtual bool CpuStressWorkload(); 333 334 // Causes false errors for unittesting. 335 // Setting to "true" causes errors to be injected. set_error_injection(bool errors)336 void set_error_injection(bool errors) { error_injection_ = errors; } error_injection()337 bool error_injection() const { return error_injection_; } 338 339 // Is SAT using normal malloc'd memory, or exotic mmap'd memory. normal_mem()340 bool normal_mem() const { return normal_mem_; } 341 342 // Get numa config, if available.. num_nodes()343 int num_nodes() const { return num_nodes_; } num_cpus()344 int num_cpus() const { return num_cpus_; } 345 346 // Handle to platform-specific error diagnoser. 347 ErrorDiag *error_diagnoser_; 348 349 // Disambiguate between different "warm" memcopies. 350 virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, 351 unsigned int size_in_bytes, 352 AdlerChecksum *checksum); 353 354 // Store a callback to use to print 355 // app-specific info about the last error location. 356 // This call back is called with a physical address, and the app can fill in 357 // the most recent transaction that occurred at that address. 358 typedef bool (*ErrCallback)(uint64 paddr, string *buf); set_err_log_callback(ErrCallback err_log_callback)359 void set_err_log_callback( 360 ErrCallback err_log_callback) { 361 err_log_callback_ = err_log_callback; 362 } get_err_log_callback()363 ErrCallback get_err_log_callback() { return err_log_callback_; } 364 365 // Set a clock object that can be overridden for use with unit tests. SetClock(Clock * clock)366 void SetClock(Clock *clock) { 367 if (clock_) { 368 delete clock_; 369 } 370 clock_ = clock; 371 time_initialized_ = clock_->Now(); 372 } 373 374 protected: 375 void *testmem_; // Location of test memory. 376 uint64 testmemsize_; // Size of test memory. 377 int64 totalmemsize_; // Size of available memory. 378 int64 min_hugepages_bytes_; // Minimum hugepages size. 379 int64 reserve_mb_; // Minimum amount of memory to reserve in MB. 380 bool error_injection_; // Do error injection? 381 bool normal_mem_; // Memory DMA capable? 382 bool use_hugepages_; // Use hugepage shmem? 383 bool use_posix_shm_; // Use 4k page shmem? 384 bool dynamic_mapped_shmem_; // Conserve virtual address space. 385 bool mmapped_allocation_; // Was memory allocated using mmap()? 386 int shmid_; // Handle to shmem 387 vector< vector<string> > *channels_; // Memory module names per channel. 388 uint64 channel_hash_; // Mask of address bits XORed for channel. 389 int channel_width_; // Channel width in bits. 390 391 int64 regionsize_; // Size of memory "regions" 392 int regioncount_; // Number of memory "regions" 393 int num_cpus_; // Number of cpus in the system. 394 int num_nodes_; // Number of nodes in the system. 395 int num_cpus_per_node_; // Number of cpus per node in the system. 396 int address_mode_; // Are we running 32 or 64 bit? 397 bool has_vector_; // Do we have sse2/neon instructions? 398 bool has_clflush_; // Do we have clflush instructions? 399 bool use_flush_page_cache_; // Do we need to flush the page cache? 400 401 402 time_t time_initialized_; // Start time of test. 403 404 vector<cpu_set_t> cpu_sets_; // Cache for cpu masks. 405 vector<bool> cpu_sets_valid_; // If the cpu mask cache is valid. 406 407 // Get file descriptor for dev msr. 408 virtual int OpenMSR(uint32 core, uint32 address); 409 410 // Look up how many hugepages there are. 411 virtual int64 FindHugePages(); 412 413 // Link to find last transaction at an error location. 414 ErrCallback err_log_callback_; 415 416 // Object to wrap the time function. 417 Clock *clock_; 418 419 private: 420 DISALLOW_COPY_AND_ASSIGN(OsLayer); 421 }; 422 423 // Selects and returns the proper OS and hardware interface. Does not call 424 // OsLayer::Initialize() on the new object. 425 OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options); 426 427 #endif // STRESSAPPTEST_OS_H_ NOLINT 428