1 // Copyright 2006 Google Inc. All Rights Reserved.
2 // Author: nsanders, menderico
3 
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #ifndef STRESSAPPTEST_OS_H_  // NOLINT
17 #define STRESSAPPTEST_OS_H_
18 
19 #include <dirent.h>
20 #include <unistd.h>
21 #include <sys/syscall.h>
22 #include <stdint.h>
23 
24 #include <string>
25 #include <list>
26 #include <map>
27 #include <vector>
28 
29 // This file must work with autoconf on its public version,
30 // so these includes are correct.
31 #include "adler32memcpy.h"  // NOLINT
32 #include "sattypes.h"       // NOLINT
33 #include "clock.h"          // NOLINT
34 
35 const char kPagemapPath[] = "/proc/self/pagemap";
36 
37 struct PCIDevice {
38   int32 domain;
39   uint16 bus;
40   uint8 dev;
41   uint8 func;
42   uint16 vendor_id;
43   uint16 device_id;
44   uint64 base_addr[6];
45   uint64 size[6];
46 };
47 
48 typedef vector<PCIDevice*> PCIDevices;
49 
50 class ErrorDiag;
51 
52 class Clock;
53 
54 // This class implements OS/Platform specific funtions.
55 class OsLayer {
56  public:
57   OsLayer();
58   virtual ~OsLayer();
59 
60   // Set the minimum amount of hugepages that should be available for testing.
61   // Must be set before Initialize().
SetMinimumHugepagesSize(int64 min_bytes)62   void SetMinimumHugepagesSize(int64 min_bytes) {
63     min_hugepages_bytes_ = min_bytes;
64   }
65 
66   // Set the minium amount of memory that should not be allocated. This only
67   // has any affect if hugepages are not used.
68   // Must be set before Initialize().
SetReserveSize(int64 reserve_mb)69   void SetReserveSize(int64 reserve_mb) {
70     reserve_mb_ = reserve_mb;
71   }
72 
73   // Set parameters needed to translate physical address to memory module.
SetDramMappingParams(uintptr_t channel_hash,int channel_width,vector<vector<string>> * channels)74   void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
75                             vector< vector<string> > *channels) {
76     channel_hash_ = channel_hash;
77     channel_width_ = channel_width;
78     channels_ = channels;
79   }
80 
81   // Initializes data strctures and open files.
82   // Returns false on error.
83   virtual bool Initialize();
84 
85   // Virtual to physical. This implementation is optional for
86   // subclasses to implement.
87   // Takes a pointer, and returns the corresponding bus address.
88   virtual uint64 VirtualToPhysical(void *vaddr);
89 
90   // Prints failed dimm. This implementation is optional for
91   // subclasses to implement.
92   // Takes a bus address and string, and prints the DIMM name
93   // into the string. Returns the DIMM number that corresponds to the
94   // address given, or -1 if unable to identify the DIMM number.
95   // Note that subclass implementations of FindDimm() MUST fill
96   // buf with at LEAST one non-whitespace character (provided len > 0).
97   virtual int FindDimm(uint64 addr, char *buf, int len);
98 
99   // Classifies addresses according to "regions"
100   // This may mean different things on different platforms.
101   virtual int32 FindRegion(uint64 paddr);
102   // Find cpu cores associated with a region. Either NUMA or arbitrary.
103   virtual cpu_set_t *FindCoreMask(int32 region);
104   // Return cpu cores associated with a region in a hex string.
105   virtual string FindCoreMaskFormat(int32 region);
106 
107   // Returns the HD device that contains this file.
108   virtual string FindFileDevice(string filename);
109 
110   // Returns a list of paths coresponding to HD devices found on this machine.
111   virtual list<string> FindFileDevices();
112 
113   // Polls for errors. This implementation is optional.
114   // This will poll once for errors and return zero iff no errors were found.
115   virtual int ErrorPoll();
116 
117   // Delay an appropriate amount of time between polling.
118   virtual void ErrorWait();
119 
120   // Report errors. This implementation is mandatory.
121   // This will output a machine readable line regarding the error.
122   virtual bool ErrorReport(const char *part, const char *symptom, int count);
123 
124   // Flushes page cache. Used to circumvent the page cache when doing disk
125   // I/O.  This will be a NOP until ActivateFlushPageCache() is called, which
126   // is typically done when opening a file with O_DIRECT fails.
127   // Returns false on error, true on success or NOP.
128   // Subclasses may implement this in machine specific ways..
129   virtual bool FlushPageCache(void);
130   // Enable FlushPageCache() to actually do the flush instead of being a NOP.
131   virtual void ActivateFlushPageCache(void);
132 
133   // Flushes cacheline. Used to distinguish read or write errors.
134   // Subclasses may implement this in machine specific ways..
135   // Takes a pointer, and flushed the cacheline containing that pointer.
136   virtual void Flush(void *vaddr);
137 
138   // Fast flush, for use in performance critical code.
139   // This is bound at compile time, and will not pick up
140   // any runtime machine configuration info.
FastFlush(void * vaddr)141   inline static void FastFlush(void *vaddr) {
142 #ifdef STRESSAPPTEST_CPU_PPC
143     asm volatile("dcbf 0,%0; sync" : : "r" (vaddr));
144 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
145     // Put mfence before and after clflush to make sure:
146     // 1. The write before the clflush is committed to memory bus;
147     // 2. The read after the clflush is hitting the memory bus.
148     //
149     // From Intel manual:
150     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
151     // to be ordered by any other fencing, serializing or other CLFLUSH
152     // instruction. For example, software can use an MFENCE instruction to
153     // insure that previous stores are included in the write-back.
154     asm volatile("mfence");
155     asm volatile("clflush (%0)" : : "r" (vaddr));
156     asm volatile("mfence");
157 #elif defined(STRESSAPPTEST_CPU_MIPS)
158     syscall(__NR_cacheflush, vaddr, 32, 0);
159 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
160     // ARMv7a cachelines are 8 words (32 bytes).
161     syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
162 #elif defined(STRESSAPPTEST_CPU_AARCH64)
163     asm volatile("dc cvau, %0" : : "r" (vaddr));
164     asm volatile("dsb ish");
165     asm volatile("ic ivau, %0" : : "r" (vaddr));
166     asm volatile("dsb ish");
167     asm volatile("isb");
168 #else
169   #warning "Unsupported CPU type: Unable to force cache flushes."
170 #endif
171   }
172 
173   // Fast flush, for use in performance critical code.
174   // This is bound at compile time, and will not pick up
175   // any runtime machine configuration info.  Takes a NULL-terminated
176   // array of addresses to flush.
FastFlushList(void ** vaddrs)177   inline static void FastFlushList(void **vaddrs) {
178 #ifdef STRESSAPPTEST_CPU_PPC
179     while (*vaddrs) {
180       asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
181     }
182     asm volatile("sync");
183 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
184     // Put mfence before and after clflush to make sure:
185     // 1. The write before the clflush is committed to memory bus;
186     // 2. The read after the clflush is hitting the memory bus.
187     //
188     // From Intel manual:
189     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
190     // to be ordered by any other fencing, serializing or other CLFLUSH
191     // instruction. For example, software can use an MFENCE instruction to
192     // insure that previous stores are included in the write-back.
193     asm volatile("mfence");
194     while (*vaddrs) {
195       asm volatile("clflush (%0)" : : "r" (*vaddrs++));
196     }
197     asm volatile("mfence");
198 #elif defined(STRESSAPPTEST_CPU_MIPS) || defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
199     while (*vaddrs) {
200       FastFlush(*vaddrs++);
201     }
202 #else
203     #warning "Unsupported CPU type: Unable to force cache flushes."
204 #endif
205   }
206 
207   // Fast flush hint, for use in performance critical code.
208   // This is bound at compile time, and will not pick up
209   // any runtime machine configuration info.  Note that this
210   // will not guarantee that a flush happens, but will at least
211   // hint that it should.  This is useful for speeding up
212   // parallel march algorithms.
FastFlushHint(void * vaddr)213   inline static void FastFlushHint(void *vaddr) {
214 #ifdef STRESSAPPTEST_CPU_PPC
215     asm volatile("dcbf 0,%0" : : "r" (vaddr));
216 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
217     // From Intel manual:
218     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
219     // to be ordered by any other fencing, serializing or other CLFLUSH
220     // instruction. For example, software can use an MFENCE instruction to
221     // insure that previous stores are included in the write-back.
222     asm volatile("clflush (%0)" : : "r" (vaddr));
223 #elif defined(STRESSAPPTEST_CPU_MIPS) || defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
224     FastFlush(vaddr);
225 #else
226     #warning "Unsupported CPU type: Unable to force cache flushes."
227 #endif
228   }
229 
230   // Fast flush, for use in performance critical code.
231   // This is bound at compile time, and will not pick up
232   // any runtime machine configuration info.  Sync's any
233   // transactions for ordering FastFlushHints.
FastFlushSync()234   inline static void FastFlushSync() {
235 #ifdef STRESSAPPTEST_CPU_PPC
236     asm volatile("sync");
237 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
238     // Put mfence before and after clflush to make sure:
239     // 1. The write before the clflush is committed to memory bus;
240     // 2. The read after the clflush is hitting the memory bus.
241     //
242     // From Intel manual:
243     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
244     // to be ordered by any other fencing, serializing or other CLFLUSH
245     // instruction. For example, software can use an MFENCE instruction to
246     // insure that previous stores are included in the write-back.
247     asm volatile("mfence");
248 #elif defined(STRESSAPPTEST_CPU_MIPS) || defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
249     // This is a NOP, FastFlushHint() always does a full flush, so there's
250     // nothing to do for FastFlushSync().
251 #else
252   #warning "Unsupported CPU type: Unable to force cache flushes."
253 #endif
254   }
255 
256   // Get time in cpu timer ticks. Useful for matching MCEs with software
257   // actions.
GetTimestamp(void)258   inline static uint64 GetTimestamp(void) {
259     uint64 tsc;
260 #ifdef STRESSAPPTEST_CPU_PPC
261     uint32 tbl, tbu, temp;
262     __asm __volatile(
263       "1:\n"
264       "mftbu  %2\n"
265       "mftb   %0\n"
266       "mftbu  %1\n"
267       "cmpw   %2,%1\n"
268       "bne    1b\n"
269       : "=r"(tbl), "=r"(tbu), "=r"(temp)
270       :
271       : "cc");
272 
273     tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl);
274 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
275     datacast_t data;
276     __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
277     tsc = data.l64;
278 #elif defined(STRESSAPPTEST_CPU_MIPS)
279     __asm __volatile("rdhwr  %0, $2\n" : "=r" (tsc));
280 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
281     #warning "Unsupported CPU type ARMV7A: your timer may not function correctly"
282     tsc = 0;
283 #elif defined(STRESSAPPTEST_CPU_AARCH64)
284     __asm __volatile("mrs %0, CNTVCT_EL0" : "=r" (tsc) : : );
285 #else
286     #warning "Unsupported CPU type: your timer may not function correctly"
287     tsc = 0;
288 #endif
289     return (tsc);
290   }
291 
292   // Find the free memory on the machine.
293   virtual int64 FindFreeMemSize();
294 
295   // Allocates test memory of length bytes.
296   // Subclasses must implement this.
297   // Call PepareTestMem to get a pointer.
298   virtual int64 AllocateAllMem();  // Returns length.
299   // Returns success.
300   virtual bool AllocateTestMem(int64 length, uint64 paddr_base);
301   virtual void FreeTestMem();
302 
303   // Prepares the memory for use. You must call this
304   // before using test memory, and after you are done.
305   virtual void *PrepareTestMem(uint64 offset, uint64 length);
306   virtual void ReleaseTestMem(void *addr, uint64 offset, uint64 length);
307 
308   // Machine type detected. Can we implement all these functions correctly?
309   // Returns true if machine type is detected and implemented.
310   virtual bool IsSupported();
311 
312   // Returns 32 for 32-bit, 64 for 64-bit.
313   virtual int AddressMode();
314   // Update OsLayer state regarding cpu support for various features.
315   virtual void GetFeatures();
316 
317   // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file.
318   virtual int PciOpen(int bus, int device, int function);
319   virtual void PciWrite(int fd, uint32 offset, uint32 value, int width);
320   virtual uint32 PciRead(int fd, uint32 offset, int width);
321 
322   // Read MSRs
323   virtual bool ReadMSR(uint32 core, uint32 address, uint64 *data);
324   virtual bool WriteMSR(uint32 core, uint32 address, uint64 *data);
325 
326   // Extract bits [n+len-1, n] from a 32 bit word.
327   // so GetBitField(0x0f00, 8, 4) == 0xf.
328   virtual uint32 GetBitField(uint32 val, uint32 n, uint32 len);
329 
330   // Platform and CPU specific CPU-stressing function.
331   // Returns true on success, false otherwise.
332   virtual bool CpuStressWorkload();
333 
334   // Causes false errors for unittesting.
335   // Setting to "true" causes errors to be injected.
set_error_injection(bool errors)336   void set_error_injection(bool errors) { error_injection_ = errors; }
error_injection()337   bool error_injection() const { return error_injection_; }
338 
339   // Is SAT using normal malloc'd memory, or exotic mmap'd memory.
normal_mem()340   bool normal_mem() const { return normal_mem_; }
341 
342   // Get numa config, if available..
num_nodes()343   int num_nodes() const { return num_nodes_; }
num_cpus()344   int num_cpus() const { return num_cpus_; }
345 
346   // Handle to platform-specific error diagnoser.
347   ErrorDiag *error_diagnoser_;
348 
349   // Disambiguate between different "warm" memcopies.
350   virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
351                                unsigned int size_in_bytes,
352                                AdlerChecksum *checksum);
353 
354   // Store a callback to use to print
355   // app-specific info about the last error location.
356   // This call back is called with a physical address, and the app can fill in
357   // the most recent transaction that occurred at that address.
358   typedef bool (*ErrCallback)(uint64 paddr, string *buf);
set_err_log_callback(ErrCallback err_log_callback)359   void set_err_log_callback(
360     ErrCallback err_log_callback) {
361     err_log_callback_ = err_log_callback;
362   }
get_err_log_callback()363   ErrCallback get_err_log_callback() { return err_log_callback_; }
364 
365   // Set a clock object that can be overridden for use with unit tests.
SetClock(Clock * clock)366   void SetClock(Clock *clock) {
367     if (clock_) {
368       delete clock_;
369     }
370     clock_ = clock;
371     time_initialized_ = clock_->Now();
372   }
373 
374  protected:
375   void *testmem_;                // Location of test memory.
376   uint64 testmemsize_;           // Size of test memory.
377   int64 totalmemsize_;           // Size of available memory.
378   int64 min_hugepages_bytes_;    // Minimum hugepages size.
379   int64 reserve_mb_;             // Minimum amount of memory to reserve in MB.
380   bool  error_injection_;        // Do error injection?
381   bool  normal_mem_;             // Memory DMA capable?
382   bool  use_hugepages_;          // Use hugepage shmem?
383   bool  use_posix_shm_;          // Use 4k page shmem?
384   bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
385   bool  mmapped_allocation_;     // Was memory allocated using mmap()?
386   int   shmid_;                  // Handle to shmem
387   vector< vector<string> > *channels_;  // Memory module names per channel.
388   uint64 channel_hash_;          // Mask of address bits XORed for channel.
389   int channel_width_;            // Channel width in bits.
390 
391   int64 regionsize_;             // Size of memory "regions"
392   int   regioncount_;            // Number of memory "regions"
393   int   num_cpus_;               // Number of cpus in the system.
394   int   num_nodes_;              // Number of nodes in the system.
395   int   num_cpus_per_node_;      // Number of cpus per node in the system.
396   int   address_mode_;           // Are we running 32 or 64 bit?
397   bool  has_vector_;             // Do we have sse2/neon instructions?
398   bool  has_clflush_;            // Do we have clflush instructions?
399   bool  use_flush_page_cache_;   // Do we need to flush the page cache?
400 
401 
402   time_t time_initialized_;      // Start time of test.
403 
404   vector<cpu_set_t> cpu_sets_;   // Cache for cpu masks.
405   vector<bool> cpu_sets_valid_;  // If the cpu mask cache is valid.
406 
407   // Get file descriptor for dev msr.
408   virtual int OpenMSR(uint32 core, uint32 address);
409 
410   // Look up how many hugepages there are.
411   virtual int64 FindHugePages();
412 
413   // Link to find last transaction at an error location.
414   ErrCallback err_log_callback_;
415 
416   // Object to wrap the time function.
417   Clock *clock_;
418 
419  private:
420   DISALLOW_COPY_AND_ASSIGN(OsLayer);
421 };
422 
423 // Selects and returns the proper OS and hardware interface.  Does not call
424 // OsLayer::Initialize() on the new object.
425 OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options);
426 
427 #endif  // STRESSAPPTEST_OS_H_ NOLINT
428