1 // Copyright 2006 Google Inc. All Rights Reserved.
2 // Author: nsanders, menderico
3
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7
8 // http://www.apache.org/licenses/LICENSE-2.0
9
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 // os.cc : os and machine specific implementation
17 // This file includes an abstracted interface
18 // for linux-distro specific and HW specific
19 // interfaces.
20
21 #include "os.h"
22
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <linux/types.h>
26 #include <malloc.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <sys/ioctl.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/ipc.h>
35 #ifdef HAVE_SYS_SHM_H
36 #include <sys/shm.h>
37 #endif
38 #include <unistd.h>
39
40 #ifndef SHM_HUGETLB
41 #define SHM_HUGETLB 04000 // remove when glibc defines it
42 #endif
43
44 #include <string>
45 #include <list>
46
47 // This file must work with autoconf on its public version,
48 // so these includes are correct.
49 #include "sattypes.h"
50 #include "error_diag.h"
51 #include "clock.h"
52
53 // OsLayer initialization.
OsLayer()54 OsLayer::OsLayer() {
55 testmem_ = 0;
56 testmemsize_ = 0;
57 totalmemsize_ = 0;
58 min_hugepages_bytes_ = 0;
59 reserve_mb_ = 0;
60 normal_mem_ = true;
61 use_hugepages_ = false;
62 use_posix_shm_ = false;
63 dynamic_mapped_shmem_ = false;
64 mmapped_allocation_ = false;
65 shmid_ = 0;
66 channels_ = NULL;
67
68 time_initialized_ = 0;
69
70 regionsize_ = 0;
71 regioncount_ = 1;
72 num_cpus_ = 0;
73 num_nodes_ = 0;
74 num_cpus_per_node_ = 0;
75 error_diagnoser_ = 0;
76 err_log_callback_ = 0;
77 error_injection_ = false;
78
79 void *pvoid = 0;
80 address_mode_ = sizeof(pvoid) * 8;
81
82 has_clflush_ = false;
83 has_vector_ = false;
84
85 use_flush_page_cache_ = false;
86
87 clock_ = NULL;
88 }
89
90 // OsLayer cleanup.
~OsLayer()91 OsLayer::~OsLayer() {
92 if (error_diagnoser_)
93 delete error_diagnoser_;
94 if (clock_)
95 delete clock_;
96 }
97
98 // OsLayer initialization.
Initialize()99 bool OsLayer::Initialize() {
100 if (!clock_) {
101 clock_ = new Clock();
102 }
103
104 time_initialized_ = clock_->Now();
105 // Detect asm support.
106 GetFeatures();
107
108 if (num_cpus_ == 0) {
109 num_nodes_ = 1;
110 num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
111 num_cpus_per_node_ = num_cpus_ / num_nodes_;
112 }
113 logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
114 cpu_sets_.resize(num_nodes_);
115 cpu_sets_valid_.resize(num_nodes_);
116 // Create error diagnoser.
117 error_diagnoser_ = new ErrorDiag();
118 if (!error_diagnoser_->set_os(this))
119 return false;
120 return true;
121 }
122
123 // Machine type detected. Can we implement all these functions correctly?
IsSupported()124 bool OsLayer::IsSupported() {
125 if (kOpenSource) {
126 // There are no explicitly supported systems in open source version.
127 return true;
128 }
129
130 // This is the default empty implementation.
131 // SAT won't report full error information.
132 return false;
133 }
134
AddressMode()135 int OsLayer::AddressMode() {
136 // Detect 32/64 bit binary.
137 void *pvoid = 0;
138 return sizeof(pvoid) * 8;
139 }
140
141 // Translates user virtual to physical address.
VirtualToPhysical(void * vaddr)142 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
143 uint64 frame, paddr, pfnmask, pagemask;
144 int pagesize = sysconf(_SC_PAGESIZE);
145 off64_t off = ((uintptr_t)vaddr) / pagesize * 8;
146 int fd = open(kPagemapPath, O_RDONLY);
147
148 /*
149 * https://www.kernel.org/doc/Documentation/vm/pagemap.txt
150 * API change (July 2015)
151 * https://patchwork.kernel.org/patch/6787991/
152 */
153
154 if (fd < 0)
155 return 0;
156
157 if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
158 int err = errno;
159 string errtxt = ErrorString(err);
160 logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
161 kPagemapPath, err, errtxt.c_str());
162 if (fd >= 0)
163 close(fd);
164 return 0;
165 }
166 close(fd);
167
168 /* Check if page is present and not swapped. */
169 if (!(frame & (1ULL << 63)) || (frame & (1ULL << 62)))
170 return 0;
171
172 /* pfn is bits 0-54. */
173 pfnmask = ((1ULL << 55) - 1);
174 /* Pagesize had better be a power of 2. */
175 pagemask = pagesize - 1;
176
177 paddr = ((frame & pfnmask) * pagesize) | ((uintptr_t)vaddr & pagemask);
178 return paddr;
179 }
180
181 // Returns the HD device that contains this file.
FindFileDevice(string filename)182 string OsLayer::FindFileDevice(string filename) {
183 return "hdUnknown";
184 }
185
186 // Returns a list of locations corresponding to HD devices.
FindFileDevices()187 list<string> OsLayer::FindFileDevices() {
188 // No autodetection on unknown systems.
189 list<string> locations;
190 return locations;
191 }
192
193
194 // Get HW core features from cpuid instruction.
GetFeatures()195 void OsLayer::GetFeatures() {
196 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
197 unsigned int eax = 1, ebx, ecx, edx;
198 cpuid(&eax, &ebx, &ecx, &edx);
199 has_clflush_ = (edx >> 19) & 1;
200 has_vector_ = (edx >> 26) & 1; // SSE2 caps bit.
201
202 logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
203 has_clflush_ ? "true" : "false",
204 has_vector_ ? "true" : "false");
205 #elif defined(STRESSAPPTEST_CPU_PPC)
206 // All PPC implementations have cache flush instructions.
207 has_clflush_ = true;
208 #elif defined(STRESSAPPTEST_CPU_MIPS)
209 // All MIPS implementations have cache flush instructions.
210 has_clflush_ = true;
211 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
212 // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
213 // For now assume neon and don't run -W if you don't have it.
214 has_vector_ = true; // NEON.
215 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
216 #else
217 #warning "Unsupported CPU type: unable to determine feature set."
218 #endif
219 }
220
221
222 // Enable FlushPageCache to be functional instead of a NOP.
ActivateFlushPageCache(void)223 void OsLayer::ActivateFlushPageCache(void) {
224 logprintf(9, "Log: page cache will be flushed as needed\n");
225 use_flush_page_cache_ = true;
226 }
227
228 // Flush the page cache to ensure reads come from the disk.
FlushPageCache(void)229 bool OsLayer::FlushPageCache(void) {
230 if (!use_flush_page_cache_)
231 return true;
232
233 // First, ask the kernel to write the cache to the disk.
234 sync();
235
236 // Second, ask the kernel to empty the cache by writing "1" to
237 // "/proc/sys/vm/drop_caches".
238 static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
239 int dcfile = open(drop_caches_file, O_WRONLY);
240 if (dcfile < 0) {
241 int err = errno;
242 string errtxt = ErrorString(err);
243 logprintf(3, "Log: failed to open %s - err %d (%s)\n",
244 drop_caches_file, err, errtxt.c_str());
245 return false;
246 }
247
248 ssize_t bytes_written = write(dcfile, "1", 1);
249 close(dcfile);
250
251 if (bytes_written != 1) {
252 int err = errno;
253 string errtxt = ErrorString(err);
254 logprintf(3, "Log: failed to write %s - err %d (%s)\n",
255 drop_caches_file, err, errtxt.c_str());
256 return false;
257 }
258 return true;
259 }
260
261
262 // We need to flush the cacheline here.
Flush(void * vaddr)263 void OsLayer::Flush(void *vaddr) {
264 // Use the generic flush. This function is just so we can override
265 // this if we are so inclined.
266 if (has_clflush_) {
267 OsLayer::FastFlush(vaddr);
268 }
269 }
270
271
272 // Run C or ASM copy as appropriate..
AdlerMemcpyWarm(uint64 * dstmem,uint64 * srcmem,unsigned int size_in_bytes,AdlerChecksum * checksum)273 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
274 unsigned int size_in_bytes,
275 AdlerChecksum *checksum) {
276 if (has_vector_) {
277 return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
278 } else {
279 return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
280 }
281 }
282
283
284 // Translate physical address to memory module/chip name.
285 // Assumes interleaving between two memory channels based on the XOR of
286 // all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
287 // blocks with bits distributed from each chip in that channel.
FindDimm(uint64 addr,char * buf,int len)288 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
289 if (!channels_) {
290 snprintf(buf, len, "DIMM Unknown");
291 return -1;
292 }
293
294 // Find channel by XORing address bits in channel_hash mask.
295 uint32 low = static_cast<uint32>(addr & channel_hash_);
296 uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
297 vector<string>& channel = (*channels_)[
298 __builtin_parity(high) ^ __builtin_parity(low)];
299
300 // Find dram chip by finding which byte within the channel
301 // by address mod channel width, then divide the channel
302 // evenly among the listed dram chips. Note, this will not work
303 // with x4 dram.
304 int chip = (addr % (channel_width_ / 8)) /
305 ((channel_width_ / 8) / channel.size());
306 string name = channel[chip];
307 snprintf(buf, len, "%s", name.c_str());
308 return 1;
309 }
310
311
312 // Classifies addresses according to "regions"
313 // This isn't really implemented meaningfully here..
FindRegion(uint64 addr)314 int32 OsLayer::FindRegion(uint64 addr) {
315 static bool warned = false;
316
317 if (regionsize_ == 0) {
318 regionsize_ = totalmemsize_ / 8;
319 if (regionsize_ < 512 * kMegabyte)
320 regionsize_ = 512 * kMegabyte;
321 regioncount_ = totalmemsize_ / regionsize_;
322 if (regioncount_ < 1) regioncount_ = 1;
323 }
324
325 int32 region_num = addr / regionsize_;
326 if (region_num >= regioncount_) {
327 if (!warned) {
328 logprintf(0, "Log: region number %d exceeds region count %d\n",
329 region_num, regioncount_);
330 warned = true;
331 }
332 region_num = region_num % regioncount_;
333 }
334 return region_num;
335 }
336
337 // Report which cores are associated with a given region.
FindCoreMask(int32 region)338 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
339 sat_assert(region >= 0);
340 region %= num_nodes_;
341 if (!cpu_sets_valid_[region]) {
342 CPU_ZERO(&cpu_sets_[region]);
343 for (int i = 0; i < num_cpus_per_node_; ++i) {
344 CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
345 }
346 cpu_sets_valid_[region] = true;
347 logprintf(5, "Log: Region %d mask 0x%s\n",
348 region, FindCoreMaskFormat(region).c_str());
349 }
350 return &cpu_sets_[region];
351 }
352
353 // Return cores associated with a given region in hex string.
FindCoreMaskFormat(int32 region)354 string OsLayer::FindCoreMaskFormat(int32 region) {
355 cpu_set_t* mask = FindCoreMask(region);
356 string format = cpuset_format(mask);
357 if (format.size() < 8)
358 format = string(8 - format.size(), '0') + format;
359 return format;
360 }
361
362 // Report an error in an easily parseable way.
ErrorReport(const char * part,const char * symptom,int count)363 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
364 time_t now = clock_->Now();
365 int ttf = now - time_initialized_;
366 if (strlen(symptom) && strlen(part)) {
367 logprintf(0, "Report Error: %s : %s : %d : %ds\n",
368 symptom, part, count, ttf);
369 } else {
370 // Log something so the error still shows up, but this won't break the
371 // parser.
372 logprintf(0, "Warning: Invalid Report Error: "
373 "%s : %s : %d : %ds\n", symptom, part, count, ttf);
374 }
375 return true;
376 }
377
378 // Read the number of hugepages out of the kernel interface in proc.
FindHugePages()379 int64 OsLayer::FindHugePages() {
380 char buf[65] = "0";
381
382 // This is a kernel interface to query the numebr of hugepages
383 // available in the system.
384 static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
385 int hpfile = open(hugepages_info_file, O_RDONLY);
386
387 ssize_t bytes_read = read(hpfile, buf, 64);
388 close(hpfile);
389
390 if (bytes_read <= 0) {
391 logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
392 "read did not provide data\n");
393 return 0;
394 }
395
396 if (bytes_read == 64) {
397 logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
398 "is surprisingly large\n");
399 return 0;
400 }
401
402 // Add a null termintation to be string safe.
403 buf[bytes_read] = '\0';
404 // Read the page count.
405 int64 pages = strtoull(buf, NULL, 10); // NOLINT
406
407 return pages;
408 }
409
FindFreeMemSize()410 int64 OsLayer::FindFreeMemSize() {
411 int64 size = 0;
412 int64 minsize = 0;
413 if (totalmemsize_ > 0)
414 return totalmemsize_;
415
416 int64 pages = sysconf(_SC_PHYS_PAGES);
417 int64 avpages = sysconf(_SC_AVPHYS_PAGES);
418 int64 pagesize = sysconf(_SC_PAGESIZE);
419 int64 physsize = pages * pagesize;
420 int64 avphyssize = avpages * pagesize;
421
422 // Assume 2MB hugepages.
423 int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
424
425 if ((pages == -1) || (pagesize == -1)) {
426 logprintf(0, "Process Error: sysconf could not determine memory size.\n");
427 return 0;
428 }
429
430 // We want to leave enough stuff for things to run.
431 // If the user specified a minimum amount of memory to expect, require that.
432 // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
433 // If less than 2GB is present use 85% of what's available.
434 // These are fairly arbitrary numbers that seem to work OK.
435 //
436 // TODO(nsanders): is there a more correct way to determine target
437 // memory size?
438 if (hugepagesize > 0) {
439 if (min_hugepages_bytes_ > 0) {
440 minsize = min_hugepages_bytes_;
441 } else {
442 minsize = hugepagesize;
443 }
444 } else {
445 if (physsize < 2048LL * kMegabyte) {
446 minsize = ((pages * 85) / 100) * pagesize;
447 } else {
448 minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
449 }
450 // Make sure that at least reserve_mb_ is left for the system.
451 if (reserve_mb_ > 0) {
452 int64 totalsize = pages * pagesize;
453 int64 reserve_kb = reserve_mb_ * kMegabyte;
454 if (reserve_kb > totalsize) {
455 logprintf(0, "Procedural Error: %lld is bigger than the total memory "
456 "available %lld\n", reserve_kb, totalsize);
457 } else if (reserve_kb > totalsize - minsize) {
458 logprintf(5, "Warning: Overriding memory to use: original %lld, "
459 "current %lld\n", minsize, totalsize - reserve_kb);
460 minsize = totalsize - reserve_kb;
461 }
462 }
463 }
464
465 // Use hugepage sizing if available.
466 if (hugepagesize > 0) {
467 if (hugepagesize < minsize) {
468 logprintf(0, "Procedural Error: Not enough hugepages. "
469 "%lldMB available < %lldMB required.\n",
470 hugepagesize / kMegabyte,
471 minsize / kMegabyte);
472 // Require the calculated minimum amount of memory.
473 size = minsize;
474 } else {
475 // Require that we get all hugepages.
476 size = hugepagesize;
477 }
478 } else {
479 // Require the calculated minimum amount of memory.
480 size = minsize;
481 }
482
483 logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
484 "Targeting %lld MB (%lld%%)\n",
485 physsize / kMegabyte,
486 avphyssize / kMegabyte,
487 hugepagesize / kMegabyte,
488 size / kMegabyte,
489 size * 100 / physsize);
490
491 totalmemsize_ = size;
492 return size;
493 }
494
495 // Allocates all memory available.
AllocateAllMem()496 int64 OsLayer::AllocateAllMem() {
497 int64 length = FindFreeMemSize();
498 bool retval = AllocateTestMem(length, 0);
499 if (retval)
500 return length;
501 else
502 return 0;
503 }
504
505 // Allocate the target memory. This may be from malloc, hugepage pool
506 // or other platform specific sources.
AllocateTestMem(int64 length,uint64 paddr_base)507 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
508 // Try hugepages first.
509 void *buf = 0;
510
511 sat_assert(length >= 0);
512
513 if (paddr_base)
514 logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
515 " ignore.\n", paddr_base);
516
517 // Determine optimal memory allocation path.
518 bool prefer_hugepages = false;
519 bool prefer_posix_shm = false;
520 bool prefer_dynamic_mapping = false;
521
522 // Are there enough hugepages?
523 int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
524 // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
525 if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
526 prefer_dynamic_mapping = true;
527 prefer_posix_shm = true;
528 logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
529 logprintf(3, "Log: You may need to run "
530 "'sudo mount -o remount,size=100\% /dev/shm.'\n");
531 } else if (hugepagesize >= length) {
532 prefer_hugepages = true;
533 logprintf(3, "Log: Prefer using hugepage allocation.\n");
534 } else {
535 logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
536 }
537
538 #ifdef HAVE_SYS_SHM_H
539 // Allocate hugepage mapped memory.
540 if (prefer_hugepages) {
541 do { // Allow break statement.
542 int shmid;
543 void *shmaddr;
544
545 if ((shmid = shmget(2, length,
546 SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
547 int err = errno;
548 string errtxt = ErrorString(err);
549 logprintf(3, "Log: failed to allocate shared hugepage "
550 "object - err %d (%s)\n",
551 err, errtxt.c_str());
552 logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
553 break;
554 }
555
556 shmaddr = shmat(shmid, NULL, 0);
557 if (shmaddr == reinterpret_cast<void*>(-1)) {
558 int err = errno;
559 string errtxt = ErrorString(err);
560 logprintf(0, "Log: failed to attach shared "
561 "hugepage object - err %d (%s).\n",
562 err, errtxt.c_str());
563 if (shmctl(shmid, IPC_RMID, NULL) < 0) {
564 int err = errno;
565 string errtxt = ErrorString(err);
566 logprintf(0, "Log: failed to remove shared "
567 "hugepage object - err %d (%s).\n",
568 err, errtxt.c_str());
569 }
570 break;
571 }
572 use_hugepages_ = true;
573 shmid_ = shmid;
574 buf = shmaddr;
575 logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
576 shmid, shmaddr);
577 } while (0);
578 }
579
580 if ((!use_hugepages_) && prefer_posix_shm) {
581 do {
582 int shm_object;
583 void *shmaddr = NULL;
584
585 shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
586 if (shm_object < 0) {
587 int err = errno;
588 string errtxt = ErrorString(err);
589 logprintf(3, "Log: failed to allocate shared "
590 "smallpage object - err %d (%s)\n",
591 err, errtxt.c_str());
592 break;
593 }
594
595 if (0 > ftruncate(shm_object, length)) {
596 int err = errno;
597 string errtxt = ErrorString(err);
598 logprintf(3, "Log: failed to ftruncate shared "
599 "smallpage object - err %d (%s)\n",
600 err, errtxt.c_str());
601 break;
602 }
603
604 // 32 bit linux apps can only use ~1.4G of address space.
605 // Use dynamic mapping for allocations larger than that.
606 // Currently perf hit is ~10% for this.
607 if (prefer_dynamic_mapping) {
608 dynamic_mapped_shmem_ = true;
609 } else {
610 // Do a full mapping here otherwise.
611 shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
612 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
613 shm_object, 0);
614 if (shmaddr == reinterpret_cast<void*>(-1)) {
615 int err = errno;
616 string errtxt = ErrorString(err);
617 logprintf(0, "Log: failed to map shared "
618 "smallpage object - err %d (%s).\n",
619 err, errtxt.c_str());
620 break;
621 }
622 }
623
624 use_posix_shm_ = true;
625 shmid_ = shm_object;
626 buf = shmaddr;
627 char location_message[256] = "";
628 if (dynamic_mapped_shmem_) {
629 sprintf(location_message, "mapped as needed");
630 } else {
631 sprintf(location_message, "at %p", shmaddr);
632 }
633 logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
634 shm_object, location_message);
635 } while (0);
636 shm_unlink("/stressapptest");
637 }
638 #endif // HAVE_SYS_SHM_H
639
640 if (!use_hugepages_ && !use_posix_shm_) {
641 // If the page size is what SAT is expecting explicitly perform mmap()
642 // allocation.
643 if (sysconf(_SC_PAGESIZE) >= 4096) {
644 void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
645 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
646 if (map_buf != MAP_FAILED) {
647 buf = map_buf;
648 mmapped_allocation_ = true;
649 logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
650 }
651 }
652 if (!mmapped_allocation_) {
653 // Use memalign to ensure that blocks are aligned enough for disk direct
654 // IO.
655 buf = static_cast<char*>(memalign(4096, length));
656 if (buf) {
657 logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
658 } else {
659 logprintf(0, "Process Error: memalign returned 0\n");
660 if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
661 logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
662 "bit process. Please setup shared memory.\n");
663 }
664 }
665 }
666 }
667
668 testmem_ = buf;
669 if (buf || dynamic_mapped_shmem_) {
670 testmemsize_ = length;
671 } else {
672 testmemsize_ = 0;
673 }
674
675 return (buf != 0) || dynamic_mapped_shmem_;
676 }
677
678 // Free the test memory.
FreeTestMem()679 void OsLayer::FreeTestMem() {
680 if (testmem_) {
681 if (use_hugepages_) {
682 #ifdef HAVE_SYS_SHM_H
683 shmdt(testmem_);
684 shmctl(shmid_, IPC_RMID, NULL);
685 #endif
686 } else if (use_posix_shm_) {
687 if (!dynamic_mapped_shmem_) {
688 munmap(testmem_, testmemsize_);
689 }
690 close(shmid_);
691 } else if (mmapped_allocation_) {
692 munmap(testmem_, testmemsize_);
693 } else {
694 free(testmem_);
695 }
696 testmem_ = 0;
697 testmemsize_ = 0;
698 }
699 }
700
701
702 // Prepare the target memory. It may requre mapping in, or this may be a noop.
PrepareTestMem(uint64 offset,uint64 length)703 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
704 sat_assert((offset + length) <= testmemsize_);
705 if (dynamic_mapped_shmem_) {
706 // TODO(nsanders): Check if we can support MAP_NONBLOCK,
707 // and evaluate performance hit from not using it.
708 #ifdef HAVE_MMAP64
709 void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
710 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
711 shmid_, offset);
712 #else
713 void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
714 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
715 shmid_, offset);
716 #endif
717 if (mapping == MAP_FAILED) {
718 string errtxt = ErrorString(errno);
719 logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
720 "error: %s.\n",
721 offset, length, errtxt.c_str());
722 sat_assert(0);
723 }
724 return mapping;
725 }
726
727 return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
728 }
729
730 // Release the test memory resources, if any.
ReleaseTestMem(void * addr,uint64 offset,uint64 length)731 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
732 if (dynamic_mapped_shmem_) {
733 int retval = munmap(addr, length);
734 if (retval == -1) {
735 string errtxt = ErrorString(errno);
736 logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
737 "error: %s.\n",
738 addr, length, errtxt.c_str());
739 sat_assert(0);
740 }
741 }
742 }
743
744 // No error polling on unknown systems.
ErrorPoll()745 int OsLayer::ErrorPoll() {
746 return 0;
747 }
748
749 // Generally, poll for errors once per second.
ErrorWait()750 void OsLayer::ErrorWait() {
751 sat_sleep(1);
752 return;
753 }
754
755 // Open a PCI bus-dev-func as a file and return its file descriptor.
756 // Error is indicated by return value less than zero.
PciOpen(int bus,int device,int function)757 int OsLayer::PciOpen(int bus, int device, int function) {
758 char dev_file[256];
759
760 snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
761 bus, device, function);
762
763 int fd = open(dev_file, O_RDWR);
764 if (fd == -1) {
765 logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
766 "function %d (errno %d).\n",
767 bus, device, function, errno);
768 return -1;
769 }
770
771 return fd;
772 }
773
774
775 // Read and write functions to access PCI config.
PciRead(int fd,uint32 offset,int width)776 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
777 // Strict aliasing rules lawyers will cause data corruption
778 // on cast pointers in some gccs.
779 union {
780 uint32 l32;
781 uint16 l16;
782 uint8 l8;
783 } datacast;
784 datacast.l32 = 0;
785 uint32 size = width / 8;
786
787 sat_assert((width == 32) || (width == 16) || (width == 8));
788 sat_assert(offset <= (256 - size));
789
790 if (lseek(fd, offset, SEEK_SET) < 0) {
791 logprintf(0, "Process Error: Can't seek %x\n", offset);
792 return 0;
793 }
794 if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
795 logprintf(0, "Process Error: Can't read %x\n", offset);
796 return 0;
797 }
798
799 // Extract the data.
800 switch (width) {
801 case 8:
802 sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
803 return datacast.l8;
804 case 16:
805 sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
806 return datacast.l16;
807 case 32:
808 return datacast.l32;
809 }
810 return 0;
811 }
812
PciWrite(int fd,uint32 offset,uint32 value,int width)813 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
814 // Strict aliasing rules lawyers will cause data corruption
815 // on cast pointers in some gccs.
816 union {
817 uint32 l32;
818 uint16 l16;
819 uint8 l8;
820 } datacast;
821 datacast.l32 = 0;
822 uint32 size = width / 8;
823
824 sat_assert((width == 32) || (width == 16) || (width == 8));
825 sat_assert(offset <= (256 - size));
826
827 // Cram the data into the right alignment.
828 switch (width) {
829 case 8:
830 sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
831 datacast.l8 = value;
832 case 16:
833 sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
834 datacast.l16 = value;
835 case 32:
836 datacast.l32 = value;
837 }
838
839 if (lseek(fd, offset, SEEK_SET) < 0) {
840 logprintf(0, "Process Error: Can't seek %x\n", offset);
841 return;
842 }
843 if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
844 logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
845 return;
846 }
847
848 return;
849 }
850
851
852
853 // Open dev msr.
OpenMSR(uint32 core,uint32 address)854 int OsLayer::OpenMSR(uint32 core, uint32 address) {
855 char buf[256];
856 snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
857 int fd = open(buf, O_RDWR);
858 if (fd < 0)
859 return fd;
860
861 uint32 pos = lseek(fd, address, SEEK_SET);
862 if (pos != address) {
863 close(fd);
864 logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
865 return -1;
866 }
867
868 return fd;
869 }
870
ReadMSR(uint32 core,uint32 address,uint64 * data)871 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
872 int fd = OpenMSR(core, address);
873 if (fd < 0)
874 return false;
875
876 // Read from the msr.
877 bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
878
879 if (!res)
880 logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
881
882 close(fd);
883
884 return res;
885 }
886
WriteMSR(uint32 core,uint32 address,uint64 * data)887 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
888 int fd = OpenMSR(core, address);
889 if (fd < 0)
890 return false;
891
892 // Write to the msr
893 bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
894
895 if (!res)
896 logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
897
898 close(fd);
899
900 return res;
901 }
902
903 // Extract bits [n+len-1, n] from a 32 bit word.
904 // so GetBitField(0x0f00, 8, 4) == 0xf.
GetBitField(uint32 val,uint32 n,uint32 len)905 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
906 return (val >> n) & ((1<<len) - 1);
907 }
908
909 // Generic CPU stress workload that would work on any CPU/Platform.
910 // Float-point array moving average calculation.
CpuStressWorkload()911 bool OsLayer::CpuStressWorkload() {
912 double float_arr[100];
913 double sum = 0;
914 #ifdef HAVE_RAND_R
915 unsigned int seed = 12345;
916 #endif
917
918 // Initialize array with random numbers.
919 for (int i = 0; i < 100; i++) {
920 #ifdef HAVE_RAND_R
921 float_arr[i] = rand_r(&seed);
922 if (rand_r(&seed) % 2)
923 float_arr[i] *= -1.0;
924 #else
925 srand(time(NULL));
926 float_arr[i] = rand(); // NOLINT
927 if (rand() % 2) // NOLINT
928 float_arr[i] *= -1.0;
929 #endif
930 }
931
932 // Calculate moving average.
933 for (int i = 0; i < 100000000; i++) {
934 float_arr[i % 100] =
935 (float_arr[i % 100] + float_arr[(i + 1) % 100] +
936 float_arr[(i + 99) % 100]) / 3;
937 sum += float_arr[i % 100];
938 }
939
940 // Artificial printf so the loops do not get optimized away.
941 if (sum == 0.0)
942 logprintf(12, "Log: I'm Feeling Lucky!\n");
943 return true;
944 }
945