240 lines
4.8 KiB
C
240 lines
4.8 KiB
C
/*
|
|
* Copyright (c) 2015
|
|
* The President and Fellows of Harvard College.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* bigfork - concurrent VM test that behaves somewhat better than
|
|
* parallelvm.
|
|
*
|
|
* This test is a mixture of forktest and parallelvm: it does nested
|
|
* forks like forktest, and aimless matrix operations like parallelvm;
|
|
* the goal is to serve as a performance benchmark more than as a
|
|
* stress test (though it can be that too) and in particular to
|
|
* exhibit less timing variance than parallelvm does. The variance is
|
|
* still fairly high, but the variance of parallelvm is horrific.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <err.h>
|
|
|
|
#define BRANCHES 6
|
|
|
|
/*
|
|
* 6 branches gives 64 procs at the final stage, and we want this to
|
|
* use about 4M. So each proc's memory load should be about 1/16M or
|
|
* 64K. Which is 16384 ints, or four 64x64 matrixes.
|
|
*/
|
|
#define DIM 64
|
|
|
|
static int m1[DIM*DIM], m2[DIM*DIM], m3[DIM*DIM], m4[DIM*DIM];
|
|
static const int right[BRANCHES] = {
|
|
536763422,
|
|
478946723,
|
|
375722852,
|
|
369910585,
|
|
328220902,
|
|
62977821,
|
|
};
|
|
static unsigned failures;
|
|
|
|
static
|
|
void
|
|
init(void)
|
|
{
|
|
unsigned i, j;
|
|
|
|
srandom(73771);
|
|
for (i=0; i<DIM; i++) {
|
|
for (j=0; j<DIM; j++) {
|
|
m1[i*DIM+j] = random() % 11 - 5;
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void
|
|
add(int *x, const int *a, const int *b)
|
|
{
|
|
unsigned i, j;
|
|
|
|
for (i=0; i<DIM; i++) {
|
|
for (j=0; j<DIM; j++) {
|
|
x[i*DIM+j] = a[i*DIM+j] + b[i*DIM+j];
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void
|
|
mul(int *x, const int *a, const int *b)
|
|
{
|
|
unsigned i, j, k;
|
|
|
|
for (i=0; i<DIM; i++) {
|
|
for (j=0; j<DIM; j++) {
|
|
x[i*DIM+j] = 0;
|
|
for (k=0; k<DIM; k++) {
|
|
x[i*DIM+j] += a[i*DIM+k] * b[k*DIM+j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void
|
|
scale(int *x, const int *a, int b)
|
|
{
|
|
unsigned i, j;
|
|
|
|
for (i=0; i<DIM; i++) {
|
|
for (j=0; j<DIM; j++) {
|
|
x[i*DIM+j] = a[i*DIM+j] / b;
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void
|
|
grind(void)
|
|
{
|
|
/*
|
|
* compute: m2 = m1*m1, m3 = m2+m1, m4 = m3*m3, m1 = m4 / 2
|
|
*/
|
|
mul(m2, m1, m1);
|
|
add(m3, m2, m1);
|
|
mul(m4, m3, m3);
|
|
scale(m1, m4, 2);
|
|
}
|
|
|
|
static
|
|
int
|
|
trace(void)
|
|
{
|
|
unsigned i;
|
|
int val = 0;
|
|
|
|
for (i=0; i<DIM; i++) {
|
|
val += m1[i*DIM+i];
|
|
}
|
|
while (val < 0) {
|
|
val += 0x20000000;
|
|
}
|
|
return val % 0x20000000;
|
|
}
|
|
|
|
static
|
|
pid_t
|
|
dofork(void)
|
|
{
|
|
pid_t pid;
|
|
|
|
pid = fork();
|
|
if (pid < 0) {
|
|
warn("fork");
|
|
}
|
|
return pid;
|
|
}
|
|
|
|
static
|
|
void
|
|
dowait(pid_t pid)
|
|
{
|
|
int status;
|
|
|
|
if (pid == -1) {
|
|
failures++;
|
|
return;
|
|
}
|
|
if (pid == 0) {
|
|
exit(failures);
|
|
}
|
|
else {
|
|
if (waitpid(pid, &status, 0) < 0) {
|
|
warn("waitpid(%d)", pid);
|
|
}
|
|
else if (WIFSIGNALED(status)) {
|
|
warnx("pid %d: signal %d", pid, WTERMSIG(status));
|
|
}
|
|
else if (WEXITSTATUS(status) > 0) {
|
|
failures += WEXITSTATUS(status);
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void
|
|
dotest(void)
|
|
{
|
|
unsigned i, me;
|
|
pid_t pids[BRANCHES];
|
|
int t;
|
|
char msg[128];
|
|
|
|
me = 0;
|
|
for (i=0; i<BRANCHES; i++) {
|
|
pids[i] = dofork();
|
|
if (pids[i] == 0) {
|
|
me += 1U<<i;
|
|
}
|
|
grind();
|
|
t = trace();
|
|
if (t == right[i]) {
|
|
snprintf(msg, sizeof(msg),
|
|
"Stage %u #%u done: %d\n", i, me, trace());
|
|
}
|
|
else {
|
|
snprintf(msg, sizeof(msg),
|
|
"Stage %u #%u FAILED: got %d, expected %d\n",
|
|
i, me, t, right[i]);
|
|
failures++;
|
|
}
|
|
(void)write(STDOUT_FILENO, msg, strlen(msg));
|
|
}
|
|
|
|
for (i=BRANCHES; i-- > 0; ) {
|
|
dowait(pids[i]);
|
|
}
|
|
if (failures > 0) {
|
|
printf("%u failures.\n", failures);
|
|
}
|
|
else {
|
|
printf("Done.\n");
|
|
}
|
|
}
|
|
|
|
int
|
|
main(void)
|
|
{
|
|
init();
|
|
dotest();
|
|
return 0;
|
|
}
|