/*
** A bunch of Ethernet protocols and tons of statistics about each.
** This file is paragraphed assuming you set tabs to 4 columns, not 8.
*/

/*#include <ctype.h>
#include <stdio.h>*/

/* you may choose a maximum of 1 of BOGGS, BIMODAL, or ITRAFFIC
** Choosing none of them results in standand Smurph traffic using a
** Poisson process (exponential inter-arrivals).
*/

#define BOGGS 0	// do we want a closed system pegged at 100% offered load?

#define ITRAFFIC (TraceArrivals || TraceSizes || TraceWho)

#define CONTINUOUS	0	// random delay continuous, rather than integer # of slots?
#define PROFILE 0		// profile the states? (not included in all protocols)

#define NDEBUG 1

#define TOO_LONG_NETWORK 0	// Are we futzing with illegally long networks?

#define MaxCCounter 16  // The standard maximum legal collision counter.

static BIMODAL = 0;

static TraceArrivals, TraceSizes, TraceWho;		// booleans

static ASCII = 0;	/* is ITRAFFIC input in 4-column ASCII, or 6-byte binary? */

static DELAY_HISTOGRAM = 0;

enum {False = 0, True};
#define NL << '\n'		// kludge for adding newlines to end of C++ output.

#ifdef NDEBUG
#define trace(foo)
#else
#define trace(foo) cerr << Time/Slot << ' ' << id << ' ' << foo NL
#endif

#if PROFILE
static BIG profState[100];
 #define profile(i) ++profState[i]
#else
 #define profile(i)
#endif

identify Crazy_mixed_up_Ethernet_simulator;

/****************************************************************
*****************************************************************

From chapter 3 of 802.3: the bits seen as a packet goes by are:
7 octet preamble
1 octet start frame delimiter
---- "frame" begins here ---
2 or 6 octet destination address
2 or 6 octet source address
2 octet length (of LLC data field)
n octet LLC data
[m octets padding, if needed]
4 octet CRC
---- "frame" ends here ---

A frame must be between 64 and 1518 octets.  tcpdump packets are between
60 and 1514 octets; Boggs et al. also list the CRC, preamble, and inter-
frame time as "extra".  Thus, for simplicity, I'll take the "info" part
of a packet to be between 60 and 1514 octets, and have 12 octets of
"header" (preamble+SFD+CRC).

To complicate things, Smurph's getPacket function asks for the a
minimum and maximum packet length, but only refers to the "info"
part defined above.  Thus, an 802.3 minimum frame length (everything
except preamble+SFD) is 64 octets; a Smurph one 60.

From now on, I refer to the preamble+SFD as a single 8-octet Preamble".

*****************************************************************
****************************************************************/


const long
	minInfo = 60*8,    // Minimum "info" length
#if BOGGS
	maxInfo = 40000,
#else
	maxInfo = 1514*8,
#endif
	Preamble = 64,	// including 8 bit SFD
	PreambleCRC = 96,	// 64b preamble+32b CRC
	PSpace = 96,	// Inter-packet space in bits
	JamL = 32,		// Length of the jamming signal in bits
	Slot = 512,	// Maximum round-trip delay in bits (for backoff)
	Slot2 = 256, // Slot / 2
	TTime=100;		// single bit Transmission time, in ITUs.  I set 1 ITU to
					// one nanosecond, so 100ns/bit = 10Mbits/sec.

#define	MaxMessageQ 1000 // max number of messages to Q per station.
#define	GlobalMaxMessageQ 0 // max number of messages to Q globally.

long
	ThinkTime,		// bit times to prepare new packet for transmission after
					// a successful transmission (Client->getPacket et al.);
					// also time from MaxCCounter failures to retry.
	mle, otherMaxCCounter;
double mit;


const maxNNodes = 1024;
int NNodes;

// Global arrays used by run length calculations.  Declaration of "static"
// ensures C++ will clear to 0 before execution.

static long runLenCount[maxNNodes];
static long referenceDepth[maxNNodes];
static int parent[maxNNodes], child[maxNNodes];

/*
** initialization code needs to be stuck in somewhere
** for the above data structures:
**
** runLenCount = 0 for all nodes, plus a "dummy" NNodes+1st node
** referenceDepth = 0 for all node positions, plus a "dummy" NNodes+1st node
**
** parent[i] = i-1; child [i]=i+1
**
** with boundary conditions
**	parent[0] = dummmy
**	parent[dummy] = -1
** to make sure that the first referenced node pushes out the dummy node
** without polluting the statistics gathering.
**
** Also, the final cleanup code needs to output the stack reference depth
** stuff in addition to the run length stuff.
*/

#define MAX_DELAY_HIST 100000
long delayHist[MAX_DELAY_HIST];

BIG bitCount[maxNNodes], bitsPerSecond = 10000000;

int queueSize[maxNNodes];
BIG queueSizeCum[maxNNodes]; /* running sum of (queue size)*(time spent at that size)*/
TIME prevQueueUpdate[maxNNodes];

TIME idleTime[maxNNodes];	/* time spent not serving, including ThinkTimes */
long packetCount[maxNNodes];
RVariable *servTimeStat[maxNNodes], *runLenStat[maxNNodes],
	*collisionStat[maxNNodes], *realCollisionStat[maxNNodes],
	*qDelayStat[maxNNodes];
int stationType[maxNNodes];

#if BOGGS
TIME warmupTime = TTime * ((BIG)bitsPerSecond) * 5,
	 measureTime = TTime * ((BIG)bitsPerSecond) * 10;
#else
	 TIME measureTime = TTime * ((BIG)bitsPerSecond) * 300;	// 5 minutes
#endif

/*********************************************************************
**
** Both CReadInt and CReadFloat are similar to Smurph's "readIn", except I
** need them to work with C I/O routines (since C and C++ are are not
** necessarily mixable), because I use C's fread to read input driven
** traffic (see iTraffic.cc).  They skip all non-digit input until they
** find a digit, and then interpret it as the appropriate kind of number.
** This allows comments to be in the input along with the input.  Be
** careful, however, that your comments do not contain any digits!
**
*********************************************************************/

int CReadInt(FILE *fp) {
	int c;
	while(!isdigit(c=getc(fp)) && c != EOF)
		cout << (char)c;
	ungetc(c, fp);
	fscanf(fp, "%d", &c);
	cout << ' ' << c;
	return c;
}


double CReadFloat(FILE *fp) {
	int c;
	double f;
	while(!isdigit(c=getc(fp)) && c != '.' && c != EOF)
		cout << (char)c;
	ungetc(c, fp);
	fscanf(fp, "%lf", &f);
	cout << ' ' << f;
	return f;
}

int stackDepth(int id, int *stackTop)
{
	int depth = 0;
	*stackTop = id;
	while(parent[*stackTop] >= 0) {
		depth++;
		*stackTop = parent[*stackTop];
		assert (depth < NNodes+2, "LRU stack is corrupted!");
	}
	return depth;
}


/* This procedure should be called each time a packet is *successfully*
** transmitted.
*/
void PacketStat(int id, int RealCollisions, int CCounter, Packet *Buffer)
{
#if BOGGS
	if(Time >= warmupTime)
	{
		bitCount[id] += Buffer->TLength + ::PSpace;
#else
		bitCount[id] += Buffer->ILength;
#endif
		packetCount[id]++;
		qDelayStat[id]->update((double)(Buffer->TTime - Buffer->QTime));
		TIME delay = Time - Buffer->TTime;
		servTimeStat[id]->update((double)delay);
#if DELAY_HISTOGRAM
		delay /= TTime * 100;
		++delayHist[MIN((int)delay, MAX_DELAY_HIST - 1)];
#endif

		++runLenCount[id];
		realCollisionStat[id]->update((double)RealCollisions);
		collisionStat[id]->update((double)CCounter);

		// We now need to check whether this packet is continuing
		// our own run (where there is nothing left to do but gloat)
		// or we just established the start of our own run.

		if(runLenCount[id]==1)
		{
			static int dummySpot = 0;
			// this is the first packet in our run, so we must
			// update the runLengthStat and reset the runLengthCount
			// for the previous transmiter, if any, and also reset
			// our own reference stack position to the top.
		
			int OldTopStack;
			int mySpot = stackDepth(id, &OldTopStack);

			runLenStat[OldTopStack]->update((double)runLenCount[OldTopStack]);
			runLenCount[OldTopStack] = 0;

			// now work on the stack update stuff...
			assert(mySpot != 0, "Can't start a run from top of stack!");

			// First remove myself from the current stack position
			parent[child[id]] = parent[id];
			child[parent[id]] = child[id];

			// Then stick it onto the top
			assert(parent[OldTopStack] == -1, "Hey, parent[top] != -1");
			parent[id] = parent[OldTopStack];
			parent[OldTopStack] = id;
			child[id] = OldTopStack;

			// And finally, remember to update the reference depth counter
			if(mySpot < dummySpot)
				++referenceDepth[mySpot];
			else
				++dummySpot;
		}
		else
			++referenceDepth[0]; // another packet by top-of-stack

		if(ITRAFFIC)
		{
			/*
			** we divide by TTime just in an attempt to avoid overflow
			** in the sum, not because it has any physical meaning.
			** Read: KLUGE.
			*/
			queueSizeCum[id] += queueSize[id] * (Time - prevQueueUpdate[id]) / TTime;
			prevQueueUpdate[id] = Time;
			--queueSize[id];
		}

#if BOGGS
	}
#endif

}

#define SMURF 1
// various helper functions for statistics.
#include "gizmo-buckets.c"

// Protocol files.
#include "protocols/std_ether.cc"	// Standard Ethernet Protocol file.
#include "protocols/wayne.cc"	// The WAYNE protocol.  (two-node ethernet)
#include "protocols/magic.cc"	// WARNOCK_RESET, MART_SEARCH, WATERLINE, MAX_IDLE, etc.
#include "protocols/blam.cc"	// same as magic, except allows bursts.


// Various traffic patterns.
#include "iTraffic.cc"
#include "bimodal.cc"


/*
** The Root process is where we initialize the entire world in the state
** "Start", and summarize and destroy the world in the state "Stop".
** Close your eyes if you don't like blood and gore.
*/

process Root {

	Link    *Cable;

	ITraffic *ITP;
	BTraffic *BTP;
	Traffic *TP;
	RVariable *bitCountStat, *packetCountStat, *globalServTimeStat,
		*globalQDelayStat, *globalRunLenStat,
		*globalCollisionStat, *globalRealCollisionStat,
		*queueStat, *perHostServStat, *perHostQDelayStat,
		*perHostCollisionStat, *perHostRealCollisionStat;
	int         i, j;
	DISTANCE    d;
	double      offered_load;

	states {Start, Stop};

	perform {

		state Start:
			Assert(ITRAFFIC+BIMODAL+BOGGS <= 1,
				"can choose a max of 1 of ITRAFFIC+BIMODAL+BOGGS");

			setEtu (TTime);
			long maxPackets;
			TIME maxTime;
			double maxCPU;
			maxPackets = CReadInt(stdin);
			// read maxTime in seconds
			maxTime = TTime * bitsPerSecond * CReadInt(stdin);
			maxCPU = CReadFloat(stdin);

#if BOGGS
			setLimit (BIG_0, warmupTime + measureTime);
#else
			setLimit (maxPackets, maxTime, maxCPU);
#endif
			otherMaxCCounter = CReadInt(stdin);
			ThinkTime = CReadInt(stdin);
			mle = CReadInt(stdin);
			offered_load = CReadFloat(stdin);
#if BOGGS
			offered_load = 1000;
#endif
			mit = ((double)mle) / offered_load;

			TraceArrivals = CReadInt(stdin);
			TraceSizes = CReadInt(stdin);
			TraceWho = CReadInt(stdin);
			// propDelay
			NNodes = CReadInt(stdin);
			// cout << "NNodes " << NNodes NL;

		  // Create the ethernet cable
		  Cable = create Link (NNodes, (PSpace + Slot) * TTime);
		  for (i = 0; i < NNodes; i++) {
		    // Create the stations and connect them to the bus
		    stationType[i] = CReadInt(stdin);
			switch(stationType[i])
			{
			case 0:	// regular node
				create Ether_Node;
				((Ether_Node*)TheStation)->Bus->connect (Cable);
				create Ether_Xmitter;
				create Ether_Receiver;
				break;
			case 1:	// "Smart" gateway node for the two-node protocol.
				create Gateway;
				((Gateway*)TheStation)->Bus->connect (Cable);
				create GateXmitter;
				create GateReceiver;
				break;
			case 2: // Magic node.
				create Magic_Node;
				((Magic_Node*)TheStation)->Bus->connect (Cable);
				create Magic_Xmitter;
				create Magic_Receiver;
				break;
			case 3: // BLAM node.
				create BLAM_Node;
				((BLAM_Node*)TheStation)->Bus->connect (Cable);
				create BLAM_Xmitter;
				create BLAM_Receiver;
				break;
			default: Assert(0, "unknown node type");
			}
#if MaxMessageQ
			// To avoid letting Smurph eat up too much memory at high offered
			// loads, don't let the station queue more messages than this:
			if(NNodes == 4 || (NNodes == 33 && (i==0 || i==8 || i==10 || i==18)))
				TheStation->setQSLimit(10 * MaxMessageQ);
			else
				TheStation->setQSLimit(MaxMessageQ);
#endif
		  }

#if GlobalMaxMessageQ
#if !ZZ_QSL
#BONEHEAD // Compile with -q if you want a GlobalMaxMessageQ.
#else
		  setQSLimit(GlobalMaxMessageQ);
#endif
#endif
		  // Define distances
		  for (i = 0; i < NNodes-1; i++)
		  {
		    for (j = i+1; j < NNodes; j++)
			{
				d = CReadInt(stdin);
				Port *Bus1, *Bus2;
				Bus1 = stationType[i] == 0 ? ((Ether_Node*)idToStation(i))->Bus:
					   stationType[i] == 1 ? ((Gateway*)idToStation(i))->Bus :
					   stationType[i] == 2 ? ((Magic_Node*)idToStation(i))->Bus:
					   ((BLAM_Node*)idToStation(i))->Bus;
				Bus2 = stationType[j] == 0 ? ((Ether_Node*)idToStation(j))->Bus:
					   stationType[j] == 1 ? ((Gateway*)idToStation(j))->Bus :
					   stationType[j] == 2 ? ((Magic_Node*)idToStation(j))->Bus:
					   ((BLAM_Node*)idToStation(j))->Bus;
				setD (Bus1, Bus2, d * TTime);
				// cout << d << ' ';
			}
			// cout << '\n';
		  }

		  // Traffic
#if BOGGS
			cout << "BOGGS\n";
#else
		  if(ITRAFFIC) {
			  ITP = create ITraffic ("stdin");
			  create (System) ITGen (ITP);
			  mit = ((double)mle) / offered_load;
			  cout << "Input driven traffic.  Claimed offered load:\n";
			  cout << " Offered load: " << offered_load NL;
			  ITP->addSender ();
			  ITP->addReceiver ();
		  } else if(BIMODAL) {
			  const double bimodalSmallFrac = 0.85;
			  const int smallSize = 128, largeSize = 1024;
			  mle = (int)(smallSize*bimodalSmallFrac + largeSize*(1-bimodalSmallFrac));
			  mit = ((double)mle) / offered_load;
			  BTP = create BTraffic(NNodes, bimodalSmallFrac, mit, smallSize, largeSize);
			  create (System) BTGen (BTP);
			  cout << " Offered load: " << mle / mit NL;
			  BTP->addSender ();
			  BTP->addReceiver ();
		  } else {
			  TP = create Traffic (MIT_exp+MLE_unf, mit, (double)mle, (double)mle);
			  cout << " Offered load: " << mle / mit NL;
			  TP->addSender ();
			  TP->addReceiver ();
		  }
#endif
		  Kernel->wait (DEATH, Stop);

          bitCountStat = create RVariable;
          packetCountStat = create RVariable;

          for(i=0; i<NNodes+2; i++)
		  {
			servTimeStat[i] = create RVariable;
			qDelayStat[i] = create RVariable;
          	runLenStat[i] = create RVariable;
          	collisionStat[i] = create RVariable;
          	realCollisionStat[i] = create RVariable;
			idleTime[i] = TIME_0;
		  }
		  globalServTimeStat = create RVariable;
		  globalQDelayStat = create RVariable;
		  globalRunLenStat = create RVariable;
		  globalCollisionStat = create RVariable;
		  globalRealCollisionStat = create RVariable;
		  queueStat = create RVariable;

			perHostServStat = create RVariable;
			perHostQDelayStat = create RVariable;
			perHostCollisionStat = create RVariable;
			perHostRealCollisionStat = create RVariable;

		/*
		** initialization code for LRU version of run-length.
		** runLenCount = 0 for all nodes, plus two "dummy" nodes at NN, NN+1.
		** referenceDepth = 0 for all nodes, plus dummies.
		**      - done in declaration, "static"
		** The dummies are at NNodes, NNodes+1, "real" nodes are 0,..NNodes-1
		**
		** The dummy end-of-real-stack marker should be position NNodes+1,
		** since we need to reserve position NNodes for the child of the
		** last node to avoid a special case with the last node with the
		** LRU stack.
		*/
		for(i=0; i < NNodes+1; i++)
		{
			parent[i] = i-1;
			child [i] = i+1;
		}
		parent[0] = NNodes+1;
		parent[NNodes+1] = -1;
		child[NNodes+1] = 0;
		/* Also, the final cleanup code needs to output the stack
		** reference depth stuff in addition to the run length stuff.
		*/

		TrialInit();

	state Stop:
			cout << "CPU execution time: " << cpuTime() NL;
            Cable->printPfm ();
			Client->printPfm();

            cout << '\n' << NNodes << " Boggs Utilization: " << TTime * (Cable->NRBits+
                Cable->NRPackets * (BIG)(PSpace + PreambleCRC)) / (double) Time NL NL;

#if !BOGGS
			TIME measureTime = Time;
#endif
			double seconds = measureTime / (double) TTime / bitsPerSecond,
				x, mom[2];

#if BOGGS
			cout << "\nBit Counts\n";
#else
			cout << "Utilizations as a fraction\n";
#endif
            for(i = 0; i < NNodes; i++)
            {
#if BOGGS
				cout << i << ' ' << bitCount[i] NL;
#else
                cout << "Util " << i << ' ' << (double)TTime * bitCount[i] / Time NL;
#endif
                bitCountStat->update((double)bitCount[i], 1);
            }
            bitCountStat->calculate(x, x, mom, TYPE_long);
            cout << NNodes << " Util: " <<
				// deduce the total by multiplying average times NNodes.
				NNodes * mom[0] * TTime / measureTime <<
#if BOGGS
				" stdDev: " << sqrt(fabs(mom[1])) / 10000 NL;
				// dividing stdDev by 10000 to turn bits/10sec to Kbits/sec.
#else
                " per-host-stdDev: " << sqrt(fabs(mom[1])) * TTime / measureTime NL NL;
#endif

			cout << "\nPacket Counts\n";
            long totalPackets = 0;
            for(i = 0; i < NNodes; i++)
            {
                totalPackets += packetCount[i];
#if BOGGS
				cout << i << ' ' << packetCount[i] NL;
#else
                cout << "Packets/s " << i << ' ' << packetCount[i] / seconds NL;
#endif
                packetCountStat->update((double)packetCount[i]);
            }
            packetCountStat->calculate(x, x, mom, TYPE_long);

#define T2ms(x) ((x)/TTime/10000)	// divide by 10000 because 10,000 bits/millisecond.
            cout << NNodes << " Packets/s: " << totalPackets / seconds <<
                " per-host-stdDev: " << sqrt(fabs(mom[1])) / seconds NL NL;

			cout << "service times, queueing delay (in ms), real Q size (and via Little)\n";
            for(i=0; i<NNodes; i++)
			{
				servTimeStat[i]->calculate(x, x, mom, TYPE_long);
				cout << "servTime " << i << " " << T2ms(mom[0]) << " " << T2ms(sqrt(fabs(mom[1])));
				combineRV(servTimeStat[i], globalServTimeStat, globalServTimeStat);
				perHostServStat->update(mom[0]);

				qDelayStat[i]->calculate(x, x, mom, TYPE_long);
				cout << " qDelay " << T2ms(mom[0]) << " " << T2ms(sqrt(fabs(mom[1])));
				combineRV(qDelayStat[i], globalQDelayStat, globalQDelayStat);
				perHostQDelayStat->update(mom[0]);

				double lambda = packetCount[i] / (double)measureTime;
				double Nbar = lambda * mom[0];
				cout << " Qsize " << queueSizeCum[i]/(double)Time*TTime << " (" << Nbar << ")" NL;
				queueStat->update(Nbar);
			}
			double min, max, avServTime, avQDelay;
			globalServTimeStat->calculate(min, max, mom, TYPE_long);
			avServTime = mom[0];
            cout << NNodes << " ServTimeInMs: " << T2ms(mom[0]) <<
                " per-packet-stdDev: " << T2ms(sqrt(fabs(mom[1])));
				perHostServStat->calculate(x,x,mom, TYPE_long);
				cout << " per-host-stdDev: " << T2ms(sqrt(fabs(mom[1]))) NL;
			cout << NNodes << " MinServiceTime: " << T2ms(min) <<
				" max : " << T2ms(max) NL;

			globalQDelayStat->calculate(min, max, mom, TYPE_long);
			avQDelay = mom[0];
			cout << NNodes << " QDelayInMs: " << T2ms(mom[0]) <<
				" per-packet-stdDev: " << T2ms(sqrt(fabs(mom[1])));
				perHostQDelayStat->calculate(x,x,mom, TYPE_long);
				cout << " per-host-stdDev: " << T2ms(sqrt(fabs(mom[1]))) NL;
			cout << NNodes << " MinQDelay: " << T2ms(min) <<
							" max : " << T2ms(max) NL;

			queueStat->calculate(x,x,mom, TYPE_long);
			cout << NNodes << " AvGlobalQueueSize " << mom[0]*NNodes << " per-host-stdDev: "
				<< sqrt(fabs(mom[1])) NL NL;

			cout << "Excess Delays" NL;
			cout << NNodes << " AverageInus: " << ( avQDelay + avServTime -
				(double)(NNodes*TTime*(PSpace+PreambleCRC+mle)) )/TTime/10 NL;
			// divide by 10 translates bit-times into microseconds.

			double totIdleFrac = 0.0;
			for(i=0; i<NNodes; i++)
			{
				double idleFrac = idleTime[i] / (double) Time;
				cout << "Idle fraction " << i << " " << idleFrac NL;
				totIdleFrac += idleFrac;
			}
			cout << "av Idle fraction " << totIdleFrac / NNodes NL NL;

            for(i=0; i<NNodes; i++)
				cout << "StackReferenceDepth " << i << " " << (double)referenceDepth[i]/(totalPackets - NNodes) NL;
            for(i=0; i<NNodes; i++)
			{
				runLenStat[i]->calculate(x, x, mom, TYPE_long);
				cout << "Run length " << i << " " << mom[0] << " " << sqrt(fabs(mom[1])) NL;
				combineRV(runLenStat[i], globalRunLenStat, globalRunLenStat);
			}
			globalRunLenStat->calculate(min, max, mom, TYPE_long);
            cout << NNodes << " AverageRunLength: " << mom[0] << " per-run-stdDev: " <<
				sqrt(fabs(mom[1])) NL;
			cout << NNodes << " Min RunLength:" << min << " max: " << max NL;

			cout << "CCounter statistics" NL;
            for(i=0; i<NNodes; i++)
			{
				collisionStat[i]->calculate(x, x, mom, TYPE_long);
				cout << "CCounter " << i << " " << mom[0] << " " << sqrt(fabs(mom[1])) NL;
				combineRV(collisionStat[i], globalCollisionStat, globalCollisionStat);
				perHostCollisionStat->update(mom[0]);

			}
			globalCollisionStat->calculate(min, max, mom, TYPE_long);
            cout << NNodes << " AverageNumCollisions: " << mom[0]
				<< " per-packet-stdDev: " << sqrt(fabs(mom[1]));
				perHostCollisionStat->calculate(x,x, mom, TYPE_long);
				cout << " per-host-stdDev: " << sqrt(fabs(mom[1])) NL;
			cout << NNodes << " Min NumCollisions: " << min << " max: " << max NL;

			cout << "RealCollision statistics" NL;
            for(i=0; i<NNodes; i++)
			{
				realCollisionStat[i]->calculate(x, x, mom, TYPE_long);
				cout << "RealCollisions " << i << " " << mom[0] << " " << sqrt(fabs(mom[1])) NL;
				combineRV(realCollisionStat[i], globalRealCollisionStat, globalRealCollisionStat);
				perHostRealCollisionStat->update(mom[0]);
			}
			globalRealCollisionStat->calculate(min, max, mom, TYPE_long);
            cout << NNodes << " AverageNumRealCollisions: " << mom[0]
				<< " per-packet-stdDev: " << sqrt(fabs(mom[1]));
				perHostRealCollisionStat->calculate(x,x, mom, TYPE_long);
				cout << " per-host-stdDev: " << sqrt(fabs(mom[1])) NL;
			cout << NNodes << " Min NumRealCollisions: " << min << " max: " << max NL;

			if(DELAY_HISTOGRAM) {
				int maxHist;
				for(maxHist = MAX_DELAY_HIST - 1; maxHist >= 0; --maxHist)
					if(delayHist[maxHist])
						break;
				cout << "Delay Histogram, " << maxHist << " entries" NL;
				for(i=0; i<=maxHist; i++)
					cout << i << ' ' << delayHist[i] NL;
			}
#if PROFILE
		for(i=0; i < sizeof(stateName)/sizeof(stateName[0]); i++)
			cout << stateName[i] << "	" << profState[i] NL;
#endif
		TrialAllDone();
	};
};

