Archive

Archive for July, 2010

HPET / RDTSC based userland gettimeofday

July 26th, 2010
 
/*-
 * Copyright (c) 2007, 2008, 2009, 2010 Gary "Monk" Stanley
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. This software may not be used by individuals associated with 
 *    fpsmeter.com / fragaholics.de
 * 4. Neither the name of the Author may be used to endorse or promote products 
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *      
 */
 
 
/* Proof of concept code for Linux. MAY work on other distros, but it will only use rdtsc */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <time.h>
#include <signal.h>
#include <fcntl.h>
#include <dlfcn.h>
#include <stdint.h>
#include <sched.h>
#include <unistd.h>
 
 
#include <assert.h>
#include <errno.h>
 
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
 
#include <linux/hpet.h>
#include <linux/rtc.h>
 
#define STACK_SIZE (1 * 1024 * 1024)
 
static struct
{
	int initialised; // 1 = use HPET, 2 = use CPU counter.
	int (*system_gettimeofday)(struct timeval *tp, void *tzp);
	struct timeval cachedTime;
	int frequency;
	int ticksPerUsec;
 
	int childPid;
	int parentPid;
 
	int fd;
	char* childStack;
 
} details;
static uint64_t lastTSCCount = -1;
 
static inline uint64_t RDTSC()
{
	unsigned long hi, lo;
	uint64_t result;
	__asm__ ("rdtsc" : "=a"(lo), "=d"(hi));
	result = ((uint64_t) hi << 32) | lo;
	return result;	
}
 
// Return ticks per usec.
static int calibrate()
{
	int i;
	unsigned long long startClock, endClock;
	struct timeval startTime, endTime;
	double t1, t2, diff;
 
	details.system_gettimeofday(&startTime, NULL);
	startClock = RDTSC();
 
	for (i = 0; i < 100000; ++i)
	{
		__asm__ __volatile__ ("nop");
	}
	details.system_gettimeofday(&endTime, NULL);
	endClock = RDTSC();
 
	t1 = startTime.tv_sec * 1000000.0 + startTime.tv_usec;
	t2 = endTime.tv_sec * 1000000.0 + endTime.tv_usec;
 
	diff = (endClock - startClock) / (t2 - t1);
 
	return diff + 0.5; // Round up to avoid time going backwards
}
 
 
static void
hpet_trigger(int dummy)
{
	details.system_gettimeofday(&details.cachedTime, NULL);
}
 
static int
hpet_thread(void* dummy)
{
	struct sigaction foo;
 
	// Dummy thread to accept HPET async signals
	// this should help to avoid interrupting sleep() calls and
	// friends in other threads.
 
	int result = -1;
	foo.sa_flags = SA_RESTART;
	sigemptyset(&foo.sa_mask);
	foo.sa_handler = hpet_trigger;
	result = sigaction(SIGIO, &foo, NULL);
	assert(result != -1);
	while(1)
	{
		pause();
	}
	return 0;
}
 
static int
init_hpet(int freq)
{
	int fdFlags;
	struct hpet_info info;
 
	// Child will have a new signal handler table, but share the same memory space
	// (new thread is created to avoid signal handler conflicts,
	// and to avoid waking the main thread up due to the hpet signals)
	details.childStack = malloc(STACK_SIZE);
	assert(details.childStack);
	details.childPid = clone(hpet_thread, details.childStack + STACK_SIZE - 64, CLONE_VM, NULL);
	if (details.childPid == -1)
	{
		goto out;
	}
 
	details.fd = open("/dev/hpet", O_RDONLY);
	if (details.fd < 0)
	{
		goto out;
	}
 
	if (ioctl(details.fd, HPET_IRQFREQ, freq) < 0) 
	{
		goto out;
	}
	if (ioctl(details.fd, HPET_INFO, &info) < 0)
	{
		goto out;
 
	}
	if (info.hi_flags && (ioctl(details.fd, HPET_EPI, 0) < 0)
			|| ioctl(details.fd, HPET_IE_ON, 0) < 0
		)
	{
		goto out;
	}
 
	if ((fcntl(details.fd, F_SETOWN, details.childPid) == -1) ||
		((fdFlags = fcntl(details.fd, F_GETFL)) == -1) ||
		(fcntl(details.fd, F_SETFL, fdFlags | O_ASYNC) == -1)) 
	{
		goto out;
	}
	return 1;
 
	out:
	if (details.fd >= 0) 
	{
		close(details.fd);
		details.fd = -1;
	}
	if (details.childPid != -1) 
	{
		kill(details.childPid, 15);
		waitpid(details.childPid, NULL, 0);
		details.childPid = -1;
	}
	if (details.childStack) 
	{
		free(details.childStack);
		details.childStack = NULL;
	}
	return 0;
}
 
 
static int
init_rtc(int freq)
{
	int fdFlags;
 
	// Child will have a new signal handler table, but share the same memory space
	// (new thread is created to avoid signal handler conflicts,
	// and to avoid waking the main thread up due to the hpet signals)
	details.childStack = malloc(STACK_SIZE);
	assert(details.childStack);
	details.childPid = clone(hpet_thread, details.childStack + STACK_SIZE - 64, CLONE_VM, NULL);
	if (details.childPid == -1)
	{
		goto out;
	}
 
	details.fd = open("/dev/rtc", O_RDONLY);
	if (details.fd < 0)
	{
		goto out;
	}
 
	if (ioctl(details.fd, RTC_IRQP_SET, freq) < 0)
	{
		goto out;
	}
	if (ioctl(details.fd, RTC_PIE_ON, 0) < 0)
	{
		goto out;
 
	}
 
	if ((fcntl(details.fd, F_SETOWN, details.childPid) == -1) ||
		((fdFlags = fcntl(details.fd, F_GETFL)) == -1) ||
		(fcntl(details.fd, F_SETFL, fdFlags | O_ASYNC) == -1)) 
	{
		goto out;
	}
	return 1;
 
	out:
	if (details.fd >= 0) 
	{
		close(details.fd);
		details.fd = -1;
	}
	if (details.childPid != -1) 
	{
		kill(details.childPid, 15);
		waitpid(details.childPid, NULL, 0);
		details.childPid = -1;
	}
	if (details.childStack) 
	{
		free(details.childStack);
		details.childStack = NULL;
	}
	return 0;
}
 
static void __attribute__((constructor))
init()
{
	char* msg;
	char* freqEnv;
	char* altEnv;
	int rawFreq;
 
	if (details.initialised)
	{
		fprintf(stderr, "libfasttime.so: Already initialised!\n");
	}
 
	details.initialised = 0;
	details.system_gettimeofday = NULL;
	details.cachedTime.tv_sec = 0;
	details.cachedTime.tv_usec = 0;
	details.frequency = 0;
	details.childPid = -1;
	details.parentPid = getpid();
	details.fd = -1;
	details.childStack = NULL;
	details.ticksPerUsec = 0;
 
	details.system_gettimeofday = dlsym(RTLD_NEXT, "gettimeofday");
	if ((msg = dlerror()) != NULL)
	{
		fprintf(stderr, "libfasttime.so: gettimeofday: dlopen failed: %s", msg);
		exit(1);
	}
 
	details.ticksPerUsec = calibrate();
	//fprintf(stderr, "Ticks per usec = %d\n", details.ticksPerUsec);
 
	freqEnv = getenv("FASTTIME_FREQ");
	if (freqEnv)
	{
		rawFreq = atoi(freqEnv);
	}
	else
	{
		rawFreq = 1024; // Hz
	}
	details.frequency = (rawFreq * 1e6) / details.ticksPerUsec;
 
	altEnv = getenv("FASTTIME_ALT");
	if (altEnv && *altEnv)
	{
		goto fallback;
	}
 
	if (init_hpet(rawFreq))
	{
		details.initialised = 1;
		fprintf(stderr, "libfasttime.so: initialized using hpet\n");
		goto out;
	}
	else if (init_rtc(rawFreq))
	{
		details.initialised = 1;
		fprintf(stderr, "libfasttime.so: initialized using rtc\n");
		goto out;
	}
 
	fallback:
	lastTSCCount = RDTSC();
	details.initialised = 2;
	fprintf(stderr, "libfasttime.so: using alternative time method\n");
 
	out:
	details.system_gettimeofday(&details.cachedTime, NULL);
 
}
 
static void __attribute__((destructor))
cleanup()
{
	if (details.fd >= 0) 
	{
		close(details.fd);
		details.fd = -1;
	}
	if (details.childStack) 
	{
		free(details.childStack);
		details.childStack = NULL;
	}
	if (details.childPid != -1) 
	{
		kill(details.childPid, 15);
		details.childPid = -1;
	}
}
 
 
int
gettimeofday(struct timeval *__restrict tp, __timezone_ptr_t tzp) // Copy declaration from sys/time.h
{
	if (details.initialised == 2)
	{
		uint64_t currentCount = RDTSC();
		int64_t diff = currentCount - lastTSCCount;
		if (diff > details.frequency || diff < 0)
		{
			details.system_gettimeofday(&details.cachedTime, tzp);
			lastTSCCount = currentCount;
		}
	}
	tp->tv_sec = details.cachedTime.tv_sec;
	tp->tv_usec = details.cachedTime.tv_usec;
 
	return 0;
}

Main

Wtf..

July 22nd, 2010

Does anyone think running realtime kernels for gameservers is a complete waste of time?

Realtime kernels on x86 is slow, because of interrupt latency :(

Main

Latest project

July 20th, 2010

http://people.summit-servers.com/monk.pdf

Main