Is everything a scam?
Let me explain. I wrote a web server in C... let that sink in.
It's pretty simple; it uses accept
to accept connections and poll
to check for new things. It's single-threaded and does almost nothing.
int webserver_listen_and_serve(Route route) {
// Create a socket
int sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd == -1) {
perror("webserver (socket)");
return 1;
}
printf("socket created successfully\n");
// Create the address to bind the socket to
struct sockaddr_in host_addr;
int host_addrlen = sizeof(host_addr);
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(PORT);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
// Bind the socket to the address
if (bind(sockfd, (struct sockaddr *)&host_addr, host_addrlen) != 0) {
perror("webserver (bind)");
return -1;
}
printf("socket successfully bound to address\n");
// Listen for incoming connections
if (listen(sockfd, SOMAXCONN) != 0) {
perror("webserver (listen)");
return -1;
}
printf("server listening for connections\n");
struct pollfd pfds[MAX_CONNS] = {0};
nfds_t nfds = 1;
pfds[0].fd = sockfd;
pfds[0].events = POLLIN;
char buffer[BUFFER_SIZE];
for (;;) {
int ready = poll(pfds, nfds, -1);
if (ready == -1) {
printf("poll\n");
exit(-1);
}
if (pfds[0].revents & POLLIN) {
int newfd = accept(sockfd, (struct sockaddr *)&host_addr,
(socklen_t *)&host_addrlen);
if (newfd != 0) {
printf("New connection of fd:%d\n", newfd);
pfds[nfds].fd = newfd;
pfds[nfds].events = POLLIN;
nfds++;
}
}
for (nfds_t i = 1; i != nfds; i++) {
if (pfds[i].revents) {
if (pfds[i].revents & POLLIN)
if (!handle_request(pfds[i].fd, route, buffer)) {
if (i != nfds - 1) {
pfds[i] = pfds[nfds - 1];
}
i--;
nfds--;
}
}
}
}
return 0;
}
Handle request looks like you would expect:
int handle_request(int sockfd, Route route, char *buffer) {
// Create client address
struct sockaddr_in client_addr;
int client_addrlen = sizeof(client_addr);
// Get client address
int sockn = getsockname(sockfd, (struct sockaddr *)&client_addr,
(socklen_t *)&client_addrlen);
if (sockn < 0) {
perror("webserver (getsockname)");
return 0;
}
// Read from the socket
int valread = read(sockfd, buffer, BUFFER_SIZE);
if (valread < 0) {
perror("webserver (read)");
close(sockfd);
return 0;
}
char *wb = buffer;
const char *line = strsep(&wb, "\n");
// Read the request
char method[BUFFER_SIZE], uri[BUFFER_SIZE], version[BUFFER_SIZE];
sscanf(line, "%s %s %s", method, uri, version);
Headers headers = {0};
line = strsep(&wb, "\n");
while (line) {
if (strcmp("\r", line) == 0)
break;
if (strncmp("Connection: Keep-Alive", line, strlen("Connection: Keep-Alive")) == 0)
headers.keep_alive = TRUE;
line = strsep(&wb, "\n");
}
if (strcmp(uri, route.path) == 0) {
route.handler(sockfd);
} else {
not_found_handler(sockfd);
}
if(!headers.keep_alive) close(sockfd);
return headers.keep_alive;
}
I did some simple benchmarking and saw it can do about 200k requests per second on my laptop. For the heck of it, I also wrote a server in GoLang, one in Javascript, and one in Rust.
For comparison:
- Javascript server: 20k requests per second, using 20MB of memory.
- Golang server: 80k requests per second using 12MB of memory
- Rust server: 200k requests per second using about 10MB of memory.
- C server: 200k requests per second using about 1MB of memory
I have linked to the code source for each of the servers above. All servers were forced to run on only one core.
Here is the big question.
Why are modern technologies so energy inefficient? Have we gotten lazy as programmers? Have we decided that using 10x the power doesn't matter? Don't get me wrong, I know that there are times when being inefficient can be the right choice to get started. But committing to fundamentally flawed technologies and signing up for a significant technical debt seems shortsighted and naive.
Is my laptop having 32GB of ram because it needs it, or is it because every chain link is getting lazy and using orders of magnitude more memory or processing power than they need?
I stripped the unused symbols out of the rust code and saw that the code links an assortment of random things that are not needed:
nm ./target/release/test-rust
U __Unwind_Backtrace
U __Unwind_DeleteException
U __Unwind_GetDataRelBase
U __Unwind_GetIP
U __Unwind_GetIPInfo
U __Unwind_GetLanguageSpecificData
U __Unwind_GetRegionStart
U __Unwind_GetTextRelBase
U __Unwind_RaiseException
U __Unwind_Resume
U __Unwind_SetGR
U __Unwind_SetIP
U ___error
U __dyld_get_image_header
U __dyld_get_image_name
U __dyld_get_image_vmaddr_slide
U __dyld_image_count
0000000100000000 T __mh_execute_header
U __tlv_atexit
U __tlv_bootstrap
U _abort
U _accept
U _bind
U _bzero
U _calloc
U _clock_gettime
U _close
U _closedir
U _dispatch_release
U _dispatch_semaphore_create
U _dispatch_semaphore_signal
U _dispatch_semaphore_wait
U _dlsym
U _fcntl
U _free
U _fstat
U _getcwd
U _getenv
U _gettimeofday
U _kevent
U _kqueue
U _listen
U _malloc
U _memcmp
U _memcpy
U _memmove
U _memset
U _mmap
U _mprotect
U _munmap
U _open
U _opendir
U _posix_memalign
U _pow
U _pthread_attr_destroy
U _pthread_attr_init
U _pthread_attr_setstacksize
U _pthread_cond_destroy
U _pthread_cond_signal
U _pthread_cond_timedwait
U _pthread_cond_wait
U _pthread_create
U _pthread_detach
U _pthread_get_stackaddr_np
U _pthread_get_stacksize_np
U _pthread_join
U _pthread_mutex_destroy
U _pthread_mutex_init
U _pthread_mutex_lock
U _pthread_mutex_trylock
U _pthread_mutex_unlock
U _pthread_mutexattr_destroy
U _pthread_mutexattr_init
U _pthread_mutexattr_settype
U _pthread_rwlock_destroy
U _pthread_rwlock_rdlock
U _pthread_rwlock_unlock
U _pthread_self
U _pthread_setname_np
U _read
U _readdir_r
U _realloc
U _recv
U _sched_yield
U _send
U _setsockopt
U _shutdown
U _sigaction
U _sigaltstack
U _signal
U _socket
U _socketpair
U _strerror_r
U _strlen
U _sysconf
U _waitpid
U _write
U _writev
This code links a lot of pthread
that is single-threaded, not to mention the dynamic symbol loading.
For comparison, the c binary's symbol list looks like this(and I really that it had inlined the (str-x) calls:
nm ../test-c/webserver
U ___chkstk_darwin
U ___stack_chk_fail
U ___stack_chk_guard
0000000100000000 T __mh_execute_header
U _accept
U _bind
U _bzero
U _close
U _exit
U _getsockname
U _listen
U _perror
U _poll
U _printf
U _puts
U _read
U _socket
U _sscanf
U _strcmp
U _strlen
U _strncmp
U _strsep
U _write
As a side note, the Rust binary is about 10 MB, while the C binary is about 34KB.
Why do I care about this?
More minor is faster in every way. If the binary is small, it will copy, load, and execute faster. Everything costs money. We run services on dedicated servers. And being able to fit 10x the servers into memory affects unit economics.
As a side note, the C program will most likely not endure bit-rot due to new fancy features being rolled out in the next version of the language. While keeping up with the moving target, Javascript, Rust, or Go. This will mean returning to the codebase every few months to ensure things are still working.
Unless there are bugs, the C code will be written once and updated when requirements change. Not when the spec changes.