We wrote the simplest possible TCP server (with minor logging) to examine the memory footprint (see tcp-server.go below)
The server simply accepts connections and does nothing. It is being run on an Ubuntu 12.04.4 LTS server (kernel 3.2.0-61-generic) with Go version go1.3 linux/amd64.
The attached benchmarking program (pulse.go) creates, in this example, 10k connections, disconnects them after 30 seconds, repeats this cycle three times, and then continuously repeats small pulses of 1k connections/disconnections. The command used to test was ./pulse -big=10000 -bs=30.
The first attached graph is obtained by recording runtime.ReadMemStats when the number of clients has changed by a multiple of 500, and the second graph is the RES memory size seen by “top” for the server process.
The server starts with a negligible 1.6KB of memory. Then the memory is set by the “big” pulses of 10k connections at ~60MB (as seen by top), or at about 16MB “SystemMemory” as seen by ReadMemStats. As expected, when the 10K pulses end, the in-use memory drops, and eventually the program starts releasing memory back to OS as evidenced by the grey “Released Memory” line.
The problem is that the System Memory (and correspondingly, the RES memory seen by “top”) never drops significantly (although it drops a little as seen in the second graph).
We would expect that after the 10K pulses end, memory would continue to be released until the RES size is the minimum needed for handling each 1k pulse (which is 8m RES as seen by “top” and 2MB in-use reported by runtime.ReadMemStats). Instead, the RES stays at about 56MB and in-use never drops from its highest value of 60MB at all.
We want to ensure scalability for irregular traffic with occasional spikes as well as be able to run multiple servers on the same box that have spikes at different times. Is there a way to effectively ensure that as much memory is released back to the system as possible in a reasonable time frame?
Code https://gist.github.com/eugene-bulkin/e8d690b4db144f468bc5 :
server.go:
package main
import (
"net"
"log"
"runtime"
"sync"
)
var m sync.Mutex
var num_clients = 0
var cycle = 0
func printMem() {
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
log.Printf("Cycle #%3d: %5d clients | System: %8d Inuse: %8d Released: %8d Objects: %6d\n", cycle, num_clients, ms.HeapSys, ms.HeapInuse, ms.HeapReleased, ms.HeapObjects)
}
func handleConnection(conn net.Conn) {
//log.Println("Accepted connection:", conn.RemoteAddr())
m.Lock()
num_clients++
if num_clients % 500 == 0 {
printMem()
}
m.Unlock()
buffer := make([]byte, 256)
for {
_, err := conn.Read(buffer)
if err != nil {
//log.Println("Lost connection:", conn.RemoteAddr())
err := conn.Close()
if err != nil {
log.Println("Connection close error:", err)
}
m.Lock()
num_clients--
if num_clients % 500 == 0 {
printMem()
}
if num_clients == 0 {
cycle++
}
m.Unlock()
break
}
}
}
func main() {
printMem()
cycle++
listener, err := net.Listen("tcp", ":3033")
if err != nil {
log.Fatal("Could not listen.")
}
for {
conn, err := listener.Accept()
if err != nil {
log.Println("Could not listen to client:", err)
continue
}
go handleConnection(conn)
}
}
pulse.go:
package main
import (
"flag"
"net"
"sync"
"log"
"time"
)
var (
numBig = flag.Int("big", 4000, "Number of connections in big pulse")
bigIters = flag.Int("i", 3, "Number of iterations of big pulse")
bigSep = flag.Int("bs", 5, "Number of seconds between big pulses")
numSmall = flag.Int("small", 1000, "Number of connections in small pulse")
smallSep = flag.Int("ss", 20, "Number of seconds between small pulses")
linger = flag.Int("l", 4, "How long connections should linger before being disconnected")
)
var m sync.Mutex
var active_conns = 0
var connections = make(map[net.Conn] bool)
func pulse(n int, linger int) {
var wg sync.WaitGroup
log.Printf("Connecting %d client(s)...\n", n)
for i := 0; i < n; i++ {
wg.Add(1)
go func() {
m.Lock()
defer m.Unlock()
defer wg.Done()
active_conns++
conn, err := net.Dial("tcp", ":3033")
if err != nil {
log.Panicln("Unable to connect: ", err)
return
}
connections[conn] = true
}()
}
wg.Wait()
if len(connections) != n {
log.Fatalf("Unable to connect all %d client(s).\n", n)
}
log.Printf("Connected %d client(s).\n", n)
time.Sleep(time.Duration(linger) * time.Second)
for conn := range connections {
active_conns--
err := conn.Close()
if err != nil {
log.Panicln("Unable to close connection:", err)
conn = nil
continue
}
delete(connections, conn)
conn = nil
}
if len(connections) > 0 {
log.Fatalf("Unable to disconnect all %d client(s) [%d remain].\n", n, len(connections))
}
log.Printf("Disconnected %d client(s).\n", n)
}
func main() {
flag.Parse()
for i := 0; i < *bigIters; i++ {
pulse(*numBig, *linger)
time.Sleep(time.Duration(*bigSep) * time.Second)
}
for {
pulse(*numSmall, *linger)
time.Sleep(time.Duration(*smallSep) * time.Second)
}
}