共计 6647 个字符,预计需要花费 17 分钟才能阅读完成。
缘起
最近浏览 <<Go 微服务实战 >> (刘金亮, 2021.1)
本系列笔记拟采纳 golang 练习之
案例需要 (聊天服务器)
- 用户能够连贯到服务器。
- 用户能够设定本人的用户名。
- 用户能够向服务器发送音讯,同时服务器也会向其余用户播送该音讯。
指标 (Day 3)
- 在 Day 2 中, 已根本实现功能性编码, 测试也通过了, 明天尝试压测, 并诊断是否存在内存和 routine 透露
过程
- 批改单元测试代码, 并发客户端减少到 50 个, 每个客户端随机 1 - 3 秒发送一条音讯
- 增加 ILoggingService, 以更不便的采集全局日志
- 应用 pprof 包的 api, 采集 heap 和 goroutine 的诊断日志
- 应用 go tool pprof < 诊断日志 >, 察看是否存在内存和 routine 透露
- 发现单元测试总是 failed, 总有若干 tChatClient, 服务端未打印敞开日志
- 查看 routine 诊断日志, 发现存在 routine 透露
- 在 tChatClient 中增加更具体的函数调用日志, 发现是 closeChan 通道写入阻塞导致
- 修复 routine 透露的 bug, 加大并发客户端到 500 个, 并进行多轮测试, 察看是否存在内存和 routine 透露
单元测试
- ChatServer_test.go
- 增大客户端并发数, 并增加 pprof 诊断
- 诊断 ILoggingService.AllLogs(), 判断所有客户端是否实现连贯 / 断开的闭环
package chat_server | |
import ( | |
"fmt" | |
cs "learning/gooop/chat_server" | |
"math/rand" | |
"os" | |
"runtime" | |
"runtime/pprof" | |
"strings" | |
"sync" | |
"testing" | |
"time" | |
) | |
func Test_ChatServer(t *testing.T) {fnAssertTrue := func(b bool, msg string) { | |
if !b {t.Fatal(msg) | |
} | |
} | |
// create dump file | |
memProfile, err := os.Create("/home/ioly/chat_server_mem.profile") | |
if err != nil {t.Fatal(err) | |
} | |
cpuProfile, err := os.Create("/home/ioly/chat_server_cpu.profile") | |
if err != nil {t.Fatal(err) | |
} | |
rtnProfile, err := os.Create("/home/ioly/chat_server_routine.profile") | |
if err != nil {t.Fatal(err) | |
} | |
err = pprof.StartCPUProfile(cpuProfile) | |
if err != nil {t.Fatal(err) | |
} | |
defer pprof.StopCPUProfile() | |
port := 3333 | |
server := cs.NewChatServer() | |
err = server.Open(port) | |
if err != nil {t.Fatal(err) | |
} | |
clientCount := 500 | |
wg := &sync.WaitGroup{} | |
rnd := rand.New(rand.NewSource(time.Now().UnixNano())) | |
address := fmt.Sprintf("localhost:%v", port) | |
for i := 0;i < clientCount;i++ {err, client := cs.DialChatClient(address) | |
if err != nil {t.Fatal(err) | |
} | |
id := fmt.Sprintf("c%02d", i) | |
//client.RecvHandler(func(client cs.IChatClient, msg cs.IMsg) {// t.Logf("%v recv: %v\n", id, msg) | |
//}) | |
wg.Add(1) | |
go func() {client.SetName(id) | |
client.Send(&cs.NameMsg{id}) | |
n := 0 | |
duration := rnd.Intn(3) + 1 | |
for range time.Tick(time.Duration(duration) * time.Second) {client.Send(&cs.ChatMsg{id, fmt.Sprintf("msg %02d from %v", n, id) }) | |
n++ | |
if n > 10 {break} | |
} | |
client.Close() | |
wg.Done()}()} | |
// wait and set status | |
done := []bool{false} | |
go func() {wg.Wait() | |
done[0] = true | |
}() | |
// print passed seconds | |
passedSeconds := 0 | |
for range time.Tick(time.Second) { | |
passedSeconds++ | |
t.Logf("%v seconds passed", passedSeconds) | |
if done[0] {break} | |
} | |
time.Sleep(5*time.Second) | |
server.Close() | |
// dump heap file | |
runtime.GC() | |
err = pprof.Lookup("heap").WriteTo(memProfile, 0) | |
if err != nil {t.Fatal(err) | |
} | |
err = pprof.Lookup("goroutine").WriteTo(rtnProfile, 0) | |
if err != nil {t.Fatal(err) | |
} | |
// check server logs | |
logs := cs.Logging.AllLogs() | |
fnHasLog := func(log string) bool { | |
for _,it := range logs {if strings.Contains(it, log) {return true} | |
} | |
return false | |
} | |
for i := 0;i < clientCount;i++ {msg := fmt.Sprintf("tChatClient.postConnClosed, c%02d, serverFlag=false", i) | |
fnAssertTrue(fnHasLog(msg), "expecting log:" + msg) | |
msg = fmt.Sprintf("tChatServer.handleIncomingConn, clientCount=%v", i + 1) | |
fnAssertTrue(fnHasLog(msg), "expecting log:" + msg) | |
msg = fmt.Sprintf("tChatClient.postConnClosed, c%02d, serverFlag=true", i) | |
fnAssertTrue(fnHasLog(msg), "expecting log:" + msg) | |
msg = fmt.Sprintf("tChatServer.handleClientClosed, c%02d", i) | |
fnAssertTrue(fnHasLog(msg), "expecting log:" + msg) | |
} | |
} |
测试输入
并发 500, 仅开端局部
tChatClient.beginWrite, write error, c496, serverFlag=true | |
tChatClient.beginWrite, <- closeChan, c496, serverFlag=true | |
tChatClient.postConnClosed, c496, serverFlag=true | |
tChatServer.handleClientClosed, c496 | |
tChatServer.handleClientClosed, c496, clientCount=1 | |
tChatClient.closeConn, c498, serverFlag=false | |
tChatClient.beginWrite, <- closeChan, c498, serverFlag=false | |
tChatClient.beginRead, read error, c498, serverFlag=false | |
tChatClient.postConnClosed, c498, serverFlag=false | |
tChatClient.beginRead, read error, c498, serverFlag=true | |
tChatClient.closeConn, c498, serverFlag=true | |
tChatClient.beginWrite, <- closeChan, c498, serverFlag=true | |
tChatClient.postConnClosed, c498, serverFlag=true | |
tChatServer.handleClientClosed, c498 | |
tChatServer.handleClientClosed, c498, clientCount=0 | |
ChatServer_test.go:97: 34 seconds passed | |
--- PASS: Test_ChatServer (39.50s) | |
PASS | |
ok command-line-arguments 39.535s |
内存透露诊断
无泄漏
$ go tool pprof ~/chat_server_mem.profile | |
File: chat_server.test | |
Type: inuse_space | |
Time: Mar 9, 2021 at 8:56am (CST) | |
Entering interactive mode (type "help" for commands, "o" for options) | |
(pprof) top | |
Showing nodes accounting for 8461.35kB, 100% of 8461.35kB total | |
Showing top 10 nodes out of 15 | |
flat flat% sum% cum cum% | |
5720.44kB 67.61% 67.61% 5720.44kB 67.61% time.startTimer | |
1184.27kB 14.00% 81.60% 1184.27kB 14.00% runtime/pprof.StartCPUProfile | |
1024.38kB 12.11% 93.71% 1024.38kB 12.11% runtime.malg | |
532.26kB 6.29% 100% 532.26kB 6.29% runtime/pprof.(*profMap).lookup | |
0 0% 100% 1184.27kB 14.00% command-line-arguments.Test_ChatServer | |
0 0% 100% 5720.44kB 67.61% learning/gooop/chat_server.(*tChatClient).beginWrite | |
0 0% 100% 1024.38kB 12.11% runtime.mstart | |
0 0% 100% 1024.38kB 12.11% runtime.newproc.func1 | |
0 0% 100% 1024.38kB 12.11% runtime.newproc1 | |
0 0% 100% 1024.38kB 12.11% runtime.systemstack | |
(pprof) quit |
routine 透露诊断 (修复前)
有透露时的诊断日志 (并发 50)
$ go tool pprof ~/chat_server_routine.profile | |
File: chat_server.test | |
Type: goroutine | |
Time: Mar 9, 2021 at 9:12am (CST) | |
Entering interactive mode (type "help" for commands, "o" for options) | |
(pprof) top | |
Showing nodes accounting for 4, 100% of 4 total | |
Showing top 10 nodes out of 21 | |
flat flat% sum% cum cum% | |
3 75.00% 75.00% 3 75.00% runtime.gopark | |
1 25.00% 100% 1 25.00% runtime/pprof.runtime_goroutineProfileWithLabels | |
0 0% 100% 1 25.00% command-line-arguments.Test_ChatServer | |
0 0% 100% 1 25.00% learning/gooop/chat_server.(*tChatClient).beginWrite | |
0 0% 100% 1 25.00% learning/gooop/chat_server.(*tChatClient).closeConn | |
0 0% 100% 1 25.00% main.main | |
0 0% 100% 1 25.00% runtime.chanrecv | |
0 0% 100% 1 25.00% runtime.chanrecv1 | |
0 0% 100% 1 25.00% runtime.chansend | |
0 0% 100% 1 25.00% runtime.chansend1 | |
(pprof) |
能够看到 closeConn 函数阻塞
func (me *tChatClient) closeConn() {if !atomic.CompareAndSwapInt32(&me.closeFlag, 0, 1) {return} | |
Logging.Logf("tChatClient.closeConn, %v, serverFlag=%v", me.name, me.serverFlag) | |
me.closeChan <- true | |
} |
从代码看, closeConn 函数阻塞只有一种可能, 就是 closeChan 阻塞.
这个起因有点诡异, 因为曾经加了 atomic.CompareAndSwapInt32 爱护,
依然导致阻塞, 惟一起因是并发环境下, closeChan 尚未开始读取, 是无奈写入的.
修复 routine 透露
就是给导致写阻塞的 closeChan 加上缓冲, 大小等于 closeConn 函数的可能调用入口数量
func openChatClient(conn net.Conn, serverFlag bool) IChatClient { | |
it := &tChatClient{ | |
conn: conn, | |
openFlag: 0, | |
closeFlag: 0, | |
serverFlag: serverFlag, | |
closeChan: make(chan bool, 3), // 给 closeChan 增加缓冲区!! | |
sendChan: make(chan IMsg, gMaxPendingSend), | |
name: "anonymous", | |
sendLogs: []IMsg{}, | |
dropLogs: []IMsg{}, | |
recvLogs: []IMsg{}, | |
} | |
it.open() | |
return it | |
} |
routine 透露诊断 (修复后)
修复后, 加大并发到 500, 未发现显著 routine 透露 (不存在业务代码导致挂起的 routine)
$ go tool pprof ~/chat_server_routine.profile | |
File: chat_server.test | |
Type: goroutine | |
Time: Mar 9, 2021 at 9:28am (CST) | |
Entering interactive mode (type "help" for commands, "o" for options) | |
(pprof) top | |
Showing nodes accounting for 3, 100% of 3 total | |
Showing top 10 nodes out of 17 | |
flat flat% sum% cum cum% | |
2 66.67% 66.67% 2 66.67% runtime.gopark | |
1 33.33% 100% 1 33.33% runtime/pprof.runtime_goroutineProfileWithLabels | |
0 0% 100% 1 33.33% command-line-arguments.Test_ChatServer | |
0 0% 100% 1 33.33% main.main | |
0 0% 100% 1 33.33% runtime.chanrecv | |
0 0% 100% 1 33.33% runtime.chanrecv1 | |
0 0% 100% 1 33.33% runtime.main | |
0 0% 100% 1 33.33% runtime/pprof.(*Profile).WriteTo | |
0 0% 100% 1 33.33% runtime/pprof.profileWriter | |
0 0% 100% 1 33.33% runtime/pprof.writeGoroutine | |
(pprof) |
(end)
正文完