Skip to content

Commit 74fde91

Browse files
author
Maxime Peim
committed
fix: fsm connection write deadline
A deadline of 1 second is set in the connection initialisation. This deadline is never changed until the connection gets in the established state. So, if the connection doesn't send its open message and its first keepalive under 1 second we never get a peering. Under heavy load this is an issue, while testing with 10k peers without routes, we never connect most of the peers. Instead, it is preferrable to set a deadline of 1s for each write.
1 parent c9c1c12 commit 74fde91

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

pkg/server/fsm.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,6 @@ func (s *adminStateRaw) Store(state adminState) {
208208
}
209209

210210
func initializeConn(fsm *fsm, conn net.Conn) {
211-
conn.SetWriteDeadline(time.Now().Add(time.Second))
212-
213211
fsm.lock.Lock()
214212
if err := setPeerConnTTL(fsm, conn); err != nil {
215213
fsm.logger.Warn("cannot set TTL",
@@ -285,6 +283,7 @@ func (ocm *outgoingConnManager) run(ch chan<- outgoingConn) {
285283
fsm.lock.Unlock()
286284
b, _ := open.Serialize()
287285

286+
conn.SetWriteDeadline(time.Now().Add(time.Second))
288287
if _, err := conn.Write(b); err != nil {
289288
conn.Close()
290289
continue
@@ -906,6 +905,7 @@ func (h *fsmHandler) active(ctx context.Context) (bgp.FSMState, *fsmStateReason)
906905
fsm.lock.Unlock()
907906

908907
b, _ := m.Serialize()
908+
conn.SetWriteDeadline(time.Now().Add(time.Second))
909909
_, err := conn.Write(b)
910910
if err == nil {
911911
fsm.bgpMessageStateUpdate(m.Header.Type, false)
@@ -914,6 +914,7 @@ func (h *fsmHandler) active(ctx context.Context) (bgp.FSMState, *fsmStateReason)
914914
return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmWriteFailed, nil, []byte(err.Error()))
915915
case result := <-fsm.outgoingConnCh:
916916
b, _ := bgp.NewBGPKeepAliveMessage().Serialize()
917+
result.conn.SetWriteDeadline(time.Now().Add(time.Second))
917918
if _, err := result.conn.Write(b); err != nil {
918919
result.conn.Close()
919920
fsm.logger.Warn("failed to send keepalive on outgoing connection", slog.String("Error", err.Error()))
@@ -1408,6 +1409,7 @@ func (h *fsmHandler) opensent(ctx context.Context) (bgp.FSMState, *fsmStateReaso
14081409
}
14091410

14101411
b, _ := bgp.NewBGPKeepAliveMessage().Serialize()
1412+
fsm.conn.SetWriteDeadline(time.Now().Add(time.Second))
14111413
if _, err := fsm.conn.Write(b); err != nil {
14121414
fsm.conn.Close()
14131415
return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmWriteFailed, nil, nil)
@@ -1452,6 +1454,7 @@ func (h *fsmHandler) opensent(ctx context.Context) (bgp.FSMState, *fsmStateReaso
14521454
}
14531455
}
14541456
b, _ := bgp.NewBGPKeepAliveMessage().Serialize()
1457+
fsm.conn.SetWriteDeadline(time.Now().Add(time.Second))
14551458
if _, err := fsm.conn.Write(b); err != nil {
14561459
fsm.conn.Close()
14571460
fsm.logger.Warn("failed to send keepalive on outgoing connection", slog.String("Error", err.Error()))
@@ -1552,6 +1555,7 @@ func (h *fsmHandler) openconfirm(ctx context.Context) (bgp.FSMState, *fsmStateRe
15521555
m := bgp.NewBGPKeepAliveMessage()
15531556
b, _ := m.Serialize()
15541557
// TODO: check error
1558+
fsm.conn.SetWriteDeadline(time.Now().Add(time.Second))
15551559
fsm.conn.Write(b)
15561560
fsm.bgpMessageStateUpdate(m.Header.Type, false)
15571561
case e := <-recvChan:

0 commit comments

Comments
 (0)