Page MenuHome Accel-ppp

падение accel-pppd 1.12.0-72-ged7b287 при одновременном поднятии 2.5к ipoe пользователей
Closed, ResolvedPublicBUG

Assigned To
Authored By
triar
Sep 1 2020, 18:12
Referenced Files
F2960: auth-fail.log-5-sec-tail
Sep 2 2020, 04:47
F2959: accel-ppp.log-5-sec-tail
Sep 2 2020, 04:47
F2962: emerg.log
Sep 2 2020, 04:47
F2961: debug.log-5-sec-tail
Sep 2 2020, 04:47
F2958: accel-ppp.conf
Sep 2 2020, 04:38
F2953: debug.log-5sec-tail
Sep 1 2020, 20:41
F2952: accel-ppp.log-5sec-tail
Sep 1 2020, 20:41
F2950: accel-ppp.log-5sec-tail
Sep 1 2020, 18:31

Description

при взлете получаем сильный рост нагрузки и падение accel-pppd
сервер
E5-2650 v0 - 2 шт. (16 ядер всего, HT отключен), 32G Ram
карточка intel 82599ES

accel-ppp и shared вланы
accel-ppp version 1.12.0-72-ged7b287
bird версии 2 (fullview присутствует)
ipv6 + ipv6-pd
NAT отсутствует
conntrack в ядре отключен
каждый клиент получает реальный IP
kernel 5.4.38-gentoo
ixgbe 5.7.1

в grub параметрами ядра добавлено

Append parameters to the linux kernel command line

GRUB_CMDLINE_LINUX="mitigations=off"

ниже backtrace

Type "apropos word" to search for commands related to "word"...
Reading symbols from /usr/local/accel-ppp-09-05-2020-debug/sbin/accel-pppd...
[New LWP 4985]
[New LWP 4972]
[New LWP 4978]
[New LWP 4979]
[New LWP 4981]
[New LWP 4982]
[New LWP 4983]
[New LWP 4984]
[New LWP 4986]
[New LWP 4987]
[New LWP 4988]
[New LWP 4989]
[New LWP 4990]
[New LWP 4991]
[New LWP 4992]
[New LWP 4993]
[New LWP 4994]
[New LWP 4995]
[New LWP 4996]
[New LWP 4997]
[New LWP 4998]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Core was generated by `/usr/local/accel-ppp-09-05-2020-debug/sbin/accel-pppd -c /etc/accel-ppp.conf -p'.
Program terminated with signal SIGABRT, Aborted.
#0  0x00007ffff7a46731 in raise () from /lib64/libc.so.6
[Current thread is 1 (Thread 0x7ffff63e5700 (LWP 4985))]
(gdb) bt full
#0  0x00007ffff7a46731 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007ffff7a3055b in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x00007ffff78252c6 in find_pd (ses=0x7fffe00ec9b0) at /usr/src/accel-ppp-code-09-05-2020-DEBUG/accel-pppd/radius/radius.c:751
        pd = 0x7fffe00eca80
        rpd = 0x1
#3  0x00007ffff7824446 in get_ipv6 (ses=0x7fffe00ec9b0) at /usr/src/accel-ppp-code-09-05-2020-DEBUG/accel-pppd/radius/radius.c:482
        rpd = 0x54
#4  0x0000555555580fde in ipdb_get_ipv6 (ses=0x7fffe00ec9b0) at /usr/src/accel-ppp-code-09-05-2020-DEBUG/accel-pppd/ipdb.c:40
        ipdb = 0x7ffff782c540 <ipdb>
        it = 0x7fffb8184ff8
#5  0x00007ffff79b93c2 in __ipoe_session_activate (ses=0x7fffe00ec8c8) at /usr/src/accel-ppp-code-09-05-2020-DEBUG/accel-pppd/ctrl/ipoe/ipoe.c:993
        addr = 0
        gw = 0
        serv = 0x5555556025e8
#6  0x00007ffff7fc5c75 in ctx_thread (ctx=0x7fffe00ecc08) at /usr/src/accel-ppp-code-09-05-2020-DEBUG/accel-pppd/triton/triton.c:272
        h = 0x8de16cc641645400
        t = 0x7fffc410fc78
        call = 0x7fffd40357c8
        tt = 1
        events = 32767
#7  0x00007ffff7fc5960 in triton_thread (thread=0x555555628228) at /usr/src/accel-ppp-code-09-05-2020-DEBUG/accel-pppd/triton/triton.c:192
        set = {__val = {516, 0 <repeats 15 times>}}
        sig = 10
        need_free = 0
        stack = 0x0
#8  0x00007ffff7f93ea7 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#9  0x00007ffff7b0721f in clone () from /lib64/libc.so.6
No symbol table info available.

Details

Protocol
IPoE
Version
при одновременном поднятии 2.5к ipoe пользователей

Event Timeline

[ipoe]
ipv6=1
verbose=5
username=lua:username
lua-file=/etc/accel-ppp.lua
password=empty
lease-time=21600
renew-time=10800
max-lease-time=43200
soft-terminate=0

gw-ip-address=gateway-ip-address-spec
gw-ip-address=gateway-ip-address-spec
....
gw-ip-address=gateway-ip-address-spec

local-net=local-net-spec
local-net=local-net-spec
.....
local-net=local-net-spec

unit-cache=2000

mode=L2
ip-unnumbered=1

idle-timeout=600

vlan-timeout=300
vlan-name=%I.%N

vlan-mon=re:b0\.[0-9]+,100-4090

moving networks from SRV-OLD-2020

interface=re:b0\.1234\.517,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1268\.825,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1267\.824,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1266\.502,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1235\.611,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1237\.534,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1239\.524,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1243\.512,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1240\.826,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1241\.610,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1238\.506,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1242\.213,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1249\.533,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1248\.515,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1246\.532,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1255\.504,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1256\.531,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1257\.528,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1258\.529,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1259\.530,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1245\.821,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1
interface=re:b0\.1244\.203,start=up,start=dhcpv4,ifcfg=1,shared=1,proxy-arp=1

attr-l4-redirect=L4-Redirect
attr-l4-redirect-ipset=L4-Redirect-ipset

[radius]
dictionary=/usr/local/accel-ppp-09-05-2020/share/accel-ppp/radius/dictionary
nas-identifier=new-accel-ppp
nas-ip-address=a.a.a.a
gw-ip-address=a.a.a.a
server=b.b.b.b,asdfuohladfhjsidfuyksdhfisduf1,auth-port=1812,acct-port=1813,req-limit=150,fail-timeout=0,max-fail=50,weight=1
dae-server=a.a.a.a:3799,asdfuohladfhjsidfuyksdhfisduf1
verbose=100
timeout=1
max-try=5
acct-interim-interval=300
acct-timeout=0
acct-on=1
attr-tunnel-type=NAS-Identifier

This comment was removed by triar.

снова падение

accel-ppp version 1.12.0-92-g38b6104

(gdb) bt full
#0 0x00007ffff7a46731 in raise () from /lib64/libc.so.6
No symbol table info available.
#1 0x00007ffff7a3055b in abort () from /lib64/libc.so.6
No symbol table info available.
#2 0x00007ffff7823460 in find_pd (ses=0x7fffec1dd3e0) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/radius/radius.c:761

pd = 0x7fffec1dd4b0
rpd = 0x7ffff66e5c00

#3 0x00007ffff78225e0 in get_ipv6 (ses=0x7fffec1dd3e0) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/radius/radius.c:492

rpd = 0x1f79b5de3

#4 0x0000555555581086 in ipdb_get_ipv6 (ses=0x7fffec1dd3e0) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/ipdb.c:40

ipdb = 0x7ffff782b540 <ipdb>
it = 0x900000001

#5 0x00007ffff79b8424 in __ipoe_session_activate (ses=0x7fffec1dd2f8) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/ctrl/ipoe/ipoe.c:1004

addr = 0
gw = 0
serv = 0x555555605a68

#6 0x00007ffff7fc5c80 in ctx_thread (ctx=0x7fffe83858c8) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/triton/triton.c:273

h = 0x3d665100b3c78700
t = 0x7fffd03e7388
call = 0x7fffc841fa68
tt = 1
events = 32767

#7 0x00007ffff7fc5960 in triton_thread (thread=0x555555627b58) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/triton/triton.c:192

set = {__val = {516, 0 <repeats 15 times>}}
sig = 10
need_free = 0
stack = 0x0

#8 0x00007ffff7f93ea7 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#9 0x00007ffff7b0721f in clone () from /lib64/libc.so.6
No symbol table info available.

triar triaged this task as High priority.Sep 2 2020, 05:08

ошибка повторилась до применения патча

бектрейс

Thread 9 "accel-pppd" received signal SIGABRT, Aborted.
[Switching to Thread 0x7ffff62fa700 (LWP 5077)]
0x00007ffff7a46731 in raise () from /lib64/libc.so.6
(gdb) bt full
#0 0x00007ffff7a46731 in raise () from /lib64/libc.so.6
No symbol table info available.
#1 0x00007ffff7a3055b in abort () from /lib64/libc.so.6
No symbol table info available.
#2 0x00007ffff7823460 in find_pd (ses=0x7fffd41c4770) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/radius/radius.c:761

pd = 0x7fffd41c4840
rpd = 0x5555555d3228

#3 0x00007ffff7822a04 in ses_acct_start (ses=0x7fffd41c4770) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/radius/radius.c:572

rpd = 0x7ffff7fd0090

#4 0x00007ffff7fc9a28 in triton_event_fire (ev_id=10, arg=0x7fffd41c4770) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/triton/event.c:103

ev = 0x5555555ec560
h = 0x5555555ec580

#5 0x000055555555d322 in ap_session_ifup (ses=0x7fffd41c4770) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/ifcfg.c:59
No locals.
#6 0x000055555555bc63 in ap_session_activate (ses=0x7fffd41c4770) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/session.c:145
No locals.
#7 0x00007ffff79b84ea in __ipoe_session_activate (ses=0x7fffd41c4688) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/ctrl/ipoe/ipoe.c:1018

addr = 0
gw = 0
serv = 0x5555555e8718

#8 0x00007ffff7fc5c80 in ctx_thread (ctx=0x7fffec154868) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/triton/triton.c:273

h = 0x1f1a066759fa000
t = 0x7fffec10e808
call = 0x7fffc41e7f18
tt = 1
events = 32767

#9 0x00007ffff7fc5960 in triton_thread (thread=0x5555556266b8) at /usr/src/accel-ppp-code-20-08-2020-DEBUG/accel-pppd/triton/triton.c:192

set = {__val = {516, 0 <repeats 15 times>}}
sig = 10
need_free = 0
stack = 0x0

#10 0x00007ffff7f93ea7 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#11 0x00007ffff7b0721f in clone () from /lib64/libc.so.6
No symbol table info available.

Dimka88 changed the task status from Open to Needs testing.Sep 6 2020, 07:07
Dimka88 claimed this task.