bad net state and not rebootable

From: Randy Bush <randy_at_psg.com>
Date: Fri, 10 Oct 2008 08:29:54 -0700
in the last days, i have had strange hangs on two servers, where they
were pingable, i could log in to console port, but no response to ssh
and other net services.  they happily ran arbitrary programs.  but, they
hung in reboot and required a power-bar whack.

now it is a third server and one where i have no power-bar.

FreeBSD srv0.iad.rg.net 8.0-CURRENT FreeBSD 8.0-CURRENT #19: Mon Sep 15
01:23:26 UTC 2008     root_at_srv0.iad.rg.net:/usr/obj/usr/src/sys/SRV0  i386

rmac.psg.com:/Users/randy/config> ping srv0
PING srv0.iad.rg.net (198.180.150.1): 56 data bytes
64 bytes from 198.180.150.1: icmp_seq=0 ttl=51 time=136.989 ms
64 bytes from 198.180.150.1: icmp_seq=1 ttl=51 time=145.055 ms
64 bytes from 198.180.150.1: icmp_seq=2 ttl=51 time=144.968 ms
64 bytes from 198.180.150.1: icmp_seq=3 ttl=51 time=144.957 ms

ifconfig looks ok

srv0.iad.rg.net:/root# ifconfig bge0
bge0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> metric 0 mtu 1500
        options=9b<RXCSUM,TXCSUM,VLAN_MTU,VLAN_HWTAGGING,VLAN_HWCSUM>
        ether 00:30:48:82:11:a2
        inet 198.180.150.1 netmask 0xffffff80 broadcast 198.180.150.127
        inet6 fe80::230:48ff:fe82:11a2%bge0 prefixlen 64 scopeid 0x1
        inet 174.128.32.42 netmask 0xfffff000 broadcast 174.128.47.255
        inet6 2001:418:8006::42 prefixlen 64
        media: Ethernet autoselect (1000baseT <full-duplex>)
        status: active

srv0.iad.rg.net:/root# netstat -in
Name    Mtu Network       Address              Ipkts Ierrs    Opkts
Oerrs  Coll
bge0   1500 <Link#1>      00:30:48:82:11:a2 22158026     0 23134038
0     0
bge0   1500 198.180.150.0 198.180.150.1      9420649     -  9338420
-     -
bge0   1500 fe80:1::230:4 fe80:1::230:48ff:        0     -        1
-     -
bge0   1500 174.128.32.0/ 174.128.32.42          116     -        0
-     -
bge0   1500 2001:418:8006 2001:418:8006::42 10460449     - 14334303
-     -
bge1*  1500 <Link#2>      00:30:48:82:11:a3        0     0        0
0     0
lo0   16384 <Link#3>                         1492574     0  1492574
0     0
lo0   16384 127.0.0.0/8   127.0.0.1           915886     -   915886
-     -
lo0   16384 ::1/128       ::1                   2556     -     2567
-     -
lo0   16384 fe80:3::1/64  fe80:3::1                0     -        0
-     -

ping looks normal

srv0.iad.rg.net:/root# ping 147.28.0.39
PING 147.28.0.39 (147.28.0.39): 56 data bytes
64 bytes from 147.28.0.39: icmp_seq=0 ttl=56 time=76.845 ms
^C
--- 147.28.0.39 ping statistics ---
1 packets transmitted, 1 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 76.845/76.845/76.845/0.000 ms

until you realize that i waited two minutes before hitting ^C

srv0.iad.rg.net:/root# df
Filesystem         1024-blocks    Used    Avail Capacity  Mounted on
/dev/mirror/gm0s1a      507630  258478   208542    55%    /
devfs                        1       1        0   100%    /dev
/dev/mirror/gm0s1d       63214     152    58006     0%    /root
/dev/mirror/gm0s1g    71055144 5565276 59805458     9%    /usr
/dev/mirror/gm0s1e     1012974  497442   434496    53%    /var
/dev/mirror/gm0s1f     1012974   18596   913342     2%    /var/spool
procfs                       4       4        0   100%    /proc
/dev/md0                126702      34   116532     0%    /tmp

srv0.iad.rg.net:/root# gmirror status
      Name    Status  Components
mirror/gm0  COMPLETE  ad4
                      ad6

ssrv0.iad.rg.net:/root# /bin/ps -auxww
USER        PID %CPU %MEM   VSZ   RSS  TT  STAT STARTED      TIME COMMAND
nagios    78154  0.3  0.4  6816  3932  ??  I    12:04AM   0:00.00
/usr/local/bin/nagios -d /usr/local/etc/nagios/nagios.cfg
nagios    78155  0.3  0.2  3552  1616  ??  I    12:04AM   0:00.00 sh -c
/usr/local/libexec/nagios/check_ssh 210.138.216.50
nagios    78156  0.3  0.1  4228  1324  ??  I    12:04AM   0:00.00
/usr/local/libexec/nagios/check_ssh 210.138.216.50
smokeping 64574  0.1  1.3 15944 13264  ??  I    15Sep08   3:57.48
/usr/local/bin/smokeping [FPing] (perl)
root          0  0.0  0.0     0    48  ??  DLs  15Sep08   0:02.80 [kernel]
root          1  0.0  0.0  1888   328  ??  ILs  15Sep08   1:09.48
/sbin/init --
root          2  0.0  0.0     0     8  ??  DL   15Sep08   0:49.11 [g_event]
root          3  0.0  0.0     0     8  ??  DL   15Sep08   6:13.69 [g_up]
root          4  0.0  0.0     0     8  ??  DL   15Sep08   6:03.08 [g_down]
root          5  0.0  0.0     0     8  ??  DL   15Sep08   0:01.78
[pagedaemon]
root          6  0.0  0.0     0     8  ??  DL   15Sep08   0:00.00 [vmdaemon]
root          7  0.0  0.0     0     8  ??  DL   15Sep08   0:00.02 [pagezero]
root          8  0.0  0.0     0     8  ??  DL   15Sep08   0:08.11
[bufdaemon]
root          9  0.0  0.0     0     8  ??  DL   15Sep08  38:55.53 [syncer]
root         10  0.0  0.0     0     8  ??  DL   15Sep08   0:00.00 [audit]
root         11  0.0  0.0     0     8  ??  RL   15Sep08 34612:02.65 [idle]
root         12  0.0  0.0     0   112  ??  WL   15Sep08  31:11.27 [intr]
root         13  0.0  0.0     0     8  ??  DL   15Sep08   1:25.55 [yarrow]
root         14  0.0  0.0     0     8  ??  DL   15Sep08   0:08.97 [vnlru]
root         15  0.0  0.0     0     8  ??  DL   15Sep08   1:29.57
[softdepflush]
root         16  0.0  0.0     0     8  ??  DL   15Sep08   1:49.61
[g_mirror gm0]
root        553  0.0  0.0  1888   396  ??  Is   15Sep08   0:00.13 /sbin/devd
root        712  0.0  0.0     0     8  ??  DL   15Sep08   0:01.01 [md0]
root        887  0.0  0.2  4432  2044  ??  I    15Sep08   0:00.55
/usr/local/sbin/smartd -p /var/run/smartd.pid -c /usr/local/etc/smartd.conf
www         941  0.0  0.6  9984  6180  ??  I    25Sep08   0:25.62
/usr/local/sbin/httpd -DNOHTTPACCEPT
root        945  0.0  0.3  4796  2608  ??  Is   15Sep08   1:12.80
/usr/sbin/ntpd -c /etc/ntp.conf -g -p /var/run/ntpd.pid -f
/var/db/ntpd.drift
mailnull   1022  0.0  0.3  8244  3204  ??  Is   15Sep08   0:19.31
/usr/local/sbin/exim -bd -q1m (exim-4.69-0)
root       1047  0.0  0.3  4236  2804  ??  Is   15Sep08   1:33.09
/usr/sbin/dhcpd -cf /etc/dhcpd.conf
root       1085  0.0  0.3  5232  2956  ??  Is   15Sep08   0:05.31
/usr/sbin/sshd
root       1095  0.0  0.1  3300  1404  ??  Is   15Sep08   0:03.33
/usr/sbin/cron -s
root       1163  0.0  0.1  3272  1288  ??  I    15Sep08   0:00.00
/usr/libexec/getty std.9600 ttyu1
www        2139  0.0  0.6  9984  6240  ??  I    25Sep08   0:26.15
/usr/local/sbin/httpd -DNOHTTPACCEPT
unbound   27209  0.0  0.8 12888  8612  ??  Is   Sat12AM   0:20.46
/usr/local/sbin/unbound
www       40079  0.0  0.6  9984  6208  ??  I    30Sep08   0:14.57
/usr/local/sbin/httpd -DNOHTTPACCEPT
www       40296  0.0  0.6  9984  6200  ??  I    30Sep08   0:13.84
/usr/local/sbin/httpd -DNOHTTPACCEPT
root      41167  0.0  0.5  9980  5520  ??  Is   15Sep08   0:45.77
/usr/local/sbin/httpd -DNOHTTPACCEPT
smokeping 43819  0.0  1.6 20388 16216  ??  Is   Wed11PM   0:00.01
/usr/local/bin/smokeping (perl)
smokeping 43820  0.0  1.6 20416 16240  ??  I    Wed11PM   0:11.41
/usr/local/bin/smokeping [FPing] (perl)
root      55477  0.0  0.1  3272  1404  ??  Is   18Sep08   0:08.62
/usr/sbin/syslogd -s
smokeping 64573  0.0  1.3 15912 13188  ??  Is   15Sep08   0:00.01
/usr/local/bin/smokeping (perl)
root      77052  0.0  0.1  3428  1504  ??  Is   12:00AM   0:00.01
/usr/local/sbin/sshguard
smokeping 77944  0.0  0.1  3212  1296  ??  I    12:04AM   0:00.03
/usr/local/sbin/fping -C 20 -q -B1 -r1 -i10 r0 iphone1 afnog.org
work0.psg.com soek0 srv0 psg1 iphone0 rip psg0 raid1 drinx.linx.net r1
psg bbgp psg2 ver0 ran rip1.psg.com
root      78307  0.0  0.4  8244  3732  ??  Is   12:04AM   0:00.02
/usr/local/sbin/exim -Mc 1Ko5UP-000KMv-OH (exim-4.69-0)
mailnull  78309  0.0  0.4  8244  3740  ??  I    12:04AM   0:00.00
/usr/local/sbin/exim -Mc 1Ko5UP-000KMv-OH (exim-4.69-0)
root      78313  0.0  0.4  8244  3732  ??  Is   12:04AM   0:00.02
/usr/local/sbin/exim -Mc 1Ko5UP-000KN0-Sj (exim-4.69-0)
mailnull  78314  0.0  0.4  8244  3744  ??  I    12:04AM   0:00.00
/usr/local/sbin/exim -Mc 1Ko5UP-000KN0-Sj (exim-4.69-0)
root      78333  0.0  0.4  8244  3732  ??  Is   12:04AM   0:00.02
/usr/local/sbin/exim -Mc 1Ko5UR-000KNP-5u (exim-4.69-0)
mailnull  78334  0.0  0.4  8244  3740  ??  I    12:04AM   0:00.00
/usr/local/sbin/exim -Mc 1Ko5UR-000KNP-5u (exim-4.69-0)
root      78347  0.0  0.4  8280  3752  ??  Is   12:04AM   0:00.02
/usr/local/sbin/exim -Mc 1Ko5UU-000KNe-0s (exim-4.69-0)
mailnull  78348  0.0  0.4  8280  3764  ??  I    12:04AM   0:00.00
/usr/local/sbin/exim -Mc 1Ko5UU-000KNe-0s (exim-4.69-0)
root      78371  0.0  0.3  6576  3148  ??  Is    8:37AM   0:00.05 sshd:
[accepted] (sshd)
sshd      78372  0.0  0.3  6576  3168  ??  I     8:38AM   0:00.01 sshd:
[net] (sshd)
root      78391  0.0  0.3  6576  3148  ??  Is    3:02PM   0:00.05 sshd:
[accepted] (sshd)
sshd      78392  0.0  0.3  6576  3160  ??  I     3:02PM   0:00.01 sshd:
[net] (sshd)
nagios    79837  0.0  0.4  6816  3920  ??  Is   23Sep08 1023:38.11
/usr/local/bin/nagios -d /usr/local/etc/nagios/nagios.cfg
www       86137  0.0  1.3 18464 13524  ??  Is   Thu10AM   0:00.44
/usr/local/bin/speedy_backend /usr/local/smokeping/htdocs/smokeping.cgi
target=Westin
www       86138  0.0  3.8 43960 39536  ??  I    Thu10AM   2:14.61
/usr/local/bin/speedy_backend /usr/local/smokeping/htdocs/smokeping.cgi
target=Westin
www       87153  0.0  0.6  9984  6196  ??  I    25Sep08   0:27.22
/usr/local/sbin/httpd -DNOHTTPACCEPT
www       90538  0.0  0.6  9984  6188  ??  I    25Sep08   0:25.02
/usr/local/sbin/httpd -DNOHTTPACCEPT
www       92815  0.0  0.6  9984  6200  ??  I    25Sep08   0:25.94
/usr/local/sbin/httpd -DNOHTTPACCEPT
www       95051  0.0  0.6  9984  6200  ??  I    25Sep08   0:25.50
/usr/local/sbin/httpd -DNOHTTPACCEPT
www       96166  0.0  0.6  9984  6188  ??  I    25Sep08   0:24.96
/usr/local/sbin/httpd -DNOHTTPACCEPT
www       97331  0.0  0.6  9984  6212  ??  I    25Sep08   0:24.87
/usr/local/sbin/httpd -DNOHTTPACCEPT
root       1154  0.0  0.1  3272  1292  v0  Is+  15Sep08   0:00.00
/usr/libexec/getty Pc ttyv0
root       1155  0.0  0.1  3272  1292  v1  Is+  15Sep08   0:00.00
/usr/libexec/getty Pc ttyv1
root       1156  0.0  0.1  3272  1292  v2  Is+  15Sep08   0:00.00
/usr/libexec/getty Pc ttyv2
root       1157  0.0  0.1  3272  1292  v3  Is+  15Sep08   0:00.00
/usr/libexec/getty Pc ttyv3
root       1158  0.0  0.1  3272  1292  v4  Is+  15Sep08   0:00.00
/usr/libexec/getty Pc ttyv4
root       1159  0.0  0.1  3272  1292  v5  Is+  15Sep08   0:00.00
/usr/libexec/getty Pc ttyv5
root       1160  0.0  0.1  3272  1292  v6  Is+  15Sep08   0:00.00
/usr/libexec/getty Pc ttyv6
root       1161  0.0  0.1  3272  1292  v7  Is+  15Sep08   0:00.00
/usr/libexec/getty Pc ttyv7
tacacs      852  0.0  0.1  3472  1460  u0- I    15Sep08   0:19.88
/usr/local/bin/tac_plus -C /usr/local/etc/tac_plus.conf
root       1162  0.0  0.2  3720  1704  u0  Is   15Sep08   0:00.16 login
[pam] (login)
root      78396  0.0  0.2  4484  2304  u0  S     3:03PM   0:00.04 -bash
(bash)
root      78433  0.0  0.1  3348  1436  u0  R+    3:15PM   0:00.00
/bin/ps -auxww

srv0.iad.rg.net:/root# netstat -nf inet
Active Internet connections
Proto Recv-Q Send-Q  Local Address          Foreign Address       (state)
tcp4      63      0 198.180.150.1.443      209.20.186.192.59658
ESTABLISHED
tcp4     178      0 198.180.150.1.443      209.20.186.192.59655   CLOSE_WAIT
tcp4     549      0 198.180.150.1.443      209.20.186.192.59654
ESTABLISHED
tcp4       0      0 198.180.150.1.49       206.223.132.86.12837   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12835   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12834   TIME_WAIT
tcp4       0      0 198.180.150.1.22       209.20.186.192.59606   CLOSE_WAIT
tcp4       0      0 198.180.150.1.*        209.20.186.192.59602   CLOSED
tcp4     469      0 198.180.150.1.*        209.20.186.192.59601   CLOSED
tcp4     549      0 198.180.150.1.*        209.20.186.192.59600   CLOSED
tcp4     458      0 198.180.150.1.*        209.20.186.192.59599   CLOSED
tcp4       0      0 198.180.150.1.49       206.223.132.86.12833   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12832   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12831   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12830   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12829   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12828   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12827   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12826   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12825   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12824   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12823   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12822   TIME_WAIT
tcp4       0      0 198.180.150.1.49       198.180.150.121.40309  TIME_WAIT
tcp4       0      0 198.180.150.1.49       198.180.150.121.40306  TIME_WAIT
tcp4       0      0 198.180.150.1.49       198.180.150.121.40226  TIME_WAIT
tcp4       0      0 198.180.150.1.49       198.180.150.121.40016  TIME_WAIT
tcp4       0      0 198.180.150.1.49       198.180.150.121.39811  TIME_WAIT
tcp4       0      0 198.180.150.1.49       198.180.150.121.39767  TIME_WAIT
tcp4       0      0 198.180.150.1.22       218.25.54.152.51578    CLOSE_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12821   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12820   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12819   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12818   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12817   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12816   TIME_WAIT
tcp4       0      0 198.180.150.1.*        221.192.199.36.2106    CLOSED
tcp4       0      0 198.180.150.1.49       206.223.132.86.12815   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12814   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12813   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12812   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12811   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12810   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12809   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12808   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12807   TIME_WAIT
tcp4       0      0 198.180.150.1.49       206.223.132.86.12806   TIME_WAIT
tcp4       0      0 198.180.150.1.*        209.20.186.192.56192   CLOSED
tcp4       0      0 198.180.150.1.49       206.223.132.86.12805   TIME_WAIT
tcp4      37      0 198.180.150.1.*        209.20.186.192.56179   CLOSED
tcp4       0      0 198.180.150.1.49       206.223.132.86.12804   TIME_WAIT
tcp4       0      0 198.180.150.1.*        209.20.186.192.56084   CLOSED
tcp4     517      0 198.180.150.1.*        209.20.186.192.56039   CLOSED
tcp4       0      0 198.180.150.1.45052    147.28.0.62.25         SYN_SENT
tcp4       0      0 198.180.150.1.60087    147.28.0.62.25         SYN_SENT
tcp4       0      0 198.180.150.1.37996    147.28.0.62.25         SYN_SENT
tcp4       0      0 198.180.150.1.25993    147.28.0.62.25         SYN_SENT
tcp4       0      0 198.180.150.1.58187    210.138.216.50.22      SYN_SENT
udp4       0      0 127.0.0.1.11458        127.0.0.1.53
udp4       0      0 127.0.0.1.60611        127.0.0.1.53
udp4       0      0 127.0.0.1.26925        127.0.0.1.53
udp4       0      0 127.0.0.1.52800        127.0.0.1.53
udp4       0      0 127.0.0.1.57827        127.0.0.1.53
udp4       0      0 127.0.0.1.29704        127.0.0.1.53
udp4       0      0 127.0.0.1.49628        127.0.0.1.53
udp4       0      0 127.0.0.1.46057        127.0.0.1.53
udp4       0      0 127.0.0.1.56978        127.0.0.1.53
udp4       0      0 127.0.0.1.58174        127.0.0.1.53
udp4       0      0 127.0.0.1.30269        127.0.0.1.53
udp4       0      0 127.0.0.1.45296        127.0.0.1.53
udp4       0      0 127.0.0.1.123          *.*
udp4       0      0 174.128.32.42.123      *.*
udp4       0      0 198.180.150.1.123      *.*

206.223.132.86 is the local oob cisco, and 49 is tacacs.  so someone was
trying an ssh attack on the local oob server (the one on which i am
using to get to the console port).?

no lsof installed and can't fetch it now.

any ideas before i try to reboot?

randy
Received on Fri Oct 10 2008 - 13:29:55 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:39:36 UTC