HPMC User Guide v 1.00
© 2022 Bassem W. Jamaleddine
The HPMC can be connected to multiple nodes over a switch.
The OFED packages required
ofed-scripts rdma-core rdma-core-devel libibverbs libibverbs-utils librdmacm librdmacm-utils libibumad ibacm iwpmd srp_daemon mstflint ofed-docs compat-rdma
■ Probe The Infiniband HCA Cards
# lspci -nn | grep HCA
03:03 root@HPMC9: /mm03fs/MPI # lspci -nn | grep HCA 5e:00.0 InfiniBand [0c06]: QLogic Corp. IBA7322 QDR InfiniBand HCA [1077:7322] (rev 02) 5f:00.0 InfiniBand [0c06]: QLogic Corp. IBA7322 QDR InfiniBand HCA [1077:7322] (rev 02)
# grep -i 1fc1 /lib/modules/3.10.0-693.el7.x86_64/modules.*
03:07 root@HPMC9: /mm03fs/MPI # grep -i 1fc1 /lib/modules/3.10.0-693.el7.x86_64/modules.* /lib/modules/3.10.0-693.el7.x86_64/modules.alias:alias pci:v00001FC1d0000000Dsv*sd*bc*sc*i* ib_ipath /lib/modules/3.10.0-693.el7.x86_64/modules.alias:alias pci:v00001FC1d00000010sv*sd*bc*sc*i* ib_qib
■ Adding the Infiniband to the Network Services
Edit the ifcfg-ib0
# vi /etc/sysconfig/network-scripts/ifcfg-ib0
NAME=ib0 DEVICE=ib0 TYPE=InfiniBand # to check if connected, cat /sys/class/net/ib0/mode CONNECTED_MODE=no PROXY_METHOD=none BOOTPROTO=static STARTMODE=auto NM_CONTROLLED=no # NOTE if subnet then possible with 192.168.1.X as # PADDR=192.168.1.119 NETWORK=192.168.1.0 BROADCAST=192.168.1.255 # also make changes in ifcfg-ib0.8002 IPADDR=172.16.0.132 PREFIX=16 NETMASK=255.255.0.0 NETWORK=172.16.0.0 BROADCAST=172.16.255.255 # ONBOOT=yes DEFROUTE=yes MTU=65520 HWADDR=80:00:00:03:FE:80:00:00:00:00:00:00:00:11:75:00:00:6E:EF:26
■ Check Infiniband Modules and Services
Check the Infiniband related modules:
# lsmod | grep ib
03:12 root@HPMC9: /mm03fs/MPI # lsmod | grep ib ib_ucm 22589 0 ib_uverbs 64636 16 ib_ucm,rdma_ucm ib_iser 47813 0 rdma_cm 54426 3 rpcrdma,ib_iser,rdma_ucm ib_umad 22080 8 libiscsi 57233 1 ib_iser ib_ipoib 110142 0 scsi_transport_iscsi 99909 2 ib_iser,libiscsi ib_cm 47287 3 rdma_cm,ib_ucm,ib_ipoib ib_qib 362592 4 rdmavt 59235 1 ib_qib ib_core 211874 15 rdma_cm,i40iw,ib_cm,iw_cm,rpcrdma,ib_qib,ib_ucm,rdmavt,ib_iser,ib_umad,ib_uverbs,rdma_ucm,ib_ipoib dca 15130 1 ib_qib libnvdimm 132047 1 nfit libahci 31992 1 ahci libata 238896 2 ahci,libahci
Check the opensm service to be up:
# systemctl list-unit-files | grep opensm
03:07 root@HPMC9 # systemctl list-unit-files | grep open opensm.service enabled
■ Query the Infiniband Connectivity
# ibnodes
03:14 root@HPMC9: /mm03fs/MPI # ibnodes Ca : 0x00117500006ef940 ports 1 "HPMC7 qib0" Ca : 0x0011750000794fd8 ports 1 "HPMC9 qib1" Ca : 0x00117500006eef26 ports 1 "HPMC9 qib0" Switch : 0x00066a00e3004e75 ports 36 "QLogic 12200-18 GUID=0x00066a00e3004e75" base port 0 lid 4 lmc 0
# ibv_devices
03:18 root@HPMC9: /mm03fs/MPI # ibv_devices
device node GUID
------ ----------------
qib0 00117500006eef26
qib1 0011750000794fd8
i40iw0 002590bc3ab90000
i40iw1 002590bc3ab80000
# ibnodes
03:18 root@HPMC9: /mm03fs/MPI # ibnodes Ca : 0x00117500006ef940 ports 1 "HPMC7 qib0" Ca : 0x0011750000794fd8 ports 1 "HPMC9 qib1" Ca : 0x00117500006eef26 ports 1 "HPMC9 qib0" Switch : 0x00066a00e3004e75 ports 36 "QLogic 12200-18 GUID=0x00066a00e3004e75" base port 0 lid 4 lmc 0 <pre> ((#)) ibnetdiscover <pre> 03:18 root@HPMC9: /mm03fs/MPI # ibnetdiscover # # Topology file: generated on Fri Apr 17 03:18:50 2026 # # Initiated from node 00117500006eef26 port 00117500006eef26 vendid=0x66a devid=0x7320 sysimgguid=0x66a00e3004e75 switchguid=0x66a00e3004e75(66a00e3004e75) Switch 36 "S-00066a00e3004e75" # "QLogic 12200-18 GUID=0x00066a00e3004e75" base port 0 lid 4 lmc 0 [10] "H-00117500006eef26"[1](117500006eef26) # "HPMC9 qib0" lid 2 4xQDR [11] "H-00117500006ef940"[1](117500006ef940) # "HPMC7 qib0" lid 5 4xDDR [12] "H-0011750000794fd8"[1](11750000794fd8) # "HPMC9 qib1" lid 1 4xQDR vendid=0x1175 devid=0x7322 sysimgguid=0x117500006ef940 caguid=0x117500006ef940 Ca 1 "H-00117500006ef940" # "HPMC7 qib0" [1](117500006ef940) "S-00066a00e3004e75"[11] # lid 5 lmc 0 "QLogic 12200-18 GUID=0x00066a00e3004e75" lid 4 4xDDR vendid=0x1175 devid=0x7322 sysimgguid=0x117500006eef26 caguid=0x11750000794fd8 Ca 1 "H-0011750000794fd8" # "HPMC9 qib1" [1](11750000794fd8) "S-00066a00e3004e75"[12] # lid 1 lmc 0 "QLogic 12200-18 GUID=0x00066a00e3004e75" lid 4 4xQDR vendid=0x1175 devid=0x7322 sysimgguid=0x117500006eef26 caguid=0x117500006eef26 Ca 1 "H-00117500006eef26" # "HPMC9 qib0" [1](117500006eef26) "S-00066a00e3004e75"[10] #
■ Query the Infiniband Devices Info
# ibv_devinfo -d qib0 -v
03:18 root@HPMC9: /mm03fs/MPI # ibv_devinfo -d qib0 -v
hca_id: qib0
transport: InfiniBand (0)
fw_ver: 0.0.0
node_guid: 0011:7500:006e:ef26
sys_image_guid: 0011:7500:006e:ef26
vendor_id: 0x1175
vendor_part_id: 29474
hw_ver: 0x2
board_id: InfiniPath_QLE7340
phys_port_cnt: 1
max_mr_size: 0xffffffffffffffff
page_size_cap: 0x1000
max_qp: 16384
max_qp_wr: 16383
device_cap_flags: 0x00003d06
BAD_PKEY_CNTR
BAD_QKEY_CNTR
SHUTDOWN_PORT
PORT_ACTIVE_EVENT
SYS_IMAGE_GUID
RC_RNR_NAK_GEN
SRQ_RESIZE
max_sge: 96
max_sge_rd: 96
max_cq: 131071
max_cqe: 196607
max_mr: 0
max_pd: 65535
max_qp_rd_atom: 16
max_ee_rd_atom: 0
max_res_rd_atom: 0
max_qp_init_rd_atom: 255
max_ee_init_rd_atom: 0
atomic_cap: ATOMIC_GLOB (2)
max_ee: 0
max_rdd: 0
max_mw: 0
max_raw_ipv6_qp: 0
max_raw_ethy_qp: 0
max_mcast_grp: 16384
max_mcast_qp_attach: 16
max_total_mcast_qp_attach: 262144
max_ah: 65535
max_fmr: 0
max_srq: 1024
max_srq_wr: 131071
max_srq_sge: 128
max_pkeys: 4
local_ca_ack_delay: 0
general_odp_caps:
rc_odp_caps:
NO SUPPORT
uc_odp_caps:
NO SUPPORT
ud_odp_caps:
NO SUPPORT
completion_timestamp_mask not supported
core clock not supported
device_cap_flags_ex: 0x0
tso_caps:
max_tso: 0
rss_caps:
max_rwq_indirection_tables: 0
max_rwq_indirection_table_size: 0
rx_hash_function: 0x0
rx_hash_fields_mask: 0x0
max_wq_type_rq: 0
packet_pacing_caps:
qp_rate_limit_min: 0kbps
qp_rate_limit_max: 0kbps
tag matching not supported
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 2048 (4)
sm_lid: 2
port_lid: 2
port_lmc: 0x00
link_layer: InfiniBand
max_msg_sz: 0x80000000
port_cap_flags: 0x0761086a
max_vl_num: 2 (2)
bad_pkey_cntr: 0x0
qkey_viol_cntr: 0x0
sm_sl: 0
pkey_tbl_len: 4
gid_tbl_len: 5
subnet_timeout: 18
init_type_reply: 0
active_width: 4X (2)
active_speed: 10.0 Gbps (4)
phys_state: LINK_UP (5)
GID[ 0]: fe80:0000:0000:0000:0011:7500:006e:ef26
# ibv_devinfo -d qib1 -v
03:22 root@HPMC9: /mm03fs/MPI # ibv_devinfo -d qib1 -v
hca_id: qib1
transport: InfiniBand (0)
fw_ver: 0.0.0
node_guid: 0011:7500:0079:4fd8
sys_image_guid: 0011:7500:006e:ef26
vendor_id: 0x1175
vendor_part_id: 29474
hw_ver: 0x2
board_id: InfiniPath_QLE7340
phys_port_cnt: 1
max_mr_size: 0xffffffffffffffff
page_size_cap: 0x1000
max_qp: 16384
max_qp_wr: 16383
device_cap_flags: 0x00003d06
BAD_PKEY_CNTR
BAD_QKEY_CNTR
SHUTDOWN_PORT
PORT_ACTIVE_EVENT
SYS_IMAGE_GUID
RC_RNR_NAK_GEN
SRQ_RESIZE
max_sge: 96
max_sge_rd: 96
max_cq: 131071
max_cqe: 196607
max_mr: 0
max_pd: 65535
max_qp_rd_atom: 16
max_ee_rd_atom: 0
max_res_rd_atom: 0
max_qp_init_rd_atom: 255
max_ee_init_rd_atom: 0
atomic_cap: ATOMIC_GLOB (2)
max_ee: 0
max_rdd: 0
max_mw: 0
max_raw_ipv6_qp: 0
max_raw_ethy_qp: 0
max_mcast_grp: 16384
max_mcast_qp_attach: 16
max_total_mcast_qp_attach: 262144
max_ah: 65535
max_fmr: 0
max_srq: 1024
max_srq_wr: 131071
max_srq_sge: 128
max_pkeys: 4
local_ca_ack_delay: 0
general_odp_caps:
rc_odp_caps:
NO SUPPORT
uc_odp_caps:
NO SUPPORT
ud_odp_caps:
NO SUPPORT
completion_timestamp_mask not supported
core clock not supported
device_cap_flags_ex: 0x0
tso_caps:
max_tso: 0
rss_caps:
max_rwq_indirection_tables: 0
max_rwq_indirection_table_size: 0
rx_hash_function: 0x0
rx_hash_fields_mask: 0x0
max_wq_type_rq: 0
packet_pacing_caps:
qp_rate_limit_min: 0kbps
qp_rate_limit_max: 0kbps
tag matching not supported
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 2048 (4)
sm_lid: 2
port_lid: 1
port_lmc: 0x00
link_layer: InfiniBand
max_msg_sz: 0x80000000
port_cap_flags: 0x07610868
max_vl_num: 2 (2)
bad_pkey_cntr: 0x0
qkey_viol_cntr: 0x0
sm_sl: 0
pkey_tbl_len: 4
gid_tbl_len: 5
subnet_timeout: 18
init_type_reply: 0
active_width: 4X (2)
active_speed: 10.0 Gbps (4)
phys_state: LINK_UP (5)
GID[ 0]: fe80:0000:0000:0000:0011:7500:0079:4fd8
■ Display the Network Interface Parameter
# ifconfig
br0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.0.131 netmask 255.255.255.0 broadcast 192.168.0.255
inet6 fe80::225:90ff:febc:3ab8 prefixlen 64 scopeid 0x20<link>
ether 00:25:90:bc:3a:b8 txqueuelen 1000 (Ethernet)
RX packets 717476 bytes 900282208 (858.5 MiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 371423 bytes 886594089 (845.5 MiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
eno1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet6 fe80::225:90ff:febc:3ab8 prefixlen 64 scopeid 0x20<link>
ether 00:25:90:bc:3a:b8 txqueuelen 1000 (Ethernet)
RX packets 719099 bytes 910433990 (868.2 MiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 948123 bytes 924656789 (881.8 MiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
eno2: flags=4099<UP,BROADCAST,MULTICAST> mtu 1500
ether 00:25:90:bc:3a:b9 txqueuelen 1000 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
ib0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 2044
inet 172.16.0.132 netmask 255.255.0.0 broadcast 172.16.255.255
inet6 fe80::211:7500:6e:ef26 prefixlen 64 scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
infiniband 80:00:00:03:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00 txqueuelen 256 (InfiniBand)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
ib1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 65520
inet 172.16.0.133 netmask 255.255.0.0 broadcast 172.16.255.255
inet6 fe80::211:7500:79:4fd8 prefixlen 64 scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
infiniband 80:00:00:03:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00 txqueuelen 256 (InfiniBand)
RX packets 6 bytes 336 (336.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
ib0.8002: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 2044
inet6 fe80::211:7500:6e:ef26 prefixlen 64 scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
infiniband 80:00:00:07:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00 txqueuelen 256 (InfiniBand)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
ib1.8002: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 65520
inet 172.16.0.133 netmask 255.255.0.0 broadcast 172.16.255.255
inet6 fe80::211:7500:79:4fd8 prefixlen 64 scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
infiniband 80:00:00:07:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00 txqueuelen 256 (InfiniBand)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1 (Local Loopback)
RX packets 80871 bytes 864928828 (824.8 MiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 80871 bytes 864928828 (824.8 MiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
mic0: flags=67<UP,BROADCAST,RUNNING> mtu 64512
ether 0a:c9:28:ae:ba:9f txqueuelen 1000 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0