HPMC User Guide v 1.00
© 2022 Bassem W. Jamaleddine


4-1

   Configuring Infiniband for RDMA

The HPMC can be connected to multiple nodes over a switch.

The OFED packages required
ofed-scripts
rdma-core
rdma-core-devel
libibverbs
libibverbs-utils
librdmacm
librdmacm-utils
libibumad
ibacm
iwpmd
srp_daemon
mstflint
ofed-docs
compat-rdma




■ Probe The Infiniband HCA Cards


lspci -nn | grep HCA

03:03 root@HPMC9: /mm03fs/MPI #  lspci -nn | grep HCA
5e:00.0 InfiniBand [0c06]: QLogic Corp. IBA7322 QDR InfiniBand HCA [1077:7322] (rev 02)
5f:00.0 InfiniBand [0c06]: QLogic Corp. IBA7322 QDR InfiniBand HCA [1077:7322] (rev 02)


grep -i 1fc1 /lib/modules/3.10.0-693.el7.x86_64/modules.*



03:07 root@HPMC9: /mm03fs/MPI #  grep -i 1fc1 /lib/modules/3.10.0-693.el7.x86_64/modules.*
/lib/modules/3.10.0-693.el7.x86_64/modules.alias:alias pci:v00001FC1d0000000Dsv*sd*bc*sc*i* ib_ipath
/lib/modules/3.10.0-693.el7.x86_64/modules.alias:alias pci:v00001FC1d00000010sv*sd*bc*sc*i* ib_qib

 

■ Adding the Infiniband to the Network Services


Edit the ifcfg-ib0

vi /etc/sysconfig/network-scripts/ifcfg-ib0

NAME=ib0 
DEVICE=ib0
TYPE=InfiniBand
# to check if connected, cat /sys/class/net/ib0/mode 
CONNECTED_MODE=no
PROXY_METHOD=none
BOOTPROTO=static
STARTMODE=auto
NM_CONTROLLED=no
# NOTE if subnet then possible with 192.168.1.X as   
#    PADDR=192.168.1.119 NETWORK=192.168.1.0 BROADCAST=192.168.1.255
#    also make changes in  ifcfg-ib0.8002
IPADDR=172.16.0.132
PREFIX=16
NETMASK=255.255.0.0
NETWORK=172.16.0.0
BROADCAST=172.16.255.255
#
ONBOOT=yes
DEFROUTE=yes
MTU=65520
HWADDR=80:00:00:03:FE:80:00:00:00:00:00:00:00:11:75:00:00:6E:EF:26




 

■ Check Infiniband Modules and Services

Check the Infiniband related modules:

lsmod | grep ib

03:12 root@HPMC9: /mm03fs/MPI #  lsmod | grep ib
ib_ucm                 22589  0 
ib_uverbs              64636  16 ib_ucm,rdma_ucm
ib_iser                47813  0 
rdma_cm                54426  3 rpcrdma,ib_iser,rdma_ucm
ib_umad                22080  8 
libiscsi               57233  1 ib_iser
ib_ipoib              110142  0 
scsi_transport_iscsi    99909  2 ib_iser,libiscsi
ib_cm                  47287  3 rdma_cm,ib_ucm,ib_ipoib
ib_qib                362592  4 
rdmavt                 59235  1 ib_qib
ib_core               211874  15 rdma_cm,i40iw,ib_cm,iw_cm,rpcrdma,ib_qib,ib_ucm,rdmavt,ib_iser,ib_umad,ib_uverbs,rdma_ucm,ib_ipoib
dca                    15130  1 ib_qib
libnvdimm             132047  1 nfit
libahci                31992  1 ahci
libata                238896  2 ahci,libahci


Check the opensm service to be up:

systemctl list-unit-files | grep opensm

03:07 root@HPMC9 #  systemctl list-unit-files    | grep open
opensm.service                                enabled 



 

■ Query the Infiniband Connectivity


ibnodes

03:14 root@HPMC9: /mm03fs/MPI #  ibnodes
Ca      : 0x00117500006ef940 ports 1 "HPMC7 qib0"
Ca      : 0x0011750000794fd8 ports 1 "HPMC9 qib1"
Ca      : 0x00117500006eef26 ports 1 "HPMC9 qib0"
Switch  : 0x00066a00e3004e75 ports 36 "QLogic 12200-18 GUID=0x00066a00e3004e75" base port 0 lid 4 lmc 0




ibv_devices

03:18 root@HPMC9: /mm03fs/MPI #  ibv_devices
    device                 node GUID
    ------              ----------------
    qib0                00117500006eef26
    qib1                0011750000794fd8
    i40iw0              002590bc3ab90000
    i40iw1              002590bc3ab80000




ibnodes

03:18 root@HPMC9: /mm03fs/MPI #  ibnodes    
Ca      : 0x00117500006ef940 ports 1 "HPMC7 qib0"
Ca      : 0x0011750000794fd8 ports 1 "HPMC9 qib1"
Ca      : 0x00117500006eef26 ports 1 "HPMC9 qib0"
Switch  : 0x00066a00e3004e75 ports 36 "QLogic 12200-18 GUID=0x00066a00e3004e75" base port 0 lid 4 lmc 0
<pre>


((#))  ibnetdiscover
<pre>
03:18 root@HPMC9: /mm03fs/MPI #  ibnetdiscover
#
# Topology file: generated on Fri Apr 17 03:18:50 2026
#
# Initiated from node 00117500006eef26 port 00117500006eef26

vendid=0x66a
devid=0x7320
sysimgguid=0x66a00e3004e75
switchguid=0x66a00e3004e75(66a00e3004e75)
Switch  36 "S-00066a00e3004e75"         # "QLogic 12200-18 GUID=0x00066a00e3004e75" base port 0 lid 4 lmc 0
[10]    "H-00117500006eef26"[1](117500006eef26)                 # "HPMC9 qib0" lid 2 4xQDR
[11]    "H-00117500006ef940"[1](117500006ef940)                 # "HPMC7 qib0" lid 5 4xDDR
[12]    "H-0011750000794fd8"[1](11750000794fd8)                 # "HPMC9 qib1" lid 1 4xQDR

vendid=0x1175
devid=0x7322
sysimgguid=0x117500006ef940
caguid=0x117500006ef940
Ca      1 "H-00117500006ef940"          # "HPMC7 qib0"
[1](117500006ef940)     "S-00066a00e3004e75"[11]                # lid 5 lmc 0 "QLogic 12200-18 GUID=0x00066a00e3004e75" lid 4 4xDDR

vendid=0x1175
devid=0x7322
sysimgguid=0x117500006eef26
caguid=0x11750000794fd8
Ca      1 "H-0011750000794fd8"          # "HPMC9 qib1"
[1](11750000794fd8)     "S-00066a00e3004e75"[12]                # lid 1 lmc 0 "QLogic 12200-18 GUID=0x00066a00e3004e75" lid 4 4xQDR

vendid=0x1175
devid=0x7322
sysimgguid=0x117500006eef26
caguid=0x117500006eef26
Ca      1 "H-00117500006eef26"          # "HPMC9 qib0"
[1](117500006eef26)     "S-00066a00e3004e75"[10]                #

 

■ Query the Infiniband Devices Info


ibv_devinfo -d qib0 -v

03:18 root@HPMC9: /mm03fs/MPI #  ibv_devinfo -d qib0 -v
hca_id: qib0
        transport:                      InfiniBand (0)
        fw_ver:                         0.0.0
        node_guid:                      0011:7500:006e:ef26
        sys_image_guid:                 0011:7500:006e:ef26
        vendor_id:                      0x1175
        vendor_part_id:                 29474
        hw_ver:                         0x2
        board_id:                       InfiniPath_QLE7340
        phys_port_cnt:                  1
        max_mr_size:                    0xffffffffffffffff
        page_size_cap:                  0x1000
        max_qp:                         16384
        max_qp_wr:                      16383
        device_cap_flags:               0x00003d06
                                        BAD_PKEY_CNTR
                                        BAD_QKEY_CNTR
                                        SHUTDOWN_PORT
                                        PORT_ACTIVE_EVENT
                                        SYS_IMAGE_GUID
                                        RC_RNR_NAK_GEN
                                        SRQ_RESIZE
        max_sge:                        96
        max_sge_rd:                     96
        max_cq:                         131071
        max_cqe:                        196607
        max_mr:                         0
        max_pd:                         65535
        max_qp_rd_atom:                 16
        max_ee_rd_atom:                 0
        max_res_rd_atom:                0
        max_qp_init_rd_atom:            255
        max_ee_init_rd_atom:            0
        atomic_cap:                     ATOMIC_GLOB (2)
        max_ee:                         0
        max_rdd:                        0
        max_mw:                         0
        max_raw_ipv6_qp:                0
        max_raw_ethy_qp:                0
        max_mcast_grp:                  16384
        max_mcast_qp_attach:            16
        max_total_mcast_qp_attach:      262144
        max_ah:                         65535
        max_fmr:                        0
        max_srq:                        1024
        max_srq_wr:                     131071
        max_srq_sge:                    128
        max_pkeys:                      4
        local_ca_ack_delay:             0
        general_odp_caps:
        rc_odp_caps:
                                        NO SUPPORT
        uc_odp_caps:
                                        NO SUPPORT
        ud_odp_caps:
                                        NO SUPPORT
        completion_timestamp_mask not supported
        core clock not supported
        device_cap_flags_ex:            0x0
        tso_caps:
        max_tso:                        0
        rss_caps:
                max_rwq_indirection_tables:                     0
                max_rwq_indirection_table_size:                 0
                rx_hash_function:                               0x0
                rx_hash_fields_mask:                            0x0
        max_wq_type_rq:                 0
        packet_pacing_caps:
                qp_rate_limit_min:      0kbps
                qp_rate_limit_max:      0kbps
        tag matching not supported
                port:   1
                        state:                  PORT_ACTIVE (4)
                        max_mtu:                4096 (5)
                        active_mtu:             2048 (4)
                        sm_lid:                 2
                        port_lid:               2
                        port_lmc:               0x00
                        link_layer:             InfiniBand
                        max_msg_sz:             0x80000000
                        port_cap_flags:         0x0761086a
                        max_vl_num:             2 (2)
                        bad_pkey_cntr:          0x0
                        qkey_viol_cntr:         0x0
                        sm_sl:                  0
                        pkey_tbl_len:           4
                        gid_tbl_len:            5
                        subnet_timeout:         18
                        init_type_reply:        0
                        active_width:           4X (2)
                        active_speed:           10.0 Gbps (4)
                        phys_state:             LINK_UP (5)
                        GID[  0]:               fe80:0000:0000:0000:0011:7500:006e:ef26


ibv_devinfo -d qib1 -v

03:22 root@HPMC9: /mm03fs/MPI #  ibv_devinfo -d qib1 -v
hca_id: qib1
        transport:                      InfiniBand (0)
        fw_ver:                         0.0.0
        node_guid:                      0011:7500:0079:4fd8
        sys_image_guid:                 0011:7500:006e:ef26
        vendor_id:                      0x1175
        vendor_part_id:                 29474
        hw_ver:                         0x2
        board_id:                       InfiniPath_QLE7340
        phys_port_cnt:                  1
        max_mr_size:                    0xffffffffffffffff
        page_size_cap:                  0x1000
        max_qp:                         16384
        max_qp_wr:                      16383
        device_cap_flags:               0x00003d06
                                        BAD_PKEY_CNTR
                                        BAD_QKEY_CNTR
                                        SHUTDOWN_PORT
                                        PORT_ACTIVE_EVENT
                                        SYS_IMAGE_GUID
                                        RC_RNR_NAK_GEN
                                        SRQ_RESIZE
        max_sge:                        96
        max_sge_rd:                     96
        max_cq:                         131071
        max_cqe:                        196607
        max_mr:                         0
        max_pd:                         65535
        max_qp_rd_atom:                 16
        max_ee_rd_atom:                 0
        max_res_rd_atom:                0
        max_qp_init_rd_atom:            255
        max_ee_init_rd_atom:            0
        atomic_cap:                     ATOMIC_GLOB (2)
        max_ee:                         0
        max_rdd:                        0
        max_mw:                         0
        max_raw_ipv6_qp:                0
        max_raw_ethy_qp:                0
        max_mcast_grp:                  16384
        max_mcast_qp_attach:            16
        max_total_mcast_qp_attach:      262144
        max_ah:                         65535
        max_fmr:                        0
        max_srq:                        1024
        max_srq_wr:                     131071
        max_srq_sge:                    128
        max_pkeys:                      4
        local_ca_ack_delay:             0
        general_odp_caps:
        rc_odp_caps:
                                        NO SUPPORT
        uc_odp_caps:
                                        NO SUPPORT
        ud_odp_caps:
                                        NO SUPPORT
        completion_timestamp_mask not supported
        core clock not supported
        device_cap_flags_ex:            0x0
        tso_caps:
        max_tso:                        0
        rss_caps:
                max_rwq_indirection_tables:                     0
                max_rwq_indirection_table_size:                 0
                rx_hash_function:                               0x0
                rx_hash_fields_mask:                            0x0
        max_wq_type_rq:                 0
        packet_pacing_caps:
                qp_rate_limit_min:      0kbps
                qp_rate_limit_max:      0kbps
        tag matching not supported
                port:   1
                        state:                  PORT_ACTIVE (4)
                        max_mtu:                4096 (5)
                        active_mtu:             2048 (4)
                        sm_lid:                 2
                        port_lid:               1
                        port_lmc:               0x00
                        link_layer:             InfiniBand
                        max_msg_sz:             0x80000000
                        port_cap_flags:         0x07610868
                        max_vl_num:             2 (2)
                        bad_pkey_cntr:          0x0
                        qkey_viol_cntr:         0x0
                        sm_sl:                  0
                        pkey_tbl_len:           4
                        gid_tbl_len:            5
                        subnet_timeout:         18
                        init_type_reply:        0
                        active_width:           4X (2)
                        active_speed:           10.0 Gbps (4)
                        phys_state:             LINK_UP (5)
                        GID[  0]:               fe80:0000:0000:0000:0011:7500:0079:4fd8

 

■ Display the Network Interface Parameter


ifconfig

br0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.0.131  netmask 255.255.255.0  broadcast 192.168.0.255
        inet6 fe80::225:90ff:febc:3ab8  prefixlen 64  scopeid 0x20<link>
        ether 00:25:90:bc:3a:b8  txqueuelen 1000  (Ethernet)
        RX packets 717476  bytes 900282208 (858.5 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 371423  bytes 886594089 (845.5 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

eno1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet6 fe80::225:90ff:febc:3ab8  prefixlen 64  scopeid 0x20<link>
        ether 00:25:90:bc:3a:b8  txqueuelen 1000  (Ethernet)
        RX packets 719099  bytes 910433990 (868.2 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 948123  bytes 924656789 (881.8 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

eno2: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        ether 00:25:90:bc:3a:b9  txqueuelen 1000  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ib0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 2044
        inet 172.16.0.132  netmask 255.255.0.0  broadcast 172.16.255.255
        inet6 fe80::211:7500:6e:ef26  prefixlen 64  scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
        infiniband 80:00:00:03:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ib1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 65520
        inet 172.16.0.133  netmask 255.255.0.0  broadcast 172.16.255.255
        inet6 fe80::211:7500:79:4fd8  prefixlen 64  scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
        infiniband 80:00:00:03:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
        RX packets 6  bytes 336 (336.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ib0.8002: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 2044
        inet6 fe80::211:7500:6e:ef26  prefixlen 64  scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
        infiniband 80:00:00:07:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ib1.8002: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 65520
        inet 172.16.0.133  netmask 255.255.0.0  broadcast 172.16.255.255
        inet6 fe80::211:7500:79:4fd8  prefixlen 64  scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
        infiniband 80:00:00:07:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1  (Local Loopback)
        RX packets 80871  bytes 864928828 (824.8 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 80871  bytes 864928828 (824.8 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

mic0: flags=67<UP,BROADCAST,RUNNING>  mtu 64512
        ether 0a:c9:28:ae:ba:9f  txqueuelen 1000  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0