We experience a very strange phenomena:
We run fhgfs over an dual rail infiniband connection on our cluster login nodes.
Also we maintain a client on one of the old cluster nodes. When doing chmod on a number of small files, we experience massive differences in metadata operation performance.
On the new node n001 we get sth around (sum) of 200-600, on the old node bighead1 we get 1800-2000 ops/sec. (Basically, the metaserver is
All of the nodes have infiniband. The only difference so far is the difference in the client release version of fhgfs-client and ofed.
Any Idea anyone where to track down this issue?
Thanks for input,
Michael
Here the settings:
New Node (Slow one):
[root@n001 fhgfs]# ofed_info
OFED-internal-2.1-1.0.6:
Installed Packages
Name : fhgfs-client
Arch : noarch
Version : 2012.10.r15
#
# --- Section 1: [Settings] ---
#
logLevel = 3
logType = helperd
logClientID = false
logHelperdIP =
connPortShift = 0
connMgmtdPortUDP = 8008
connMgmtdPortTCP = 8008
connClientPortUDP = 8004
connHelperdPortTCP = 8006
connUseSDP = false
connUseRDMA = true
connRDMABufSize = 16384
connRDMABufNum = 128
connRDMATypeOfService = 0
connMaxInternodeNum = 16
connInterfacesFile =
connNetFilterFile =
connNonPrimaryExpiration = 10000
connCommRetrySecs = 0
tuneNumWorkers = 0
tunePreferredMetaFile =
tunePreferredStorageFile =
tuneFileCacheType = buffered
tuneRemoteFSync = false
tuneUseGlobalFileLocks = false
sysCreateHardlinksAsSymlinks = true
sysMountSanityCheckMS = 11000
sysSyncOnClose = false
sysSessionCheckOnClose = false
quotaEnabled = true
Old Node (Fast one):
[mruepp@bighead1 ~]$ ofed_info
MLNX_OFED_LINUX-1.5.3-4.0.42 (OFED-1.5.3-4.0.42):
Name : fhgfs-client
Arch : noarch
Version : 2012.10.r10
#
# --- Section 1: [Settings] ---
#
logLevel = 3
logType = helperd
logClientID = false
logHelperdIP =
connPortShift = 0
connMgmtdPortUDP = 8008
connMgmtdPortTCP = 8008
connClientPortUDP = 8004
connHelperdPortTCP = 8006
connUseSDP = false
connUseRDMA = true
connRDMABufSize = 16384
connRDMABufNum = 128
connRDMATypeOfService = 0
connMaxInternodeNum = 12
connInterfacesFile =
connNetFilterFile =
connNonPrimaryExpiration = 10000
connCommRetrySecs = 0
tuneNumWorkers = 0
tunePreferredMetaFile =
tunePreferredStorageFile =
tuneFileCacheType = buffered
tuneRemoteFSync = false
tuneUseGlobalFileLocks = true
sysMgmtdHost = 10.110.84.20
sysCreateHardlinksAsSymlinks = true
sysMountSanityCheckMS = 11000
sysSyncOnClose = false
sysSessionCheckOnClose = false
Settings of the Metaserver:
# --- Section 1: [Settings] ---
#
logLevel = 3
logNoDate = false
logStdFile = /var/log/fhgfs-meta.log
logNumLines = 50000
logNumRotatedFiles = 5
connPortShift = 0
connMgmtdPortUDP = 8008
connMgmtdPortTCP = 8008
connMetaPortUDP = 8005
connMetaPortTCP = 8005
connUseSDP = false
connUseRDMA = true
connRDMATypeOfService = 0
connBacklogTCP = 128
connMaxInternodeNum = 32
connInterfacesFile =
connNetFilterFile =
connNonPrimaryExpiration = 10000
storeMetaDirectory = /mnt/fhgfsMeta
storeAllowFirstRunInit = false
storeUseExtendedAttribs = true
tuneNumWorkers = 128
tuneBindToNumaZone =
tuneTargetChooser = randomized
tuneRotateMirrorTargets = false
tuneUsePerUserMsgQueues = false
runDaemonized = true