Hi,
I’ve been trialing SLES 12 and HAE.
Setup consists of a 2node active-passive cluster with NFS.
Problem I have is that fence_scsi stonith agent doesn’t seem to work.
I’m getting an error
pengine[7241]: warning: unpack_rsc_op_failure: Processing failed op start for storage-fence on fs009b: unknown error (1)
and crm status shows
storage-fence_start_0 on fs009a 'unknown error' (1): call=18, status=Error, last-rc-change='Wed Jun 17 00:51:40 2015', queued=0ms, exec=1093ms
storage-fence_start_0 on fs009b 'unknown error' (1): call=18, status=Error, last-rc-change='Wed Jun 17 00:56:42 2015', queued=0ms, exec=1101ms
2015-06-17T01:34:29.156751+02:00 hrzgiufs009a stonithd[25547]: warning: log_operation: storage-fence:25670 [ ERROR:root:Failed: nodename or key is required ]
2015-06-17T01:34:29.156988+02:00 hrzgiufs009a stonithd[25547]: warning: log_operation: storage-fence:25670 [ ]
2015-06-17T01:34:29.157234+02:00 hrzgiufs009a stonithd[25547]: warning: log_operation: storage-fence:25670 [ ERROR:root:Please use '-h' for usage ]
2015-06-17T01:34:29.157460+02:00 hrzgiufs009a stonithd[25547]: warning: log_operation: storage-fence:25670 [ ]
The command fence_scsi works on the other hand:
fs009a:~ # fence_scsi -d /dev/mapper/mpath_test-3par -n fs009b -o off
Success: Already OFF
fs009a:~ # fence_scsi -d /dev/mapper/mpath_test-3par -n fs009a -o on
Success: Already ON
I’m guessing fence_scsi running under corosync doesn’t get the key/nodename supplied.
Here’s some config:
# crm configure show
node 739719956: fs009a \\
attributes maintenance=off standby=off
node 739719957: fs009b \\
attributes maintenance=off standby=off
primitive clusterIP IPaddr2 \\
params ip=172.23.59.22 cidr_netmask=25 \\
op monitor interval=10s timeout=20s \\
op stop interval=0s timeout=20s \\
op start interval=0 timeout=20s
primitive fs_storage_test Filesystem \\
params device="/dev/mapper/mpath_test-3par_part1" directory="/TEST" fstype=ext4 \\
op monitor timeout=40 interval=20 \\
op start timeout=60 interval=0 \\
op stop timeout=60 interval=0 \\
meta target-role=Started
primitive nfs-server systemd:nfsserver \\
op monitor interval=60 timeout=15 \\
op start interval=0 timeout=15 \\
op stop interval=0 timeout=15
primitive storage-fence stonith:fence_scsi \\
params action=off devices="/dev/mapper/mpath_test-3par" verbose=false \\
op monitor interval=60s timeout=0s \\
meta target-role=Started
group nas-service clusterIP fs_storage_test nfs-server \\
meta target-role=Started
location constraint-location-a nas-service 100: fs009a
property cib-bootstrap-options: \\
dc-version=1.1.12-ad083a8 \\
cluster-infrastructure=corosync \\
cluster-name=fs009 \\
stonith-enabled=true \\
no-quorum-policy=stop \\
last-lrm-refresh=1434493344
rsc_defaults rsc-options: \\
resource-stickiness=100
[CODE]totem {
#The maximum number of messages that may be sent by one processor on receipt of the token.
max_messages: 20
#The virtual synchrony filter type used to indentify a primary component. Change with care.
vsftype: none
#Used for mutual node authentication
crypto_cipher: none
#HMAC/SHA1 should be used to authenticate all message
secauth: on
#How many token retransmits should be attempted before forming a new configuration.
token_retransmits_before_loss_const: 10
#How many threads should be used to encypt and sending message. Only have meanings when secauth is turned on
threads: 16
interface {
#Network Address to be bind for this interface setting
bindnetaddr: 172.23.59.0
#The multicast port to be used
mcastport: 5405
#The ringnumber assigned to this interface setting
ringnumber: 0
#Time-to-live for cluster communication packets
ttl: 1
}
interface {
#Network Address to be bind for this interface setting
bindnetaddr: 10.99.99.0
#The multicast port to be used
mcastport: 5405
#The ringnumber assigned to this interface setting
ringnumber: 1
}
#How long to wait for consensus to be achieved before starting a new round of membership configuration.
consensus: 6000
#To make sure the auto-generated nodeid is positive
clear_node_high_bit: yes
#Used for mutual node authentication
crypto_hash: none
#The mode for redundant ring. None is used when only 1 interface specified, otherwise, only active or passive may be choosen
rrp_mode: active
#How long to wait for join messages in membership protocol. in ms
join: 60
#This specifies the name of cluster
cluster_name: fs009
#Timeout for a token lost. in ms
token: 5000
#The only valid version is 2
version: 2
#Transport protocol
transport: udpu
}
nodelist {
node {
#ring0 address
ring0_addr: fs009a
#ring1 address
ring1_addr: fs009a-2
}
node {
#ring0 address
ring0_addr: fs009b
#ring1 address
ring1_addr: fs009b-2
}
}
logging {
#Log to a specified file
to_logfile: no
#Log timestamp as well
timestamp: on
#Facility in syslog
syslog_facility: daemon
logger_subsys {
#Enable debug for this logger.
debug: on
#This specifies the subsystem identity (name) for which logging is specified
subsys: QUORUM
}
#Log to syslog
to_syslog: yes
#Whether or not turning on the debug information in the log
[/CODE]
I’ve found this issue here: https://access.redhat.com/solutions/1421063
Any idea?