#266 added health check to ppm and strengthened reliability of RTU hw state reporting

This commit is contained in:
Mikayla
2023-06-22 19:46:17 +00:00
parent a02529b9f7
commit 712c7a8f3b
15 changed files with 99 additions and 74 deletions

View File

@@ -10,6 +10,7 @@ function boilerv_rtu.new(boiler)
local unit = rtu.init_unit()
-- disable auto fault clearing
boiler.__p_clear_fault()
boiler.__p_disable_afc()
-- discrete inputs --

View File

@@ -10,6 +10,7 @@ function envd_rtu.new(envd)
local unit = rtu.init_unit()
-- disable auto fault clearing
envd.__p_clear_fault()
envd.__p_disable_afc()
-- discrete inputs --

View File

@@ -10,6 +10,7 @@ function imatrix_rtu.new(imatrix)
local unit = rtu.init_unit()
-- disable auto fault clearing
imatrix.__p_clear_fault()
imatrix.__p_disable_afc()
-- discrete inputs --

View File

@@ -10,6 +10,7 @@ function sna_rtu.new(sna)
local unit = rtu.init_unit()
-- disable auto fault clearing
sna.__p_clear_fault()
sna.__p_disable_afc()
-- discrete inputs --

View File

@@ -10,6 +10,7 @@ function sps_rtu.new(sps)
local unit = rtu.init_unit()
-- disable auto fault clearing
sps.__p_clear_fault()
sps.__p_disable_afc()
-- discrete inputs --

View File

@@ -10,6 +10,7 @@ function turbinev_rtu.new(turbine)
local unit = rtu.init_unit()
-- disable auto fault clearing
turbine.__p_clear_fault()
turbine.__p_disable_afc()
-- discrete inputs --

View File

@@ -28,7 +28,7 @@ local sna_rtu = require("rtu.dev.sna_rtu")
local sps_rtu = require("rtu.dev.sps_rtu")
local turbinev_rtu = require("rtu.dev.turbinev_rtu")
local RTU_VERSION = "v1.3.6"
local RTU_VERSION = "v1.3.7"
local RTU_UNIT_TYPE = types.RTU_UNIT_TYPE
local RTU_UNIT_HW_STATE = databus.RTU_UNIT_HW_STATE
@@ -236,18 +236,19 @@ local function main()
---@class rtu_unit_registry_entry
local unit = {
uid = 0, ---@type integer
name = "redstone_io", ---@type string
type = RTU_UNIT_TYPE.REDSTONE, ---@type RTU_UNIT_TYPE
index = entry_idx, ---@type integer
reactor = io_reactor, ---@type integer
device = capabilities, ---@type table use device field for redstone ports
is_multiblock = false, ---@type boolean
formed = nil, ---@type boolean|nil
rtu = rs_rtu, ---@type rtu_device|rtu_rs_device
uid = 0, ---@type integer
name = "redstone_io", ---@type string
type = RTU_UNIT_TYPE.REDSTONE, ---@type RTU_UNIT_TYPE
index = entry_idx, ---@type integer
reactor = io_reactor, ---@type integer
device = capabilities, ---@type table use device field for redstone ports
is_multiblock = false, ---@type boolean
formed = nil, ---@type boolean|nil
hw_state = RTU_UNIT_HW_STATE.OK, ---@type RTU_UNIT_HW_STATE
rtu = rs_rtu, ---@type rtu_device|rtu_rs_device
modbus_io = modbus.new(rs_rtu, false),
pkt_queue = nil, ---@type mqueue|nil
thread = nil ---@type parallel_thread|nil
pkt_queue = nil, ---@type mqueue|nil
thread = nil ---@type parallel_thread|nil
}
table.insert(units, unit)
@@ -261,7 +262,7 @@ local function main()
unit.uid = #units
databus.tx_unit_hw_status(unit.uid, RTU_UNIT_HW_STATE.OK)
databus.tx_unit_hw_status(unit.uid, unit.hw_state)
end
end
@@ -403,6 +404,7 @@ local function main()
device = device, ---@type table
is_multiblock = is_multiblock, ---@type boolean
formed = formed, ---@type boolean|nil
hw_state = RTU_UNIT_HW_STATE.OFFLINE, ---@type RTU_UNIT_HW_STATE
rtu = rtu_iface, ---@type rtu_device|rtu_rs_device
modbus_io = modbus.new(rtu_iface, true),
pkt_queue = mqueue.new(), ---@type mqueue|nil
@@ -422,19 +424,21 @@ local function main()
rtu_unit.uid = #units
-- report hardware status
-- determine hardware status
if rtu_unit.type == RTU_UNIT_TYPE.VIRTUAL then
databus.tx_unit_hw_status(rtu_unit.uid, RTU_UNIT_HW_STATE.OFFLINE)
rtu_unit.hw_state = RTU_UNIT_HW_STATE.OFFLINE
else
if rtu_unit.is_multiblock then
databus.tx_unit_hw_status(rtu_unit.uid, util.trinary(rtu_unit.formed == true, RTU_UNIT_HW_STATE.OK, RTU_UNIT_HW_STATE.UNFORMED))
rtu_unit.hw_state = util.trinary(rtu_unit.formed == true, RTU_UNIT_HW_STATE.OK, RTU_UNIT_HW_STATE.UNFORMED)
elseif faulted then
databus.tx_unit_hw_status(rtu_unit.uid, RTU_UNIT_HW_STATE.FAULTED)
rtu_unit.hw_state = RTU_UNIT_HW_STATE.FAULTED
else
databus.tx_unit_hw_status(rtu_unit.uid, RTU_UNIT_HW_STATE.OK)
rtu_unit.hw_state = RTU_UNIT_HW_STATE.OK
end
end
-- report hardware status
databus.tx_unit_hw_status(rtu_unit.uid, rtu_unit.hw_state)
end
-- we made it through all that trusting-user-to-write-a-config-file chaos

View File

@@ -105,13 +105,15 @@ function threads.thread__main(smem)
for i = 1, #units do
-- find disconnected device
if units[i].device == device then
-- we are going to let the PPM prevent crashes
-- return fault flags/codes to MODBUS queries
-- will let the PPM prevent crashes, which will indicate failures in MODBUS queries
local unit = units[i] ---@type rtu_unit_registry_entry
local type_name = types.rtu_type_to_string(unit.type)
println_ts(util.c("lost the ", type_name, " on interface ", unit.name))
log.warning(util.c("lost the ", type_name, " unit peripheral on interface ", unit.name))
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.OFFLINE)
unit.hw_state = UNIT_HW_STATE.OFFLINE
databus.tx_unit_hw_status(unit.uid, unit.hw_state)
break
end
end
@@ -144,6 +146,8 @@ function threads.thread__main(smem)
-- note: cannot check isFormed as that would yield this coroutine and consume events
if unit.name == param1 then
local resend_advert = false
local faulted = false
local unknown = false
-- found, re-link
unit.device = device
@@ -177,57 +181,58 @@ function threads.thread__main(smem)
end
if unit.type == RTU_UNIT_TYPE.BOILER_VALVE then
unit.rtu = boilerv_rtu.new(device)
unit.rtu, faulted = boilerv_rtu.new(device)
-- if not formed, indexing the multiblock functions would have resulted in a PPM fault
unit.formed = util.trinary(device.__p_is_faulted(), false, nil)
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.UNFORMED)
unit.formed = util.trinary(faulted, false, nil)
elseif unit.type == RTU_UNIT_TYPE.TURBINE_VALVE then
unit.rtu = turbinev_rtu.new(device)
unit.rtu, faulted = turbinev_rtu.new(device)
-- if not formed, indexing the multiblock functions would have resulted in a PPM fault
unit.formed = util.trinary(device.__p_is_faulted(), false, nil)
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.UNFORMED)
unit.formed = util.trinary(faulted, false, nil)
elseif unit.type == RTU_UNIT_TYPE.IMATRIX then
unit.rtu = imatrix_rtu.new(device)
unit.rtu, faulted = imatrix_rtu.new(device)
-- if not formed, indexing the multiblock functions would have resulted in a PPM fault
unit.formed = util.trinary(device.__p_is_faulted(), false, nil)
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.UNFORMED)
unit.formed = util.trinary(faulted, false, nil)
elseif unit.type == RTU_UNIT_TYPE.SPS then
unit.rtu = sps_rtu.new(device)
unit.rtu, faulted = sps_rtu.new(device)
-- if not formed, indexing the multiblock functions would have resulted in a PPM fault
unit.formed = util.trinary(device.__p_is_faulted(), false, nil)
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.UNFORMED)
unit.formed = util.trinary(faulted, false, nil)
elseif unit.type == RTU_UNIT_TYPE.SNA then
unit.rtu = sna_rtu.new(device)
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.OK)
unit.rtu, faulted = sna_rtu.new(device)
elseif unit.type == RTU_UNIT_TYPE.ENV_DETECTOR then
unit.rtu = envd_rtu.new(device)
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.OK)
unit.rtu, faulted = envd_rtu.new(device)
else
unknown = true
log.error(util.c("failed to identify reconnected RTU unit type (", unit.name, ")"), true)
end
if unit.is_multiblock then
if (unit.formed == false) then
unit.hw_state = UNIT_HW_STATE.UNFORMED
if unit.formed == false then
log.info(util.c("assuming ", unit.name, " is not formed due to PPM faults while initializing"))
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.UNFORMED)
end
elseif device.__p_is_faulted() then
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.FAULTED)
elseif faulted then
unit.hw_state = UNIT_HW_STATE.FAULTED
elseif not unknown then
unit.hw_state = UNIT_HW_STATE.OK
else
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.OK)
unit.hw_state = UNIT_HW_STATE.OFFLINE
end
unit.modbus_io = modbus.new(unit.rtu, true)
databus.tx_unit_hw_status(unit.uid, unit.hw_state)
local type_name = types.rtu_type_to_string(unit.type)
local message = util.c("reconnected the ", type_name, " on interface ", unit.name)
println_ts(message)
log.info(message)
if not unknown then
unit.modbus_io = modbus.new(unit.rtu, true)
if resend_advert then
rtu_comms.send_advertisement(units)
else
rtu_comms.send_remounted(unit.uid)
local type_name = types.rtu_type_to_string(unit.type)
local message = util.c("reconnected the ", type_name, " on interface ", unit.name)
println_ts(message)
log.info(message)
if resend_advert then
rtu_comms.send_advertisement(units)
else
rtu_comms.send_remounted(unit.uid)
end
end
end
end
@@ -391,13 +396,6 @@ function threads.thread__unit_comms(smem, unit)
-- received a packet
local _, reply = unit.modbus_io.handle_packet(msg.message)
rtu_comms.send_modbus(reply)
-- check if there was a problem and update the hardware state if so
local frame = reply.get()
if unit.formed and (bit.band(frame.func_code, types.MODBUS_FCODE.ERROR_FLAG) ~= 0) and
(frame.data[1] == types.MODBUS_EXCODE.SERVER_DEVICE_FAIL) then
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.FAULTED)
end
end
end
@@ -413,12 +411,10 @@ function threads.thread__unit_comms(smem, unit)
if unit.formed == nil then
unit.formed = is_formed
if is_formed then databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.OK) end
if is_formed then unit.hw_state = UNIT_HW_STATE.OK end
end
if not unit.formed then
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.UNFORMED)
end
if not unit.formed then unit.hw_state = UNIT_HW_STATE.UNFORMED end
if (not unit.formed) and is_formed then
-- newly re-formed
@@ -463,11 +459,11 @@ function threads.thread__unit_comms(smem, unit)
if unit.formed and faulted then
-- something is still wrong = can't mark as formed yet
unit.formed = false
unit.hw_state = UNIT_HW_STATE.UNFORMED
log.info(util.c("assuming ", unit.name, " is not formed due to PPM faults while initializing"))
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.UNFORMED)
else
unit.hw_state = UNIT_HW_STATE.OK
rtu_comms.send_remounted(unit.uid)
databus.tx_unit_hw_status(unit.uid, UNIT_HW_STATE.OK)
end
local type_name = types.rtu_type_to_string(unit.type)
@@ -484,6 +480,16 @@ function threads.thread__unit_comms(smem, unit)
unit.formed = is_formed
end
-- check hardware status
if unit.device.__p_is_healthy() then
if unit.hw_state == UNIT_HW_STATE.FAULTED then unit.hw_state = UNIT_HW_STATE.OK end
else
if unit.hw_state == UNIT_HW_STATE.OK then unit.hw_state = UNIT_HW_STATE.FAULTED end
end
-- update hw status
databus.tx_unit_hw_status(unit.uid, unit.hw_state)
-- check for termination request
if rtu_state.shutdown then
log.info("rtu unit thread exiting -> " .. short_name)