From 26c6010ce0f32943616647d52dddeab721e62362 Mon Sep 17 00:00:00 2001 From: Mikayla Fischler Date: Sat, 21 May 2022 13:56:14 -0400 Subject: [PATCH] #56 pcall threads and restart on crash (unless shutting down) --- reactor-plc/startup.lua | 6 +- reactor-plc/threads.lua | 122 ++++++++++++++++++++++++++++++++++++---- rtu/startup.lua | 6 +- rtu/threads.lua | 69 +++++++++++++++++++++-- 4 files changed, 181 insertions(+), 22 deletions(-) diff --git a/reactor-plc/startup.lua b/reactor-plc/startup.lua index 86e7722..c401961 100644 --- a/reactor-plc/startup.lua +++ b/reactor-plc/startup.lua @@ -13,7 +13,7 @@ local config = require("reactor-plc.config") local plc = require("reactor-plc.plc") local threads = require("reactor-plc.threads") -local R_PLC_VERSION = "alpha-v0.6.9" +local R_PLC_VERSION = "alpha-v0.7.0" local print = util.print local println = util.println @@ -156,7 +156,7 @@ if __shared_memory.networked then local sp_ctrl_thread = threads.thread__setpoint_control(__shared_memory) -- run threads - parallel.waitForAll(main_thread.exec, rps_thread.exec, comms_thread_tx.exec, comms_thread_rx.exec, sp_ctrl_thread.exec) + parallel.waitForAll(main_thread.p_exec, rps_thread.p_exec, comms_thread_tx.p_exec, comms_thread_rx.p_exec, sp_ctrl_thread.p_exec) if plc_state.init_ok then -- send status one last time after RPS shutdown @@ -168,7 +168,7 @@ if __shared_memory.networked then end else -- run threads, excluding comms - parallel.waitForAll(main_thread.exec, rps_thread.exec) + parallel.waitForAll(main_thread.p_exec, rps_thread.p_exec) end println_ts("exited") diff --git a/reactor-plc/threads.lua b/reactor-plc/threads.lua index 9531792..561f72c 100644 --- a/reactor-plc/threads.lua +++ b/reactor-plc/threads.lua @@ -31,8 +31,10 @@ local MQ__COMM_CMD = { ---@param smem plc_shared_memory ---@param init function threads.thread__main = function (smem, init) + local public = {} ---@class thread + -- execute thread - local exec = function () + public.exec = function () log.debug("main thread init, clock inactive") -- send status updates at 2Hz (every 10 server ticks) (every loop tick) @@ -183,14 +185,38 @@ threads.thread__main = function (smem, init) end end - return { exec = exec } + -- execute the thread in a protected mode, retrying it on return if not shutting down + public.p_exec = function () + local plc_state = smem.plc_state + + while not plc_state.shutdown do + local status, result = pcall(public.exec) + if status == false then + log.fatal(result) + end + + -- if status is true, then we are probably exiting, so this won't matter + -- if not, we need to restart the clock + -- this thread cannot be slept because it will miss events (namely "terminate" otherwise) + if not plc_state.shutdown then + log.info("main thread restarting now...") + +---@diagnostic disable-next-line: undefined-field + os.queueEvent("clock_start") + end + end + end + + return public end -- RPS operation thread ---@param smem plc_shared_memory threads.thread__rps = function (smem) + local public = {} ---@class thread + -- execute thread - local exec = function () + public.exec = function () log.debug("rps thread start") -- load in from shared memory @@ -301,14 +327,35 @@ threads.thread__rps = function (smem) end end - return { exec = exec } + -- execute the thread in a protected mode, retrying it on return if not shutting down + public.p_exec = function () + local plc_state = smem.plc_state + local rps = smem.plc_sys.rps + + while not plc_state.shutdown do + local status, result = pcall(public.exec) + if status == false then + log.fatal(result) + end + + if not plc_state.shutdown then + if plc_state.init_ok then rps.scram() end + log.info("rps thread restarting in 5 seconds...") + util.psleep(5) + end + end + end + + return public end -- communications sender thread ---@param smem plc_shared_memory threads.thread__comms_tx = function (smem) + local public = {} ---@class thread + -- execute thread - local exec = function () + public.exec = function () log.debug("comms tx thread start") -- load in from shared memory @@ -355,14 +402,33 @@ threads.thread__comms_tx = function (smem) end end - return { exec = exec } + -- execute the thread in a protected mode, retrying it on return if not shutting down + public.p_exec = function () + local plc_state = smem.plc_state + + while not plc_state.shutdown do + local status, result = pcall(public.exec) + if status == false then + log.fatal(result) + end + + if not plc_state.shutdown then + log.info("comms tx thread restarting in 5 seconds...") + util.psleep(5) + end + end + end + + return public end -- communications handler thread ---@param smem plc_shared_memory threads.thread__comms_rx = function (smem) + local public = {} ---@class thread + -- execute thread - local exec = function () + public.exec = function () log.debug("comms rx thread start") -- load in from shared memory @@ -408,14 +474,33 @@ threads.thread__comms_rx = function (smem) end end - return { exec = exec } + -- execute the thread in a protected mode, retrying it on return if not shutting down + public.p_exec = function () + local plc_state = smem.plc_state + + while not plc_state.shutdown do + local status, result = pcall(public.exec) + if status == false then + log.fatal(result) + end + + if not plc_state.shutdown then + log.info("comms rx thread restarting in 5 seconds...") + util.psleep(5) + end + end + end + + return public end -- apply setpoints ---@param smem plc_shared_memory threads.thread__setpoint_control = function (smem) + local public = {} ---@class thread + -- execute thread - local exec = function () + public.exec = function () log.debug("setpoint control thread start") -- load in from shared memory @@ -511,7 +596,24 @@ threads.thread__setpoint_control = function (smem) end end - return { exec = exec } + -- execute the thread in a protected mode, retrying it on return if not shutting down + public.p_exec = function () + local plc_state = smem.plc_state + + while not plc_state.shutdown do + local status, result = pcall(public.exec) + if status == false then + log.fatal(result) + end + + if not plc_state.shutdown then + log.info("setpoint control thread restarting in 5 seconds...") + util.psleep(5) + end + end + end + + return public end return threads diff --git a/rtu/startup.lua b/rtu/startup.lua index 90578ce..003a2d6 100644 --- a/rtu/startup.lua +++ b/rtu/startup.lua @@ -24,7 +24,7 @@ local imatrix_rtu = require("rtu.dev.imatrix_rtu") local turbine_rtu = require("rtu.dev.turbine_rtu") local turbinev_rtu = require("rtu.dev.turbinev_rtu") -local RTU_VERSION = "alpha-v0.6.8" +local RTU_VERSION = "alpha-v0.7.0" local rtu_t = types.rtu_t @@ -272,10 +272,10 @@ local main_thread = threads.thread__main(__shared_memory) local comms_thread = threads.thread__comms(__shared_memory) -- assemble thread list -local _threads = { main_thread.exec, comms_thread.exec } +local _threads = { main_thread.p_exec, comms_thread.p_exec } for i = 1, #units do if units[i].thread ~= nil then - table.insert(_threads, units[i].thread.exec) + table.insert(_threads, units[i].thread.p_exec) end end diff --git a/rtu/threads.lua b/rtu/threads.lua index a3d19d7..3d83acf 100644 --- a/rtu/threads.lua +++ b/rtu/threads.lua @@ -28,8 +28,10 @@ local COMMS_SLEEP = 100 -- (100ms, 2 ticks) -- main thread ---@param smem rtu_shared_memory threads.thread__main = function (smem) + local public = {} ---@class thread + -- execute thread - local exec = function () + public.exec = function () log.debug("main thread start") -- main loop clock @@ -152,14 +154,33 @@ threads.thread__main = function (smem) end end - return { exec = exec } + -- execute the thread in a protected mode, retrying it on return if not shutting down + public.p_exec = function () + local rtu_state = smem.rtu_state + + while not rtu_state.shutdown do + local status, result = pcall(public.exec) + if status == false then + log.fatal(result) + end + + if not rtu_state.shutdown then + log.info("main thread restarting in 5 seconds...") + util.psleep(5) + end + end + end + + return public end -- communications handler thread ---@param smem rtu_shared_memory threads.thread__comms = function (smem) + local public = {} ---@class thread + -- execute thread - local exec = function () + public.exec = function () log.debug("comms thread start") -- load in from shared memory @@ -205,15 +226,34 @@ threads.thread__comms = function (smem) end end - return { exec = exec } + -- execute the thread in a protected mode, retrying it on return if not shutting down + public.p_exec = function () + local rtu_state = smem.rtu_state + + while not rtu_state.shutdown do + local status, result = pcall(public.exec) + if status == false then + log.fatal(result) + end + + if not rtu_state.shutdown then + log.info("comms thread restarting in 5 seconds...") + util.psleep(5) + end + end + end + + return public end -- per-unit communications handler thread ---@param smem rtu_shared_memory ---@param unit rtu_unit_registry_entry threads.thread__unit_comms = function (smem, unit) + local public = {} ---@class thread + -- execute thread - local exec = function () + public.exec = function () log.debug("rtu unit thread start -> " .. unit.name .. "(" .. unit.type .. ")") -- load in from shared memory @@ -256,7 +296,24 @@ threads.thread__unit_comms = function (smem, unit) end end - return { exec = exec } + -- execute the thread in a protected mode, retrying it on return if not shutting down + public.p_exec = function () + local rtu_state = smem.rtu_state + + while not rtu_state.shutdown do + local status, result = pcall(public.exec) + if status == false then + log.fatal(result) + end + + if not rtu_state.shutdown then + log.info("rtu unit thread " .. unit.name .. "(" .. unit.type .. ") restarting in 5 seconds...") + util.psleep(5) + end + end + end + + return public end return threads