service-runner.js (3754B)
1 const { spawn } = require("child_process"); 2 const path = require("path"); 3 4 const ROOT_DIR = path.join(__dirname, ".."); 5 const SERVER_ENTRY = path.join(ROOT_DIR, "server.js"); 6 const PORT = Number(process.env.PORT || 3000); 7 const HEALTHCHECK_URL = process.env.HEALTHCHECK_URL || `http://127.0.0.1:${PORT}/api/health`; 8 const HEALTHCHECK_MS = Number(process.env.HEALTHCHECK_MS || 15_000); 9 const HEALTHCHECK_TIMEOUT_MS = Number(process.env.HEALTHCHECK_TIMEOUT_MS || 4_000); 10 const HEALTHCHECK_FAILS = Number(process.env.HEALTHCHECK_FAILS || 3); 11 const RESTART_MIN_MS = Number(process.env.RESTART_MIN_MS || 2_000); 12 const RESTART_MAX_MS = Number(process.env.RESTART_MAX_MS || 30_000); 13 14 let child = null; 15 let stopping = false; 16 let checkTimer = null; 17 let restarting = false; 18 let backoffMs = RESTART_MIN_MS; 19 let consecutiveHealthFails = 0; 20 21 function log(msg) { 22 console.log(`[runner] ${msg}`); 23 } 24 25 async function healthcheck() { 26 if (!child || child.killed) return; 27 const controller = new AbortController(); 28 const timer = setTimeout(() => controller.abort(), HEALTHCHECK_TIMEOUT_MS); 29 try { 30 const res = await fetch(HEALTHCHECK_URL, { signal: controller.signal, cache: "no-store" }); 31 if (!res.ok) throw new Error(`HTTP ${res.status}`); 32 const body = await res.json().catch(() => ({})); 33 if (!body || body.ok !== true) throw new Error("Invalid health payload"); 34 consecutiveHealthFails = 0; 35 } catch (e) { 36 consecutiveHealthFails += 1; 37 log(`Healthcheck failed (${consecutiveHealthFails}/${HEALTHCHECK_FAILS}): ${e?.message || e}`); 38 if (consecutiveHealthFails >= HEALTHCHECK_FAILS) { 39 log("Healthcheck threshold exceeded. Restarting server process."); 40 restartChild("healthcheck"); 41 } 42 } finally { 43 clearTimeout(timer); 44 } 45 } 46 47 function scheduleChecks() { 48 if (checkTimer) clearInterval(checkTimer); 49 checkTimer = setInterval(() => { 50 healthcheck().catch(() => { 51 // ignore 52 }); 53 }, Math.max(1000, HEALTHCHECK_MS)); 54 } 55 56 function clearChecks() { 57 if (checkTimer) clearInterval(checkTimer); 58 checkTimer = null; 59 } 60 61 function spawnChild() { 62 consecutiveHealthFails = 0; 63 const nodeExe = process.execPath; 64 child = spawn(nodeExe, [SERVER_ENTRY], { 65 cwd: ROOT_DIR, 66 env: process.env, 67 stdio: "inherit" 68 }); 69 log(`Started server (pid ${child.pid})`); 70 71 child.on("exit", (code, signal) => { 72 const wasStopping = stopping; 73 const detail = signal ? `signal=${signal}` : `code=${code}`; 74 log(`Server exited (${detail})`); 75 child = null; 76 if (wasStopping) return; 77 restartChild("exit"); 78 }); 79 } 80 81 function restartChild(reason) { 82 if (stopping || restarting) return; 83 restarting = true; 84 clearChecks(); 85 const delay = backoffMs; 86 backoffMs = Math.min(RESTART_MAX_MS, Math.floor(backoffMs * 1.8)); 87 88 const finishRestart = () => { 89 restarting = false; 90 if (stopping) return; 91 spawnChild(); 92 scheduleChecks(); 93 }; 94 95 if (child && !child.killed) { 96 try { 97 child.kill("SIGTERM"); 98 } catch { 99 // ignore 100 } 101 } 102 log(`Restart scheduled in ${delay}ms (reason: ${reason})`); 103 setTimeout(finishRestart, delay); 104 } 105 106 function shutdown(signal) { 107 if (stopping) return; 108 stopping = true; 109 clearChecks(); 110 log(`Stopping runner (${signal})`); 111 if (child && !child.killed) { 112 try { 113 child.kill("SIGTERM"); 114 } catch { 115 // ignore 116 } 117 setTimeout(() => { 118 if (child && !child.killed) { 119 try { 120 child.kill("SIGKILL"); 121 } catch { 122 // ignore 123 } 124 } 125 process.exit(0); 126 }, 5_000); 127 } else { 128 process.exit(0); 129 } 130 } 131 132 process.on("SIGINT", () => shutdown("SIGINT")); 133 process.on("SIGTERM", () => shutdown("SIGTERM")); 134 135 log(`Health URL: ${HEALTHCHECK_URL}`); 136 spawnChild(); 137 scheduleChecks();