bzl

self-hosted ephemeral community engine
Log | Files | Refs | README | LICENSE

service-runner.js (3754B)


      1 const { spawn } = require("child_process");
      2 const path = require("path");
      3 
      4 const ROOT_DIR = path.join(__dirname, "..");
      5 const SERVER_ENTRY = path.join(ROOT_DIR, "server.js");
      6 const PORT = Number(process.env.PORT || 3000);
      7 const HEALTHCHECK_URL = process.env.HEALTHCHECK_URL || `http://127.0.0.1:${PORT}/api/health`;
      8 const HEALTHCHECK_MS = Number(process.env.HEALTHCHECK_MS || 15_000);
      9 const HEALTHCHECK_TIMEOUT_MS = Number(process.env.HEALTHCHECK_TIMEOUT_MS || 4_000);
     10 const HEALTHCHECK_FAILS = Number(process.env.HEALTHCHECK_FAILS || 3);
     11 const RESTART_MIN_MS = Number(process.env.RESTART_MIN_MS || 2_000);
     12 const RESTART_MAX_MS = Number(process.env.RESTART_MAX_MS || 30_000);
     13 
     14 let child = null;
     15 let stopping = false;
     16 let checkTimer = null;
     17 let restarting = false;
     18 let backoffMs = RESTART_MIN_MS;
     19 let consecutiveHealthFails = 0;
     20 
     21 function log(msg) {
     22   console.log(`[runner] ${msg}`);
     23 }
     24 
     25 async function healthcheck() {
     26   if (!child || child.killed) return;
     27   const controller = new AbortController();
     28   const timer = setTimeout(() => controller.abort(), HEALTHCHECK_TIMEOUT_MS);
     29   try {
     30     const res = await fetch(HEALTHCHECK_URL, { signal: controller.signal, cache: "no-store" });
     31     if (!res.ok) throw new Error(`HTTP ${res.status}`);
     32     const body = await res.json().catch(() => ({}));
     33     if (!body || body.ok !== true) throw new Error("Invalid health payload");
     34     consecutiveHealthFails = 0;
     35   } catch (e) {
     36     consecutiveHealthFails += 1;
     37     log(`Healthcheck failed (${consecutiveHealthFails}/${HEALTHCHECK_FAILS}): ${e?.message || e}`);
     38     if (consecutiveHealthFails >= HEALTHCHECK_FAILS) {
     39       log("Healthcheck threshold exceeded. Restarting server process.");
     40       restartChild("healthcheck");
     41     }
     42   } finally {
     43     clearTimeout(timer);
     44   }
     45 }
     46 
     47 function scheduleChecks() {
     48   if (checkTimer) clearInterval(checkTimer);
     49   checkTimer = setInterval(() => {
     50     healthcheck().catch(() => {
     51       // ignore
     52     });
     53   }, Math.max(1000, HEALTHCHECK_MS));
     54 }
     55 
     56 function clearChecks() {
     57   if (checkTimer) clearInterval(checkTimer);
     58   checkTimer = null;
     59 }
     60 
     61 function spawnChild() {
     62   consecutiveHealthFails = 0;
     63   const nodeExe = process.execPath;
     64   child = spawn(nodeExe, [SERVER_ENTRY], {
     65     cwd: ROOT_DIR,
     66     env: process.env,
     67     stdio: "inherit"
     68   });
     69   log(`Started server (pid ${child.pid})`);
     70 
     71   child.on("exit", (code, signal) => {
     72     const wasStopping = stopping;
     73     const detail = signal ? `signal=${signal}` : `code=${code}`;
     74     log(`Server exited (${detail})`);
     75     child = null;
     76     if (wasStopping) return;
     77     restartChild("exit");
     78   });
     79 }
     80 
     81 function restartChild(reason) {
     82   if (stopping || restarting) return;
     83   restarting = true;
     84   clearChecks();
     85   const delay = backoffMs;
     86   backoffMs = Math.min(RESTART_MAX_MS, Math.floor(backoffMs * 1.8));
     87 
     88   const finishRestart = () => {
     89     restarting = false;
     90     if (stopping) return;
     91     spawnChild();
     92     scheduleChecks();
     93   };
     94 
     95   if (child && !child.killed) {
     96     try {
     97       child.kill("SIGTERM");
     98     } catch {
     99       // ignore
    100     }
    101   }
    102   log(`Restart scheduled in ${delay}ms (reason: ${reason})`);
    103   setTimeout(finishRestart, delay);
    104 }
    105 
    106 function shutdown(signal) {
    107   if (stopping) return;
    108   stopping = true;
    109   clearChecks();
    110   log(`Stopping runner (${signal})`);
    111   if (child && !child.killed) {
    112     try {
    113       child.kill("SIGTERM");
    114     } catch {
    115       // ignore
    116     }
    117     setTimeout(() => {
    118       if (child && !child.killed) {
    119         try {
    120           child.kill("SIGKILL");
    121         } catch {
    122           // ignore
    123         }
    124       }
    125       process.exit(0);
    126     }, 5_000);
    127   } else {
    128     process.exit(0);
    129   }
    130 }
    131 
    132 process.on("SIGINT", () => shutdown("SIGINT"));
    133 process.on("SIGTERM", () => shutdown("SIGTERM"));
    134 
    135 log(`Health URL: ${HEALTHCHECK_URL}`);
    136 spawnChild();
    137 scheduleChecks();