From ceb8048889df1e9843433a2594bd301ed99f70cc Mon Sep 17 00:00:00 2001 From: Budleigh Salterton Date: Mon, 3 Jul 2023 15:31:45 +0200 Subject: [PATCH 01/62] Todo comments and changes for review --- packages/host/src/lib/csi-controller.ts | 10 +++++++- packages/host/src/lib/host.ts | 31 ++++++++++++++----------- packages/host/src/lib/socket-server.ts | 12 +++++++--- packages/runner/src/bin/start-runner.ts | 12 +++++++++- packages/runner/src/runner.ts | 1 + 5 files changed, 48 insertions(+), 18 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index da3e4114f..a72829edf 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -66,6 +66,8 @@ const BPMux = require("bpmux").BPMux; /** * Handles all Instance lifecycle, exposes instance's HTTP API. + * + * @todo write interface for CSIController and CSIDispatcher */ export class CSIController extends TypedEmitter { id: string; @@ -269,6 +271,10 @@ export class CSIController extends TypedEmitter { this.emit("end", code); } + /** + * @todo add comment + * @todo move this to CSIDispatcher - that would be one for all sequences + */ startInstance() { this._instanceAdapter = getInstanceAdapter(this.adapter, this.sthConfig, this.id); @@ -280,6 +286,7 @@ export class CSIController extends TypedEmitter { instanceAdapterExitDelay: this.sthConfig.timings.instanceAdapterExitDelay }; + // @todo this also is moved to CSIDispatcher in entirety const instanceMain = async () => { try { this.status = InstanceStatus.STARTING; @@ -324,7 +331,8 @@ export class CSIController extends TypedEmitter { return error; }); - + + // @todo - this should be checked by CSIController, but Dispatcher should know about this via event listener. this.instancePromise.finally(() => { this.heartBeatResolver?.res(this.id); }); diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 65cfac16c..fcafa64fc 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -155,19 +155,6 @@ export class Host implements IComponent { onInstanceRequest: (socket: Duplex) => { this.api.server.emit("connection", socket); }, }; - /** - * Sets listener for connections to socket server. - */ - private attachListeners() { - this.socketServer.on("connect", async (id, streams) => { - this.logger.debug("Instance connected", id); - - await this.instancesStore[id].handleInstanceConnect( - streams - ); - }); - } - public get service(): string { return name; } @@ -954,6 +941,7 @@ export class Host implements IComponent { this.logger.info("Start sequence", sequence.id, sequence.config.name); try { + // @todo - this line should be done by CSIDispatcher const csic = await this.startCSIController(sequence, payload); await this.cpmConnector?.sendInstanceInfo({ @@ -995,11 +983,28 @@ export class Host implements IComponent { } } + /** + * Sets listener for connections to socket server. + */ + private attachListeners() { + this.socketServer.on("connect", async (id, streams) => { + this.logger.debug("Instance connected", id); + + // @todo this should be a call to CSIDispatcher + // @todo CSIDispatcher should receive a reference to instanceStore. + await this.instancesStore[id].handleInstanceConnect( + streams + ); + }); + } + /** * Creates new CSIController {@link CSIController} object and handles its events. * * @param {SequenceInfo} sequence Sequence info object. * @param {STHRestAPI.StartSequencePayload} payload App start configuration. + * @todo This should be started by onConnect from `this..attachListeners` + * @todo Move this to CSI Dispatcher */ async startCSIController(sequence: SequenceInfo, payload: STHRestAPI.StartSequencePayload): Promise { const communicationHandler = new CommunicationHandler(); diff --git a/packages/host/src/lib/socket-server.ts b/packages/host/src/lib/socket-server.ts index ac48cc069..ca49c9fb3 100644 --- a/packages/host/src/lib/socket-server.ts +++ b/packages/host/src/lib/socket-server.ts @@ -45,9 +45,15 @@ export class SocketServer extends TypedEmitter implements IComponent { }); const id = await new Promise((resolve) => { - connection.once("readable", () => { - resolve(connection.read(36).toString()); - }); + const immediateData = connection.read(36); + + if (!immediateData) { + connection.once("readable", () => { + resolve(connection.read(36).toString()); + }); + } else { + resolve(immediateData); + } }); const channel = await new Promise((resolve) => { diff --git a/packages/runner/src/bin/start-runner.ts b/packages/runner/src/bin/start-runner.ts index 520a92ad8..be2ac454c 100755 --- a/packages/runner/src/bin/start-runner.ts +++ b/packages/runner/src/bin/start-runner.ts @@ -10,6 +10,16 @@ const sequencePath: string = process.env.SEQUENCE_PATH?.replace(/.js$/, "") + ". const instancesServerPort = process.env.INSTANCES_SERVER_PORT; const instancesServerHost = process.env.INSTANCES_SERVER_HOST; const instanceId = process.env.INSTANCE_ID; +const instanceConnectJSON = process.env.INSTANCE_CONNECT_JSON; + +let connectInfo; + +try { + connectInfo = JSON.parse(instanceConnectJSON); +} catch { + console.error("Error while parsing connection information."); + process.exit(RunnerExitCode.INVALID_ENV_VARS); +} if (!instancesServerPort || instancesServerPort !== parseInt(instancesServerPort, 10).toString()) { console.error("Incorrect run argument: instancesServerPort"); @@ -43,7 +53,7 @@ const hostClient = new HostClient(+instancesServerPort, instancesServerHost); * @param fifosPath - fifo files path */ -const runner: Runner = new Runner(sequencePath, hostClient, instanceId); +const runner: Runner = new Runner(sequencePath, hostClient, instanceId, connectInfo); runner.main() .catch(e => { diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index f83151edf..2c9d1bb2b 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -455,6 +455,7 @@ export class Runner implements IComponent { } sendHandshakeMessage() { + // TODO: send connection info MessageUtils.writeMessageOnStream([RunnerMessageCode.PING, {}], this.hostClient.monitorStream); this.logger.trace("Handshake sent"); From 321f78d9f9d7116f8b5140b653737013d31cce06 Mon Sep 17 00:00:00 2001 From: Budleigh Salterton Date: Mon, 3 Jul 2023 13:41:22 +0000 Subject: [PATCH 02/62] Add needed info to Ping Interface --- packages/host/src/lib/csi-controller.ts | 4 ++-- packages/types/src/messages/handshake.ts | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index a72829edf..d932597c3 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -66,7 +66,7 @@ const BPMux = require("bpmux").BPMux; /** * Handles all Instance lifecycle, exposes instance's HTTP API. - * + * * @todo write interface for CSIController and CSIDispatcher */ export class CSIController extends TypedEmitter { @@ -331,7 +331,7 @@ export class CSIController extends TypedEmitter { return error; }); - + // @todo - this should be checked by CSIController, but Dispatcher should know about this via event listener. this.instancePromise.finally(() => { this.heartBeatResolver?.res(this.id); diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index 07494eb7a..70be89e35 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -1,11 +1,17 @@ import { RunnerMessageCode } from "@scramjet/symbols"; +import { SequenceInfo } from "../sequence-adapter"; +import { StartSequencePayload } from "../rest-api-sth"; /** * Runner sends a handshake message to the Cloud Server Host (CSH) after it is. * Runner is then waiting to receive the handshake acknowledge message back (PONG) * from the CSH to start the Sequence. */ -export type HandshakeMessage = { msgCode: RunnerMessageCode.PING }; +export type HandshakeMessage = { + msgCode: RunnerMessageCode.PING, + sequence: SequenceInfo, + payload: StartSequencePayload +}; export type PingMessageData = { ports?: Record } From 05eb95e86d7a78a9504fcc01deeefdd1eac91420 Mon Sep 17 00:00:00 2001 From: Budleigh Salterton Date: Mon, 3 Jul 2023 13:47:23 +0000 Subject: [PATCH 03/62] Add runner exception --- packages/runner/src/bin/start-runner.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/runner/src/bin/start-runner.ts b/packages/runner/src/bin/start-runner.ts index be2ac454c..271fdc058 100755 --- a/packages/runner/src/bin/start-runner.ts +++ b/packages/runner/src/bin/start-runner.ts @@ -10,12 +10,13 @@ const sequencePath: string = process.env.SEQUENCE_PATH?.replace(/.js$/, "") + ". const instancesServerPort = process.env.INSTANCES_SERVER_PORT; const instancesServerHost = process.env.INSTANCES_SERVER_HOST; const instanceId = process.env.INSTANCE_ID; -const instanceConnectJSON = process.env.INSTANCE_CONNECT_JSON; +const connectInfoJSON = process.env.INSTANCE_CONNECT_JSON; let connectInfo; try { - connectInfo = JSON.parse(instanceConnectJSON); + if (!connectInfoJSON) throw new Error("Connection JSON is required."); + connectInfo = JSON.parse(connectInfoJSON); } catch { console.error("Error while parsing connection information."); process.exit(RunnerExitCode.INVALID_ENV_VARS); @@ -53,7 +54,7 @@ const hostClient = new HostClient(+instancesServerPort, instancesServerHost); * @param fifosPath - fifo files path */ -const runner: Runner = new Runner(sequencePath, hostClient, instanceId, connectInfo); +const runner: Runner = new Runner(sequencePath, hostClient, instanceId, connectInfo, connectInfoJSON); runner.main() .catch(e => { From 1f3871e6f79f82636fd3dd9ffe69154a11860b3b Mon Sep 17 00:00:00 2001 From: Piotr Date: Tue, 11 Jul 2023 09:00:41 +0000 Subject: [PATCH 04/62] csi dispatcher --- packages/host/src/lib/csi-dispatcher.ts | 61 ++++++++++++++++++++++ packages/host/src/lib/host.ts | 69 +++++++++++++------------ 2 files changed, 96 insertions(+), 34 deletions(-) create mode 100644 packages/host/src/lib/csi-dispatcher.ts diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts new file mode 100644 index 000000000..313313e44 --- /dev/null +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -0,0 +1,61 @@ +import { ObjLogger } from "@scramjet/obj-logger"; +import { HostProxy, IObjectLogger, InstanceConfig, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; +import { SocketServer } from "./socket-server"; +import { InstanceStore } from "./instance-store"; +import { CSIController } from "./csi-controller"; +import { CommunicationHandler, IDProvider } from "@scramjet/model"; +import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; +import { getInstanceAdapter } from "@scramjet/adapters"; +import SequenceStore from "./sequenceStore"; + +export class CSIDispatcher { + public logger: IObjectLogger; + //private socketServer: SocketServer; + public instancesStore: typeof InstanceStore; + //private sequenceStore: Map; + private STHConfig: STHConfiguration; + + constructor(_socketServer: SocketServer, instancesStore: typeof InstanceStore, _sequenceStore: SequenceStore, STHConfig: STHConfiguration) { + this.logger = new ObjLogger(this); + //this.socketServer = socketServer; + this.instancesStore = instancesStore; + //this.sequenceStore = sequenceStore; + this.STHConfig = STHConfig; + } + + createCSIController(id: string, sequence: SequenceInfo, payload: StartSequencePayload, communicationHandler: CommunicationHandler, config: STHConfiguration, instanceProxy: HostProxy) { + const csiController = new CSIController(id, sequence, payload, communicationHandler, config, instanceProxy); + + csiController.logger.pipe(this.logger); + this.logger.trace("CSIController created", id); + + this.instancesStore[id] = csiController; + + return csiController; + } + + async startRunner(sequence: SequenceInfo, payload: STHRestAPI.StartSequencePayload) { + const limits = { + memory: payload.limits?.memory || this.STHConfig.docker.runner.maxMem + }; + const id = IDProvider.generate(); + + const instanceAdapter = getInstanceAdapter(this.STHConfig.runtimeAdapter, this.STHConfig, id); + + await instanceAdapter.init(); + + const instanceConfig: InstanceConfig = { + ...sequence.config, + limits: limits, + instanceAdapterExitDelay: this.STHConfig.timings.instanceAdapterExitDelay + }; + + await instanceAdapter.run( + instanceConfig, + this.STHConfig.host.instancesServerPort, + id + ); + + return { id }; + } +} diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index fcafa64fc..334bfad96 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -44,7 +44,7 @@ import { ConfigService, development } from "@scramjet/sth-config"; import { isStartSequenceDTO, readJsonFile, defer, FileBuilder } from "@scramjet/utility"; import { inspect } from "util"; import { auditMiddleware, logger as auditMiddlewareLogger } from "./middlewares/audit"; -import { AuditedRequest, Auditor } from "./auditor"; +import { Auditor } from "./auditor"; import { getTelemetryAdapter, ITelemetryAdapter } from "@scramjet/telemetry"; import { cpus, totalmem } from "os"; import { S3Client } from "./s3-client"; @@ -56,6 +56,7 @@ import { ContentType } from "./serviceDiscovery/contentType"; import SequenceStore from "./sequenceStore"; import { GetSequenceResponse } from "@scramjet/types/src/rest-api-sth"; import { loadModule, logger as loadModuleLogger } from "@scramjet/module-loader"; +import { CSIDispatcher } from "./csi-dispatcher"; const buildInfo = readJsonFile("build.info", __dirname, ".."); const packageFile = findPackage(__dirname).next(); @@ -151,6 +152,8 @@ export class Host implements IComponent { */ s3Client?: S3Client; + csiDispatcher: CSIDispatcher; + private instanceProxy: HostProxy = { onInstanceRequest: (socket: Duplex) => { this.api.server.emit("connection", socket); }, }; @@ -237,6 +240,10 @@ export class Host implements IComponent { this.instanceBase = `${this.config.host.apiBase}/instance`; this.topicsBase = `${this.config.host.apiBase}/topic`; + this.csiDispatcher = new CSIDispatcher(this.socketServer, this.instancesStore, this.sequenceStore, sthConfig); + + this.csiDispatcher.logger.pipe(this.logger); + if (this.config.host.apiBase.includes(":")) { throw new HostError("API_CONFIGURATION_ERROR", "Can't expose an API on paths including a semicolon..."); } @@ -439,7 +446,7 @@ export class Host implements IComponent { this.logger.warn("Sequence id not found for startup config", seqenceConfig); return; } - + // @todo dispatcher await this.startCSIController(sequence, { appConfig: seqenceConfig.appConfig || {}, args: seqenceConfig.args, @@ -942,36 +949,31 @@ export class Host implements IComponent { try { // @todo - this line should be done by CSIDispatcher - const csic = await this.startCSIController(sequence, payload); - - await this.cpmConnector?.sendInstanceInfo({ - id: csic.id, - appConfig: csic.appConfig, - args: csic.args, - sequence: (info => { - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const { instances, ...rest } = info; - - return rest; - })(sequence), - ports: csic.info.ports, - created: csic.info.created, - started: csic.info.started, - status: csic.status, - }, InstanceMessageCode.INSTANCE_STARTED); - - this.logger.debug("Instance limits", csic.limits); - this.auditor.auditInstanceStart(csic.id, req as AuditedRequest, csic.limits); - this.pushTelemetry("Instance started", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); - - csic.on("hourChime", () => { - this.pushTelemetry("Instance hour chime", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); - }); + const runner = await this.csiDispatcher.startRunner(sequence, payload); + + // await this.cpmConnector?.sendInstanceInfo({ + // id: csic.id, + // appConfig: csic.appConfig, + // args: csic.args, + // sequence: sequenceId, + // ports: csic.info.ports, + // created: csic.info.created, + // started: csic.info.started, + // status: csic.status, + // }, InstanceMessageCode.INSTANCE_STARTED); + + // this.logger.debug("Instance limits", csic.limits); + // this.auditor.auditInstanceStart(csic.id, req as AuditedRequest, csic.limits); + // this.pushTelemetry("Instance started", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); + + // csic.on("hourChime", () => { + // this.pushTelemetry("Instance hour chime", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); + // }); return { opStatus: ReasonPhrases.OK, - message: `Sequence ${csic.id} starting`, - id: csic.id + message: `Sequence ${runner.id} starting`, + id: runner.id }; } catch (error: any) { this.pushTelemetry("Instance start failed", { error: error.message }, "error"); @@ -992,6 +994,9 @@ export class Host implements IComponent { // @todo this should be a call to CSIDispatcher // @todo CSIDispatcher should receive a reference to instanceStore. + if (!this.instancesStore[id]) { + this.csiDispatcher.createCSIController(id, {} as SequenceInfo, {} as STHRestAPI.StartSequencePayload, new CommunicationHandler(), this.config, this.instanceProxy); + } await this.instancesStore[id].handleInstanceConnect( streams ); @@ -1012,15 +1017,11 @@ export class Host implements IComponent { if (isDevelopment) this.logger.debug("CSIC start payload", payload); - const csic = new CSIController(id, sequence, payload, communicationHandler, this.config, this.instanceProxy); + const csic = this.csiDispatcher.createCSIController(id, sequence, payload, communicationHandler, this.config, this.instanceProxy); csic.logger.pipe(this.logger, { end: false }); communicationHandler.logger.pipe(this.logger, { end: false }); - this.logger.trace("CSIController created", id); - - this.instancesStore[id] = csic; - csic.on("error", (err) => { this.pushTelemetry("Instance error", { ...err }, "error"); this.logger.error("CSIController errored", err.message, err.exitcode); From 416580b0454cffc8a996489e43fc572e88c8e91c Mon Sep 17 00:00:00 2001 From: Piotr Date: Wed, 12 Jul 2023 15:16:10 +0000 Subject: [PATCH 05/62] Start instance in dispatcher --- .../adapters/src/docker-instance-adapter.ts | 6 +- packages/adapters/src/get-runner-env.ts | 3 +- .../src/kubernetes-instance-adapter.ts | 6 +- .../adapters/src/process-instance-adapter.ts | 13 +- packages/adapters/src/types.ts | 3 +- packages/host/src/lib/csi-controller.ts | 23 +-- packages/host/src/lib/csi-dispatcher.ts | 134 +++++++++++++- packages/host/src/lib/host.ts | 165 ++++-------------- packages/runner/src/bin/start-runner.ts | 12 +- packages/runner/src/runner.ts | 10 +- packages/types/src/lifecycle-adapters.ts | 3 +- packages/types/src/messages/handshake.ts | 2 +- 12 files changed, 202 insertions(+), 178 deletions(-) diff --git a/packages/adapters/src/docker-instance-adapter.ts b/packages/adapters/src/docker-instance-adapter.ts index 760aaa719..0e498a231 100644 --- a/packages/adapters/src/docker-instance-adapter.ts +++ b/packages/adapters/src/docker-instance-adapter.ts @@ -12,6 +12,7 @@ import { RunnerContainerConfiguration, InstanceLimits, STHConfiguration, + SequenceInfo, } from "@scramjet/types"; import path from "path"; import { DockerodeDockerHelper } from "./dockerode-docker-helper"; @@ -168,7 +169,7 @@ IComponent { } // eslint-disable-next-line complexity - async run(config: InstanceConfig, instancesServerPort: number, instanceId: string): Promise { + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { if (config.type !== "docker") { throw new Error("Docker instance adapter run with invalid runner config"); } @@ -191,7 +192,8 @@ IComponent { instancesServerPort, instancesServerHost: networkSetup.host, instanceId, - pipesPath: "" + pipesPath: "", + sequenceInfo }).map(([k, v]) => `${k}=${v}`); this.logger.debug("Runner will start with envs", envs); diff --git a/packages/adapters/src/get-runner-env.ts b/packages/adapters/src/get-runner-env.ts index e658ac99e..39074cf5f 100644 --- a/packages/adapters/src/get-runner-env.ts +++ b/packages/adapters/src/get-runner-env.ts @@ -9,7 +9,7 @@ import { RunnerEnvConfig, RunnerEnvironmentVariables } from "./types"; * @returns env vars */ export function getRunnerEnvVariables({ - sequencePath, instancesServerPort, instancesServerHost, instanceId, pipesPath, paths = "posix" + sequencePath, instancesServerPort, instancesServerHost, instanceId, pipesPath, paths = "posix", sequenceInfo }: RunnerEnvConfig, extra: Record = {}): RunnerEnvironmentVariables { const join = path[paths].join; @@ -23,6 +23,7 @@ export function getRunnerEnvVariables({ INSTANCE_ID: instanceId, PIPES_LOCATION: pipesPath, CRASH_LOG: join(pipesPath, "crash_log"), + SEQUENCE_INFO: JSON.stringify(sequenceInfo), ...extra }; } diff --git a/packages/adapters/src/kubernetes-instance-adapter.ts b/packages/adapters/src/kubernetes-instance-adapter.ts index e91ff7bde..47134c0e9 100644 --- a/packages/adapters/src/kubernetes-instance-adapter.ts +++ b/packages/adapters/src/kubernetes-instance-adapter.ts @@ -9,6 +9,7 @@ import { IObjectLogger, K8SAdapterConfiguration, MonitoringMessageData, + SequenceInfo, STHConfiguration, } from "@scramjet/types"; @@ -88,7 +89,7 @@ IComponent { }; } - async run(config: InstanceConfig, instancesServerPort: number, instanceId: string): Promise { + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { if (config.type !== "kubernetes") { throw new Error(`Invalid config type for kubernetes adapter: ${config.type}`); } @@ -109,7 +110,8 @@ IComponent { instancesServerPort, instancesServerHost: this.adapterConfig.sthPodHost, instanceId, - pipesPath: "" + pipesPath: "", + sequenceInfo }).map(([name, value]) => ({ name, value })); const runnerImage = config.engines.python3 diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index fa317b584..39e19efe0 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -9,7 +9,8 @@ import { STHConfiguration, InstanceLimits, IObjectLogger, MonitoringMessageData, - SequenceConfig + SequenceConfig, + SequenceInfo } from "@scramjet/types"; import { ChildProcess, spawn } from "child_process"; @@ -115,7 +116,8 @@ class ProcessInstanceAdapter implements return pythonpath; } - async run(config: InstanceConfig, instancesServerPort: number, instanceId: string): Promise { + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { + console.log("config type", config.type) if (config.type !== "process") { throw new Error("Process instance adapter run with invalid runner config"); } @@ -134,7 +136,8 @@ class ProcessInstanceAdapter implements instancesServerHost: "127.0.0.1", instancesServerPort, instanceId, - pipesPath: "" + pipesPath: "", + sequenceInfo }, { PYTHONPATH: this.getPythonpath(config.sequenceDir), }); @@ -148,6 +151,10 @@ class ProcessInstanceAdapter implements this.logger.trace("Runner process is running", runnerProcess.pid); + // @todo exit here with pid + // then promise waiting for process with given pid finish (endOfRun) + // how to connect to a process knowing id of it? + this.runnerProcess = runnerProcess; const [statusCode, signal] = await new Promise<[number | null, NodeJS.Signals | null]>( diff --git a/packages/adapters/src/types.ts b/packages/adapters/src/types.ts index b5ec64fbc..3c4ff4742 100644 --- a/packages/adapters/src/types.ts +++ b/packages/adapters/src/types.ts @@ -1,4 +1,4 @@ -import { ExitCode, InstanceId, IObjectLogger } from "@scramjet/types"; +import { ExitCode, InstanceId, IObjectLogger, SequenceInfo } from "@scramjet/types"; import { ContainerStats, NetworkInspectInfo } from "dockerode"; import { PathLike } from "fs"; import { Stream, Writable } from "stream"; @@ -314,6 +314,7 @@ export type RunnerEnvConfig = { instancesServerPort: number; instancesServerHost: string; instanceId: InstanceId; + sequenceInfo: SequenceInfo } export type RunnerEnvironmentVariables = Partial<{ diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index d932597c3..cda6ee818 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -10,7 +10,6 @@ import { ReadableStream, SequenceInfo, WritableStream, - InstanceConfig, ILifeCycleAdapterRun, MessageDataType, IObjectLogger, @@ -197,7 +196,7 @@ export class CSIController extends TypedEmitter { this.logger = new ObjLogger(this, { id }); this.logger.debug("Constructor executed"); - this.info.created = new Date(); + this.status = InstanceStatus.INITIALIZING; this.upStreams = [ @@ -215,7 +214,7 @@ export class CSIController extends TypedEmitter { async start() { const i = new Promise((res, rej) => { this.initResolver = { res, rej }; - this.startInstance(); + //this.startInstance(); }); i.then(() => this.main()).catch(async (e) => { @@ -280,27 +279,13 @@ export class CSIController extends TypedEmitter { this._instanceAdapter.logger.pipe(this.logger, { end: false }); - const instanceConfig: InstanceConfig = { - ...this.sequence.config, - limits: this.limits, - instanceAdapterExitDelay: this.sthConfig.timings.instanceAdapterExitDelay - }; - // @todo this also is moved to CSIDispatcher in entirety const instanceMain = async () => { try { this.status = InstanceStatus.STARTING; - await this.instanceAdapter.init(); - this.logger.trace("Streams hooked and routed"); - this.endOfSequence = this.instanceAdapter.run( - instanceConfig, - this.sthConfig.host.instancesServerPort, - this.id - ); - this.logger.trace("Sequence initialized"); const exitcode = await this.endOfSequence; @@ -539,12 +524,14 @@ export class CSIController extends TypedEmitter { } this.info.ports = message[1].ports; + this.sequence = message[1].sequenceInfo; if (this.controlDataStream) { const pongMsg: HandshakeAcknowledgeMessage = { msgCode: RunnerMessageCode.PONG, appConfig: this.appConfig, - args: this.args + args: this.args, + //runtimeId:? }; await this.controlDataStream.whenWrote(MessageUtilities.serializeMessage(pongMsg)); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 313313e44..09f7c4a1e 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -1,5 +1,5 @@ import { ObjLogger } from "@scramjet/obj-logger"; -import { HostProxy, IObjectLogger, InstanceConfig, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; +import { HostProxy, IObjectLogger, InstanceConfig, MessageDataType, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; import { SocketServer } from "./socket-server"; import { InstanceStore } from "./instance-store"; import { CSIController } from "./csi-controller"; @@ -7,30 +7,138 @@ import { CommunicationHandler, IDProvider } from "@scramjet/model"; import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; import { getInstanceAdapter } from "@scramjet/adapters"; import SequenceStore from "./sequenceStore"; +import { TypedEmitter } from "@scramjet/utility"; +import { RunnerMessageCode } from "@scramjet/symbols"; -export class CSIDispatcher { +type errorEventData = {id:string, err: any } +type endEventData = {id:string, code:number } +type Events = { + pang: (payload: MessageDataType) => void; + hourChime: () => void; + error: (data: errorEventData) => void; + stop: (code: number) => void; + end: (data: endEventData) => void; + terminated: (data: endEventData) => void; +}; + +export class CSIDispatcher extends TypedEmitter { public logger: IObjectLogger; //private socketServer: SocketServer; public instancesStore: typeof InstanceStore; - //private sequenceStore: Map; private STHConfig: STHConfiguration; constructor(_socketServer: SocketServer, instancesStore: typeof InstanceStore, _sequenceStore: SequenceStore, STHConfig: STHConfiguration) { + super(); this.logger = new ObjLogger(this); //this.socketServer = socketServer; this.instancesStore = instancesStore; - //this.sequenceStore = sequenceStore; this.STHConfig = STHConfig; } createCSIController(id: string, sequence: SequenceInfo, payload: StartSequencePayload, communicationHandler: CommunicationHandler, config: STHConfiguration, instanceProxy: HostProxy) { + sequence.instances = sequence.instances || []; + const csiController = new CSIController(id, sequence, payload, communicationHandler, config, instanceProxy); csiController.logger.pipe(this.logger); this.logger.trace("CSIController created", id); - this.instancesStore[id] = csiController; + csiController.logger.pipe(this.logger, { end: false }); + communicationHandler.logger.pipe(this.logger, { end: false }); + + csiController.on("error", (err) => { + //this.pushTelemetry("Instance error", { ...err }, "error"); + this.logger.error("CSIController errored", err.message, err.exitcode); + this.emit("error", { id, err }); + }); + + csiController.on("pang", async (data) => { + this.logger.trace("PANG received", data); + + if (data.requires && !csiController.inputRouted) { + this.logger.trace("Routing Sequence input to topic", data.requires); + + // await this.serviceDiscovery.routeTopicToStream( + // { topic: data.requires, contentType: data.contentType! }, + // csiController.getInputStream() + // ); + + csiController.inputRouted = true; + + // await this.serviceDiscovery.update({ + // requires: data.requires, contentType: data.contentType!, topicName: data.requires + // }); + } + + if (data.provides && !csiController.outputRouted) { + this.logger.trace("Routing Sequence output to topic", data.provides); + // await this.serviceDiscovery.routeStreamToTopic( + // csiController.getOutputStream(), + // { topic: data.provides, contentType: data.contentType! }, + // csiController.id + // ); + + csiController.outputRouted = true; + + // await this.serviceDiscovery.update({ + // provides: data.provides, contentType: data.contentType!, topicName: data.provides + // }); + } + }); + + csiController.on("end", async (code) => { + this.logger.trace("csiControllerontrolled ended", `Exit code: ${code}`); + // if (csiController.provides && csiController.provides !== "") { + // csiController.getOutputStream()!.unpipe(this.serviceDiscovery.getData( + // { + // topic: csiController.provides, + // contentType: "" + // } + // ) as Writable); + // } + + csiController.logger.unpipe(this.logger); + + delete InstanceStore[csiController.id]; + + sequence.instances.filter(a => a !== id); + + // await this.cpmConnector?.sendInstanceInfo({ + // id: csiController.id, + // sequence: sequence.id + // }, InstanceMessageCode.INSTANCE_ENDED); + + // this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); + this.emit("end", { id, code }); + }); + + csiController.once("terminated", (code) => { + // if (csiController.requires && csiController.requires !== "") { + // (this.serviceDiscovery.getData({ + // topic: csiController.requires, + // contentType: "", + // }) as Readable + // ).unpipe(csiController.getInputStream()!); + // } + + // this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); + // this.pushTelemetry("Instance ended", { + // executionTime: csiController.info.ended && csiController.info.started ? ((csiController.info.ended?.getTime() - csiController.info.started.getTime()) / 1000).toString() : "-1", + // id: csiController.id, + // code: code.toString(), + // seqId: csiController.sequence.id + // }); + this.emit("terminated", { id, code }); + }); + + csiController.start().then(() => {}, () => {}); + + this.logger.trace("csiController started", id); + + sequence.instances.push(id); + + this.instancesStore[id] = csiController; return csiController; } @@ -53,9 +161,21 @@ export class CSIDispatcher { await instanceAdapter.run( instanceConfig, this.STHConfig.host.instancesServerPort, - id + id, + sequence, + //payload ); + // @todo more instance info + return { + id, + appConfig: payload.appConfig, + args: payload.args, + sequenceId: sequence.id, + info: { - return { id }; + }, + limits, + sequence + }; } } diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 334bfad96..f1a84ae45 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -24,14 +24,13 @@ import { STHRestAPI, } from "@scramjet/types"; import { CommunicationHandler, HostError, IDProvider } from "@scramjet/model"; -import { HostHeaders, InstanceMessageCode, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; +import { HostHeaders, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; import { ObjLogger, prettyPrint } from "@scramjet/obj-logger"; import { LoadCheck, LoadCheckConfig } from "@scramjet/load-check"; import { getSequenceAdapter, initializeRuntimeAdapters } from "@scramjet/adapters"; import { CPMConnector } from "./cpm-connector"; -import { CSIController } from "./csi-controller"; import { CommonLogsPipe } from "./common-logs-pipe"; import { InstanceStore } from "./instance-store"; @@ -50,9 +49,8 @@ import { cpus, totalmem } from "os"; import { S3Client } from "./s3-client"; import { DuplexStream } from "@scramjet/api-server"; import { readFileSync } from "fs"; -import TopicId from "./serviceDiscovery/topicId"; + import TopicRouter from "./serviceDiscovery/topicRouter"; -import { ContentType } from "./serviceDiscovery/contentType"; import SequenceStore from "./sequenceStore"; import { GetSequenceResponse } from "@scramjet/types/src/rest-api-sth"; import { loadModule, logger as loadModuleLogger } from "@scramjet/module-loader"; @@ -244,6 +242,8 @@ export class Host implements IComponent { this.csiDispatcher.logger.pipe(this.logger); + this.attachDispatcherEvents(); + if (this.config.host.apiBase.includes(":")) { throw new HostError("API_CONFIGURATION_ERROR", "Can't expose an API on paths including a semicolon..."); } @@ -271,6 +271,10 @@ export class Host implements IComponent { return monitoringServer.start(); } + attachDispatcherEvents() { + //this.csiDispatcher.on(); + } + getId() { let id = this.config.host.id; @@ -447,7 +451,7 @@ export class Host implements IComponent { return; } // @todo dispatcher - await this.startCSIController(sequence, { + await this.csiDispatcher.startRunner(sequence, { appConfig: seqenceConfig.appConfig || {}, args: seqenceConfig.args, instanceId: seqenceConfig.instanceId @@ -948,23 +952,23 @@ export class Host implements IComponent { this.logger.info("Start sequence", sequence.id, sequence.config.name); try { - // @todo - this line should be done by CSIDispatcher const runner = await this.csiDispatcher.startRunner(sequence, payload); + // @todo more info // await this.cpmConnector?.sendInstanceInfo({ - // id: csic.id, - // appConfig: csic.appConfig, - // args: csic.args, - // sequence: sequenceId, - // ports: csic.info.ports, - // created: csic.info.created, - // started: csic.info.started, - // status: csic.status, + // id: runner.id, + // appConfig: payload.appConfig, + // args: payload.args, + // sequence: sequence.id, + // // ports: runner.info.ports + // // created: csic.info.created, + // // started: csic.info.started, + // // status: csic.status, // }, InstanceMessageCode.INSTANCE_STARTED); - // this.logger.debug("Instance limits", csic.limits); - // this.auditor.auditInstanceStart(csic.id, req as AuditedRequest, csic.limits); - // this.pushTelemetry("Instance started", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); + //this.logger.debug("Instance limits", runner.limits); + //this.auditor.auditInstanceStart(runner.id, req as AuditedRequest, runner.limits); + //this.pushTelemetry("Instance started", { id: runner.id, language: runner.sequence.config.language, seqId: runner.sequence.id }); // csic.on("hourChime", () => { // this.pushTelemetry("Instance hour chime", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); @@ -992,130 +996,25 @@ export class Host implements IComponent { this.socketServer.on("connect", async (id, streams) => { this.logger.debug("Instance connected", id); - // @todo this should be a call to CSIDispatcher - // @todo CSIDispatcher should receive a reference to instanceStore. + // @todo need more instance info if (!this.instancesStore[id]) { - this.csiDispatcher.createCSIController(id, {} as SequenceInfo, {} as STHRestAPI.StartSequencePayload, new CommunicationHandler(), this.config, this.instanceProxy); + this.logger.info("creating new CSIController for runner connecting"); + this.csiDispatcher.createCSIController( + id, + {} as SequenceInfo, + {} as STHRestAPI.StartSequencePayload, + new CommunicationHandler(), + this.config, + this.instanceProxy + ); } + await this.instancesStore[id].handleInstanceConnect( streams ); }); } - /** - * Creates new CSIController {@link CSIController} object and handles its events. - * - * @param {SequenceInfo} sequence Sequence info object. - * @param {STHRestAPI.StartSequencePayload} payload App start configuration. - * @todo This should be started by onConnect from `this..attachListeners` - * @todo Move this to CSI Dispatcher - */ - async startCSIController(sequence: SequenceInfo, payload: STHRestAPI.StartSequencePayload): Promise { - const communicationHandler = new CommunicationHandler(); - const id = payload.instanceId || IDProvider.generate(); - - if (isDevelopment) this.logger.debug("CSIC start payload", payload); - - const csic = this.csiDispatcher.createCSIController(id, sequence, payload, communicationHandler, this.config, this.instanceProxy); - - csic.logger.pipe(this.logger, { end: false }); - communicationHandler.logger.pipe(this.logger, { end: false }); - - csic.on("error", (err) => { - this.pushTelemetry("Instance error", { ...err }, "error"); - this.logger.error("CSIController errored", err.message, err.exitcode); - }); - - // eslint-disable-next-line complexity - csic.on("pang", async (data) => { - this.logger.trace("PANG received", data); - - if ((data.requires || data.provides) && !data.contentType) { - this.logger.warn("Missing topic content-type"); - } - - if (data.requires && !csic.inputRouted && data.contentType) { - this.logger.trace("Routing Sequence input to topic", data.requires); - - await this.serviceDiscovery.routeTopicToStream( - { topic: new TopicId(data.requires), contentType: data.contentType as ContentType }, - csic.getInputStream() - ); - - csic.inputRouted = true; - - await this.serviceDiscovery.update({ - requires: data.requires, contentType: data.contentType!, topicName: data.requires - }); - } - - if (data.provides && !csic.outputRouted && data.contentType) { - this.logger.trace("Routing Sequence output to topic", data.provides); - await this.serviceDiscovery.routeStreamToTopic( - csic.getOutputStream(), - { topic: new TopicId(data.provides), contentType: data.contentType as ContentType }, - // csic.id - ); - - csic.outputRouted = true; - - await this.serviceDiscovery.update({ - provides: data.provides, contentType: data.contentType!, topicName: data.provides - }); - } - }); - - csic.on("end", async (code) => { - this.logger.trace("CSIController ended", `Exit code: ${code}`); - - if (csic.provides && csic.provides !== "") { - const topic = this.serviceDiscovery.getTopic(new TopicId(csic.provides)); - - if (topic) csic.getOutputStream()!.unpipe(topic); - } - - csic.logger.unpipe(this.logger); - - delete InstanceStore[csic.id]; - - sequence.instances = sequence.instances.filter(item => { - return item !== id; - }); - - await this.cpmConnector?.sendInstanceInfo({ - id: csic.id, - sequence: sequence - }, InstanceMessageCode.INSTANCE_ENDED); - - this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); - }); - - csic.once("terminated", (code) => { - if (csic.requires && csic.requires !== "") { - const topic = this.serviceDiscovery.getTopic(new TopicId(csic.requires)); - - if (topic) topic.unpipe(csic.getInputStream()); - } - - this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); - this.pushTelemetry("Instance ended", { - executionTime: csic.info.ended && csic.info.started ? ((csic.info.ended?.getTime() - csic.info.started.getTime()) / 1000).toString() : "-1", - id: csic.id, - code: code.toString(), - seqId: csic.sequence.id - }); - }); - - await csic.start(); - - this.logger.trace("CSIController started", id); - - sequence.instances.push(id); - - return csic; - } - /** * Returns list of all Sequences. * diff --git a/packages/runner/src/bin/start-runner.ts b/packages/runner/src/bin/start-runner.ts index 271fdc058..ff7e54bda 100755 --- a/packages/runner/src/bin/start-runner.ts +++ b/packages/runner/src/bin/start-runner.ts @@ -2,7 +2,7 @@ import { Runner } from "../runner"; import fs from "fs"; -import { AppConfig } from "@scramjet/types"; +import { AppConfig, SequenceInfo } from "@scramjet/types"; import { HostClient } from "../host-client"; import { RunnerExitCode } from "@scramjet/symbols"; @@ -10,13 +10,13 @@ const sequencePath: string = process.env.SEQUENCE_PATH?.replace(/.js$/, "") + ". const instancesServerPort = process.env.INSTANCES_SERVER_PORT; const instancesServerHost = process.env.INSTANCES_SERVER_HOST; const instanceId = process.env.INSTANCE_ID; -const connectInfoJSON = process.env.INSTANCE_CONNECT_JSON; +const sequenceInfo = process.env.SEQUENCE_INFO; -let connectInfo; +let connectInfo: SequenceInfo; try { - if (!connectInfoJSON) throw new Error("Connection JSON is required."); - connectInfo = JSON.parse(connectInfoJSON); + if (!sequenceInfo) throw new Error("Connection JSON is required."); + connectInfo = JSON.parse(sequenceInfo); } catch { console.error("Error while parsing connection information."); process.exit(RunnerExitCode.INVALID_ENV_VARS); @@ -54,7 +54,7 @@ const hostClient = new HostClient(+instancesServerPort, instancesServerHost); * @param fifosPath - fifo files path */ -const runner: Runner = new Runner(sequencePath, hostClient, instanceId, connectInfo, connectInfoJSON); +const runner: Runner = new Runner(sequencePath, hostClient, instanceId, connectInfo); runner.main() .catch(e => { diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 2c9d1bb2b..b892c53df 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -15,7 +15,8 @@ import { SynchronousStreamable, HasTopicInformation, IObjectLogger, - HostClient + HostClient, + SequenceInfo } from "@scramjet/types"; import { RunnerError } from "@scramjet/model"; import { ObjLogger } from "@scramjet/obj-logger"; @@ -120,12 +121,15 @@ export class Runner implements IComponent { private inputDataStream: DataStream; private outputDataStream: DataStream; + private sequenceInfo: SequenceInfo constructor( private sequencePath: string, private hostClient: IHostClient, - private instanceId: string + private instanceId: string, + sequenceInfo: SequenceInfo ) { + this.sequenceInfo = sequenceInfo; this.emitter = new EventEmitter(); this.logger = new ObjLogger(this, { id: instanceId }); @@ -456,7 +460,7 @@ export class Runner implements IComponent { sendHandshakeMessage() { // TODO: send connection info - MessageUtils.writeMessageOnStream([RunnerMessageCode.PING, {}], this.hostClient.monitorStream); + MessageUtils.writeMessageOnStream([RunnerMessageCode.PING, {sequenceInfo: this.sequenceInfo}], this.hostClient.monitorStream); this.logger.trace("Handshake sent"); } diff --git a/packages/types/src/lifecycle-adapters.ts b/packages/types/src/lifecycle-adapters.ts index 3db5800fd..245ab08ed 100644 --- a/packages/types/src/lifecycle-adapters.ts +++ b/packages/types/src/lifecycle-adapters.ts @@ -3,6 +3,7 @@ import { MaybePromise } from "./utils"; import { InstanceConfig } from "./runner-config"; import { IObjectLogger } from "./object-logger"; import { InstanceLimits } from "./instance-limits"; +import { SequenceInfo } from "./sequence-adapter"; export type ExitCode = number; @@ -36,7 +37,7 @@ export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { * @param {InstanceConfig} Runner configuration. * @returns {ExitCode} Runner exit code. */ - run(config: InstanceConfig, instancesServerPort: number, instanceId: string): Promise; + run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise; monitorRate(rps: number): this; diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index 70be89e35..85fa94dbe 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -13,7 +13,7 @@ export type HandshakeMessage = { payload: StartSequencePayload }; -export type PingMessageData = { ports?: Record } +export type PingMessageData = { ports?: Record; sequenceInfo: SequenceInfo } export type PangMessageData = { requires?: string, From 799ecb784ca29895f9190ec21d190f7ca763de76 Mon Sep 17 00:00:00 2001 From: Budleigh Salterton Date: Thu, 13 Jul 2023 11:50:30 +0000 Subject: [PATCH 06/62] InstanceAdapter..dispatch method --- .editorconfig | 2 +- .eslintrc | 4 ++-- packages/host/src/lib/csi-controller.ts | 2 ++ packages/host/src/lib/csi-dispatcher.ts | 6 ++---- packages/host/src/lib/host.ts | 1 + packages/types/src/lifecycle-adapters.ts | 20 +++++++++++++++----- 6 files changed, 23 insertions(+), 12 deletions(-) diff --git a/.editorconfig b/.editorconfig index d70c2286e..dc8b8f095 100644 --- a/.editorconfig +++ b/.editorconfig @@ -5,7 +5,7 @@ indent_style = space indent_size = 4 trim_trailing_whitespace = true insert_final_newline = true -max_line_length = 120 +max_line_length = 180 end_of_line = lf charset = utf-8 diff --git a/.eslintrc b/.eslintrc index 3563e60e0..ff90a7755 100644 --- a/.eslintrc +++ b/.eslintrc @@ -209,9 +209,9 @@ "max-len": [ "warn", { - "code": 120, + "code": 180, "tabWidth": 4, - "comments": 120, + "comments": 180, "ignoreComments": false, "ignoreTrailingComments": true, "ignoreUrls": true, diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index cda6ee818..26559f0b9 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -526,6 +526,8 @@ export class CSIController extends TypedEmitter { this.info.ports = message[1].ports; this.sequence = message[1].sequenceInfo; + // TODO: add message to initiate the instance adapter + if (this.controlDataStream) { const pongMsg: HandshakeAcknowledgeMessage = { msgCode: RunnerMessageCode.PONG, diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 09f7c4a1e..9f0501c39 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -149,16 +149,14 @@ export class CSIDispatcher extends TypedEmitter { const id = IDProvider.generate(); const instanceAdapter = getInstanceAdapter(this.STHConfig.runtimeAdapter, this.STHConfig, id); - - await instanceAdapter.init(); - const instanceConfig: InstanceConfig = { ...sequence.config, limits: limits, instanceAdapterExitDelay: this.STHConfig.timings.instanceAdapterExitDelay }; - await instanceAdapter.run( + await instanceAdapter.init(); + await instanceAdapter.dispatch( instanceConfig, this.STHConfig.host.instancesServerPort, id, diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index f1a84ae45..0450e6146 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -953,6 +953,7 @@ export class Host implements IComponent { try { const runner = await this.csiDispatcher.startRunner(sequence, payload); + // @todo more info // await this.cpmConnector?.sendInstanceInfo({ diff --git a/packages/types/src/lifecycle-adapters.ts b/packages/types/src/lifecycle-adapters.ts index 245ab08ed..d094dece3 100644 --- a/packages/types/src/lifecycle-adapters.ts +++ b/packages/types/src/lifecycle-adapters.ts @@ -24,12 +24,26 @@ export interface ILifeCycleAdapterMain { // TODO: THIS is forceful removal - let's think about refactor. remove(): MaybePromise; - getCrashLog(): Promise + monitorRate(rps: number): this; + + stats(msg: MonitoringMessageData): Promise; + + getCrashLog(): Promise; + + waitUntilExit(config: InstanceConfig, instanceId: string, sequenceInfo: SequenceInfo): Promise; + } // @TODO create ISequenceAdapter interface export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { limits: InstanceLimits; + /** + * Initiates runner start + * + * @param {InstanceConfig} Runner configuration. + * @returns {ExitCode} Runner exit code. + */ + dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise; /** * Starts Runner. @@ -38,10 +52,6 @@ export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { * @returns {ExitCode} Runner exit code. */ run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise; - - monitorRate(rps: number): this; - - stats(msg: MonitoringMessageData): Promise; } export type LifeCycleError = any | (Error & { exitCode?: number, errorMessage?: string }); From a532cd31e98a4bca26a3151ceee91ff3cda7dc37 Mon Sep 17 00:00:00 2001 From: Budleigh Salterton Date: Thu, 13 Jul 2023 14:55:27 +0000 Subject: [PATCH 07/62] Refactoring runner connect start --- .../adapters/src/docker-instance-adapter.ts | 13 +++++++-- .../adapters/src/dockerode-docker-helper.ts | 6 ++++ packages/adapters/src/types.ts | 8 +++++ packages/host/src/lib/csi-controller.ts | 29 ++++++------------- packages/host/src/lib/csi-dispatcher.ts | 1 - packages/host/src/lib/socket-server.ts | 2 +- packages/runner/src/runner.ts | 4 +-- packages/types/src/communication-handler.ts | 3 ++ packages/types/src/instance.ts | 4 +++ packages/types/src/lifecycle-adapters.ts | 8 ++--- packages/types/src/messages/handshake.ts | 2 +- .../types/src/rest-api-sth/start-sequence.ts | 12 ++------ packages/types/src/runner-connect.ts | 11 +++++++ 13 files changed, 62 insertions(+), 41 deletions(-) create mode 100644 packages/types/src/runner-connect.ts diff --git a/packages/adapters/src/docker-instance-adapter.ts b/packages/adapters/src/docker-instance-adapter.ts index 0e498a231..95d012da0 100644 --- a/packages/adapters/src/docker-instance-adapter.ts +++ b/packages/adapters/src/docker-instance-adapter.ts @@ -168,8 +168,13 @@ IComponent { }; } - // eslint-disable-next-line complexity async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { + await this.dispatch(config, instancesServerPort, instanceId, sequenceInfo); + return this.waitUntilExit(config, instanceId, sequenceInfo); + } + + // eslint-disable-next-line complexity + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { if (config.type !== "docker") { throw new Error("Docker instance adapter run with invalid runner config"); } @@ -205,7 +210,8 @@ IComponent { { mountPoint: config.sequenceDir, volume: config.id, writeable: false } ], labels: { - "scramjet.sequence.name": config.name + "scramjet.sequence.name": config.name, + "scramjet.instance.id": instanceId }, ports: this.resources.ports, publishAllPorts: true, @@ -220,8 +226,11 @@ IComponent { this.resources.containerId = containerId; this.logger.trace("Container is running", containerId); + } + async waitUntilExit(instanceId: string): Promise { try { + const containerId = await this.dockerHelper.getContainerIdByLabel("scramjet.instance.id", instanceId); const { statusCode } = await this.dockerHelper.wait(containerId); this.logger.debug("Container exited", statusCode); diff --git a/packages/adapters/src/dockerode-docker-helper.ts b/packages/adapters/src/dockerode-docker-helper.ts index d87351d8f..46523033e 100644 --- a/packages/adapters/src/dockerode-docker-helper.ts +++ b/packages/adapters/src/dockerode-docker-helper.ts @@ -139,6 +139,12 @@ export class DockerodeDockerHelper implements IDockerHelper { return id; } + async getContainerIdByLabel(label: string, value: string): Promise { + const result = await this.dockerode.listContainers({ label: `${label}=${value}` }); + + return result[0]!.Id; + } + /** * Start container with provided id. * diff --git a/packages/adapters/src/types.ts b/packages/adapters/src/types.ts index 3c4ff4742..13b5b3cb1 100644 --- a/packages/adapters/src/types.ts +++ b/packages/adapters/src/types.ts @@ -187,6 +187,14 @@ export interface IDockerHelper { */ translateVolumesConfig: (volumeConfigs: DockerAdapterVolumeConfig[]) => any; + /** + * Gets first found container by a given label + * + * @param {string} label the label + * @param {string} value label value. + */ + getContainerIdByLabel(label: string, value: string): Promise; + /** * Creates Docker container from provided image with attached volumes and local directories. * diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 26559f0b9..21a32ebe1 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -26,7 +26,6 @@ import { import { AppError, CSIControllerError, - CommunicationHandler, HostError, MessageUtilities, InstanceAdapterError, @@ -76,7 +75,6 @@ export class CSIController extends TypedEmitter { private keepAliveRequested?: boolean; private _lastStats?: MonitoringMessageData; private bpmux: any; - private adapter: string; get lastStats(): InstanceStats { return { @@ -88,8 +86,6 @@ export class CSIController extends TypedEmitter { } }; } - hostProxy: HostProxy; - sthConfig: STHConfiguration; limits: InstanceLimits = {}; sequence: SequenceInfo; appConfig: AppConfig; @@ -164,36 +160,28 @@ export class CSIController extends TypedEmitter { private downStreams?: DownstreamStreamsConfig; private upStreams: PassThroughStreamsConfig; - communicationHandler: ICommunicationHandler; - constructor( - id: string, - sequence: SequenceInfo, - payload: STHRestAPI.StartSequencePayload, - communicationHandler: CommunicationHandler, - sthConfig: STHConfiguration, - hostProxy: HostProxy, - chosenAdapter: STHConfiguration["runtimeAdapter"] = sthConfig.runtimeAdapter + private handshakeMessage: MessageDataType, + public communicationHandler: ICommunicationHandler, + private sthConfig: STHConfiguration, + private hostProxy: HostProxy, + private adapter: STHConfiguration["runtimeAdapter"] = sthConfig.runtimeAdapter ) { super(); - this.id = id; - this.adapter = chosenAdapter; - this.sequence = sequence; + this.id = this.handshakeMessage.id; + this.sequence = this.handshakeMessage.sequenceInfo; this.appConfig = payload.appConfig; - this.sthConfig = sthConfig; this.args = payload.args; this.outputTopic = payload.outputTopic; this.inputTopic = payload.inputTopic; - this.hostProxy = hostProxy; this.limits = { memory: payload.limits?.memory || sthConfig.docker.runner.maxMem }; this.instanceLifetimeExtensionDelay = +sthConfig.timings.instanceLifetimeExtensionDelay; - this.communicationHandler = communicationHandler; - this.logger = new ObjLogger(this, { id }); + this.logger = new ObjLogger(this, { id: this.id }); this.logger.debug("Constructor executed"); @@ -516,6 +504,7 @@ export class CSIController extends TypedEmitter { this.upStreams[CC.MONITORING].resume(); } + // TODO: refactor out of CSI Controller - this should be in async handleHandshake(message: EncodedMessage) { this.logger.debug("PING received", message); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 9f0501c39..b4b0c1c9b 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -170,7 +170,6 @@ export class CSIDispatcher extends TypedEmitter { args: payload.args, sequenceId: sequence.id, info: { - }, limits, sequence diff --git a/packages/host/src/lib/socket-server.ts b/packages/host/src/lib/socket-server.ts index ca49c9fb3..b17017180 100644 --- a/packages/host/src/lib/socket-server.ts +++ b/packages/host/src/lib/socket-server.ts @@ -46,7 +46,7 @@ export class SocketServer extends TypedEmitter implements IComponent { const id = await new Promise((resolve) => { const immediateData = connection.read(36); - + if (!immediateData) { connection.once("readable", () => { resolve(connection.read(36).toString()); diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index b892c53df..ddcaab303 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -121,7 +121,7 @@ export class Runner implements IComponent { private inputDataStream: DataStream; private outputDataStream: DataStream; - private sequenceInfo: SequenceInfo + private sequenceInfo: SequenceInfo; constructor( private sequencePath: string, @@ -460,7 +460,7 @@ export class Runner implements IComponent { sendHandshakeMessage() { // TODO: send connection info - MessageUtils.writeMessageOnStream([RunnerMessageCode.PING, {sequenceInfo: this.sequenceInfo}], this.hostClient.monitorStream); + MessageUtils.writeMessageOnStream([RunnerMessageCode.PING, { id: this.instanceId, sequenceInfo: this.sequenceInfo }], this.hostClient.monitorStream); this.logger.trace("Handshake sent"); } diff --git a/packages/types/src/communication-handler.ts b/packages/types/src/communication-handler.ts index f1e844f5a..eae6f8007 100644 --- a/packages/types/src/communication-handler.ts +++ b/packages/types/src/communication-handler.ts @@ -7,6 +7,7 @@ import { MonitoringMessageCode, UpstreamStreamsConfig } from "./message-streams"; import { MaybePromise } from "./utils"; +import { InstanceConnectionInfo } from "./instance"; export type MonitoringMessageHandler = (msg: EncodedMessage) => void; @@ -18,6 +19,8 @@ export type ControlMessageHandler = export interface ICommunicationHandler { logger: IObjectLogger; + waitForHandshake(): Promise; + hookUpstreamStreams(str: UpstreamStreamsConfig): this; hookDownstreamStreams(str: DownstreamStreamsConfig): this; diff --git a/packages/types/src/instance.ts b/packages/types/src/instance.ts index 7008e7be0..abef13195 100644 --- a/packages/types/src/instance.ts +++ b/packages/types/src/instance.ts @@ -11,3 +11,7 @@ export const enum InstanceStatus { COMPLETED ="completed", ERRORED = "errored", } + +export type InstanceConnectionInfo = { + +} diff --git a/packages/types/src/lifecycle-adapters.ts b/packages/types/src/lifecycle-adapters.ts index d094dece3..a17f57240 100644 --- a/packages/types/src/lifecycle-adapters.ts +++ b/packages/types/src/lifecycle-adapters.ts @@ -30,15 +30,15 @@ export interface ILifeCycleAdapterMain { getCrashLog(): Promise; - waitUntilExit(config: InstanceConfig, instanceId: string, sequenceInfo: SequenceInfo): Promise; - + waitUntilExit(instanceId: string): Promise; } // @TODO create ISequenceAdapter interface export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { limits: InstanceLimits; + /** - * Initiates runner start + * Initiates runner start without waiting for the result * * @param {InstanceConfig} Runner configuration. * @returns {ExitCode} Runner exit code. @@ -46,7 +46,7 @@ export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise; /** - * Starts Runner. + * Starts Runner - in essence does `dispatch` and then `waitUntilExit`. * * @param {InstanceConfig} Runner configuration. * @returns {ExitCode} Runner exit code. diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index 85fa94dbe..501738967 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -13,7 +13,7 @@ export type HandshakeMessage = { payload: StartSequencePayload }; -export type PingMessageData = { ports?: Record; sequenceInfo: SequenceInfo } +export type PingMessageData = { id: string, ports?: Record; sequenceInfo: SequenceInfo, payload: StartSequencePayload } export type PangMessageData = { requires?: string, diff --git a/packages/types/src/rest-api-sth/start-sequence.ts b/packages/types/src/rest-api-sth/start-sequence.ts index 9711d338e..66898c0eb 100644 --- a/packages/types/src/rest-api-sth/start-sequence.ts +++ b/packages/types/src/rest-api-sth/start-sequence.ts @@ -1,13 +1,5 @@ -import { AppConfig } from "../app-config"; -import { InstanceLimits } from "../instance-limits"; +import { RunnerConnectInfo } from "../runner-connect"; export type StartSequenceResponse = { id: string } -export type StartSequencePayload = { - appConfig: AppConfig, - args?: any[], - outputTopic?: string, - inputTopic?: string, - limits?: InstanceLimits, - instanceId?: string -} +export type StartSequencePayload = RunnerConnectInfo; diff --git a/packages/types/src/runner-connect.ts b/packages/types/src/runner-connect.ts new file mode 100644 index 000000000..f848eb56c --- /dev/null +++ b/packages/types/src/runner-connect.ts @@ -0,0 +1,11 @@ +import { AppConfig } from "./app-config"; +import { InstanceLimits } from "./instance-limits"; + +export type RunnerConnectInfo = { + appConfig: AppConfig, + args?: any[], + outputTopic?: string, + inputTopic?: string, + limits?: InstanceLimits, + instanceId?: string +} From 21118b9999a21b5a5517ae6ea247412ec01e0182 Mon Sep 17 00:00:00 2001 From: Piotr Date: Wed, 2 Aug 2023 08:02:57 +0000 Subject: [PATCH 08/62] pushing payload to runner/ docker run&disptach --- .../adapters/src/docker-instance-adapter.ts | 12 ++++++----- packages/adapters/src/types.ts | 2 ++ packages/host/src/lib/csi-controller.ts | 20 +++++++++---------- packages/host/src/lib/csi-dispatcher.ts | 19 +++++++++++------- packages/host/src/lib/host.ts | 12 +++++++++++ packages/types/src/lifecycle-adapters.ts | 7 ++++--- packages/types/src/messages/handshake.ts | 2 +- 7 files changed, 48 insertions(+), 26 deletions(-) diff --git a/packages/adapters/src/docker-instance-adapter.ts b/packages/adapters/src/docker-instance-adapter.ts index 95d012da0..994b2c37c 100644 --- a/packages/adapters/src/docker-instance-adapter.ts +++ b/packages/adapters/src/docker-instance-adapter.ts @@ -22,6 +22,7 @@ import { STH_DOCKER_NETWORK, isHostSpawnedInDockerContainer, getHostname } from import { ObjLogger } from "@scramjet/obj-logger"; import { getRunnerEnvEntries } from "./get-runner-env"; import { Readable } from "stream"; +import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; /** * Adapter for running Instance by Runner executed in Docker container. @@ -168,13 +169,13 @@ IComponent { }; } - async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { - await this.dispatch(config, instancesServerPort, instanceId, sequenceInfo); + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + await this.dispatch(config, instancesServerPort, instanceId, sequenceInfo, payload); return this.waitUntilExit(config, instanceId, sequenceInfo); } // eslint-disable-next-line complexity - async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { if (config.type !== "docker") { throw new Error("Docker instance adapter run with invalid runner config"); } @@ -198,7 +199,8 @@ IComponent { instancesServerHost: networkSetup.host, instanceId, pipesPath: "", - sequenceInfo + sequenceInfo, + payload }).map(([k, v]) => `${k}=${v}`); this.logger.debug("Runner will start with envs", envs); @@ -228,7 +230,7 @@ IComponent { this.logger.trace("Container is running", containerId); } - async waitUntilExit(instanceId: string): Promise { + async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { try { const containerId = await this.dockerHelper.getContainerIdByLabel("scramjet.instance.id", instanceId); const { statusCode } = await this.dockerHelper.wait(containerId); diff --git a/packages/adapters/src/types.ts b/packages/adapters/src/types.ts index 13b5b3cb1..d5a392c75 100644 --- a/packages/adapters/src/types.ts +++ b/packages/adapters/src/types.ts @@ -1,4 +1,5 @@ import { ExitCode, InstanceId, IObjectLogger, SequenceInfo } from "@scramjet/types"; +import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; import { ContainerStats, NetworkInspectInfo } from "dockerode"; import { PathLike } from "fs"; import { Stream, Writable } from "stream"; @@ -323,6 +324,7 @@ export type RunnerEnvConfig = { instancesServerHost: string; instanceId: InstanceId; sequenceInfo: SequenceInfo + payload?: StartSequencePayload } export type RunnerEnvironmentVariables = Partial<{ diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 21a32ebe1..0e089fd94 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -4,7 +4,6 @@ import { DownstreamStreamsConfig, EncodedMessage, HandshakeAcknowledgeMessage, - ICommunicationHandler, ParsedMessage, PassThroughStreamsConfig, ReadableStream, @@ -22,13 +21,14 @@ import { OpResponse, StopSequenceMessageData, HostProxy, + ICommunicationHandler, } from "@scramjet/types"; import { AppError, CSIControllerError, HostError, MessageUtilities, - InstanceAdapterError, + InstanceAdapterError } from "@scramjet/model"; import { CommunicationChannel as CC, RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols"; import { Duplex, PassThrough, Readable } from "stream"; @@ -165,18 +165,18 @@ export class CSIController extends TypedEmitter { public communicationHandler: ICommunicationHandler, private sthConfig: STHConfiguration, private hostProxy: HostProxy, - private adapter: STHConfiguration["runtimeAdapter"] = sthConfig.runtimeAdapter + private adapter: STHConfiguration["runtimeAdapter"] = sthConfig.runtimeAdapter, ) { super(); this.id = this.handshakeMessage.id; - this.sequence = this.handshakeMessage.sequenceInfo; - this.appConfig = payload.appConfig; - this.args = payload.args; - this.outputTopic = payload.outputTopic; - this.inputTopic = payload.inputTopic; + this.sequence = this.handshakeMessage.sequence; + this.appConfig = this.handshakeMessage.payload.appConfig; + this.args = this.handshakeMessage.payload.args; + this.outputTopic = this.handshakeMessage.payload.outputTopic; + this.inputTopic = this.handshakeMessage.payload.inputTopic; this.limits = { - memory: payload.limits?.memory || sthConfig.docker.runner.maxMem + memory: handshakeMessage.payload.limits?.memory || sthConfig.docker.runner.maxMem }; this.instanceLifetimeExtensionDelay = +sthConfig.timings.instanceLifetimeExtensionDelay; @@ -513,7 +513,7 @@ export class CSIController extends TypedEmitter { } this.info.ports = message[1].ports; - this.sequence = message[1].sequenceInfo; + this.sequence = message[1].sequence; // TODO: add message to initiate the instance adapter diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index b4b0c1c9b..3d576ab71 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -1,9 +1,9 @@ import { ObjLogger } from "@scramjet/obj-logger"; -import { HostProxy, IObjectLogger, InstanceConfig, MessageDataType, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; +import { HostProxy, ICommunicationHandler, IObjectLogger, InstanceConfig, MessageDataType, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; import { SocketServer } from "./socket-server"; import { InstanceStore } from "./instance-store"; import { CSIController } from "./csi-controller"; -import { CommunicationHandler, IDProvider } from "@scramjet/model"; +import { IDProvider } from "@scramjet/model"; import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; import { getInstanceAdapter } from "@scramjet/adapters"; import SequenceStore from "./sequenceStore"; @@ -35,10 +35,15 @@ export class CSIDispatcher extends TypedEmitter { this.STHConfig = STHConfig; } - createCSIController(id: string, sequence: SequenceInfo, payload: StartSequencePayload, communicationHandler: CommunicationHandler, config: STHConfiguration, instanceProxy: HostProxy) { - sequence.instances = sequence.instances || []; - - const csiController = new CSIController(id, sequence, payload, communicationHandler, config, instanceProxy); + async createCSIController( + id: string, + sequence: SequenceInfo, + payload: StartSequencePayload, + communicationHandler: ICommunicationHandler, + config: STHConfiguration, + instanceProxy: HostProxy) { + sequence.instances = sequence.instances || new Set(); + const csiController = new CSIController({ id, sequence, payload }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); csiController.logger.pipe(this.logger); this.logger.trace("CSIController created", id); @@ -161,7 +166,7 @@ export class CSIDispatcher extends TypedEmitter { this.STHConfig.host.instancesServerPort, id, sequence, - //payload + payload ); // @todo more instance info return { diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 0450e6146..38a2fa835 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -1000,6 +1000,7 @@ export class Host implements IComponent { // @todo need more instance info if (!this.instancesStore[id]) { this.logger.info("creating new CSIController for runner connecting"); +<<<<<<< HEAD this.csiDispatcher.createCSIController( id, {} as SequenceInfo, @@ -1008,6 +1009,17 @@ export class Host implements IComponent { this.config, this.instanceProxy ); +||||||| constructed merge base + this.csiDispatcher.createCSIController(id, {} as SequenceInfo, {} as STHRestAPI.StartSequencePayload, new CommunicationHandler(), this.config, this.instanceProxy); +======= + await this.csiDispatcher.createCSIController( + id, + {} as SequenceInfo, + {} as STHRestAPI.StartSequencePayload, + new CommunicationHandler(), + this.config, + this.instanceProxy); +>>>>>>> pushing payload to runner/ docker run&disptach } await this.instancesStore[id].handleInstanceConnect( diff --git a/packages/types/src/lifecycle-adapters.ts b/packages/types/src/lifecycle-adapters.ts index a17f57240..6dc23e06e 100644 --- a/packages/types/src/lifecycle-adapters.ts +++ b/packages/types/src/lifecycle-adapters.ts @@ -4,6 +4,7 @@ import { InstanceConfig } from "./runner-config"; import { IObjectLogger } from "./object-logger"; import { InstanceLimits } from "./instance-limits"; import { SequenceInfo } from "./sequence-adapter"; +import { RunnerConnectInfo } from "./runner-connect"; export type ExitCode = number; @@ -30,7 +31,7 @@ export interface ILifeCycleAdapterMain { getCrashLog(): Promise; - waitUntilExit(instanceId: string): Promise; + waitUntilExit(config: InstanceConfig, instanceId: string, sequenceInfo: SequenceInfo): Promise; } // @TODO create ISequenceAdapter interface @@ -43,7 +44,7 @@ export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { * @param {InstanceConfig} Runner configuration. * @returns {ExitCode} Runner exit code. */ - dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise; + dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise; /** * Starts Runner - in essence does `dispatch` and then `waitUntilExit`. @@ -51,7 +52,7 @@ export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { * @param {InstanceConfig} Runner configuration. * @returns {ExitCode} Runner exit code. */ - run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise; + run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise; } export type LifeCycleError = any | (Error & { exitCode?: number, errorMessage?: string }); diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index 501738967..b3f65380d 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -13,7 +13,7 @@ export type HandshakeMessage = { payload: StartSequencePayload }; -export type PingMessageData = { id: string, ports?: Record; sequenceInfo: SequenceInfo, payload: StartSequencePayload } +export type PingMessageData = { id: string, ports?: Record; sequence: SequenceInfo, payload: StartSequencePayload } export type PangMessageData = { requires?: string, From d70edba43e6510b09f60b2a2d59d13a7b68009b7 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 12 Sep 2023 14:43:14 +0000 Subject: [PATCH 09/62] Reconnect. Fix starting instance --- packages/adapters/src/get-runner-env.ts | 3 ++- .../src/kubernetes-instance-adapter.ts | 9 ++++++++ .../adapters/src/process-instance-adapter.ts | 10 ++++++++- packages/host/src/lib/csi-controller.ts | 14 +++++++------ packages/host/src/lib/csi-dispatcher.ts | 14 +++++++------ packages/host/src/lib/host.ts | 13 ------------ packages/model/src/stream-handler.ts | 9 ++++++++ packages/runner/src/bin/start-runner.ts | 14 ++++++++++++- packages/runner/src/runner.ts | 21 +++++++++++++++++-- packages/types/src/lifecycle-adapters.ts | 2 +- packages/types/src/messages/handshake.ts | 12 ++++++++--- 11 files changed, 87 insertions(+), 34 deletions(-) diff --git a/packages/adapters/src/get-runner-env.ts b/packages/adapters/src/get-runner-env.ts index 39074cf5f..7cdb9cbd6 100644 --- a/packages/adapters/src/get-runner-env.ts +++ b/packages/adapters/src/get-runner-env.ts @@ -9,7 +9,7 @@ import { RunnerEnvConfig, RunnerEnvironmentVariables } from "./types"; * @returns env vars */ export function getRunnerEnvVariables({ - sequencePath, instancesServerPort, instancesServerHost, instanceId, pipesPath, paths = "posix", sequenceInfo + sequencePath, instancesServerPort, instancesServerHost, instanceId, pipesPath, paths = "posix", sequenceInfo, payload }: RunnerEnvConfig, extra: Record = {}): RunnerEnvironmentVariables { const join = path[paths].join; @@ -24,6 +24,7 @@ export function getRunnerEnvVariables({ PIPES_LOCATION: pipesPath, CRASH_LOG: join(pipesPath, "crash_log"), SEQUENCE_INFO: JSON.stringify(sequenceInfo), + RUNNER_CONNECT_INFO: JSON.stringify(payload), ...extra }; } diff --git a/packages/adapters/src/kubernetes-instance-adapter.ts b/packages/adapters/src/kubernetes-instance-adapter.ts index 47134c0e9..6f5f508f0 100644 --- a/packages/adapters/src/kubernetes-instance-adapter.ts +++ b/packages/adapters/src/kubernetes-instance-adapter.ts @@ -21,6 +21,7 @@ import { adapterConfigDecoder } from "./kubernetes-config-decoder"; import { getRunnerEnvEntries } from "./get-runner-env"; import { PassThrough } from "stream"; import { RunnerExitCode } from "@scramjet/symbols"; +import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; /** * Adapter for running Instance by Runner executed in separate process. @@ -88,6 +89,9 @@ IComponent { } }; } + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + throw Error("not implemented"); + } async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { if (config.type !== "kubernetes") { @@ -181,6 +185,11 @@ IComponent { return 0; } + async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { + throw Error("Not implemented"); + } + + async cleanup(): Promise { await this.remove(this.adapterConfig.timeout); } diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 39e19efe0..f6c52d514 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -16,6 +16,7 @@ import { ChildProcess, spawn } from "child_process"; import path from "path"; import { getRunnerEnvVariables } from "./get-runner-env"; +import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; const isTSNode = !!(process as any)[Symbol.for("ts-node.register.instance")]; const gotPython = "\n _ \n __ _____ _ __ ___ ___| |\n \\ \\ /\\ / / _ \\| '_ \\/ __|_ / |\n \\ V V / (_) | | | \\__ \\/ /|_|\n \\_/\\_/ \\___/|_| |_|___/___(_) 🐍\n"; @@ -116,8 +117,11 @@ class ProcessInstanceAdapter implements return pythonpath; } + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + throw Error("not implemented"); + } + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { - console.log("config type", config.type) if (config.type !== "process") { throw new Error("Process instance adapter run with invalid runner config"); } @@ -177,6 +181,10 @@ class ProcessInstanceAdapter implements return statusCode; } + async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { + throw Error("Not implemented"); + } + /** * Performs cleanup after Runner end. * Removes fifos used to communication with runner. diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 0e089fd94..a4aac7326 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -21,7 +21,7 @@ import { OpResponse, StopSequenceMessageData, HostProxy, - ICommunicationHandler, + ICommunicationHandler } from "@scramjet/types"; import { AppError, @@ -165,12 +165,12 @@ export class CSIController extends TypedEmitter { public communicationHandler: ICommunicationHandler, private sthConfig: STHConfiguration, private hostProxy: HostProxy, - private adapter: STHConfiguration["runtimeAdapter"] = sthConfig.runtimeAdapter, + private adapter: STHConfiguration["runtimeAdapter"] = sthConfig.runtimeAdapter ) { super(); this.id = this.handshakeMessage.id; - this.sequence = this.handshakeMessage.sequence; + this.sequence = this.handshakeMessage.sequenceInfo; this.appConfig = this.handshakeMessage.payload.appConfig; this.args = this.handshakeMessage.payload.args; this.outputTopic = this.handshakeMessage.payload.outputTopic; @@ -202,7 +202,7 @@ export class CSIController extends TypedEmitter { async start() { const i = new Promise((res, rej) => { this.initResolver = { res, rej }; - //this.startInstance(); + this.startInstance(); }); i.then(() => this.main()).catch(async (e) => { @@ -267,6 +267,8 @@ export class CSIController extends TypedEmitter { this._instanceAdapter.logger.pipe(this.logger, { end: false }); + this.endOfSequence = this._instanceAdapter.waitUntilExit(undefined, this.id, this.sequence); + // @todo this also is moved to CSIDispatcher in entirety const instanceMain = async () => { try { @@ -513,7 +515,7 @@ export class CSIController extends TypedEmitter { } this.info.ports = message[1].ports; - this.sequence = message[1].sequence; + this.sequence = message[1].sequenceInfo; // TODO: add message to initiate the instance adapter @@ -531,7 +533,7 @@ export class CSIController extends TypedEmitter { } this.info.started = new Date(); - this.logger.info("Instance started", this.info); + this.logger.info("Instance started", JSON.stringify(message, undefined, 4)); } async handleInstanceConnect(streams: DownstreamStreamsConfig) { diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 3d576ab71..376ad42c4 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -37,16 +37,16 @@ export class CSIDispatcher extends TypedEmitter { async createCSIController( id: string, - sequence: SequenceInfo, + sequenceInfo: SequenceInfo, payload: StartSequencePayload, communicationHandler: ICommunicationHandler, config: STHConfiguration, instanceProxy: HostProxy) { - sequence.instances = sequence.instances || new Set(); - const csiController = new CSIController({ id, sequence, payload }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); + sequenceInfo.instances = sequenceInfo.instances || new Set(); + const csiController = new CSIController({ id, sequenceInfo, payload }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); csiController.logger.pipe(this.logger); - this.logger.trace("CSIController created", id); + this.logger.trace("CSIController created", id, sequenceInfo); csiController.logger.pipe(this.logger, { end: false }); communicationHandler.logger.pipe(this.logger, { end: false }); @@ -107,7 +107,7 @@ export class CSIDispatcher extends TypedEmitter { delete InstanceStore[csiController.id]; - sequence.instances.filter(a => a !== id); + sequenceInfo.instances.filter(a => a !== id); // await this.cpmConnector?.sendInstanceInfo({ // id: csiController.id, @@ -141,9 +141,10 @@ export class CSIDispatcher extends TypedEmitter { this.logger.trace("csiController started", id); - sequence.instances.push(id); + sequenceInfo.instances.push(id); this.instancesStore[id] = csiController; + return csiController; } @@ -168,6 +169,7 @@ export class CSIDispatcher extends TypedEmitter { sequence, payload ); + // @todo more instance info return { id, diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 38a2fa835..18ce9a8b2 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -1000,18 +1000,6 @@ export class Host implements IComponent { // @todo need more instance info if (!this.instancesStore[id]) { this.logger.info("creating new CSIController for runner connecting"); -<<<<<<< HEAD - this.csiDispatcher.createCSIController( - id, - {} as SequenceInfo, - {} as STHRestAPI.StartSequencePayload, - new CommunicationHandler(), - this.config, - this.instanceProxy - ); -||||||| constructed merge base - this.csiDispatcher.createCSIController(id, {} as SequenceInfo, {} as STHRestAPI.StartSequencePayload, new CommunicationHandler(), this.config, this.instanceProxy); -======= await this.csiDispatcher.createCSIController( id, {} as SequenceInfo, @@ -1019,7 +1007,6 @@ export class Host implements IComponent { new CommunicationHandler(), this.config, this.instanceProxy); ->>>>>>> pushing payload to runner/ docker run&disptach } await this.instancesStore[id].handleInstanceConnect( diff --git a/packages/model/src/stream-handler.ts b/packages/model/src/stream-handler.ts index f26dfbdc3..a884c8369 100644 --- a/packages/model/src/stream-handler.ts +++ b/packages/model/src/stream-handler.ts @@ -7,6 +7,7 @@ import { EncodedMonitoringMessage, ICommunicationHandler, IObjectLogger, + InstanceConnectionInfo, LoggerOutput, MaybePromise, MessageDataType, @@ -147,6 +148,14 @@ export class CommunicationHandler implements ICommunicationHandler { return this; } + waitForHandshake(): Promise { + return new Promise((res) => { + this.addMonitoringHandler(RunnerMessageCode.PING, (msg) => { + res(msg); + }); + }) + } + pipeMessageStreams() { if (this._piped) { this.logger.error("pipeMessageStreams called twice"); diff --git a/packages/runner/src/bin/start-runner.ts b/packages/runner/src/bin/start-runner.ts index ff7e54bda..1eae1764b 100755 --- a/packages/runner/src/bin/start-runner.ts +++ b/packages/runner/src/bin/start-runner.ts @@ -5,14 +5,26 @@ import fs from "fs"; import { AppConfig, SequenceInfo } from "@scramjet/types"; import { HostClient } from "../host-client"; import { RunnerExitCode } from "@scramjet/symbols"; +import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; const sequencePath: string = process.env.SEQUENCE_PATH?.replace(/.js$/, "") + ".js"; const instancesServerPort = process.env.INSTANCES_SERVER_PORT; const instancesServerHost = process.env.INSTANCES_SERVER_HOST; const instanceId = process.env.INSTANCE_ID; const sequenceInfo = process.env.SEQUENCE_INFO; +const runnerConnectInfo = process.env.RUNNER_CONNECT_INFO; let connectInfo: SequenceInfo; +let parsedRunnerConnectInfo: RunnerConnectInfo; + +try { + if (!runnerConnectInfo) throw new Error("Connection JSON is required."); + parsedRunnerConnectInfo = JSON.parse(runnerConnectInfo); +} catch { + console.error("Error while parsing connection information."); + process.exit(RunnerExitCode.INVALID_ENV_VARS); +} + try { if (!sequenceInfo) throw new Error("Connection JSON is required."); @@ -54,7 +66,7 @@ const hostClient = new HostClient(+instancesServerPort, instancesServerHost); * @param fifosPath - fifo files path */ -const runner: Runner = new Runner(sequencePath, hostClient, instanceId, connectInfo); +const runner: Runner = new Runner(sequencePath, hostClient, instanceId, connectInfo, parsedRunnerConnectInfo); runner.main() .catch(e => { diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index ddcaab303..ae5696506 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -33,7 +33,12 @@ import { mapToInputDataStream, readInputStreamHeaders } from "./input-stream"; import { MessageUtils } from "./message-utils"; import { HostClient as HostApiClient } from "@scramjet/api-client"; import { ClientUtilsCustomAgent } from "@scramjet/client-utils"; +<<<<<<< HEAD import { ManagerClient } from "@scramjet/manager-api-client"; +||||||| constructed merge base +======= +import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; +>>>>>>> Reconnect. Fix starting instance // async function flushStream(source: Readable | undefined, target: Writable) { // if (!source) return; @@ -123,15 +128,22 @@ export class Runner implements IComponent { private outputDataStream: DataStream; private sequenceInfo: SequenceInfo; + private runnerConnectInfo: RunnerConnectInfo = { + appConfig: {} + }; + constructor( private sequencePath: string, private hostClient: IHostClient, private instanceId: string, - sequenceInfo: SequenceInfo + sequenceInfo: SequenceInfo, + runnerConnectInfo: RunnerConnectInfo ) { this.sequenceInfo = sequenceInfo; this.emitter = new EventEmitter(); + this.runnerConnectInfo = runnerConnectInfo; + this.logger = new ObjLogger(this, { id: instanceId }); hostClient.logger.pipe(this.logger); @@ -460,7 +472,12 @@ export class Runner implements IComponent { sendHandshakeMessage() { // TODO: send connection info - MessageUtils.writeMessageOnStream([RunnerMessageCode.PING, { id: this.instanceId, sequenceInfo: this.sequenceInfo }], this.hostClient.monitorStream); + MessageUtils.writeMessageOnStream([ + RunnerMessageCode.PING, { + id: this.instanceId, + sequenceInfo: this.sequenceInfo, + payload: this.runnerConnectInfo + }], this.hostClient.monitorStream); this.logger.trace("Handshake sent"); } diff --git a/packages/types/src/lifecycle-adapters.ts b/packages/types/src/lifecycle-adapters.ts index 6dc23e06e..86fa60936 100644 --- a/packages/types/src/lifecycle-adapters.ts +++ b/packages/types/src/lifecycle-adapters.ts @@ -31,7 +31,7 @@ export interface ILifeCycleAdapterMain { getCrashLog(): Promise; - waitUntilExit(config: InstanceConfig, instanceId: string, sequenceInfo: SequenceInfo): Promise; + waitUntilExit(config: InstanceConfig | undefined, instanceId: string, sequenceInfo: SequenceInfo): Promise; } // @TODO create ISequenceAdapter interface diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index b3f65380d..64f33a20b 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -10,13 +10,19 @@ import { StartSequencePayload } from "../rest-api-sth"; export type HandshakeMessage = { msgCode: RunnerMessageCode.PING, sequence: SequenceInfo, - payload: StartSequencePayload + payload: StartSequencePayload, + sequenceInfo: SequenceInfo }; -export type PingMessageData = { id: string, ports?: Record; sequence: SequenceInfo, payload: StartSequencePayload } +export type PingMessageData = { + id: string; + ports?: Record; + payload: StartSequencePayload; + sequenceInfo: SequenceInfo; +}; export type PangMessageData = { requires?: string, contentType?: string, provides?: string -} +}; From bd0b54fc77b5985ec5e1e56d4a05deae947b07f5 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Wed, 13 Sep 2023 13:12:04 +0000 Subject: [PATCH 10/62] Reconnect start seq with process adapter [wip] --- .../adapters/src/process-instance-adapter.ts | 19 +++++++-------- packages/host/src/lib/csi-controller.ts | 5 ++++ packages/host/src/lib/csi-dispatcher.ts | 2 ++ packages/model/src/stream-handler.ts | 2 ++ packages/runner/src/runner.ts | 23 ++++++++++++++++++- packages/symbols/src/runner-message-code.ts | 1 + packages/types/src/message-streams.ts | 5 +++- 7 files changed, 46 insertions(+), 11 deletions(-) diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index f6c52d514..015c574d1 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -117,11 +117,12 @@ class ProcessInstanceAdapter implements return pythonpath; } - async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { - throw Error("not implemented"); + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + await this.dispatch(config, instancesServerPort, instanceId, sequenceInfo, payload); + return this.waitUntilExit(config, instanceId, sequenceInfo); } - async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { if (config.type !== "process") { throw new Error("Process instance adapter run with invalid runner config"); } @@ -141,7 +142,8 @@ class ProcessInstanceAdapter implements instancesServerPort, instanceId, pipesPath: "", - sequenceInfo + sequenceInfo, + payload }, { PYTHONPATH: this.getPythonpath(config.sequenceDir), }); @@ -160,12 +162,14 @@ class ProcessInstanceAdapter implements // how to connect to a process knowing id of it? this.runnerProcess = runnerProcess; + } + async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { const [statusCode, signal] = await new Promise<[number | null, NodeJS.Signals | null]>( - (res) => runnerProcess.on("exit", (code, sig) => res([code, sig])) + (res) => this.runnerProcess?.on("exit", (code, sig) => res([code, sig])) ); - this.logger.trace("Runner process exited", runnerProcess.pid); + this.logger.trace("Runner process exited", this.runnerProcess?.pid); if (statusCode === null) { this.logger.warn("Runner was killed by a signal, and didn't return a status code", signal); @@ -179,10 +183,7 @@ class ProcessInstanceAdapter implements } return statusCode; - } - async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { - throw Error("Not implemented"); } /** diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index a4aac7326..dc8e3f870 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -485,6 +485,11 @@ export class CSIController extends TypedEmitter { this.heartBeatTick(); message[1] = stats; + + await this.controlDataStream?.whenWrote( + MessageUtilities.serializeMessage({ msgCode: RunnerMessageCode.MONITORING_REPLY }) + ); + return message; }, true); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 376ad42c4..f79f3b6f6 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -161,6 +161,8 @@ export class CSIDispatcher extends TypedEmitter { instanceAdapterExitDelay: this.STHConfig.timings.instanceAdapterExitDelay }; + instanceAdapter.logger.pipe(this.logger); + await instanceAdapter.init(); await instanceAdapter.dispatch( instanceConfig, diff --git a/packages/model/src/stream-handler.ts b/packages/model/src/stream-handler.ts index a884c8369..79c96c297 100644 --- a/packages/model/src/stream-handler.ts +++ b/packages/model/src/stream-handler.ts @@ -50,6 +50,7 @@ type MonitoringMessageHandlerList = { type ControlMessageHandlerList = { [RunnerMessageCode.KILL]: ConfiguredMessageHandler[]; [RunnerMessageCode.MONITORING_RATE]: ConfiguredMessageHandler[]; + [RunnerMessageCode.MONITORING_REPLY]: ConfiguredMessageHandler[]; [RunnerMessageCode.STOP]: ConfiguredMessageHandler[]; [RunnerMessageCode.PONG]: ConfiguredMessageHandler[]; [RunnerMessageCode.INPUT_CONTENT_TYPE]: ConfiguredMessageHandler[]; @@ -87,6 +88,7 @@ export class CommunicationHandler implements ICommunicationHandler { this.controlHandlerHash = { [RunnerMessageCode.KILL]: [], [RunnerMessageCode.MONITORING_RATE]: [], + [RunnerMessageCode.MONITORING_REPLY]: [], [RunnerMessageCode.STOP]: [], [RunnerMessageCode.EVENT]: [], [RunnerMessageCode.PONG]: [], diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index ae5696506..23bb4f9f2 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -119,6 +119,7 @@ export class Runner implements IComponent { private monitoringInterval?: NodeJS.Timeout; private keepAliveRequested?: boolean; + private monitoringMessageReplyTimeout?: NodeJS.Timeout; private stopExpected: boolean = false; handshakeResolver?: { res: Function, rej: Function }; @@ -195,6 +196,11 @@ export class Runner implements IComponent { this.emitter.emit(eventData.eventName, eventData.message); break; + case RunnerMessageCode.MONITORING_REPLY: + if (this.monitoringMessageReplyTimeout) { + clearTimeout(this.monitoringMessageReplyTimeout); + } + break; default: break; } @@ -247,6 +253,15 @@ export class Runner implements IComponent { MessageUtils.writeMessageOnStream( [RunnerMessageCode.MONITORING, { healthy }], this.hostClient.monitorStream ); + + this.monitoringMessageReplyTimeout = setTimeout(() => { + this.handleDisconnect(); + }, 500); + } + + async handleDisconnect() { + this.logger.info("Handling disconnect...."); + await this.premain(); } async handleKillRequest(): Promise { @@ -301,7 +316,7 @@ export class Runner implements IComponent { .finally(() => process.exit()); } - async main() { + async premain() { await this.hostClient.init(this.instanceId); this.redirectOutputs(); @@ -323,6 +338,12 @@ export class Runner implements IComponent { this.logger.debug("Handshake received"); + return { appConfig, args }; + } + + async main() { + const { appConfig, args } = await this.premain(); + this.initAppContext(appConfig as X); await this.reportHealth(); diff --git a/packages/symbols/src/runner-message-code.ts b/packages/symbols/src/runner-message-code.ts index c2bce5ede..9c96dff62 100644 --- a/packages/symbols/src/runner-message-code.ts +++ b/packages/symbols/src/runner-message-code.ts @@ -15,5 +15,6 @@ export enum RunnerMessageCode { STOP = 4001, KILL = 4002, MONITORING_RATE = 4003, + MONITORING_REPLY = 4004, EVENT = 5001, } diff --git a/packages/types/src/message-streams.ts b/packages/types/src/message-streams.ts index edc0ceef5..96a157a68 100644 --- a/packages/types/src/message-streams.ts +++ b/packages/types/src/message-streams.ts @@ -43,6 +43,7 @@ import { LoadCheckStat } from "./load-check-stat"; import { NetworkInfo } from "./network-info"; import { SequenceCompleteMessageData } from "./messages/sequence-complete"; import { KillMessageData } from "./messages/kill-sequence"; +import { MonitoringReplyMessage, MonitoringReplyMessageData } from "./messages/monitor-reply"; export type MessageType = T extends RunnerMessageCode.ACKNOWLEDGE ? AcknowledgeMessage : @@ -53,6 +54,7 @@ export type MessageType = T extends RunnerMessageCode.KILL ? KillSequenceMessage : T extends RunnerMessageCode.MONITORING ? MonitoringMessage : T extends RunnerMessageCode.MONITORING_RATE ? MonitoringRateMessage : + T extends RunnerMessageCode.MONITORING_REPLY ? MonitoringReplyMessage : T extends RunnerMessageCode.STOP ? StopSequenceMessage : T extends RunnerMessageCode.PING ? HandshakeMessage : T extends RunnerMessageCode.PONG ? HandshakeAcknowledgeMessage : @@ -71,6 +73,7 @@ export type MessageDataType = T extends RunnerMessageCode.KILL ? KillMessageData : T extends RunnerMessageCode.MONITORING ? MonitoringMessageData : T extends RunnerMessageCode.MONITORING_RATE ? MonitoringRateMessageData : + T extends RunnerMessageCode.MONITORING_REPLY ? MonitoringReplyMessageData : T extends RunnerMessageCode.STOP ? StopSequenceMessageData : T extends RunnerMessageCode.PING ? PingMessageData : T extends RunnerMessageCode.PONG ? HandshakeAcknowledgeMessageData : @@ -93,7 +96,7 @@ export type EncodedMessage< > = [T, MessageDataType]; export type ControlMessageCode = - RunnerMessageCode.KILL | RunnerMessageCode.MONITORING_RATE | RunnerMessageCode.STOP | RunnerMessageCode.EVENT | + RunnerMessageCode.KILL | RunnerMessageCode.MONITORING_RATE | RunnerMessageCode.MONITORING_REPLY | RunnerMessageCode.STOP | RunnerMessageCode.EVENT | RunnerMessageCode.PONG | CPMMessageCode.STH_ID | CPMMessageCode.KEY_REVOKED | CPMMessageCode.LIMIT_EXCEEDED | CPMMessageCode.ID_DROP | RunnerMessageCode.INPUT_CONTENT_TYPE; From b1f37d3d26188f219c4bb8e57380b719b8cd3c2a Mon Sep 17 00:00:00 2001 From: patuwwy Date: Wed, 13 Sep 2023 13:53:19 +0000 Subject: [PATCH 11/62] Reconnect instance on STH restart (wip) --- packages/adapters/src/process-instance-adapter.ts | 4 +++- packages/runner/src/host-client.ts | 14 ++++++++++---- packages/runner/src/runner.ts | 15 +++++++++++---- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 015c574d1..6ed8348ac 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -151,7 +151,9 @@ class ProcessInstanceAdapter implements this.logger.debug("Spawning Runner process with command", runnerCommand); this.logger.trace("Runner process environment", env); - const runnerProcess = spawn(runnerCommand[0], runnerCommand.slice(1), { env }); + const runnerProcess = spawn(runnerCommand[0], runnerCommand.slice(1), { env, detached: true }); + + runnerProcess.unref(); this.crashLogStreams = Promise.all([runnerProcess.stdout, runnerProcess.stderr].map(streamToString)); diff --git a/packages/runner/src/host-client.ts b/packages/runner/src/host-client.ts index 5635b6dd0..aec42528a 100644 --- a/packages/runner/src/host-client.ts +++ b/packages/runner/src/host-client.ts @@ -44,9 +44,15 @@ class HostClient implements IHostClient { Array.from(Array(9)) .map(() => { // Error handling for each connection is process crash for now - const connection = net.createConnection(this.instancesServerPort, this.instancesServerHost); - - connection.setNoDelay(true); + let connection: Socket; + + try { + connection = net.createConnection(this.instancesServerPort, this.instancesServerHost); + connection.on("error", () => {}); + connection.setNoDelay(true); + } catch (e) { + return Promise.reject(e); + } return new Promise(res => { connection.on("connect", () => res(connection)); @@ -62,7 +68,7 @@ class HostClient implements IHostClient { return connection; }); }) - ); + ).catch((e) => {}); this._streams = openConnections as HostOpenConnections; diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 23bb4f9f2..dc455595e 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -70,7 +70,8 @@ const overrideMap: Map = new Map(); function overrideStandardStream(oldStream: Writable, newStream: Writable) { if (overrideMap.has(oldStream)) { - throw new Error("Attempt to override stream more than once"); + //throw new Error("Attempt to override stream more than once"); + revertStandardStream(oldStream); } const write = oldStream.write; @@ -260,8 +261,9 @@ export class Runner implements IComponent { } async handleDisconnect() { - this.logger.info("Handling disconnect...."); - await this.premain(); + this.logger.info("Reinitializing...."); + + this.premain(); } async handleKillRequest(): Promise { @@ -317,7 +319,12 @@ export class Runner implements IComponent { } async premain() { - await this.hostClient.init(this.instanceId); + try { + await this.hostClient.init(this.instanceId); + } catch (e) { + await defer(2000); + this.premain(); + } this.redirectOutputs(); From 00455b0adc71eadf13726ae57266c64eb2b1f025 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 15 Sep 2023 14:36:39 +0000 Subject: [PATCH 12/62] Reconnect. Write runner exitcode to file. read in process-adapter --- .../adapters/src/process-instance-adapter.ts | 85 +++++++++++++++---- packages/host/src/lib/csi-controller.ts | 17 +++- packages/host/src/lib/csi-dispatcher.ts | 2 +- packages/host/src/lib/host.ts | 1 + packages/runner/src/runner.ts | 40 ++++++--- packages/types/src/lifecycle-adapters.ts | 2 + packages/types/src/runner-connect.ts | 13 +-- 7 files changed, 120 insertions(+), 40 deletions(-) diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 6ed8348ac..8765fc4cd 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -1,22 +1,25 @@ import { ObjLogger } from "@scramjet/obj-logger"; -import { streamToString } from "@scramjet/utility"; -import { STHConfiguration, +import { ExitCode, IComponent, ILifeCycleAdapterMain, ILifeCycleAdapterRun, + IObjectLogger, InstanceConfig, InstanceLimits, - IObjectLogger, MonitoringMessageData, + STHConfiguration, SequenceConfig, SequenceInfo } from "@scramjet/types"; +import { streamToString } from "@scramjet/utility"; import { ChildProcess, spawn } from "child_process"; +import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; +import { constants } from "fs"; +import { access, readFile, rm } from "fs/promises"; import path from "path"; import { getRunnerEnvVariables } from "./get-runner-env"; -import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; const isTSNode = !!(process as any)[Symbol.for("ts-node.register.instance")]; const gotPython = "\n _ \n __ _____ _ __ ___ ___| |\n \\ \\ /\\ / / _ \\| '_ \\/ __|_ / |\n \\ V V / (_) | | | \\__ \\/ /|_|\n \\_/\\_/ \\___/|_| |_|___/___(_) 🐍\n"; @@ -31,6 +34,8 @@ class ProcessInstanceAdapter implements logger: IObjectLogger; sthConfig: STHConfiguration; + processPID: number = -1; + private runnerProcess?: ChildProcess; private crashLogStreams?: Promise; private _limits?: InstanceLimits = {}; @@ -45,6 +50,7 @@ class ProcessInstanceAdapter implements this.logger = new ObjLogger(this); this.sthConfig = config; } + id?: string | undefined; async init(): Promise { // noop @@ -117,6 +123,11 @@ class ProcessInstanceAdapter implements return pythonpath; } + setRunner(system: Record): void { + this.logger.info("--------- Setting system from runner", system); + this.processPID = parseInt(system.processPID, 10); + } + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { await this.dispatch(config, instancesServerPort, instanceId, sequenceInfo, payload); return this.waitUntilExit(config, instanceId, sequenceInfo); @@ -166,26 +177,66 @@ class ProcessInstanceAdapter implements this.runnerProcess = runnerProcess; } - async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { - const [statusCode, signal] = await new Promise<[number | null, NodeJS.Signals | null]>( - (res) => this.runnerProcess?.on("exit", (code, sig) => res([code, sig])) - ); + getRunnerInfo(): RunnerConnectInfo["system"] { + return { + processPID: this.processPID.toString() + } + } - this.logger.trace("Runner process exited", this.runnerProcess?.pid); + async waitUntilExit(config: InstanceConfig, instanceId: string, _sequenceInfo: SequenceInfo): Promise { + if (this.runnerProcess) { + const [statusCode, signal] = await new Promise<[number | null, NodeJS.Signals | null]>( + (res) => this.runnerProcess?.on("exit", (code, sig) => res([code, sig])) + ); - if (statusCode === null) { - this.logger.warn("Runner was killed by a signal, and didn't return a status code", signal); + this.logger.trace("Runner process exited", this.runnerProcess?.pid); - // Probably SIGIKLL - return 137; - } + if (statusCode === null) { + this.logger.warn("Runner was killed by a signal, and didn't return a status code", signal); + + // Probably SIGIKLL + return 137; + } - if (statusCode > 0) { - this.logger.debug("Process returned non-zero status code", statusCode); + if (statusCode > 0) { + this.logger.debug("Process returned non-zero status code", statusCode); + } + + return statusCode; } - return statusCode; + // When no process reference Wait for file created by runner + return new Promise((res, reject) => { + const interval = setInterval(async() => { + if (this.processPID < 1) return; + + const filePath = `/tmp/runner-${this.processPID}`; + + try { + await access(filePath, constants.F_OK) + clearInterval(interval); + + const data = await readFile(filePath, 'utf8').catch((readErr) => { + this.logger.error(`Cant' read runner exit code from: ${readErr}`); + reject(readErr); + return; + }) + + this.logger.debug("exitCode saved to file by runner:", data, filePath); + + rm(filePath).then(() => { + this.logger.debug("File removed"); + }, (err) => { + this.logger.error("Can't remove exitcode file"); + }) + + res(parseInt(data!, 10)); + } catch (err) { + /** file not exists */ + }; + }, 1000); + }); } /** diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index dc8e3f870..4be838fb0 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -43,6 +43,7 @@ import { getInstanceAdapter } from "@scramjet/adapters"; import { cancellableDefer, CancellablePromise, defer, promiseTimeout, TypedEmitter } from "@scramjet/utility"; import { ObjLogger } from "@scramjet/obj-logger"; import { ReasonPhrases } from "http-status-codes"; +import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; /** * @TODO: Runner exits after 10secs and k8s client checks status every 500ms so we need to give it some time @@ -87,6 +88,7 @@ export class CSIController extends TypedEmitter { }; } limits: InstanceLimits = {}; + runnerSystemInfo: RunnerConnectInfo["system"]; sequence: SequenceInfo; appConfig: AppConfig; instancePromise?: Promise<{ message: string, exitcode: number; status: InstanceStatus }>; @@ -170,6 +172,7 @@ export class CSIController extends TypedEmitter { super(); this.id = this.handshakeMessage.id; + this.runnerSystemInfo = this.handshakeMessage.payload.system; this.sequence = this.handshakeMessage.sequenceInfo; this.appConfig = this.handshakeMessage.payload.appConfig; this.args = this.handshakeMessage.payload.args; @@ -513,10 +516,14 @@ export class CSIController extends TypedEmitter { // TODO: refactor out of CSI Controller - this should be in async handleHandshake(message: EncodedMessage) { - this.logger.debug("PING received", message); + this.logger.debug("PING received", JSON.stringify(message)); - if (!message[1].ports) { - this.logger.trace("Received a PING message but didn't receive ports config"); + if (message[1].ports) { + this.logger.trace("Received a PING message with ports config"); + } + + if (this.instanceAdapter.setRunner) { + this.instanceAdapter.setRunner(message[1].payload.system); } this.info.ports = message[1].ports; @@ -538,7 +545,7 @@ export class CSIController extends TypedEmitter { } this.info.started = new Date(); - this.logger.info("Instance started", JSON.stringify(message, undefined, 4)); + this.logger.info("Instance started", JSON.stringify(message, undefined)); } async handleInstanceConnect(streams: DownstreamStreamsConfig) { @@ -552,6 +559,7 @@ export class CSIController extends TypedEmitter { streams[8]?.end(); }); this.bpmux.on("peer_multiplex", (socket: Duplex, _data: any) => this.hostProxy.onInstanceRequest(socket)); + await once(this, "pang"); this.initResolver?.res(); } catch (e: any) { @@ -656,6 +664,7 @@ export class CSIController extends TypedEmitter { localEmitter.lastEvents[event.eventName] = event; localEmitter.emit(event.eventName, event); }); + this.router.upstream("/events/:name", async (req: ParsedMessage, res: ServerResponse) => { const name = req.params?.name; diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index f79f3b6f6..5ccb520a5 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -43,9 +43,9 @@ export class CSIDispatcher extends TypedEmitter { config: STHConfiguration, instanceProxy: HostProxy) { sequenceInfo.instances = sequenceInfo.instances || new Set(); + const csiController = new CSIController({ id, sequenceInfo, payload }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); - csiController.logger.pipe(this.logger); this.logger.trace("CSIController created", id, sequenceInfo); csiController.logger.pipe(this.logger, { end: false }); diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 18ce9a8b2..7c6b2664f 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -1000,6 +1000,7 @@ export class Host implements IComponent { // @todo need more instance info if (!this.instancesStore[id]) { this.logger.info("creating new CSIController for runner connecting"); + await this.csiDispatcher.createCSIController( id, {} as SequenceInfo, diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index dc455595e..f88aae7af 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -1,3 +1,6 @@ +import { RunnerError } from "@scramjet/model"; +import { ObjLogger } from "@scramjet/obj-logger"; +import { RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols"; import { AppConfig, ApplicationFunction, @@ -6,21 +9,18 @@ import { EncodedMonitoringMessage, EventMessageData, HandshakeAcknowledgeMessageData, + HasTopicInformation, + HostClient, IComponent, IHostClient, + IObjectLogger, MaybePromise, MonitoringRateMessageData, + SequenceInfo, StopSequenceMessageData, Streamable, - SynchronousStreamable, - HasTopicInformation, - IObjectLogger, - HostClient, - SequenceInfo + SynchronousStreamable } from "@scramjet/types"; -import { RunnerError } from "@scramjet/model"; -import { ObjLogger } from "@scramjet/obj-logger"; -import { RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols"; import { defer } from "@scramjet/utility"; import { BufferStream, DataStream, StringStream } from "scramjet"; @@ -28,9 +28,6 @@ import { BufferStream, DataStream, StringStream } from "scramjet"; import { EventEmitter } from "events"; import { Readable, Writable } from "stream"; -import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; -import { mapToInputDataStream, readInputStreamHeaders } from "./input-stream"; -import { MessageUtils } from "./message-utils"; import { HostClient as HostApiClient } from "@scramjet/api-client"; import { ClientUtilsCustomAgent } from "@scramjet/client-utils"; <<<<<<< HEAD @@ -38,7 +35,15 @@ import { ManagerClient } from "@scramjet/manager-api-client"; ||||||| constructed merge base ======= import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; +<<<<<<< HEAD >>>>>>> Reconnect. Fix starting instance +||||||| constructed merge base +======= +import { writeFileSync } from "fs"; +import { mapToInputDataStream, readInputStreamHeaders } from "./input-stream"; +import { MessageUtils } from "./message-utils"; +import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; +>>>>>>> Reconnect. Write runner exitcode to file. read in process-adapter // async function flushStream(source: Readable | undefined, target: Writable) { // if (!source) return; @@ -164,6 +169,12 @@ export class Runner implements IComponent { throw e; }); + + process.on("beforeExit", (code)=> { + const filepath = `/tmp/runner-${process.pid.toString()}`; + + writeFileSync(filepath, code.toString()); + }); } get context(): RunnerAppContext { @@ -504,7 +515,12 @@ export class Runner implements IComponent { RunnerMessageCode.PING, { id: this.instanceId, sequenceInfo: this.sequenceInfo, - payload: this.runnerConnectInfo + payload: { + ...this.runnerConnectInfo, + system: { + processPID: process.pid.toString() + } + } }], this.hostClient.monitorStream); this.logger.trace("Handshake sent"); diff --git a/packages/types/src/lifecycle-adapters.ts b/packages/types/src/lifecycle-adapters.ts index 86fa60936..919eb9483 100644 --- a/packages/types/src/lifecycle-adapters.ts +++ b/packages/types/src/lifecycle-adapters.ts @@ -36,6 +36,8 @@ export interface ILifeCycleAdapterMain { // @TODO create ISequenceAdapter interface export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { + setRunner?(system: RunnerConnectInfo["system"]): void; + limits: InstanceLimits; /** diff --git a/packages/types/src/runner-connect.ts b/packages/types/src/runner-connect.ts index f848eb56c..6b101e292 100644 --- a/packages/types/src/runner-connect.ts +++ b/packages/types/src/runner-connect.ts @@ -2,10 +2,11 @@ import { AppConfig } from "./app-config"; import { InstanceLimits } from "./instance-limits"; export type RunnerConnectInfo = { - appConfig: AppConfig, - args?: any[], - outputTopic?: string, - inputTopic?: string, - limits?: InstanceLimits, - instanceId?: string + appConfig: AppConfig; + args?: any[]; + outputTopic?: string; + inputTopic?: string; + limits?: InstanceLimits; + instanceId?: string; + system?: Record; } From 0b9306406718b7e19d2df80e7ae10ee2853e07d7 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Sun, 17 Sep 2023 00:16:20 +0000 Subject: [PATCH 13/62] Starting Instance wait for CSI create --- packages/host/src/lib/csi-dispatcher.ts | 28 ++++++++++++----- packages/host/src/lib/host.ts | 40 ++++++++++++------------- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 5ccb520a5..dd6a69c1b 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -1,14 +1,14 @@ +import { getInstanceAdapter } from "@scramjet/adapters"; +import { IDProvider } from "@scramjet/model"; import { ObjLogger } from "@scramjet/obj-logger"; +import { RunnerMessageCode } from "@scramjet/symbols"; import { HostProxy, ICommunicationHandler, IObjectLogger, InstanceConfig, MessageDataType, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; -import { SocketServer } from "./socket-server"; -import { InstanceStore } from "./instance-store"; -import { CSIController } from "./csi-controller"; -import { IDProvider } from "@scramjet/model"; import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; -import { getInstanceAdapter } from "@scramjet/adapters"; -import SequenceStore from "./sequenceStore"; import { TypedEmitter } from "@scramjet/utility"; -import { RunnerMessageCode } from "@scramjet/symbols"; +import { CSIController } from "./csi-controller"; +import { InstanceStore } from "./instance-store"; +import { SocketServer } from "./socket-server"; +import SequenceStore from "./sequenceStore"; type errorEventData = {id:string, err: any } type endEventData = {id:string, code:number } @@ -19,6 +19,7 @@ type Events = { stop: (code: number) => void; end: (data: endEventData) => void; terminated: (data: endEventData) => void; + established: (id: string) => void; }; export class CSIDispatcher extends TypedEmitter { @@ -145,6 +146,8 @@ export class CSIDispatcher extends TypedEmitter { this.instancesStore[id] = csiController; + this.emit("established", id); + return csiController; } @@ -172,6 +175,17 @@ export class CSIDispatcher extends TypedEmitter { payload ); + await new Promise((resolve, _reject) => { + const resolveFunction = (eventId: string) => { + if (eventId === id) { + resolve(); + this.off("established", resolveFunction); + } + }; + + this.on("established", resolveFunction); + }); + // @todo more instance info return { id, diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 7c6b2664f..97a735058 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -1,10 +1,12 @@ import findPackage from "find-package-json"; import { ReasonPhrases, StatusCodes } from "http-status-codes"; -import { Duplex } from "stream"; import { IncomingHttpHeaders, IncomingMessage, Server, ServerResponse } from "http"; import { AddressInfo } from "net"; +import { Duplex } from "stream"; +import { CommunicationHandler, HostError, IDProvider } from "@scramjet/model"; +import { HostHeaders, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; import { APIExpose, CPMConnectorOptions, @@ -21,37 +23,35 @@ import { SequenceInfo, StartSequenceDTO, STHConfiguration, - STHRestAPI, + STHRestAPI } from "@scramjet/types"; -import { CommunicationHandler, HostError, IDProvider } from "@scramjet/model"; -import { HostHeaders, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; -import { ObjLogger, prettyPrint } from "@scramjet/obj-logger"; -import { LoadCheck, LoadCheckConfig } from "@scramjet/load-check"; import { getSequenceAdapter, initializeRuntimeAdapters } from "@scramjet/adapters"; +import { LoadCheck, LoadCheckConfig } from "@scramjet/load-check"; +import { ObjLogger, prettyPrint } from "@scramjet/obj-logger"; -import { CPMConnector } from "./cpm-connector"; import { CommonLogsPipe } from "./common-logs-pipe"; +import { CPMConnector } from "./cpm-connector"; import { InstanceStore } from "./instance-store"; -import { ServiceDiscovery } from "./serviceDiscovery/sd-adapter"; -import { SocketServer } from "./socket-server"; -import { DataStream } from "scramjet"; -import { optionsMiddleware } from "./middlewares/options"; -import { corsMiddleware } from "./middlewares/cors"; +import { DuplexStream } from "@scramjet/api-server"; import { ConfigService, development } from "@scramjet/sth-config"; -import { isStartSequenceDTO, readJsonFile, defer, FileBuilder } from "@scramjet/utility"; -import { inspect } from "util"; -import { auditMiddleware, logger as auditMiddlewareLogger } from "./middlewares/audit"; -import { Auditor } from "./auditor"; import { getTelemetryAdapter, ITelemetryAdapter } from "@scramjet/telemetry"; -import { cpus, totalmem } from "os"; -import { S3Client } from "./s3-client"; -import { DuplexStream } from "@scramjet/api-server"; +import { defer, FileBuilder, isStartSequenceDTO, readJsonFile } from "@scramjet/utility"; import { readFileSync } from "fs"; +import { cpus, totalmem } from "os"; +import { DataStream } from "scramjet"; +import { inspect } from "util"; +import { Auditor } from "./auditor"; -import TopicRouter from "./serviceDiscovery/topicRouter"; +import { auditMiddleware, logger as auditMiddlewareLogger } from "./middlewares/audit"; +import { corsMiddleware } from "./middlewares/cors"; +import { optionsMiddleware } from "./middlewares/options"; +import { S3Client } from "./s3-client"; +import { ServiceDiscovery } from "./serviceDiscovery/sd-adapter"; +import { SocketServer } from "./socket-server"; import SequenceStore from "./sequenceStore"; +import TopicRouter from "./serviceDiscovery/topicRouter"; import { GetSequenceResponse } from "@scramjet/types/src/rest-api-sth"; import { loadModule, logger as loadModuleLogger } from "@scramjet/module-loader"; import { CSIDispatcher } from "./csi-dispatcher"; From 23735c4f26e3c51f5bc3ba8ec248956ccdc1acdd Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 18 Sep 2023 11:31:54 +0000 Subject: [PATCH 14/62] Reconnect. Fix lint issues --- .../src/kubernetes-instance-adapter.ts | 16 ++--- .../adapters/src/process-instance-adapter.ts | 20 +++--- packages/host/src/lib/csi-controller.ts | 50 +++++++-------- packages/host/src/lib/csi-dispatcher.ts | 25 +++++--- packages/host/src/lib/host.ts | 41 +++++++------ packages/model/src/stream-handler.ts | 4 +- packages/runner/src/bin/start-runner.ts | 1 - packages/runner/src/host-client.ts | 10 +-- packages/runner/src/runner.ts | 61 ++++++++++--------- packages/types/src/messages/monitor-reply.ts | 9 +++ packages/utility/src/index.ts | 17 +++--- 11 files changed, 141 insertions(+), 113 deletions(-) create mode 100644 packages/types/src/messages/monitor-reply.ts diff --git a/packages/adapters/src/kubernetes-instance-adapter.ts b/packages/adapters/src/kubernetes-instance-adapter.ts index 6f5f508f0..05e13125d 100644 --- a/packages/adapters/src/kubernetes-instance-adapter.ts +++ b/packages/adapters/src/kubernetes-instance-adapter.ts @@ -13,15 +13,15 @@ import { STHConfiguration, } from "@scramjet/types"; -import path from "path"; import { ObjLogger } from "@scramjet/obj-logger"; +import { RunnerExitCode } from "@scramjet/symbols"; +import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; import { createReadStream } from "fs"; +import path from "path"; +import { PassThrough } from "stream"; +import { getRunnerEnvEntries } from "./get-runner-env"; import { KubernetesClientAdapter } from "./kubernetes-client-adapter"; import { adapterConfigDecoder } from "./kubernetes-config-decoder"; -import { getRunnerEnvEntries } from "./get-runner-env"; -import { PassThrough } from "stream"; -import { RunnerExitCode } from "@scramjet/symbols"; -import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; /** * Adapter for running Instance by Runner executed in separate process. @@ -89,7 +89,7 @@ IComponent { } }; } - async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + async dispatch(_config: InstanceConfig, _instancesServerPort: number, _instanceId: string, _sequenceInfo: SequenceInfo, _payload: RunnerConnectInfo): Promise { throw Error("not implemented"); } @@ -185,11 +185,11 @@ IComponent { return 0; } - async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { + async waitUntilExit(_config: InstanceConfig, _instanceId:string, _sequenceInfo: SequenceInfo): Promise { + this.logger.debug("WaitUntilExit", [_config, _instanceId, _sequenceInfo]); throw Error("Not implemented"); } - async cleanup(): Promise { await this.remove(this.adapterConfig.timeout); } diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 8765fc4cd..05f6849d7 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -180,10 +180,10 @@ class ProcessInstanceAdapter implements getRunnerInfo(): RunnerConnectInfo["system"] { return { processPID: this.processPID.toString() - } + }; } - async waitUntilExit(config: InstanceConfig, instanceId: string, _sequenceInfo: SequenceInfo): Promise { + async waitUntilExit(_config: InstanceConfig, _instanceId: string, _sequenceInfo: SequenceInfo): Promise { if (this.runnerProcess) { const [statusCode, signal] = await new Promise<[number | null, NodeJS.Signals | null]>( (res) => this.runnerProcess?.on("exit", (code, sig) => res([code, sig])) @@ -207,34 +207,34 @@ class ProcessInstanceAdapter implements // When no process reference Wait for file created by runner return new Promise((res, reject) => { - const interval = setInterval(async() => { + const interval = setInterval(async () => { if (this.processPID < 1) return; const filePath = `/tmp/runner-${this.processPID}`; try { - await access(filePath, constants.F_OK) + await access(filePath, constants.F_OK); clearInterval(interval); - const data = await readFile(filePath, 'utf8').catch((readErr) => { + const data = await readFile(filePath, "utf8").catch((readErr) => { this.logger.error(`Cant' read runner exit code from: ${readErr}`); reject(readErr); return; - }) + }); this.logger.debug("exitCode saved to file by runner:", data, filePath); rm(filePath).then(() => { this.logger.debug("File removed"); - }, (err) => { - this.logger.error("Can't remove exitcode file"); - }) + }, (err: any) => { + this.logger.error("Can't remove exitcode file", err); + }); res(parseInt(data!, 10)); } catch (err) { /** file not exists */ - }; + } }, 1000); }); } diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 4be838fb0..fcc4360af 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -1,49 +1,49 @@ +import { + AppError, + CSIControllerError, + HostError, + InstanceAdapterError, + MessageUtilities +} from "@scramjet/model"; +import { development } from "@scramjet/sth-config"; +import { CommunicationChannel as CC, RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols"; import { APIRoute, AppConfig, DownstreamStreamsConfig, EncodedMessage, HandshakeAcknowledgeMessage, - ParsedMessage, - PassThroughStreamsConfig, - ReadableStream, - SequenceInfo, - WritableStream, + HostProxy, + ICommunicationHandler, ILifeCycleAdapterRun, - MessageDataType, - IObjectLogger, - STHRestAPI, - STHConfiguration, InstanceLimits, + InstanceStats, InstanceStatus, + IObjectLogger, + MessageDataType, MonitoringMessageData, - InstanceStats, OpResponse, + ParsedMessage, + PassThroughStreamsConfig, + ReadableStream, + SequenceInfo, + STHConfiguration, + STHRestAPI, StopSequenceMessageData, - HostProxy, - ICommunicationHandler + WritableStream } from "@scramjet/types"; -import { - AppError, - CSIControllerError, - HostError, - MessageUtilities, - InstanceAdapterError -} from "@scramjet/model"; -import { CommunicationChannel as CC, RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols"; import { Duplex, PassThrough, Readable } from "stream"; -import { development } from "@scramjet/sth-config"; -import { DataStream } from "scramjet"; +import { DuplexStream, getRouter } from "@scramjet/api-server"; import { EventEmitter, once } from "events"; import { ServerResponse } from "http"; -import { DuplexStream, getRouter } from "@scramjet/api-server"; +import { DataStream } from "scramjet"; import { getInstanceAdapter } from "@scramjet/adapters"; -import { cancellableDefer, CancellablePromise, defer, promiseTimeout, TypedEmitter } from "@scramjet/utility"; import { ObjLogger } from "@scramjet/obj-logger"; -import { ReasonPhrases } from "http-status-codes"; import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; +import { cancellableDefer, CancellablePromise, defer, promiseTimeout, TypedEmitter } from "@scramjet/utility"; +import { ReasonPhrases } from "http-status-codes"; /** * @TODO: Runner exits after 10secs and k8s client checks status every 500ms so we need to give it some time diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index dd6a69c1b..6a779596d 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -7,8 +7,8 @@ import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; import { TypedEmitter } from "@scramjet/utility"; import { CSIController } from "./csi-controller"; import { InstanceStore } from "./instance-store"; -import { SocketServer } from "./socket-server"; import SequenceStore from "./sequenceStore"; +import { SocketServer } from "./socket-server"; type errorEventData = {id:string, err: any } type endEventData = {id:string, code:number } @@ -43,7 +43,7 @@ export class CSIDispatcher extends TypedEmitter { communicationHandler: ICommunicationHandler, config: STHConfiguration, instanceProxy: HostProxy) { - sequenceInfo.instances = sequenceInfo.instances || new Set(); + sequenceInfo.instances = sequenceInfo.instances || []; const csiController = new CSIController({ id, sequenceInfo, payload }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); @@ -58,10 +58,15 @@ export class CSIDispatcher extends TypedEmitter { this.emit("error", { id, err }); }); + // eslint-disable-next-line complexity csiController.on("pang", async (data) => { this.logger.trace("PANG received", data); - if (data.requires && !csiController.inputRouted) { + if ((data.requires || data.provides) && !data.contentType) { + this.logger.warn("Missing topic content-type"); + } + + if (data.requires && !csiController.inputRouted && data.contentType) { this.logger.trace("Routing Sequence input to topic", data.requires); // await this.serviceDiscovery.routeTopicToStream( @@ -76,7 +81,7 @@ export class CSIDispatcher extends TypedEmitter { // }); } - if (data.provides && !csiController.outputRouted) { + if (data.provides && !csiController.outputRouted && data.contentType) { this.logger.trace("Routing Sequence output to topic", data.provides); // await this.serviceDiscovery.routeStreamToTopic( // csiController.getOutputStream(), @@ -108,7 +113,9 @@ export class CSIDispatcher extends TypedEmitter { delete InstanceStore[csiController.id]; - sequenceInfo.instances.filter(a => a !== id); + sequenceInfo.instances = sequenceInfo.instances.filter(item => { + return item !== id; + }); // await this.cpmConnector?.sendInstanceInfo({ // id: csiController.id, @@ -130,7 +137,9 @@ export class CSIDispatcher extends TypedEmitter { // this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); // this.pushTelemetry("Instance ended", { - // executionTime: csiController.info.ended && csiController.info.started ? ((csiController.info.ended?.getTime() - csiController.info.started.getTime()) / 1000).toString() : "-1", + // executionTime: csiController.info.ended && csiController.info.started + // ? ((csiController.info.ended?.getTime() - csiController.info.started.getTime()) / 1000).toString() + // : "-1", // id: csiController.id, // code: code.toString(), // seqId: csiController.sequence.id @@ -138,7 +147,9 @@ export class CSIDispatcher extends TypedEmitter { this.emit("terminated", { id, code }); }); - csiController.start().then(() => {}, () => {}); + csiController.start().catch(() => { + //@TODO: handle start error; + }); this.logger.trace("csiController started", id); diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 97a735058..a12d8739e 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -20,10 +20,10 @@ import { OpResponse, ParsedMessage, PublicSTHConfiguration, - SequenceInfo, - StartSequenceDTO, STHConfiguration, - STHRestAPI + STHRestAPI, + SequenceInfo, + StartSequenceDTO } from "@scramjet/types"; import { getSequenceAdapter, initializeRuntimeAdapters } from "@scramjet/adapters"; @@ -42,17 +42,17 @@ import { readFileSync } from "fs"; import { cpus, totalmem } from "os"; import { DataStream } from "scramjet"; import { inspect } from "util"; -import { Auditor } from "./auditor"; +import { AuditedRequest, Auditor } from "./auditor"; import { auditMiddleware, logger as auditMiddlewareLogger } from "./middlewares/audit"; import { corsMiddleware } from "./middlewares/cors"; import { optionsMiddleware } from "./middlewares/options"; import { S3Client } from "./s3-client"; import { ServiceDiscovery } from "./serviceDiscovery/sd-adapter"; import { SocketServer } from "./socket-server"; + import SequenceStore from "./sequenceStore"; import TopicRouter from "./serviceDiscovery/topicRouter"; -import { GetSequenceResponse } from "@scramjet/types/src/rest-api-sth"; import { loadModule, logger as loadModuleLogger } from "@scramjet/module-loader"; import { CSIDispatcher } from "./csi-dispatcher"; @@ -185,6 +185,7 @@ export class Host implements IComponent { constructor(apiServer: APIExpose, socketServer: SocketServer, sthConfig: STHConfiguration) { this.config = sthConfig; this.publicConfig = ConfigService.getConfigInfo(sthConfig); + this.sequenceStore = new SequenceStore(); this.logger = new ObjLogger( this, @@ -672,7 +673,7 @@ export class Host implements IComponent { this.logger.trace("Sequence removed:", id); // eslint-disable-next-line max-len - await this.cpmConnector?.sendSequenceInfo(id, SequenceMessageCode.SEQUENCE_DELETED, sequenceInfo as unknown as GetSequenceResponse); + await this.cpmConnector?.sendSequenceInfo(id, SequenceMessageCode.SEQUENCE_DELETED, sequenceInfo as unknown as STHRestAPI.GetSequenceResponse); this.auditor.auditSequence(id, SequenceMessageCode.SEQUENCE_DELETED); return { @@ -805,7 +806,7 @@ export class Host implements IComponent { this.logger.trace(`Sequence identified: ${config.id}`); // eslint-disable-next-line max-len - await this.cpmConnector?.sendSequenceInfo(id, SequenceMessageCode.SEQUENCE_CREATED, config as unknown as GetSequenceResponse); + await this.cpmConnector?.sendSequenceInfo(id, SequenceMessageCode.SEQUENCE_CREATED, config as unknown as STHRestAPI.GetSequenceResponse); this.auditor.auditSequence(id, SequenceMessageCode.SEQUENCE_CREATED); this.pushTelemetry("Sequence uploaded", { language: config.language.toLowerCase(), seqId: id }); @@ -955,21 +956,25 @@ export class Host implements IComponent { const runner = await this.csiDispatcher.startRunner(sequence, payload); // @todo more info - // await this.cpmConnector?.sendInstanceInfo({ // id: runner.id, - // appConfig: payload.appConfig, - // args: payload.args, - // sequence: sequence.id, - // // ports: runner.info.ports - // // created: csic.info.created, - // // started: csic.info.started, - // // status: csic.status, + // appConfig: runner.appConfig, + // args: runner.args, + // sequence: (info => { + // // eslint-disable-next-line @typescript-eslint/no-unused-vars + // const { instances, ...rest } = info; + + // return rest; + // })(sequence), + // ports: runner.info.ports, + // created: csic.info.created, + // started: csic.info.started, + // status: csic.status, // }, InstanceMessageCode.INSTANCE_STARTED); - //this.logger.debug("Instance limits", runner.limits); - //this.auditor.auditInstanceStart(runner.id, req as AuditedRequest, runner.limits); - //this.pushTelemetry("Instance started", { id: runner.id, language: runner.sequence.config.language, seqId: runner.sequence.id }); + this.logger.debug("Instance limits", runner.limits); + this.auditor.auditInstanceStart(runner.id, req as AuditedRequest, runner.limits); + this.pushTelemetry("Instance started", { id: runner.id, language: runner.sequence.config.language, seqId: runner.sequence.id }); // csic.on("hourChime", () => { // this.pushTelemetry("Instance hour chime", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); diff --git a/packages/model/src/stream-handler.ts b/packages/model/src/stream-handler.ts index 79c96c297..2b5d258e7 100644 --- a/packages/model/src/stream-handler.ts +++ b/packages/model/src/stream-handler.ts @@ -1,3 +1,4 @@ +import { ObjLogger } from "@scramjet/obj-logger"; import { CommunicationChannel as CC, CPMMessageCode, RunnerMessageCode } from "@scramjet/symbols"; import { ControlMessageCode, @@ -18,7 +19,6 @@ import { UpstreamStreamsConfig, WritableStream } from "@scramjet/types"; -import { ObjLogger } from "@scramjet/obj-logger"; import { DataStream, StringStream } from "scramjet"; import { PassThrough, Readable, Writable } from "stream"; @@ -155,7 +155,7 @@ export class CommunicationHandler implements ICommunicationHandler { this.addMonitoringHandler(RunnerMessageCode.PING, (msg) => { res(msg); }); - }) + }); } pipeMessageStreams() { diff --git a/packages/runner/src/bin/start-runner.ts b/packages/runner/src/bin/start-runner.ts index 1eae1764b..63f1803b2 100755 --- a/packages/runner/src/bin/start-runner.ts +++ b/packages/runner/src/bin/start-runner.ts @@ -25,7 +25,6 @@ try { process.exit(RunnerExitCode.INVALID_ENV_VARS); } - try { if (!sequenceInfo) throw new Error("Connection JSON is required."); connectInfo = JSON.parse(sequenceInfo); diff --git a/packages/runner/src/host-client.ts b/packages/runner/src/host-client.ts index aec42528a..213169a28 100644 --- a/packages/runner/src/host-client.ts +++ b/packages/runner/src/host-client.ts @@ -1,9 +1,9 @@ /* eslint-disable dot-notation */ -import { IHostClient, IObjectLogger, UpstreamStreamsConfig, } from "@scramjet/types"; -import { CommunicationChannel as CC } from "@scramjet/symbols"; -import net, { createConnection, Socket } from "net"; import { ObjLogger } from "@scramjet/obj-logger"; +import { CommunicationChannel as CC } from "@scramjet/symbols"; +import { IHostClient, IObjectLogger, UpstreamStreamsConfig, } from "@scramjet/types"; import { Agent } from "http"; +import net, { Socket, createConnection } from "net"; type HostOpenConnections = [ net.Socket, net.Socket, net.Socket, net.Socket, net.Socket, net.Socket, net.Socket, net.Socket, net.Socket @@ -68,7 +68,9 @@ class HostClient implements IHostClient { return connection; }); }) - ).catch((e) => {}); + ).catch((_e) => { + //@TODO: handle error. + }); this._streams = openConnections as HostOpenConnections; diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index f88aae7af..c588afd63 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -30,20 +30,14 @@ import { Readable, Writable } from "stream"; import { HostClient as HostApiClient } from "@scramjet/api-client"; import { ClientUtilsCustomAgent } from "@scramjet/client-utils"; -<<<<<<< HEAD import { ManagerClient } from "@scramjet/manager-api-client"; -||||||| constructed merge base -======= + import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; -<<<<<<< HEAD ->>>>>>> Reconnect. Fix starting instance -||||||| constructed merge base -======= + import { writeFileSync } from "fs"; import { mapToInputDataStream, readInputStreamHeaders } from "./input-stream"; import { MessageUtils } from "./message-utils"; import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; ->>>>>>> Reconnect. Write runner exitcode to file. read in process-adapter // async function flushStream(source: Readable | undefined, target: Writable) { // if (!source) return; @@ -73,6 +67,23 @@ export function isSynchronousStreamable(obj: SynchronousStreamable | Primit const overrideMap: Map = new Map(); +function revertStandardStream(oldStream: Writable) { + if (overrideMap.has(oldStream)) { + const { write, drainCb, errorCb } = overrideMap.get(oldStream) as OverrideConfig; + + // @ts-ignore - this is ok, we're doing this on purpose! + delete oldStream.write; + + // if prototypic write is there, then no change needed + if (oldStream.write !== write) + oldStream.write = write; + + oldStream.off("drain", drainCb); + oldStream.off("error", errorCb); + overrideMap.delete(oldStream); + } +} + function overrideStandardStream(oldStream: Writable, newStream: Writable) { if (overrideMap.has(oldStream)) { //throw new Error("Attempt to override stream more than once"); @@ -97,23 +108,6 @@ function overrideStandardStream(oldStream: Writable, newStream: Writable) { overrideMap.set(oldStream, { write, drainCb, errorCb }); } -function revertStandardStream(oldStream: Writable) { - if (overrideMap.has(oldStream)) { - const { write, drainCb, errorCb } = overrideMap.get(oldStream) as OverrideConfig; - - // @ts-ignore - this is ok, we're doing this on purpose! - delete oldStream.write; - - // if prototypic write is there, then no change needed - if (oldStream.write !== write) - oldStream.write = write; - - oldStream.off("drain", drainCb); - oldStream.off("error", errorCb); - overrideMap.delete(oldStream); - } -} - /** * Runtime environment for sequence code. * Communicates with Host with data transferred to/from Sequence, health info, @@ -170,7 +164,7 @@ export class Runner implements IComponent { throw e; }); - process.on("beforeExit", (code)=> { + process.on("beforeExit", (code) => { const filepath = `/tmp/runner-${process.pid.toString()}`; writeFileSync(filepath, code.toString()); @@ -266,15 +260,17 @@ export class Runner implements IComponent { [RunnerMessageCode.MONITORING, { healthy }], this.hostClient.monitorStream ); - this.monitoringMessageReplyTimeout = setTimeout(() => { - this.handleDisconnect(); + this.monitoringMessageReplyTimeout = setTimeout(async () => { + await this.handleDisconnect(); }, 500); } async handleDisconnect() { this.logger.info("Reinitializing...."); - this.premain(); + this.premain().catch((e) => { + this.logger.error("Premain error", e); + }); } async handleKillRequest(): Promise { @@ -333,8 +329,13 @@ export class Runner implements IComponent { try { await this.hostClient.init(this.instanceId); } catch (e) { + this.logger.error("hostClient init error", e); + await defer(2000); - this.premain(); + + this.premain().catch((err: any) => { + this.logger.error("Premain error", err); + }); } this.redirectOutputs(); diff --git a/packages/types/src/messages/monitor-reply.ts b/packages/types/src/messages/monitor-reply.ts new file mode 100644 index 000000000..699b35f0a --- /dev/null +++ b/packages/types/src/messages/monitor-reply.ts @@ -0,0 +1,9 @@ +import { RunnerMessageCode } from "@scramjet/symbols"; + +export type MonitoringReplyMessageData = {}; + +/** + * Message instructing Runner how often to emit monitoring messages. + * This message type is sent from CSIController. + */ +export type MonitoringReplyMessage = { msgCode: RunnerMessageCode.MONITORING_REPLY} & MonitoringReplyMessageData; diff --git a/packages/utility/src/index.ts b/packages/utility/src/index.ts index 44d9406c6..8028bc519 100644 --- a/packages/utility/src/index.ts +++ b/packages/utility/src/index.ts @@ -1,15 +1,16 @@ +export * from "./config"; +export * from "./constants"; export * from "./defer"; +export * from "./file"; export * from "./free-ports-finder"; +export * from "./keygen"; export * from "./merge"; +export * from "./normalize-url"; export * from "./promise-timeout"; -export * from "./read-streamed-json"; -export * from "./typeguards"; -export * from "./typed-emitter"; export * from "./read-json-file"; -export * from "./normalize-url"; +export * from "./read-streamed-json"; export * from "./stream-to-string"; -export * from "./config"; -export * from "./file"; -export * from "./constants"; +export * from "./typed-emitter"; +export * from "./typeguards"; export * from "./validators"; -export * from "./keygen"; + From 8cbbf0e7d101df459fc07a7a04c986ca38219cc3 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 18 Sep 2023 16:33:15 +0000 Subject: [PATCH 15/62] Reconnect. k8s adapter run --- .../src/kubernetes-instance-adapter.ts | 32 +++++++++---------- .../adapters/src/process-instance-adapter.ts | 9 +++--- packages/model/src/stream-handler.ts | 1 + packages/runner/src/runner.ts | 15 +++++---- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/packages/adapters/src/kubernetes-instance-adapter.ts b/packages/adapters/src/kubernetes-instance-adapter.ts index 05e13125d..39add3c03 100644 --- a/packages/adapters/src/kubernetes-instance-adapter.ts +++ b/packages/adapters/src/kubernetes-instance-adapter.ts @@ -39,6 +39,8 @@ IComponent { private adapterConfig: K8SAdapterConfiguration; private _limits?: InstanceLimits = {}; + stdErrorStream?: PassThrough; + get limits() { return this._limits || {} as InstanceLimits; } private set limits(value: InstanceLimits) { this._limits = value; } @@ -89,17 +91,13 @@ IComponent { } }; } - async dispatch(_config: InstanceConfig, _instancesServerPort: number, _instanceId: string, _sequenceInfo: SequenceInfo, _payload: RunnerConnectInfo): Promise { - throw Error("not implemented"); - } - - async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo): Promise { + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, _payload: RunnerConnectInfo): Promise { if (config.type !== "kubernetes") { throw new Error(`Invalid config type for kubernetes adapter: ${config.type}`); } if (this.adapterConfig.quotaName && await this.kubeClient.isPodsLimitReached(this.adapterConfig.quotaName)) { - return RunnerExitCode.PODS_LIMIT_REACHED; + throw Error(RunnerExitCode.PODS_LIMIT_REACHED.toString()); } this.limits = config.limits; @@ -154,25 +152,27 @@ IComponent { // This means runner pod was unable to start. So it went from "Pending" to "Failed" state directly. // Return 1 which is Linux exit code for "General Error" since we are not able // to determine what happened exactly. - return startPodStatus.code || 137; + return; } this.logger.debug("Copy sequence files to Runner"); const compressedStream = createReadStream(path.join(config.sequenceDir, "compressed.tar.gz")); - const stdErrorStream = new PassThrough(); - stdErrorStream.on("data", (data) => { this.logger.error("POD stderr", data.toString()); }); + this.stdErrorStream = new PassThrough(); + this.stdErrorStream.on("data", (data) => { this.logger.error("POD stderr", data.toString()); }); - await this.kubeClient.exec(runnerName, runnerName, ["unpack.sh", "/package"], process.stdout, stdErrorStream, compressedStream, 2); + await this.kubeClient.exec(runnerName, runnerName, ["unpack.sh", "/package"], process.stdout, this.stdErrorStream, compressedStream, 2); + } - const exitPodStatus = await this.kubeClient.waitForPodStatus(runnerName, ["Succeeded", "Failed", "Unknown"]); + async waitUntilExit(_config: InstanceConfig, _instanceId: string, _sequenceInfo: SequenceInfo): Promise { + const exitPodStatus = await this.kubeClient.waitForPodStatus(this._runnerName!, ["Succeeded", "Failed", "Unknown"]); - stdErrorStream.end(); + this.stdErrorStream?.end(); if (exitPodStatus.status !== "Succeeded") { this.logger.error("Runner stopped incorrectly", exitPodStatus); - this.logger.error("Container failure reason is: ", await this.kubeClient.getPodTerminatedContainerReason(runnerName)); + this.logger.error("Container failure reason is: ", await this.kubeClient.getPodTerminatedContainerReason(this._runnerName!)); return exitPodStatus.code || 137; } @@ -185,9 +185,9 @@ IComponent { return 0; } - async waitUntilExit(_config: InstanceConfig, _instanceId:string, _sequenceInfo: SequenceInfo): Promise { - this.logger.debug("WaitUntilExit", [_config, _instanceId, _sequenceInfo]); - throw Error("Not implemented"); + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + await this.dispatch(config, instancesServerPort, instanceId, sequenceInfo, payload); + return this.waitUntilExit(config, instanceId, sequenceInfo); } async cleanup(): Promise { diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 05f6849d7..45dfb6eb3 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -60,7 +60,10 @@ class ProcessInstanceAdapter implements if (!runnerProcess) { // Runner process not initialized yet - return msg; + return { + ...msg, + processId: this.processPID + }; } return { @@ -170,10 +173,6 @@ class ProcessInstanceAdapter implements this.logger.trace("Runner process is running", runnerProcess.pid); - // @todo exit here with pid - // then promise waiting for process with given pid finish (endOfRun) - // how to connect to a process knowing id of it? - this.runnerProcess = runnerProcess; } diff --git a/packages/model/src/stream-handler.ts b/packages/model/src/stream-handler.ts index 2b5d258e7..e4a30d2c5 100644 --- a/packages/model/src/stream-handler.ts +++ b/packages/model/src/stream-handler.ts @@ -163,6 +163,7 @@ export class CommunicationHandler implements ICommunicationHandler { this.logger.error("pipeMessageStreams called twice"); throw new Error("pipeMessageStreams called twice"); } + this._piped = true; if (!this.downstreams || !this.upstreams) { diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index c588afd63..a9e13adc2 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -39,6 +39,15 @@ import { mapToInputDataStream, readInputStreamHeaders } from "./input-stream"; import { MessageUtils } from "./message-utils"; import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; +process.once("beforeExit", (code) => { + const filepath = `/tmp/runner-${process.pid.toString()}`; + + writeFileSync(filepath, code.toString()); + + // eslint-disable-next-line no-console + console.log("Runner exit"); +}); + // async function flushStream(source: Readable | undefined, target: Writable) { // if (!source) return; @@ -163,12 +172,6 @@ export class Runner implements IComponent { throw e; }); - - process.on("beforeExit", (code) => { - const filepath = `/tmp/runner-${process.pid.toString()}`; - - writeFileSync(filepath, code.toString()); - }); } get context(): RunnerAppContext { From e569f3e73d735a393668064a990732860d674a58 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 25 Sep 2023 12:02:53 +0000 Subject: [PATCH 16/62] Reconnect. logs. [wip] --- packages/host/src/lib/csi-controller.ts | 5 ++++- packages/runner/src/runner.ts | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index fcc4360af..f635c5920 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -186,7 +186,10 @@ export class CSIController extends TypedEmitter { this.logger = new ObjLogger(this, { id: this.id }); - this.logger.debug("Constructor executed"); + this.logger.debug("Constructor executed", arguments); + + // eslint-disable-next-line no-console + console.log("Constructor executed", arguments); this.status = InstanceStatus.INITIALIZING; diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index a9e13adc2..a9785958b 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -328,7 +328,9 @@ export class Runner implements IComponent { .finally(() => process.exit()); } - async premain() { + async premain(): Promise<{ appConfig: AppConfig, args: any}> { + this.logger.debug("premain"); + try { await this.hostClient.init(this.instanceId); } catch (e) { @@ -336,9 +338,7 @@ export class Runner implements IComponent { await defer(2000); - this.premain().catch((err: any) => { - this.logger.error("Premain error", err); - }); + return await this.premain(); } this.redirectOutputs(); @@ -358,7 +358,7 @@ export class Runner implements IComponent { const { appConfig, args } = await this.waitForHandshakeResponse(); - this.logger.debug("Handshake received"); + this.logger.debug("Handshake received", appConfig, args); return { appConfig, args }; } From 97ff56a2fe36f2738fbba49937433d61afa01037 Mon Sep 17 00:00:00 2001 From: Piotr Date: Mon, 25 Sep 2023 12:28:50 +0000 Subject: [PATCH 17/62] fix runner appConfig WIP --- packages/runner/src/runner.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index a9785958b..676fc6b0f 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -356,7 +356,7 @@ export class Runner implements IComponent { this.sendHandshakeMessage(); - const { appConfig, args } = await this.waitForHandshakeResponse(); + const { args, appConfig } = this.runnerConnectInfo; this.logger.debug("Handshake received", appConfig, args); From 91f0572bd279d05efe387c9313a34ad3faad627e Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 25 Sep 2023 22:24:20 +0000 Subject: [PATCH 18/62] Restore piping instance --- packages/host/src/lib/csi-controller.ts | 14 +-- packages/host/src/lib/csi-dispatcher.ts | 109 ++++++++++-------- packages/host/src/lib/host.ts | 6 +- .../src/lib/serviceDiscovery/sd-adapter.ts | 2 + 4 files changed, 75 insertions(+), 56 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index f635c5920..0de68df81 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -63,6 +63,7 @@ type Events = { const BPMux = require("bpmux").BPMux; +export type CSIControllerInfo = { ports?: any; created?: Date; started?: Date; ended?: Date; }; /** * Handles all Instance lifecycle, exposes instance's HTTP API. * @@ -95,7 +96,7 @@ export class CSIController extends TypedEmitter { args: Array | undefined; controlDataStream?: DataStream; router?: APIRoute; - info: { ports?: any; created?: Date; started?: Date; ended?: Date; } = {}; + info: CSIControllerInfo = {}; status: InstanceStatus; terminated?: { exitcode: number; reason: string; }; provides?: string; @@ -111,6 +112,8 @@ export class CSIController extends TypedEmitter { apiOutput = new PassThrough(); apiInputEnabled = true; + executionTime: number = -1; + /** * Topic to which the output stream should be routed */ @@ -186,11 +189,6 @@ export class CSIController extends TypedEmitter { this.logger = new ObjLogger(this, { id: this.id }); - this.logger.debug("Constructor executed", arguments); - - // eslint-disable-next-line no-console - console.log("Constructor executed", arguments); - this.status = InstanceStatus.INITIALIZING; this.upStreams = [ @@ -205,8 +203,8 @@ export class CSIController extends TypedEmitter { ]; } - async start() { - const i = new Promise((res, rej) => { + async start(): Promise { + const i = new Promise((res, rej) => { this.initResolver = { res, rej }; this.startInstance(); }); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 6a779596d..39363bc0a 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -5,13 +5,16 @@ import { RunnerMessageCode } from "@scramjet/symbols"; import { HostProxy, ICommunicationHandler, IObjectLogger, InstanceConfig, MessageDataType, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; import { TypedEmitter } from "@scramjet/utility"; -import { CSIController } from "./csi-controller"; +import { CSIController, CSIControllerInfo } from "./csi-controller"; import { InstanceStore } from "./instance-store"; -import SequenceStore from "./sequenceStore"; -import { SocketServer } from "./socket-server"; +import { ServiceDiscovery } from "./serviceDiscovery/sd-adapter"; +import { ContentType } from "./serviceDiscovery/contentType"; +import TopicId from "./serviceDiscovery/topicId"; +import { Readable, Writable } from "stream"; type errorEventData = {id:string, err: any } -type endEventData = {id:string, code:number } +type endEventData = { id: string, code: number, info: CSIControllerInfo & { executionTime: number }, sequence: SequenceInfo}; + type Events = { pang: (payload: MessageDataType) => void; hourChime: () => void; @@ -24,16 +27,16 @@ type Events = { export class CSIDispatcher extends TypedEmitter { public logger: IObjectLogger; - //private socketServer: SocketServer; public instancesStore: typeof InstanceStore; private STHConfig: STHConfiguration; + private serviceDiscovery: ServiceDiscovery; - constructor(_socketServer: SocketServer, instancesStore: typeof InstanceStore, _sequenceStore: SequenceStore, STHConfig: STHConfiguration) { + constructor(instancesStore: typeof InstanceStore, serviceDiscovery: ServiceDiscovery, STHConfig: STHConfiguration) { super(); this.logger = new ObjLogger(this); - //this.socketServer = socketServer; this.instancesStore = instancesStore; this.STHConfig = STHConfig; + this.serviceDiscovery = serviceDiscovery; } async createCSIController( @@ -53,7 +56,6 @@ export class CSIDispatcher extends TypedEmitter { communicationHandler.logger.pipe(this.logger, { end: false }); csiController.on("error", (err) => { - //this.pushTelemetry("Instance error", { ...err }, "error"); this.logger.error("CSIController errored", err.message, err.exitcode); this.emit("error", { id, err }); }); @@ -67,47 +69,46 @@ export class CSIDispatcher extends TypedEmitter { } if (data.requires && !csiController.inputRouted && data.contentType) { - this.logger.trace("Routing Sequence input to topic", data.requires); + this.logger.trace("Routing Sequence topic to input", data.requires); - // await this.serviceDiscovery.routeTopicToStream( - // { topic: data.requires, contentType: data.contentType! }, - // csiController.getInputStream() - // ); + await this.serviceDiscovery.routeTopicToStream( + { topic: new TopicId(data.requires), contentType: data.contentType as ContentType }, + csiController.getInputStream() + ); csiController.inputRouted = true; - // await this.serviceDiscovery.update({ - // requires: data.requires, contentType: data.contentType!, topicName: data.requires - // }); + await this.serviceDiscovery.update({ + requires: data.requires, contentType: data.contentType, topicName: data.requires + }); } if (data.provides && !csiController.outputRouted && data.contentType) { this.logger.trace("Routing Sequence output to topic", data.provides); - // await this.serviceDiscovery.routeStreamToTopic( - // csiController.getOutputStream(), - // { topic: data.provides, contentType: data.contentType! }, - // csiController.id - // ); + await this.serviceDiscovery.routeStreamToTopic( + csiController.getOutputStream(), + { topic: new TopicId(data.provides), contentType: data.contentType as ContentType } + ); csiController.outputRouted = true; - // await this.serviceDiscovery.update({ - // provides: data.provides, contentType: data.contentType!, topicName: data.provides - // }); + await this.serviceDiscovery.update({ + provides: data.provides, contentType: data.contentType!, topicName: data.provides + }); } }); csiController.on("end", async (code) => { - this.logger.trace("csiControllerontrolled ended", `Exit code: ${code}`); - - // if (csiController.provides && csiController.provides !== "") { - // csiController.getOutputStream()!.unpipe(this.serviceDiscovery.getData( - // { - // topic: csiController.provides, - // contentType: "" - // } - // ) as Writable); - // } + this.logger.trace("csiControllerontrolled ended", `id: ${csiController.id}`, `Exit code: ${code}`); + + if (csiController.provides && csiController.provides !== "") { + csiController.getOutputStream().unpipe(this.serviceDiscovery.getData( + { + topic: new TopicId(csiController.provides), + contentType: "" as ContentType + } + ) as Writable); + } csiController.logger.unpipe(this.logger); @@ -123,17 +124,24 @@ export class CSIDispatcher extends TypedEmitter { // }, InstanceMessageCode.INSTANCE_ENDED); // this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); - this.emit("end", { id, code }); + this.emit("end", { + id, + code, + info: { + executionTime: csiController.executionTime + }, + sequence: csiController.sequence + }); }); csiController.once("terminated", (code) => { - // if (csiController.requires && csiController.requires !== "") { - // (this.serviceDiscovery.getData({ - // topic: csiController.requires, - // contentType: "", - // }) as Readable - // ).unpipe(csiController.getInputStream()!); - // } + if (csiController.requires && csiController.requires !== "") { + (this.serviceDiscovery.getData({ + topic: new TopicId(csiController.requires), + contentType: "" as ContentType, + }) as Readable + ).unpipe(csiController.getInputStream()!); + } // this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); // this.pushTelemetry("Instance ended", { @@ -144,11 +152,20 @@ export class CSIDispatcher extends TypedEmitter { // code: code.toString(), // seqId: csiController.sequence.id // }); - this.emit("terminated", { id, code }); + + this.emit("terminated", { + id, + code, + info: { + executionTime: csiController.executionTime + }, + sequence: csiController.sequence + }); }); - csiController.start().catch(() => { - //@TODO: handle start error; + csiController.start().catch((e) => { + this.logger.error("CSIC start error", csiController.id, e); + throw new Error("CSIC start error"); }); this.logger.trace("csiController started", id); @@ -189,8 +206,8 @@ export class CSIDispatcher extends TypedEmitter { await new Promise((resolve, _reject) => { const resolveFunction = (eventId: string) => { if (eventId === id) { - resolve(); this.off("established", resolveFunction); + resolve(); } }; diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index a12d8739e..eb27da72e 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -239,7 +239,7 @@ export class Host implements IComponent { this.instanceBase = `${this.config.host.apiBase}/instance`; this.topicsBase = `${this.config.host.apiBase}/topic`; - this.csiDispatcher = new CSIDispatcher(this.socketServer, this.instancesStore, this.sequenceStore, sthConfig); + this.csiDispatcher = new CSIDispatcher(this.instancesStore, this.serviceDiscovery, sthConfig); this.csiDispatcher.logger.pipe(this.logger); @@ -273,7 +273,9 @@ export class Host implements IComponent { } attachDispatcherEvents() { - //this.csiDispatcher.on(); + this.csiDispatcher.on("error", (errorData) => { + this.pushTelemetry("Instance error", { ...errorData }, "error"); + }); } getId() { diff --git a/packages/host/src/lib/serviceDiscovery/sd-adapter.ts b/packages/host/src/lib/serviceDiscovery/sd-adapter.ts index 48fc7bf8b..f82a86e32 100644 --- a/packages/host/src/lib/serviceDiscovery/sd-adapter.ts +++ b/packages/host/src/lib/serviceDiscovery/sd-adapter.ts @@ -76,6 +76,8 @@ export class ServiceDiscovery { const topic = this.topicsController.get(topicName); if (topic) { + config.contentType ||= topic.contentType; + if (topic.contentType !== config.contentType) { this.logger.error("Content-type mismatch, existing and requested ", topic.contentType, config.contentType); throw new Error("Content-type mismatch"); From 6a4b31968e87d603c78c263accef9c1510acb9a5 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 26 Sep 2023 09:12:13 +0000 Subject: [PATCH 19/62] reconnect. runner pid in docker, k8s [TEST] --- bdd/step-definitions/e2e/host-steps.ts | 46 +++++++++++-------- .../adapters/src/docker-instance-adapter.ts | 7 ++- .../adapters/src/process-instance-adapter.ts | 3 +- packages/host/src/lib/csi-controller.ts | 17 ++++++- packages/host/src/lib/csi-dispatcher.ts | 6 +-- packages/host/src/lib/host.ts | 10 +++- 6 files changed, 64 insertions(+), 25 deletions(-) diff --git a/bdd/step-definitions/e2e/host-steps.ts b/bdd/step-definitions/e2e/host-steps.ts index 86c327e0f..fe335b12b 100644 --- a/bdd/step-definitions/e2e/host-steps.ts +++ b/bdd/step-definitions/e2e/host-steps.ts @@ -430,29 +430,39 @@ When("send kill message to instance", async function(this: CustomWorld) { assert.ok(resp); }); +// eslint-disable-next-line complexity When("get runner PID", { timeout: 31000 }, async function(this: CustomWorld) { let success: any; let tries = 0; - while (!success && tries < 3) { - if (process.env.RUNTIME_ADAPTER === "kubernetes") { - // @TODO - return; - } - - if (process.env.RUNTIME_ADAPTER === "process") { - const res = (await this.resources.instance?.getHealth())?.processId; + const adapter = process.env.RUNTIME_ADAPTER; - if (res) { - processId = success = res; - console.log("Process is identified.", processId); - } - } else { - containerId = success = (await this.resources.instance?.getHealth())?.containerId!; - - if (containerId) { - console.log("Container is identified.", containerId); - } + while (!success && tries < 3) { + const health = await this.resources.instance?.getHealth(); + + console.log("Health", health); + + switch (adapter) { + case "kubernetes": + return; + case "docker": + + containerId = success = health?.containerId!; + + if (containerId) { + console.log("Container is identified.", containerId); + } + break; + case "process": + const res = health?.processId; + + if (res) { + processId = success = res; + console.log("Process is identified.", processId); + } + break; + default: + break; } tries++; diff --git a/packages/adapters/src/docker-instance-adapter.ts b/packages/adapters/src/docker-instance-adapter.ts index 994b2c37c..0e8e6ac69 100644 --- a/packages/adapters/src/docker-instance-adapter.ts +++ b/packages/adapters/src/docker-instance-adapter.ts @@ -114,6 +114,8 @@ IComponent { * @returns {Promise} Promise resolved with container statistics. */ async stats(msg: MonitoringMessageData): Promise { + this.logger.debug("STATS. Container id:", this.resources.containerId); + if (this.resources.containerId) { const stats = await this.dockerHelper.stats(this.resources.containerId)!; @@ -225,7 +227,7 @@ IComponent { this.crashLogStreams = Promise.all(([streams.stdout, streams.stderr] as Readable[]).map(streamToString)); - this.resources.containerId = containerId; + this.resources.containerId = containerId; // doesnt matter this.logger.trace("Container is running", containerId); } @@ -233,6 +235,9 @@ IComponent { async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { try { const containerId = await this.dockerHelper.getContainerIdByLabel("scramjet.instance.id", instanceId); + + this.resources.containerId = containerId; + const { statusCode } = await this.dockerHelper.wait(containerId); this.logger.debug("Container exited", statusCode); diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 45dfb6eb3..13986f6a4 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -35,6 +35,7 @@ class ProcessInstanceAdapter implements sthConfig: STHConfiguration; processPID: number = -1; + id?: string | undefined; private runnerProcess?: ChildProcess; private crashLogStreams?: Promise; @@ -50,11 +51,11 @@ class ProcessInstanceAdapter implements this.logger = new ObjLogger(this); this.sthConfig = config; } - id?: string | undefined; async init(): Promise { // noop } + async stats(msg: MonitoringMessageData): Promise { const { runnerProcess } = this; diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 0de68df81..55f6fa12e 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -174,6 +174,9 @@ export class CSIController extends TypedEmitter { ) { super(); + // eslint-disable-next-line no-console + console.log(handshakeMessage); + this.id = this.handshakeMessage.id; this.runnerSystemInfo = this.handshakeMessage.payload.system; this.sequence = this.handshakeMessage.sequenceInfo; @@ -253,6 +256,8 @@ export class CSIController extends TypedEmitter { } this.info.ended = new Date(); + this.executionTime = (this.info.ended.getTime() - this.info.started!.getTime()) / 1000; + this.emit("terminated", code); this.logger.trace("Finalizing..."); @@ -459,12 +464,17 @@ export class CSIController extends TypedEmitter { .pipe(this.upStreams[CC.CONTROL]); this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { + // eslint-disable-next-line no-console + console.log("ping", message); await this.handleHandshake(message); return null; }); this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PANG, async (message) => { + // eslint-disable-next-line no-console + console.log("pong", message); + const pangData = message[1]; this.provides ||= this.outputTopic || pangData.provides; @@ -484,6 +494,8 @@ export class CSIController extends TypedEmitter { this.communicationHandler.addMonitoringHandler(RunnerMessageCode.MONITORING, async message => { const stats = await this.instanceAdapter.stats(message[1]); + this.logger.debug("Health stats", stats); + this._lastStats = stats; this.heartBeatTick(); @@ -545,7 +557,7 @@ export class CSIController extends TypedEmitter { throw new CSIControllerError("UNINITIALIZED_STREAM", "control"); } - this.info.started = new Date(); + this.info.started = new Date(); //@TODO: set by runner? this.logger.info("Instance started", JSON.stringify(message, undefined)); } @@ -782,6 +794,9 @@ export class CSIController extends TypedEmitter { } getInfo(): STHRestAPI.GetInstanceResponse { + // eslint-disable-next-line no-console + this.logger.debug(this.sequence, this.info); + return { id: this.id, appConfig: this.appConfig, diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 39363bc0a..6bab08876 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -62,7 +62,7 @@ export class CSIDispatcher extends TypedEmitter { // eslint-disable-next-line complexity csiController.on("pang", async (data) => { - this.logger.trace("PANG received", data); + this.logger.trace("PANG received", [csiController.id, data]); if ((data.requires || data.provides) && !data.contentType) { this.logger.warn("Missing topic content-type"); @@ -96,6 +96,8 @@ export class CSIDispatcher extends TypedEmitter { provides: data.provides, contentType: data.contentType!, topicName: data.provides }); } + + this.emit("established", id); // after pang? }); csiController.on("end", async (code) => { @@ -174,8 +176,6 @@ export class CSIDispatcher extends TypedEmitter { this.instancesStore[id] = csiController; - this.emit("established", id); - return csiController; } diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index eb27da72e..9cfd4bba3 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -276,6 +276,14 @@ export class Host implements IComponent { this.csiDispatcher.on("error", (errorData) => { this.pushTelemetry("Instance error", { ...errorData }, "error"); }); + + this.csiDispatcher.on("end", () => { + + }); + + this.csiDispatcher.on("terminated", () => { + + }); } getId() { @@ -1002,7 +1010,7 @@ export class Host implements IComponent { */ private attachListeners() { this.socketServer.on("connect", async (id, streams) => { - this.logger.debug("Instance connected", id); + this.logger.debug("Instance connecting", id); // @todo need more instance info if (!this.instancesStore[id]) { From 23cda86f460c795be1c6b05f883a2edd2c928671 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 26 Sep 2023 16:16:09 +0000 Subject: [PATCH 20/62] Kill runner --- packages/host/src/lib/csi-controller.ts | 2 +- packages/runner/src/runner.ts | 30 +++++++++++++++++------- packages/symbols/src/runner-exit-code.ts | 3 ++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 55f6fa12e..4fdbf93b2 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -795,7 +795,7 @@ export class CSIController extends TypedEmitter { getInfo(): STHRestAPI.GetInstanceResponse { // eslint-disable-next-line no-console - this.logger.debug(this.sequence, this.info); + this.logger.debug("Get info [seq, info]", this.sequence, this.info); return { id: this.id, diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 676fc6b0f..b478e76ca 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -21,7 +21,7 @@ import { Streamable, SynchronousStreamable } from "@scramjet/types"; -import { defer } from "@scramjet/utility"; +import { defer, promiseTimeout } from "@scramjet/utility"; import { BufferStream, DataStream, StringStream } from "scramjet"; @@ -39,14 +39,24 @@ import { mapToInputDataStream, readInputStreamHeaders } from "./input-stream"; import { MessageUtils } from "./message-utils"; import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; -process.once("beforeExit", (code) => { +let exitHandled = false; + +function onBeforeExit(code: number) { + if (exitHandled) return; + const filepath = `/tmp/runner-${process.pid.toString()}`; writeFileSync(filepath, code.toString()); - // eslint-disable-next-line no-console - console.log("Runner exit"); -}); + exitHandled = true; +} + +function onException(_error: Error) { + onBeforeExit(RunnerExitCode.UNCAUGHT_EXCEPTION); +} + +process.once("beforeExit", onBeforeExit); +process.once("uncaughtException", onException); // async function flushStream(source: Readable | undefined, target: Writable) { // if (!source) return; @@ -321,11 +331,11 @@ export class Runner implements IComponent { private async exit(exitCode?: number) { //TODO: we need to wait a bit for the logs to flush - we shouldn't need to as cleanup should wait. - await defer(200); + //await defer(200); this.cleanup() .then((code) => { process.exitCode = exitCode || code; }, (e) => console.error(e?.stack)) - .finally(() => process.exit()); + .finally(() => { onBeforeExit(process.exitCode!); process.exit(); }); } async premain(): Promise<{ appConfig: AppConfig, args: any}> { @@ -447,7 +457,9 @@ export class Runner implements IComponent { try { this.logger.info("Cleaning up streams"); - await this.hostClient.disconnect(); + await promiseTimeout( + this.hostClient.disconnect(), 5000 + ); } catch (e: any) { exitcode = RunnerExitCode.CLEANUP_FAILED; } @@ -457,8 +469,10 @@ export class Runner implements IComponent { private async revertOutputs() { this.logger.unpipe(this.hostClient.logStream); + revertStandardStream(process.stdout); revertStandardStream(process.stderr); + this.logger.addOutput(process.stderr); } diff --git a/packages/symbols/src/runner-exit-code.ts b/packages/symbols/src/runner-exit-code.ts index 3e7bcf11c..1c33d8b43 100644 --- a/packages/symbols/src/runner-exit-code.ts +++ b/packages/symbols/src/runner-exit-code.ts @@ -8,5 +8,6 @@ export enum RunnerExitCode { STOPPED = 138, SUCCESS = 0, CLEANUP_FAILED = 223, - PODS_LIMIT_REACHED = 24 + PODS_LIMIT_REACHED = 24, + UNCAUGHT_EXCEPTION = 101 } From 2cb372a460cc46b44de17c3a9a9f7b516ecff1de Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 29 Sep 2023 08:30:16 +0000 Subject: [PATCH 21/62] Fixing missing containerid --- packages/adapters/src/docker-instance-adapter.ts | 10 ++++++++++ packages/host/src/lib/csi-controller.ts | 5 ++++- packages/types/src/lifecycle-adapters.ts | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/adapters/src/docker-instance-adapter.ts b/packages/adapters/src/docker-instance-adapter.ts index 0e8e6ac69..a7f89406c 100644 --- a/packages/adapters/src/docker-instance-adapter.ts +++ b/packages/adapters/src/docker-instance-adapter.ts @@ -171,6 +171,14 @@ IComponent { }; } + async setRunner(system: Record): Promise { + const containerId = await this.dockerHelper.getContainerIdByLabel("scramjet.instance.id", system.id); + + this.logger.debug("Container id restored", containerId); + + this.resources.containerId = containerId; + } + async run(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { await this.dispatch(config, instancesServerPort, instanceId, sequenceInfo, payload); return this.waitUntilExit(config, instanceId, sequenceInfo); @@ -236,6 +244,8 @@ IComponent { try { const containerId = await this.dockerHelper.getContainerIdByLabel("scramjet.instance.id", instanceId); + this.logger.debug("Container id restored", containerId); + this.resources.containerId = containerId; const { statusCode } = await this.dockerHelper.wait(containerId); diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 4fdbf93b2..cc5d4822f 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -536,7 +536,10 @@ export class CSIController extends TypedEmitter { } if (this.instanceAdapter.setRunner) { - this.instanceAdapter.setRunner(message[1].payload.system); + await this.instanceAdapter.setRunner({ + ...message[1].payload.system, + id: this.id + }); } this.info.ports = message[1].ports; diff --git a/packages/types/src/lifecycle-adapters.ts b/packages/types/src/lifecycle-adapters.ts index 919eb9483..5397f1ad6 100644 --- a/packages/types/src/lifecycle-adapters.ts +++ b/packages/types/src/lifecycle-adapters.ts @@ -36,7 +36,7 @@ export interface ILifeCycleAdapterMain { // @TODO create ISequenceAdapter interface export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { - setRunner?(system: RunnerConnectInfo["system"]): void; + setRunner?(system: RunnerConnectInfo["system"]): MaybePromise; limits: InstanceLimits; From 66ee06dba82d73668b7b2fb91e186e918d99d5ad Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 29 Sep 2023 17:48:25 +0000 Subject: [PATCH 22/62] Get container id on stats when missing --- .../adapters/src/docker-instance-adapter.ts | 26 +++++++++---------- packages/runner/src/runner.ts | 13 +++++++--- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/packages/adapters/src/docker-instance-adapter.ts b/packages/adapters/src/docker-instance-adapter.ts index a7f89406c..01683e9d6 100644 --- a/packages/adapters/src/docker-instance-adapter.ts +++ b/packages/adapters/src/docker-instance-adapter.ts @@ -116,22 +116,20 @@ IComponent { async stats(msg: MonitoringMessageData): Promise { this.logger.debug("STATS. Container id:", this.resources.containerId); - if (this.resources.containerId) { - const stats = await this.dockerHelper.stats(this.resources.containerId)!; + this.resources.containerId ||= await this.dockerHelper.getContainerIdByLabel("scramjet.instance.id", this.id); - return { - cpuTotalUsage: stats.cpu_stats?.cpu_usage?.total_usage, - healthy: msg.healthy, - limit: stats.memory_stats?.limit, - memoryMaxUsage: stats.memory_stats?.max_usage, - memoryUsage: stats.memory_stats?.usage, - networkRx: stats.networks?.eth0?.rx_bytes, - networkTx: stats.networks?.eth0?.tx_bytes, - containerId: this.resources.containerId - }; - } + const stats = await this.dockerHelper.stats(this.resources.containerId)!; - return msg; + return { + cpuTotalUsage: stats.cpu_stats?.cpu_usage?.total_usage, + healthy: msg.healthy, + limit: stats.memory_stats?.limit, + memoryMaxUsage: stats.memory_stats?.max_usage, + memoryUsage: stats.memory_stats?.usage, + networkRx: stats.networks?.eth0?.rx_bytes, + networkTx: stats.networks?.eth0?.tx_bytes, + containerId: this.resources.containerId + }; } private async getNetworkSetup(): Promise<{ network: string, host: string }> { diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index b478e76ca..33c724e5e 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -21,7 +21,7 @@ import { Streamable, SynchronousStreamable } from "@scramjet/types"; -import { defer, promiseTimeout } from "@scramjet/utility"; +import { defer } from "@scramjet/utility"; import { BufferStream, DataStream, StringStream } from "scramjet"; @@ -452,14 +452,19 @@ export class Runner implements IComponent { this.logger.trace("Monitoring interval removed"); } + if (this.monitoringMessageReplyTimeout) { + clearTimeout(this.monitoringMessageReplyTimeout); + this.logger.trace("Monitoring reply check removed"); + } + let exitcode = 0; try { this.logger.info("Cleaning up streams"); - await promiseTimeout( - this.hostClient.disconnect(), 5000 - ); + // await promiseTimeout( + // this.hostClient.disconnect(), 5000 + // ); } catch (e: any) { exitcode = RunnerExitCode.CLEANUP_FAILED; } From bf71f9a41c4c38f9886e9e31b67360920d973c7d Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 29 Sep 2023 18:48:26 +0000 Subject: [PATCH 23/62] Kill all docker runners --- bdd/step-definitions/e2e/host-steps.ts | 51 ++++++++++++++++++-------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/bdd/step-definitions/e2e/host-steps.ts b/bdd/step-definitions/e2e/host-steps.ts index fe335b12b..2e95d4c9e 100644 --- a/bdd/step-definitions/e2e/host-steps.ts +++ b/bdd/step-definitions/e2e/host-steps.ts @@ -84,23 +84,42 @@ const waitForProcessToEnd = async (pid: number) => { } }; -const killRunner = async () => { - if (process.env.RUNTIME_ADAPTER === "kubernetes") { - // @TODO - return; - } - - if (process.env.RUNTIME_ADAPTER === "process" && processId) { - try { - process.kill(processId); - await waitForProcessToEnd(processId); - } catch (e) { - console.error("Couldn't kill runner", e); - } +// const killRunner = async () => { +// if (process.env.RUNTIME_ADAPTER === "kubernetes") { +// // @TODO +// return; +// } + +// if (process.env.RUNTIME_ADAPTER === "process" && processId) { +// try { +// process.kill(processId); +// await waitForProcessToEnd(processId); +// } catch (e) { +// console.error("Couldn't kill runner", e); +// } +// } + +// if (process.env.RUNTIME_ADAPTER === "docker" && containerId) { +// await dockerode.getContainer(containerId).kill(); +// } +// }; + +const killAllRunners = async () => { + if (process.env.RUNTIME_ADAPTER === "process") { + exec("killall runner"); } - if (process.env.RUNTIME_ADAPTER === "docker" && containerId) { - await dockerode.getContainer(containerId).kill(); + if (process.env.RUNTIME_ADAPTER === "docker") { + await Promise.all( + (await dockerode.listContainers()) + .map(async container => { + if (container.Labels["scramjet.instance.id"]) { + return dockerode.getContainer(container.Id).kill(); + } + + return Promise.resolve(); + }) + ); } }; @@ -162,7 +181,7 @@ Before(() => { streams = {}; }); -After({ tags: "@runner-cleanup" }, killRunner); +After({ tags: "@runner-cleanup" }, killAllRunners); const startHost = async () => { let apiUrl = process.env.SCRAMJET_HOST_BASE_URL; From bf98b4cd0fa1127857531b942d41e23e5290098c Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 2 Oct 2023 10:37:55 +0000 Subject: [PATCH 24/62] Disable reconect --- packages/runner/src/runner.ts | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 33c724e5e..0d967a3cc 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -273,18 +273,20 @@ export class Runner implements IComponent { [RunnerMessageCode.MONITORING, { healthy }], this.hostClient.monitorStream ); - this.monitoringMessageReplyTimeout = setTimeout(async () => { - await this.handleDisconnect(); - }, 500); + // this.monitoringMessageReplyTimeout = setTimeout(async () => { + // await this.handleDisconnect(); + // }, 1000); } - async handleDisconnect() { - this.logger.info("Reinitializing...."); + // async handleDisconnect() { + // await defer(5000); - this.premain().catch((e) => { - this.logger.error("Premain error", e); - }); - } + // this.logger.info("Reinitializing...."); + + // this.premain().catch((e) => { + // this.logger.error("Premain error", e); + // }); + // } async handleKillRequest(): Promise { this.logger.debug("Handling KILL request"); From d226c92bc074d0241c3bc6c7a0862932dcf41b64 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 2 Oct 2023 11:01:16 +0000 Subject: [PATCH 25/62] prune after scenario --- bdd/step-definitions/e2e/host-steps.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bdd/step-definitions/e2e/host-steps.ts b/bdd/step-definitions/e2e/host-steps.ts index 2e95d4c9e..5b3dffc5d 100644 --- a/bdd/step-definitions/e2e/host-steps.ts +++ b/bdd/step-definitions/e2e/host-steps.ts @@ -182,6 +182,13 @@ Before(() => { }); After({ tags: "@runner-cleanup" }, killAllRunners); +After({}, async () => { + const seqs = await hostClient.listSequences(); + + await Promise.all( + seqs.map(seq => hostClient.deleteSequence(seq.id, { force: true })) + ); +}); const startHost = async () => { let apiUrl = process.env.SCRAMJET_HOST_BASE_URL; From b9b8bfde72e621e6ad10e5df27b634c3f56eb769 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 2 Oct 2023 12:55:44 +0000 Subject: [PATCH 26/62] Kill runner will --- bdd/step-definitions/e2e/host-steps.ts | 10 ++++++++-- packages/adapters/src/process-instance-adapter.ts | 6 +++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/bdd/step-definitions/e2e/host-steps.ts b/bdd/step-definitions/e2e/host-steps.ts index 5b3dffc5d..2cf10d89d 100644 --- a/bdd/step-definitions/e2e/host-steps.ts +++ b/bdd/step-definitions/e2e/host-steps.ts @@ -183,10 +183,16 @@ Before(() => { After({ tags: "@runner-cleanup" }, killAllRunners); After({}, async () => { - const seqs = await hostClient.listSequences(); + let insts = []; + + try { + insts = await hostClient.listInstances(); + } catch (_e) { + return; + } await Promise.all( - seqs.map(seq => hostClient.deleteSequence(seq.id, { force: true })) + insts.map(i => hostClient.getInstanceClient(i.id).kill({ removeImmediately: true }).catch(_e => {})) ); }); diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 13986f6a4..b0e428166 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -256,7 +256,11 @@ class ProcessInstanceAdapter implements * Forcefully stops Runner process. */ async remove() { - this.runnerProcess?.kill(); + if (this.runnerProcess) { + this.runnerProcess.kill(); + } else { + spawn("kill", ["-9", this.processPID.toString()]); + } } async getCrashLog(): Promise { From 7ae246ebdf4c4e6836d7053907099e47a09282ae Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 2 Oct 2023 14:06:32 +0000 Subject: [PATCH 27/62] Fix overwriting topics inout --- bdd/features/e2e/E2E-010-cli.feature | 1 + bdd/features/e2e/E2E-011-cli-topic.feature | 1 + packages/host/src/lib/csi-controller.ts | 12 +++++++++-- packages/host/src/lib/csi-dispatcher.ts | 22 +++++++------------ packages/host/src/lib/host.ts | 25 ++++++++++++++++++++-- 5 files changed, 43 insertions(+), 18 deletions(-) diff --git a/bdd/features/e2e/E2E-010-cli.feature b/bdd/features/e2e/E2E-010-cli.feature index fdf3c92f3..a5498682b 100644 --- a/bdd/features/e2e/E2E-010-cli.feature +++ b/bdd/features/e2e/E2E-010-cli.feature @@ -46,6 +46,7 @@ Feature: CLI tests @ci-api @cli Scenario: E2E-010 TC-006 Test Sequence 'prune --force' option + Given I set config for local Hub When I execute CLI with "seq send ../packages/checksum-sequence.tar.gz" When I execute CLI with "seq send ../packages/csv-transform.tar.gz" When I execute CLI with "seq list" diff --git a/bdd/features/e2e/E2E-011-cli-topic.feature b/bdd/features/e2e/E2E-011-cli-topic.feature index e26b0e3ca..b39e3a9a0 100644 --- a/bdd/features/e2e/E2E-011-cli-topic.feature +++ b/bdd/features/e2e/E2E-011-cli-topic.feature @@ -19,6 +19,7 @@ This feature checks topic functionalities over CLI @ci-topic @cli Scenario: E2E-011 TC-003 API to Instance + # Given I set config for local Hub When I execute CLI with "topic send avengers data/data.json" without waiting for the end When I execute CLI with "seq send ../packages/hello-input-out.tar.gz" When I execute CLI with "seq start - --input-topic avengers " diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index cc5d4822f..842aba7f9 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -53,6 +53,7 @@ import { ReasonPhrases } from "http-status-codes"; const runnerExitDelay = 15000; type Events = { + ping: (pingMessage: MessageDataType) => void; pang: (payload: MessageDataType) => void; hourChime: () => void; error: (error: any) => void; @@ -175,7 +176,7 @@ export class CSIController extends TypedEmitter { super(); // eslint-disable-next-line no-console - console.log(handshakeMessage); + console.log("csic constructor handshakeMessage", handshakeMessage); this.id = this.handshakeMessage.id; this.runnerSystemInfo = this.handshakeMessage.payload.system; @@ -466,14 +467,19 @@ export class CSIController extends TypedEmitter { this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { // eslint-disable-next-line no-console console.log("ping", message); + + this.provides ||= this.outputTopic || message[1].payload?.outputTopic; + this.requires ||= this.inputTopic || message[1].payload?.inputTopic; + await this.handleHandshake(message); + this.emit("ping", message[1]); return null; }); this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PANG, async (message) => { // eslint-disable-next-line no-console - console.log("pong", message); + console.log("pang", message); const pangData = message[1]; @@ -545,6 +551,8 @@ export class CSIController extends TypedEmitter { this.info.ports = message[1].ports; this.sequence = message[1].sequenceInfo; + this.inputTopic = message[1].payload?.inputTopic; + this.outputTopic = message[1].payload?.outputTopic; // TODO: add message to initiate the instance adapter if (this.controlDataStream) { diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 6bab08876..69381b31d 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -2,7 +2,7 @@ import { getInstanceAdapter } from "@scramjet/adapters"; import { IDProvider } from "@scramjet/model"; import { ObjLogger } from "@scramjet/obj-logger"; import { RunnerMessageCode } from "@scramjet/symbols"; -import { HostProxy, ICommunicationHandler, IObjectLogger, InstanceConfig, MessageDataType, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; +import { HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, MessageDataType, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; import { TypedEmitter } from "@scramjet/utility"; import { CSIController, CSIControllerInfo } from "./csi-controller"; @@ -22,7 +22,7 @@ type Events = { stop: (code: number) => void; end: (data: endEventData) => void; terminated: (data: endEventData) => void; - established: (id: string) => void; + established: (instance: Instance) => void; }; export class CSIDispatcher extends TypedEmitter { @@ -69,7 +69,7 @@ export class CSIDispatcher extends TypedEmitter { } if (data.requires && !csiController.inputRouted && data.contentType) { - this.logger.trace("Routing Sequence topic to input", data.requires); + this.logger.trace("Routing topic to Sequence input", data.requires); await this.serviceDiscovery.routeTopicToStream( { topic: new TopicId(data.requires), contentType: data.contentType as ContentType }, @@ -96,8 +96,10 @@ export class CSIDispatcher extends TypedEmitter { provides: data.provides, contentType: data.contentType!, topicName: data.provides }); } + }); - this.emit("established", id); // after pang? + csiController.on("ping", (pingMessage) => { + this.emit("established", { id: pingMessage.id, sequence: pingMessage.sequenceInfo }); }); csiController.on("end", async (code) => { @@ -114,12 +116,6 @@ export class CSIDispatcher extends TypedEmitter { csiController.logger.unpipe(this.logger); - delete InstanceStore[csiController.id]; - - sequenceInfo.instances = sequenceInfo.instances.filter(item => { - return item !== id; - }); - // await this.cpmConnector?.sendInstanceInfo({ // id: csiController.id, // sequence: sequence.id @@ -172,8 +168,6 @@ export class CSIDispatcher extends TypedEmitter { this.logger.trace("csiController started", id); - sequenceInfo.instances.push(id); - this.instancesStore[id] = csiController; return csiController; @@ -204,8 +198,8 @@ export class CSIDispatcher extends TypedEmitter { ); await new Promise((resolve, _reject) => { - const resolveFunction = (eventId: string) => { - if (eventId === id) { + const resolveFunction = (instance: Instance) => { + if (instance.id === id) { this.off("established", resolveFunction); resolve(); } diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 9cfd4bba3..a5478edbd 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -14,6 +14,7 @@ import { IComponent, IMonitoringServerConstructor, IObjectLogger, + Instance, LogLevel, MonitoringServerConfig, NextCallback, @@ -277,12 +278,30 @@ export class Host implements IComponent { this.pushTelemetry("Instance error", { ...errorData }, "error"); }); - this.csiDispatcher.on("end", () => { + this.csiDispatcher.on("end", (terminated) => { + const seq = this.sequenceStore.getById(terminated.sequence.id); + // eslint-disable-next-line no-console + console.log("ended", terminated); + + if (seq) { + seq.instances = seq.instances.filter(i => i !== terminated.id); + } + + delete this.instancesStore[terminated.id]; }); - this.csiDispatcher.on("terminated", () => { + this.csiDispatcher.on("established", (instance: Instance) => { + const seq = this.sequenceStore.getById(instance.sequence.id); + // eslint-disable-next-line no-console + console.log("established", instance); + + if (seq) { + seq.instances.push(instance.id); + } else { + this.logger.warn("Instance of not existing sequence connected"); + } }); } @@ -648,6 +667,8 @@ export class Host implements IComponent { error: `The sequence ${id} does not exist.` }; } + // eslint-disable-next-line no-console + console.log("Instances of sequence", sequenceInfo.id, sequenceInfo.instances); if (sequenceInfo.instances.length > 0) { const instances = [...sequenceInfo.instances].every((instanceId) => { From a1ec49a8b8fd6b5974d4f5092fb6a18b70a989a8 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Thu, 5 Oct 2023 12:29:54 +0000 Subject: [PATCH 28/62] Reconnect. Cleanup --- packages/host/src/lib/csi-dispatcher.ts | 81 ++++++++++------- packages/host/src/lib/host.ts | 95 ++++++++++++++------ packages/symbols/src/instance-status-code.ts | 4 +- 3 files changed, 120 insertions(+), 60 deletions(-) diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 69381b31d..ed9e02d25 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -2,7 +2,7 @@ import { getInstanceAdapter } from "@scramjet/adapters"; import { IDProvider } from "@scramjet/model"; import { ObjLogger } from "@scramjet/obj-logger"; import { RunnerMessageCode } from "@scramjet/symbols"; -import { HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, MessageDataType, STHConfiguration, STHRestAPI, SequenceInfo } from "@scramjet/types"; +import { HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, MessageDataType, PangMessageData, PingMessageData, STHConfiguration, STHRestAPI, SequenceInfo, SequenceInfoInstance } from "@scramjet/types"; import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; import { TypedEmitter } from "@scramjet/utility"; import { CSIController, CSIControllerInfo } from "./csi-controller"; @@ -11,32 +11,45 @@ import { ServiceDiscovery } from "./serviceDiscovery/sd-adapter"; import { ContentType } from "./serviceDiscovery/contentType"; import TopicId from "./serviceDiscovery/topicId"; import { Readable, Writable } from "stream"; +import SequenceStore from "./sequenceStore"; -type errorEventData = {id:string, err: any } -type endEventData = { id: string, code: number, info: CSIControllerInfo & { executionTime: number }, sequence: SequenceInfo}; +export type DispatcherErrorEventData = { id:string, err: any }; +export type DispatcherInstanceEndEventData = { id: string, code: number, info: CSIControllerInfo & { executionTime: number }, sequence: SequenceInfoInstance}; +export type DispatcherInstanceTerminatedEventData = DispatcherInstanceEndEventData; +export type DispatcherInstanceEstablishedEventData = Instance; type Events = { pang: (payload: MessageDataType) => void; hourChime: () => void; - error: (data: errorEventData) => void; + error: (data: DispatcherErrorEventData) => void; stop: (code: number) => void; - end: (data: endEventData) => void; - terminated: (data: endEventData) => void; - established: (instance: Instance) => void; + end: (data: DispatcherInstanceEndEventData) => void; + terminated: (data: DispatcherInstanceEndEventData) => void; + established: (data: DispatcherInstanceEstablishedEventData) => void; }; +type CSIDispatcherOpts = { + instanceStore: typeof InstanceStore, + sequenceStore: SequenceStore, + serviceDiscovery: ServiceDiscovery, + STHConfig: STHConfiguration +} + export class CSIDispatcher extends TypedEmitter { public logger: IObjectLogger; - public instancesStore: typeof InstanceStore; + public instanceStore: typeof InstanceStore; + public sequenceStore: SequenceStore; private STHConfig: STHConfiguration; private serviceDiscovery: ServiceDiscovery; - constructor(instancesStore: typeof InstanceStore, serviceDiscovery: ServiceDiscovery, STHConfig: STHConfiguration) { + constructor(opts: CSIDispatcherOpts) { super(); + this.logger = new ObjLogger(this); - this.instancesStore = instancesStore; - this.STHConfig = STHConfig; - this.serviceDiscovery = serviceDiscovery; + this.instanceStore = opts.instanceStore; + this.sequenceStore = opts.sequenceStore; + this.STHConfig = opts.STHConfig; + this.serviceDiscovery = opts.serviceDiscovery; } async createCSIController( @@ -61,7 +74,7 @@ export class CSIDispatcher extends TypedEmitter { }); // eslint-disable-next-line complexity - csiController.on("pang", async (data) => { + csiController.on("pang", async (data: PangMessageData) => { this.logger.trace("PANG received", [csiController.id, data]); if ((data.requires || data.provides) && !data.contentType) { @@ -85,6 +98,7 @@ export class CSIDispatcher extends TypedEmitter { if (data.provides && !csiController.outputRouted && data.contentType) { this.logger.trace("Routing Sequence output to topic", data.provides); + await this.serviceDiscovery.routeStreamToTopic( csiController.getOutputStream(), { topic: new TopicId(data.provides), contentType: data.contentType as ContentType } @@ -98,11 +112,19 @@ export class CSIDispatcher extends TypedEmitter { } }); - csiController.on("ping", (pingMessage) => { + csiController.on("ping", (pingMessage: PingMessageData) => { + const seq = this.sequenceStore.getById(csiController.sequence.id); + + if (seq) { + seq.instances.push(csiController.id); + } else { + this.logger.warn("Instance of not existing sequence connected"); + //@TODO: ? + } this.emit("established", { id: pingMessage.id, sequence: pingMessage.sequenceInfo }); }); - csiController.on("end", async (code) => { + csiController.on("end", async (code: number) => { this.logger.trace("csiControllerontrolled ended", `id: ${csiController.id}`, `Exit code: ${code}`); if (csiController.provides && csiController.provides !== "") { @@ -116,12 +138,6 @@ export class CSIDispatcher extends TypedEmitter { csiController.logger.unpipe(this.logger); - // await this.cpmConnector?.sendInstanceInfo({ - // id: csiController.id, - // sequence: sequence.id - // }, InstanceMessageCode.INSTANCE_ENDED); - - // this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); this.emit("end", { id, code, @@ -130,6 +146,14 @@ export class CSIDispatcher extends TypedEmitter { }, sequence: csiController.sequence }); + + const seq = this.sequenceStore.getById(csiController.sequence.id); + + if (seq) { + seq.instances = seq.instances.filter(i => i !== csiController.id); + } + + delete this.instanceStore[csiController.id]; }); csiController.once("terminated", (code) => { @@ -141,16 +165,6 @@ export class CSIDispatcher extends TypedEmitter { ).unpipe(csiController.getInputStream()!); } - // this.auditor.auditInstance(id, InstanceMessageCode.INSTANCE_ENDED); - // this.pushTelemetry("Instance ended", { - // executionTime: csiController.info.ended && csiController.info.started - // ? ((csiController.info.ended?.getTime() - csiController.info.started.getTime()) / 1000).toString() - // : "-1", - // id: csiController.id, - // code: code.toString(), - // seqId: csiController.sequence.id - // }); - this.emit("terminated", { id, code, @@ -168,7 +182,7 @@ export class CSIDispatcher extends TypedEmitter { this.logger.trace("csiController started", id); - this.instancesStore[id] = csiController; + this.instanceStore[id] = csiController; return csiController; } @@ -214,8 +228,7 @@ export class CSIDispatcher extends TypedEmitter { appConfig: payload.appConfig, args: payload.args, sequenceId: sequence.id, - info: { - }, + info: {}, limits, sequence }; diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index a5478edbd..8617efede 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -6,7 +6,7 @@ import { AddressInfo } from "net"; import { Duplex } from "stream"; import { CommunicationHandler, HostError, IDProvider } from "@scramjet/model"; -import { HostHeaders, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; +import { HostHeaders, InstanceMessageCode, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; import { APIExpose, CPMConnectorOptions, @@ -55,7 +55,7 @@ import { SocketServer } from "./socket-server"; import SequenceStore from "./sequenceStore"; import TopicRouter from "./serviceDiscovery/topicRouter"; import { loadModule, logger as loadModuleLogger } from "@scramjet/module-loader"; -import { CSIDispatcher } from "./csi-dispatcher"; +import { CSIDispatcher, DispatcherErrorEventData, DispatcherInstanceEndEventData, DispatcherInstanceTerminatedEventData } from "./csi-dispatcher"; const buildInfo = readJsonFile("build.info", __dirname, ".."); const packageFile = findPackage(__dirname).next(); @@ -240,7 +240,12 @@ export class Host implements IComponent { this.instanceBase = `${this.config.host.apiBase}/instance`; this.topicsBase = `${this.config.host.apiBase}/topic`; - this.csiDispatcher = new CSIDispatcher(this.instancesStore, this.serviceDiscovery, sthConfig); + this.csiDispatcher = new CSIDispatcher({ + instanceStore: this.instancesStore, + sequenceStore: this.sequenceStore, + serviceDiscovery: this.serviceDiscovery, + STHConfig: sthConfig + }); this.csiDispatcher.logger.pipe(this.logger); @@ -274,34 +279,74 @@ export class Host implements IComponent { } attachDispatcherEvents() { - this.csiDispatcher.on("error", (errorData) => { - this.pushTelemetry("Instance error", { ...errorData }, "error"); - }); - - this.csiDispatcher.on("end", (terminated) => { - const seq = this.sequenceStore.getById(terminated.sequence.id); + this.csiDispatcher + .on("end", async (eventData: DispatcherInstanceEndEventData) => { + await this.handleDispatcherEndEvent(eventData); + }) + .on("established", async (instance: Instance) => { + await this.handleDispatcherEstablishedEvent(instance); + }) + .on("terminated", async (eventData: DispatcherInstanceTerminatedEventData) => { + await this.handleDispatcherTerminatedEvent(eventData); + }) + .on("error", (errorData: DispatcherErrorEventData) => { + this.pushTelemetry("Instance error", { ...errorData }, "error"); + }); + } - // eslint-disable-next-line no-console - console.log("ended", terminated); + /** + * Pass information about connected instance to monitoring and platform services. + * + * @param {Instance} instance Instance data. + */ + async handleDispatcherEstablishedEvent(instance: Instance) { + this.auditor.auditInstance(instance.id, InstanceMessageCode.INSTANCE_CONNECTED); - if (seq) { - seq.instances = seq.instances.filter(i => i !== terminated.id); - } + await this.cpmConnector?.sendInstanceInfo({ + id: instance.id, + sequence: instance.sequence + }, InstanceMessageCode.INSTANCE_CONNECTED); - delete this.instancesStore[terminated.id]; + this.pushTelemetry("Instance connected", { + id: instance.id, + seqId: instance.sequence.id }); + } - this.csiDispatcher.on("established", (instance: Instance) => { - const seq = this.sequenceStore.getById(instance.sequence.id); - - // eslint-disable-next-line no-console - console.log("established", instance); + /** + * Pass information about ended instance to monitoring and platform services. + * + * @param {DispatcherInstanceEndEventData} eventData Event details. + */ + async handleDispatcherEndEvent(eventData: DispatcherInstanceEndEventData) { + this.auditor.auditInstance(eventData.id, InstanceMessageCode.INSTANCE_ENDED); + + await this.cpmConnector?.sendInstanceInfo({ + id: eventData.id, + sequence: eventData.sequence + }, InstanceMessageCode.INSTANCE_ENDED); + + this.pushTelemetry("Instance ended", { + executionTime: eventData.info.executionTime.toString(), + id: eventData.id, + code: eventData.code.toString(), + seqId: eventData.sequence.id + }); + } - if (seq) { - seq.instances.push(instance.id); - } else { - this.logger.warn("Instance of not existing sequence connected"); - } + /** + * Pass information about terminated instance to monitoring services. + * + * @param {DispatcherInstanceTerminatedEventData} eventData Event details. + */ + async handleDispatcherTerminatedEvent(eventData: DispatcherInstanceTerminatedEventData) { + this.auditor.auditInstance(eventData.id, InstanceMessageCode.INSTANCE_TERMINATED); + + this.pushTelemetry("Instance terminated", { + executionTime: eventData.info.executionTime.toString(), + id: eventData.id, + code: eventData.code.toString(), + seqId: eventData.sequence.id }); } diff --git a/packages/symbols/src/instance-status-code.ts b/packages/symbols/src/instance-status-code.ts index 74a215472..7e96d2709 100644 --- a/packages/symbols/src/instance-status-code.ts +++ b/packages/symbols/src/instance-status-code.ts @@ -1,5 +1,7 @@ export enum InstanceMessageCode { INSTANCE_STARTED, INSTANCE_STOPPED, - INSTANCE_ENDED + INSTANCE_ENDED, + INSTANCE_TERMINATED, + INSTANCE_CONNECTED } From 5dff6029b8463b1c5b577927a823496cd910e81d Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 6 Oct 2023 12:19:08 +0000 Subject: [PATCH 29/62] Reenable reconnect --- packages/runner/src/runner.ts | 31 +++++++++++++++++++---------- packages/types/src/csh-connector.ts | 3 +-- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 0d967a3cc..1ee9633e6 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -148,6 +148,8 @@ export class Runner implements IComponent { private outputDataStream: DataStream; private sequenceInfo: SequenceInfo; + private connected = false; + private runnerConnectInfo: RunnerConnectInfo = { appConfig: {} }; @@ -256,7 +258,7 @@ export class Runner implements IComponent { let working = false; this.monitoringInterval = setInterval(async () => { - if (working) { + if (working || !this.connected) { return; } @@ -273,20 +275,25 @@ export class Runner implements IComponent { [RunnerMessageCode.MONITORING, { healthy }], this.hostClient.monitorStream ); - // this.monitoringMessageReplyTimeout = setTimeout(async () => { - // await this.handleDisconnect(); - // }, 1000); + this.monitoringMessageReplyTimeout = setTimeout(async () => { + if (!this.connected) return; + + this.connected = false; + + await this.handleDisconnect(); + }, 1000); } - // async handleDisconnect() { - // await defer(5000); + async handleDisconnect() { + this.connected = false; + //await this.hostClient.disconnect(); - // this.logger.info("Reinitializing...."); + await defer(5000); - // this.premain().catch((e) => { - // this.logger.error("Premain error", e); - // }); - // } + this.logger.info("Reinitializing...."); + + await this.premain(); + } async handleKillRequest(): Promise { this.logger.debug("Handling KILL request"); @@ -345,7 +352,9 @@ export class Runner implements IComponent { try { await this.hostClient.init(this.instanceId); + this.connected = true; } catch (e) { + this.connected = false; this.logger.error("hostClient init error", e); await defer(2000); diff --git a/packages/types/src/csh-connector.ts b/packages/types/src/csh-connector.ts index 7c6f992fb..179befcd3 100644 --- a/packages/types/src/csh-connector.ts +++ b/packages/types/src/csh-connector.ts @@ -1,4 +1,3 @@ -import { MaybePromise } from "./utils"; import { IComponent } from "./component"; import { CommunicationChannel as CC } from "@scramjet/symbols"; import { UpstreamStreamsConfig } from "./message-streams"; @@ -9,7 +8,7 @@ export interface IHostClient extends IComponent { * Interface used by Runner to communicate with Host. */ - init(id: string): MaybePromise; + init(id: string): Promise; /** * Disconnects from a host server. From 340df76227b7d5e5166f4da02d206bd7a73f374f Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 10 Oct 2023 09:40:04 +0000 Subject: [PATCH 30/62] Connect py runner --- packages/python-runner/runner.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index b3ae136e1..3ee7557ff 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -17,7 +17,8 @@ server_port = os.getenv('INSTANCES_SERVER_PORT') server_host = os.getenv('INSTANCES_SERVER_HOST') or 'localhost' instance_id = os.getenv('INSTANCE_ID') - +runner_connect_info = json.loads(os.getenv("RUNNER_CONNECT_INFO")) +sequence_info = json.loads(os.getenv("SEQUENCE_INFO")) def send_encoded_msg(stream, msg_code, data={}): message = json.dumps([msg_code.value, data]) @@ -116,7 +117,8 @@ async def handshake(self): control = self.streams[CC.CONTROL] self.logger.info(f'Sending PING') - send_encoded_msg(monitoring, msg_codes.PING) + payload = {**runner_connect_info, **{"system":{"processPID":str(os.getpid())}}} + send_encoded_msg(monitoring, msg_codes.PING, {"payload":payload, "sequenceInfo": sequence_info, "id": instance_id}) message = await control.readuntil(b'\n') self.logger.info(f'Got message: {message}') @@ -164,7 +166,7 @@ async def handle_stop(self, data): self.keep_alive_requested = False timeout = data.get('timeout') / 1000 can_keep_alive = data.get('canCallKeepalive') - try: + try: for handler in self.stop_handlers: await handler(timeout, can_keep_alive) except Exception as e: @@ -285,7 +287,7 @@ async def forward_output_stream(self, output): await output.write_to(self.streams[CC.OUT]) - + async def send_keep_alive(self, timeout: int = 0, can_keep_alive: bool = False): monitoring = self.streams[CC.MONITORING] send_encoded_msg(monitoring, msg_codes.ALIVE) @@ -320,7 +322,7 @@ def emit(self, event_name, message=''): msg_codes.EVENT, {'eventName': event_name, 'message': message} ) - + async def keep_alive(self, timeout: int = 0): await self.runner.send_keep_alive(timeout) From 6970aa7458c43a709d9d70ef97f6b3777cd410cc Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 10 Oct 2023 14:47:20 +0000 Subject: [PATCH 31/62] Store args from ping --- packages/host/src/lib/csi-controller.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 842aba7f9..39ed508e1 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -468,8 +468,12 @@ export class CSIController extends TypedEmitter { // eslint-disable-next-line no-console console.log("ping", message); - this.provides ||= this.outputTopic || message[1].payload?.outputTopic; - this.requires ||= this.inputTopic || message[1].payload?.inputTopic; + const payload = message[1].payload; + + this.args = message[1].payload.args; + + this.provides ||= this.outputTopic || payload?.outputTopic; + this.requires ||= this.inputTopic || payload?.inputTopic; await this.handleHandshake(message); From 0b9a3a29795ac03050cf07d9f3f77f7897349832 Mon Sep 17 00:00:00 2001 From: Piotr Date: Wed, 11 Oct 2023 11:41:12 +0000 Subject: [PATCH 32/62] [WIP] Py runner reconnect --- packages/python-runner/runner.py | 10 +++++++++- packages/python-runner/runnerClock.py | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 packages/python-runner/runnerClock.py diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index 3ee7557ff..f488e2b77 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -11,6 +11,7 @@ from logging_setup import LoggingSetup from hardcoded_magic_values import CommunicationChannels as CC from hardcoded_magic_values import RunnerMessageCodes as msg_codes +from runnerClock import RunnerClock sequence_path = os.getenv('SEQUENCE_PATH') @@ -49,7 +50,12 @@ def __init__(self, instance_id, sequence_path, log_setup) -> None: self.health_check = lambda: {'healthy': True} self.emitter = AsyncIOEventEmitter() self.keep_alive_requested = False + self.runner_clock = RunnerClock(2) + async def reconnect(self): + self.logger.debug('trying to reconnect...') + #reconnect logic here + self.runner_clock.reset(self.reconnect) async def main(self, server_host, server_port): self.logger.info('Connecting to host...') @@ -65,6 +71,7 @@ async def main(self, server_host, server_port): asyncio.create_task(self.setup_heartbeat()) self.load_sequence() + self.runner_clock.start(self.reconnect) await self.run_instance(config, args) @@ -159,7 +166,8 @@ async def connect_control_stream(self): await self.handle_stop(data) if code == msg_codes.EVENT.value: self.emitter.emit(data['eventName'], data['message'] if 'message' in data else None) - + if code == msg_codes.FORCE_CONFIRM_ALIVE.value: + self.runner_clock.reset(self.reconnect) async def handle_stop(self, data): self.logger.info(f'Gracefully shutting down...{data}') diff --git a/packages/python-runner/runnerClock.py b/packages/python-runner/runnerClock.py new file mode 100644 index 000000000..5dc81c518 --- /dev/null +++ b/packages/python-runner/runnerClock.py @@ -0,0 +1,18 @@ +import asyncio + +class RunnerClock: + def __init__(self, delay): + self.delay = delay + self.timer_task = None + + async def _timer(self, function): + await asyncio.sleep(self.delay) + await function() + + def start(self, function): + if self.timer_task: + self.timer_task.cancel() + self.timer_task = asyncio.create_task(self._timer(function)) + + def reset(self, function): + self.start(function) From 369aa68cc5321cb43badc0fc123d267fc803ab7e Mon Sep 17 00:00:00 2001 From: Piotr Date: Wed, 11 Oct 2023 12:02:27 +0000 Subject: [PATCH 33/62] update python magic values --- packages/python-runner/hardcoded_magic_values.py | 2 +- packages/python-runner/package.json | 3 ++- packages/python-runner/runner.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/python-runner/hardcoded_magic_values.py b/packages/python-runner/hardcoded_magic_values.py index 12625a6f8..25e42c7a1 100644 --- a/packages/python-runner/hardcoded_magic_values.py +++ b/packages/python-runner/hardcoded_magic_values.py @@ -30,6 +30,6 @@ class RunnerMessageCodes(Enum): STOP = 4001 KILL = 4002 MONITORING_RATE = 4003 - FORCE_CONFIRM_ALIVE = 4004 + MONITORING_REPLY = 4004 EVENT = 5001 diff --git a/packages/python-runner/package.json b/packages/python-runner/package.json index 18218254e..2172dee13 100644 --- a/packages/python-runner/package.json +++ b/packages/python-runner/package.json @@ -13,7 +13,8 @@ "assets": [ "hardcoded_magic_values.py", "logging_setup.py", - "runner.py" + "runner.py", + "runnerClock.py" ], "author": "Scramjet ", "license": "MIT", diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index f488e2b77..0a0ff6d13 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -166,7 +166,7 @@ async def connect_control_stream(self): await self.handle_stop(data) if code == msg_codes.EVENT.value: self.emitter.emit(data['eventName'], data['message'] if 'message' in data else None) - if code == msg_codes.FORCE_CONFIRM_ALIVE.value: + if code == msg_codes.MONITORING_REPLY.value: self.runner_clock.reset(self.reconnect) async def handle_stop(self, data): From 019ad27a88c2399c0f0dd0bf8731321b46bb9be2 Mon Sep 17 00:00:00 2001 From: Piotr Date: Thu, 12 Oct 2023 09:06:24 +0000 Subject: [PATCH 34/62] [WIP] py runner reconnect --- packages/python-runner/runner.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index 0a0ff6d13..c4ae17c12 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -54,22 +54,33 @@ def __init__(self, instance_id, sequence_path, log_setup) -> None: async def reconnect(self): self.logger.debug('trying to reconnect...') - #reconnect logic here + await self.premain() + + asyncio.sleep(1) self.runner_clock.reset(self.reconnect) - async def main(self, server_host, server_port): - self.logger.info('Connecting to host...') - await self.init_connections(server_host, server_port) + async def premain(self): + try: + await self.init_connections(server_host, server_port) + except: + self.logger.debug("hostClient init error") + asyncio.sleep(2) + return await self.premain() - # Do this early to have access to any thrown exceptions and logs. self.connect_stdio() self.connect_log_stream() - config, args = await self.handshake() self.logger.info('Communication established.') + asyncio.create_task(self.connect_control_stream()) asyncio.create_task(self.setup_heartbeat()) + return config, args + + async def main(self): + self.logger.info('Connecting to host...') + config, args = await self.premain() + self.load_sequence() self.runner_clock.start(self.reconnect) await self.run_instance(config, args) @@ -348,4 +359,4 @@ async def keep_alive(self, timeout: int = 0): sys.exit(2) runner = Runner(instance_id, sequence_path, log_setup) -asyncio.run(runner.main(server_host, server_port)) +asyncio.run(runner.main()) From 296ff2125ba1438e584b148186dabc854a023b1b Mon Sep 17 00:00:00 2001 From: patuwwy Date: Thu, 12 Oct 2023 12:00:59 +0000 Subject: [PATCH 35/62] Reconnect pyRunner. [no data after reconnecting --- packages/python-runner/runner.py | 63 ++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index c4ae17c12..4290850c5 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -11,8 +11,8 @@ from logging_setup import LoggingSetup from hardcoded_magic_values import CommunicationChannels as CC from hardcoded_magic_values import RunnerMessageCodes as msg_codes -from runnerClock import RunnerClock - +# from runnerClock import RunnerClock +import threading sequence_path = os.getenv('SEQUENCE_PATH') server_port = os.getenv('INSTANCES_SERVER_PORT') @@ -41,7 +41,12 @@ def flush(self): class Runner: + + + def __init__(self, instance_id, sequence_path, log_setup) -> None: + self.reconnect_interval = None + self.connected = False self.instance_id = instance_id self.seq_path = sequence_path self._logging_setup = log_setup @@ -50,23 +55,30 @@ def __init__(self, instance_id, sequence_path, log_setup) -> None: self.health_check = lambda: {'healthy': True} self.emitter = AsyncIOEventEmitter() self.keep_alive_requested = False - self.runner_clock = RunnerClock(2) + #self.runner_clock = RunnerClock(2) async def reconnect(self): self.logger.debug('trying to reconnect...') await self.premain() - asyncio.sleep(1) - self.runner_clock.reset(self.reconnect) + # await asyncio.sleep(1) + # self.runner_clock.reset(self.reconnect) + async def premain(self): + self.logger.info('Connecting to host...') + try: - await self.init_connections(server_host, server_port) + if not self.connected: + self.logger.debug(f"connected: {self.connected}") + await self.init_connections(server_host, server_port) except: self.logger.debug("hostClient init error") - asyncio.sleep(2) + await asyncio.sleep(2) return await self.premain() + self.connected = True + self.connect_stdio() self.connect_log_stream() config, args = await self.handshake() @@ -78,11 +90,10 @@ async def premain(self): return config, args async def main(self): - self.logger.info('Connecting to host...') config, args = await self.premain() self.load_sequence() - self.runner_clock.start(self.reconnect) + await self.run_instance(config, args) @@ -125,9 +136,9 @@ def connect_stdio(self): def connect_log_stream(self): self.logger.info('Switching to main log stream...') log_stream = codecs.getwriter('utf-8')(self.streams[CC.LOG]) - self._logging_setup.switch_target(log_stream) - self._logging_setup.flush_temp_handler() - self.logger.info('Log stream connected.') + # self._logging_setup.switch_target(log_stream) + # self._logging_setup.flush_temp_handler() + # self.logger.info('Log stream connected.') async def handshake(self): @@ -178,7 +189,12 @@ async def connect_control_stream(self): if code == msg_codes.EVENT.value: self.emitter.emit(data['eventName'], data['message'] if 'message' in data else None) if code == msg_codes.MONITORING_REPLY.value: - self.runner_clock.reset(self.reconnect) + self.logger.debug("Monitoring reply received. Canceling reconnect") + + if self.reconnect_interval: + self.logger.debug("Reconnect has been set. canceling") + self.reconnect_interval.cancel() + self.reconnect_interval = None async def handle_stop(self, data): self.logger.info(f'Gracefully shutting down...{data}') @@ -200,14 +216,31 @@ async def handle_stop(self, data): async def setup_heartbeat(self): - while True: + async def timeout(self): + self.logger.debug("timeout method") + + await asyncio.sleep(4) + + self.connected = False + + self.logger.debug("TIMEOUT!, going to reconnect...") + + await self.reconnect() + + while self.connected: + await asyncio.sleep(5) + + self.logger.debug(f"Sending health check {self.reconnect_interval}") + + send_encoded_msg( self.streams[CC.MONITORING], msg_codes.MONITORING, self.health_check(), ) - await asyncio.sleep(1) + if self.reconnect_interval == None: + self.reconnect_interval = asyncio.create_task(timeout(self)) def load_sequence(self): # Add sequence directory to sys.path From a7907d29ce4a0a895164ba7c6eb3034af2512bdc Mon Sep 17 00:00:00 2001 From: Piotr Date: Mon, 16 Oct 2023 08:54:13 +0000 Subject: [PATCH 36/62] Remove clock/ cleanup --- packages/python-runner/package.json | 3 +-- packages/python-runner/runner.py | 10 ---------- packages/python-runner/runnerClock.py | 18 ------------------ 3 files changed, 1 insertion(+), 30 deletions(-) delete mode 100644 packages/python-runner/runnerClock.py diff --git a/packages/python-runner/package.json b/packages/python-runner/package.json index 2172dee13..18218254e 100644 --- a/packages/python-runner/package.json +++ b/packages/python-runner/package.json @@ -13,8 +13,7 @@ "assets": [ "hardcoded_magic_values.py", "logging_setup.py", - "runner.py", - "runnerClock.py" + "runner.py" ], "author": "Scramjet ", "license": "MIT", diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index 4290850c5..ef35cb38f 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -11,8 +11,6 @@ from logging_setup import LoggingSetup from hardcoded_magic_values import CommunicationChannels as CC from hardcoded_magic_values import RunnerMessageCodes as msg_codes -# from runnerClock import RunnerClock -import threading sequence_path = os.getenv('SEQUENCE_PATH') server_port = os.getenv('INSTANCES_SERVER_PORT') @@ -41,9 +39,6 @@ def flush(self): class Runner: - - - def __init__(self, instance_id, sequence_path, log_setup) -> None: self.reconnect_interval = None self.connected = False @@ -55,16 +50,11 @@ def __init__(self, instance_id, sequence_path, log_setup) -> None: self.health_check = lambda: {'healthy': True} self.emitter = AsyncIOEventEmitter() self.keep_alive_requested = False - #self.runner_clock = RunnerClock(2) async def reconnect(self): self.logger.debug('trying to reconnect...') await self.premain() - # await asyncio.sleep(1) - # self.runner_clock.reset(self.reconnect) - - async def premain(self): self.logger.info('Connecting to host...') diff --git a/packages/python-runner/runnerClock.py b/packages/python-runner/runnerClock.py deleted file mode 100644 index 5dc81c518..000000000 --- a/packages/python-runner/runnerClock.py +++ /dev/null @@ -1,18 +0,0 @@ -import asyncio - -class RunnerClock: - def __init__(self, delay): - self.delay = delay - self.timer_task = None - - async def _timer(self, function): - await asyncio.sleep(self.delay) - await function() - - def start(self, function): - if self.timer_task: - self.timer_task.cancel() - self.timer_task = asyncio.create_task(self._timer(function)) - - def reset(self, function): - self.start(function) From 636914568eef0726e5783c57db5bc164c4f03c32 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 16 Oct 2023 09:42:16 +0000 Subject: [PATCH 37/62] PyRunner. Reuseable redirecting inout --- packages/python-runner/runner.py | 130 ++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 45 deletions(-) diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index ef35cb38f..75114b300 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -40,6 +40,8 @@ def flush(self): class Runner: def __init__(self, instance_id, sequence_path, log_setup) -> None: + self.connection_retry_delay = 2 + self.reconnect_interval = None self.connected = False self.instance_id = instance_id @@ -51,23 +53,38 @@ def __init__(self, instance_id, sequence_path, log_setup) -> None: self.emitter = AsyncIOEventEmitter() self.keep_alive_requested = False - async def reconnect(self): - self.logger.debug('trying to reconnect...') - await self.premain() + self.sequence = None + + self.input_type = None + self.instance_input = Stream() + + self.output_content_type = None + self.instance_output = None + self.instance_direct_output = None + - async def premain(self): + async def connect_to_host(self): self.logger.info('Connecting to host...') try: if not self.connected: - self.logger.debug(f"connected: {self.connected}") await self.init_connections(server_host, server_port) + + self.connected = True + else: + self.logger.warn(f"Already connected!") except: - self.logger.debug("hostClient init error") - await asyncio.sleep(2) - return await self.premain() + self.logger.debug(f"Error connecting. Retrying in {self.connection_retry_delay}") + + await asyncio.sleep(self.connection_retry_delay) + return await self.connect_to_host() + - self.connected = True + async def initialize(self): + await self.connect_to_host() + + asyncio.create_task(self.connect_input_stream()) + self.forward_output_stream() self.connect_stdio() self.connect_log_stream() @@ -76,14 +93,14 @@ async def premain(self): asyncio.create_task(self.connect_control_stream()) asyncio.create_task(self.setup_heartbeat()) + await self.forward_output_stream() return config, args - async def main(self): - config, args = await self.premain() + async def main(self): + config, args = await self.initialize() self.load_sequence() - await self.run_instance(config, args) @@ -186,6 +203,7 @@ async def connect_control_stream(self): self.reconnect_interval.cancel() self.reconnect_interval = None + async def handle_stop(self, data): self.logger.info(f'Gracefully shutting down...{data}') self.keep_alive_requested = False @@ -207,22 +225,18 @@ async def handle_stop(self, data): async def setup_heartbeat(self): async def timeout(self): - self.logger.debug("timeout method") - await asyncio.sleep(4) + self.logger.warn("Monitoring reply not received!") self.connected = False - self.logger.debug("TIMEOUT!, going to reconnect...") - - await self.reconnect() + await self.initialize() while self.connected: await asyncio.sleep(5) self.logger.debug(f"Sending health check {self.reconnect_interval}") - send_encoded_msg( self.streams[CC.MONITORING], msg_codes.MONITORING, @@ -232,6 +246,7 @@ async def timeout(self): if self.reconnect_interval == None: self.reconnect_interval = asyncio.create_task(timeout(self)) + def load_sequence(self): # Add sequence directory to sys.path module_dir = os.path.dirname(self.seq_path) @@ -245,13 +260,15 @@ def load_sequence(self): # switch to sequence dir so that relative paths will work os.chdir(os.path.dirname(self.seq_path)) + async def run_instance(self, config, args): context = AppContext(self, config) - input_stream = Stream() - asyncio.create_task(self.connect_input_stream(input_stream)) + self.instance_input = Stream() + + asyncio.create_task(self.connect_input_stream()) self.logger.info('Running instance...') - result = self.sequence.run(context, input_stream, *args) + result = self.sequence.run(context, self.instance_input, *args) self.logger.info(f'Sending PANG') monitoring = self.streams[CC.MONITORING] @@ -266,23 +283,32 @@ async def run_instance(self, config, args): send_encoded_msg(monitoring, msg_codes.PANG, consumes) if isinstance(result, types.AsyncGeneratorType): + self.logger.info("Instance result is instance") result = Stream.read_from(result) elif asyncio.iscoroutine(result): + self.logger.info("Instance result is coroutine") result = await result if result: - await self.forward_output_stream(result) + self.logger.info("Instance result stream") + self.instance_direct_output = result + self.get_output_content_type() + await self.forward_output_stream() else: self.logger.debug('Sequence returned no output.') self.logger.info('Finished.') await self.cleanup() + async def cleanup(self): self.streams[CC.LOG].write_eof() - async def connect_input_stream(self, input_stream): + async def get_input_content_type(self): + if self.sequence is None: + return + if hasattr(self.sequence, "requires"): - input_type = self.sequence.requires.get('contentType') + self.input_type = self.sequence.requires.get('contentType') else: raw_headers = await self.streams[CC.IN].readuntil(b'\r\n\r\n') header_list = raw_headers.decode().rstrip().split('\r\n') @@ -290,44 +316,58 @@ async def connect_input_stream(self, input_stream): key.lower(): val for key, val in [el.split(': ') for el in header_list] } self.logger.info(f'Input headers: {repr(headers)}') - input_type = headers.get('content-type') + self.input_type = headers.get('content-type') + - if input_type == 'text/plain': + async def connect_input_stream(self): + if self.input_type is None: + await self.get_input_content_type() + + if self.input_type is None: + return + + if self.input_type == 'text/plain': input = Stream.read_from(self.streams[CC.IN]) self.logger.debug('Decoding input stream...') input = input.decode('utf-8') - elif input_type == 'application/octet-stream': + elif self.input_type == 'application/octet-stream': self.logger.debug('Opening input in binary mode...') input = Stream.read_from(self.streams[CC.IN], chunk_size=CHUNK_SIZE) else: - raise TypeError(f'Unsupported input type: {repr(input_type)}') + raise TypeError(f'Unsupported input type: {repr(self.input_type)}') - input.pipe(input_stream) + input.pipe(self.instance_input) self.logger.debug('Input stream forwarded to the instance.') - async def forward_output_stream(self, output): - - if hasattr(output, 'provides'): - attribute = getattr(self.sequence, 'provides', None) - content_type = attribute['contentType'] - else: - if hasattr(self.sequence, 'provides'): + def get_output_content_type(self): + if self.output_content_type is None: + if hasattr(self.instance_direct_output, 'provides'): attribute = getattr(self.sequence, 'provides', None) - content_type = attribute['contentType'] + self.output_content_type = attribute['contentType'] else: - self.logger.debug('Output type not set, using default') - content_type = 'text/plain' + if hasattr(self.sequence, 'provides'): + attribute = getattr(self.sequence, 'provides', None) + self.output_content_type = attribute['contentType'] + else: + self.logger.debug('Output type not set, using default') + self.output_content_type = 'text/plain' + + self.logger.info(f'Content-type: {self.output_content_type}') + + + async def forward_output_stream(self): + if self.instance_direct_output is None: + return - self.logger.info(f'Content-type: {content_type}') - if content_type == 'text/plain': + if self.output_content_type == 'text/plain': self.logger.debug('Output stream will be treated as text and encoded') - output = output.map(lambda s: s.encode()) - if content_type == 'application/x-ndjson': + self.instance_output = self.instance_direct_output.map(lambda s: s.encode()) + if self.output_content_type == 'application/x-ndjson': self.logger.debug('Output will be converted to JSON') - output = output.map(lambda chunk: (json.dumps(chunk)+'\n').encode()) + self.instance_output = self.instance_direct_output.map(lambda chunk: (json.dumps(chunk)+'\n').encode()) - await output.write_to(self.streams[CC.OUT]) + await self.instance_output.write_to(self.streams[CC.OUT]) async def send_keep_alive(self, timeout: int = 0, can_keep_alive: bool = False): From 360078452d592fcfcff5931f6cbfca156268b9ce Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 16 Oct 2023 12:53:31 +0000 Subject: [PATCH 38/62] Restore log stream --- packages/python-runner/runner.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index 75114b300..b6a1fc585 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -143,9 +143,9 @@ def connect_stdio(self): def connect_log_stream(self): self.logger.info('Switching to main log stream...') log_stream = codecs.getwriter('utf-8')(self.streams[CC.LOG]) - # self._logging_setup.switch_target(log_stream) - # self._logging_setup.flush_temp_handler() - # self.logger.info('Log stream connected.') + self._logging_setup.switch_target(log_stream) + self._logging_setup.flush_temp_handler() + self.logger.info('Log stream connected.') async def handshake(self): @@ -196,10 +196,8 @@ async def connect_control_stream(self): if code == msg_codes.EVENT.value: self.emitter.emit(data['eventName'], data['message'] if 'message' in data else None) if code == msg_codes.MONITORING_REPLY.value: - self.logger.debug("Monitoring reply received. Canceling reconnect") if self.reconnect_interval: - self.logger.debug("Reconnect has been set. canceling") self.reconnect_interval.cancel() self.reconnect_interval = None @@ -235,8 +233,6 @@ async def timeout(self): while self.connected: await asyncio.sleep(5) - self.logger.debug(f"Sending health check {self.reconnect_interval}") - send_encoded_msg( self.streams[CC.MONITORING], msg_codes.MONITORING, @@ -358,15 +354,18 @@ def get_output_content_type(self): async def forward_output_stream(self): if self.instance_direct_output is None: + self.logger.warn("Instance direct output not initialized") return - if self.output_content_type == 'text/plain': - self.logger.debug('Output stream will be treated as text and encoded') - self.instance_output = self.instance_direct_output.map(lambda s: s.encode()) - if self.output_content_type == 'application/x-ndjson': - self.logger.debug('Output will be converted to JSON') - self.instance_output = self.instance_direct_output.map(lambda chunk: (json.dumps(chunk)+'\n').encode()) + if self.instance_output is None: + if self.output_content_type == 'text/plain': + self.logger.debug('Output stream will be treated as text and encoded') + self.instance_output = self.instance_direct_output.map(lambda s: s.encode()) + if self.output_content_type == 'application/x-ndjson': + self.logger.debug('Output will be converted to JSON') + self.instance_output = self.instance_direct_output.map(lambda chunk: (json.dumps(chunk)+'\n').encode()) + self.logger.debug("Writing instance_output to CC.OUT") await self.instance_output.write_to(self.streams[CC.OUT]) From 88e8b01d62a4792c64c7046a6041354814b713e4 Mon Sep 17 00:00:00 2001 From: Piotr Date: Mon, 16 Oct 2023 13:01:39 +0000 Subject: [PATCH 39/62] console logs cleanup --- packages/host/src/lib/csi-controller.ts | 16 ---------------- packages/host/src/lib/csi-dispatcher.ts | 1 - packages/host/src/lib/host.ts | 2 -- 3 files changed, 19 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 39ed508e1..eac56ad62 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -174,10 +174,6 @@ export class CSIController extends TypedEmitter { private adapter: STHConfiguration["runtimeAdapter"] = sthConfig.runtimeAdapter ) { super(); - - // eslint-disable-next-line no-console - console.log("csic constructor handshakeMessage", handshakeMessage); - this.id = this.handshakeMessage.id; this.runnerSystemInfo = this.handshakeMessage.payload.system; this.sequence = this.handshakeMessage.sequenceInfo; @@ -268,10 +264,6 @@ export class CSIController extends TypedEmitter { this.emit("end", code); } - /** - * @todo add comment - * @todo move this to CSIDispatcher - that would be one for all sequences - */ startInstance() { this._instanceAdapter = getInstanceAdapter(this.adapter, this.sthConfig, this.id); @@ -465,9 +457,6 @@ export class CSIController extends TypedEmitter { .pipe(this.upStreams[CC.CONTROL]); this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { - // eslint-disable-next-line no-console - console.log("ping", message); - const payload = message[1].payload; this.args = message[1].payload.args; @@ -482,9 +471,6 @@ export class CSIController extends TypedEmitter { }); this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PANG, async (message) => { - // eslint-disable-next-line no-console - console.log("pang", message); - const pangData = message[1]; this.provides ||= this.outputTopic || pangData.provides; @@ -504,8 +490,6 @@ export class CSIController extends TypedEmitter { this.communicationHandler.addMonitoringHandler(RunnerMessageCode.MONITORING, async message => { const stats = await this.instanceAdapter.stats(message[1]); - this.logger.debug("Health stats", stats); - this._lastStats = stats; this.heartBeatTick(); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index ed9e02d25..56815c8a6 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -222,7 +222,6 @@ export class CSIDispatcher extends TypedEmitter { this.on("established", resolveFunction); }); - // @todo more instance info return { id, appConfig: payload.appConfig, diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 8617efede..1dda5d327 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -525,7 +525,6 @@ export class Host implements IComponent { this.logger.warn("Sequence id not found for startup config", seqenceConfig); return; } - // @todo dispatcher await this.csiDispatcher.startRunner(sequence, { appConfig: seqenceConfig.appConfig || {}, args: seqenceConfig.args, @@ -1078,7 +1077,6 @@ export class Host implements IComponent { this.socketServer.on("connect", async (id, streams) => { this.logger.debug("Instance connecting", id); - // @todo need more instance info if (!this.instancesStore[id]) { this.logger.info("creating new CSIController for runner connecting"); From eb48a7ae3d586b9eccd050d098c249e6194080dc Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 17 Oct 2023 09:33:03 +0000 Subject: [PATCH 40/62] Reconnect inout [wip]. --- bdd/features/e2e/E2E-015-unified.feature | 1 + packages/python-runner/runner.py | 37 +++++++++++++++++++----- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/bdd/features/e2e/E2E-015-unified.feature b/bdd/features/e2e/E2E-015-unified.feature index addee0134..c63e32387 100644 --- a/bdd/features/e2e/E2E-015-unified.feature +++ b/bdd/features/e2e/E2E-015-unified.feature @@ -23,6 +23,7 @@ Feature: Test our shiny new Python runner Given host is running When find and upload sequence "debug-args.tar.gz" And instance started with arguments "foo 3" + And wait for "1000" ms Then "output" is "{\"first_arg\":\"foo\",\"second_arg\":\"3\"}" And host is still running diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index b6a1fc585..c6883339e 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -84,7 +84,6 @@ async def initialize(self): await self.connect_to_host() asyncio.create_task(self.connect_input_stream()) - self.forward_output_stream() self.connect_stdio() self.connect_log_stream() @@ -93,7 +92,8 @@ async def initialize(self): asyncio.create_task(self.connect_control_stream()) asyncio.create_task(self.setup_heartbeat()) - await self.forward_output_stream() + + self.forward_output_stream() return config, args @@ -274,6 +274,7 @@ async def run_instance(self, config, args): send_encoded_msg(monitoring, msg_codes.PANG, produces) consumes = getattr(result, 'requires', None) or getattr(self.sequence, 'requires', None) + if consumes: self.logger.info(f'Sending PANG with {consumes}') send_encoded_msg(monitoring, msg_codes.PANG, consumes) @@ -288,7 +289,18 @@ async def run_instance(self, config, args): self.logger.info("Instance result stream") self.instance_direct_output = result self.get_output_content_type() - await self.forward_output_stream() + self.forward_output_stream() + + end_stream = Stream() + self.instance_output.pipe(end_stream) + + + while True: + chunk = await end_stream.read() + if chunk is None: + self.logger.debug('Ending output') + break + else: self.logger.debug('Sequence returned no output.') @@ -332,7 +344,7 @@ async def connect_input_stream(self): else: raise TypeError(f'Unsupported input type: {repr(self.input_type)}') - input.pipe(self.instance_input) + input.pipe(self.instance_input, False) self.logger.debug('Input stream forwarded to the instance.') @@ -352,12 +364,15 @@ def get_output_content_type(self): self.logger.info(f'Content-type: {self.output_content_type}') - async def forward_output_stream(self): + def forward_output_stream(self): if self.instance_direct_output is None: self.logger.warn("Instance direct output not initialized") return - if self.instance_output is None: + if self.instance_output and len(self.instance_output._sinks) > 0: + self.instance_output.unpipe() + # self.instance_output.end() + else: if self.output_content_type == 'text/plain': self.logger.debug('Output stream will be treated as text and encoded') self.instance_output = self.instance_direct_output.map(lambda s: s.encode()) @@ -366,7 +381,15 @@ async def forward_output_stream(self): self.instance_output = self.instance_direct_output.map(lambda chunk: (json.dumps(chunk)+'\n').encode()) self.logger.debug("Writing instance_output to CC.OUT") - await self.instance_output.write_to(self.streams[CC.OUT]) + + s = Stream() + + self.instance_output.pipe(s, False) + + async def write(): + await s.write_to(self.streams[CC.OUT]) + + asyncio.create_task(write()) async def send_keep_alive(self, timeout: int = 0, can_keep_alive: bool = False): From 2df066f846ecb069a79e446e0e6b1cda14921c42 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 8 Jan 2024 16:11:51 +0000 Subject: [PATCH 41/62] Fix merge --- packages/host/src/lib/csi-controller.ts | 2 +- packages/host/src/lib/csi-dispatcher.ts | 6 +++--- packages/host/src/lib/host.ts | 11 ++++------- packages/runner/src/runner.ts | 3 --- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 135363e6f..2b465d2a9 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -184,7 +184,7 @@ export class CSIController extends TypedEmitter { this.inputTopic = this.handshakeMessage.payload.inputTopic; this.limits = { memory: handshakeMessage.payload.limits?.memory || sthConfig.docker.runner.maxMem, - gpu: payload.limits?.gpu + gpu: handshakeMessage.payload.limits?.gpu }; this.instanceLifetimeExtensionDelay = +sthConfig.timings.instanceLifetimeExtensionDelay; diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 56815c8a6..213e907ba 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -92,7 +92,7 @@ export class CSIDispatcher extends TypedEmitter { csiController.inputRouted = true; await this.serviceDiscovery.update({ - requires: data.requires, contentType: data.contentType, topicName: data.requires + requires: data.requires, contentType: data.contentType, topicName: data.requires, status: "add" }); } @@ -107,7 +107,7 @@ export class CSIDispatcher extends TypedEmitter { csiController.outputRouted = true; await this.serviceDiscovery.update({ - provides: data.provides, contentType: data.contentType!, topicName: data.provides + provides: data.provides, contentType: data.contentType!, topicName: data.provides, status: "add" }); } }); @@ -191,7 +191,7 @@ export class CSIDispatcher extends TypedEmitter { const limits = { memory: payload.limits?.memory || this.STHConfig.docker.runner.maxMem }; - const id = IDProvider.generate(); + const id = payload.instanceId || IDProvider.generate(); const instanceAdapter = getInstanceAdapter(this.STHConfig.runtimeAdapter, this.STHConfig, id); const instanceConfig: InstanceConfig = { diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 834fcaaf7..3a04955b8 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -39,7 +39,7 @@ import { DuplexStream } from "@scramjet/api-server"; import { ConfigService, development } from "@scramjet/sth-config"; import { isStartSequenceDTO, isStartSequenceEndpointPayloadDTO, readJsonFile, defer, FileBuilder } from "@scramjet/utility"; -import { getTelemetryAdapter, ITelemetryAdapter } from "@scramjet/telemetry"; +import { ITelemetryAdapter } from "@scramjet/telemetry"; import { readFileSync } from "fs"; import { cpus, totalmem } from "os"; @@ -723,7 +723,7 @@ export class Host implements IComponent { }; } // eslint-disable-next-line no-console - console.log("Instances of sequence", sequenceInfo.id, sequenceInfo.instances); + this.logger.info("Instances of sequence", sequence.id, sequence.instances); if (sequence.instances.length > 0) { const instances = [...sequence.instances].every((instanceId) => { @@ -759,7 +759,7 @@ export class Host implements IComponent { this.logger.trace("Sequence removed:", id); - await this.cpmConnector?.sendSequenceInfo(id, SequenceMessageCode.SEQUENCE_DELETED, sequence as unknown as GetSequenceResponse); + await this.cpmConnector?.sendSequenceInfo(id, SequenceMessageCode.SEQUENCE_DELETED, sequence as unknown as STHRestAPI.GetSequenceResponse); this.auditor.auditSequence(id, SequenceMessageCode.SEQUENCE_DELETED); @@ -996,7 +996,6 @@ export class Host implements IComponent { */ // eslint-disable-next-line complexity async handleStartSequence(req: ParsedMessage): Promise> { - if (await this.loadCheck.overloaded()) { return { opStatus: ReasonPhrases.INSUFFICIENT_SPACE_ON_RESOURCE, @@ -1062,7 +1061,6 @@ export class Host implements IComponent { // this.pushTelemetry("Instance hour chime", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); // }); - return { opStatus: ReasonPhrases.OK, message: `Sequence ${runner.id} starting`, @@ -1095,7 +1093,6 @@ export class Host implements IComponent { new CommunicationHandler(), this.config, this.instanceProxy); - } await this.instancesStore[id].handleInstanceConnect( @@ -1251,7 +1248,7 @@ export class Host implements IComponent { */ async setTelemetry(): Promise { if (this.config.telemetry.status) { - this.telemetryAdapter.logger.pipe(this.logger); + this.telemetryAdapter?.logger.pipe(this.logger); const ipAddress = require("ext-ip")(); diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 9f14157a3..e2baf8867 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -39,9 +39,6 @@ import { ManagerClient } from "@scramjet/manager-api-client"; import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; import { writeFileSync } from "fs"; -import { mapToInputDataStream, readInputStreamHeaders } from "./input-stream"; -import { MessageUtils } from "./message-utils"; -import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; let exitHandled = false; From 9ce523b5dbc598f7d353719158c33df8dacd14bb Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 9 Jan 2024 11:54:11 +0000 Subject: [PATCH 42/62] Fix reconnecting js runner --- .vscode/launch.json | 9 +++++++++ packages/runner/src/host-client.ts | 4 ++-- packages/runner/src/runner.ts | 4 +--- packages/types/src/csh-connector.ts | 2 +- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index b0d386ce5..d43e43383 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -31,6 +31,15 @@ "outFiles": [ "${workspaceFolder}/**/*.js" ] + }, + { + "name": "Attach by Process ID", + "processId": "${command:PickProcess}", + "request": "attach", + "skipFiles": [ + "/**" + ], + "type": "node" } ] } diff --git a/packages/runner/src/host-client.ts b/packages/runner/src/host-client.ts index 213169a28..1bd4267d7 100644 --- a/packages/runner/src/host-client.ts +++ b/packages/runner/src/host-client.ts @@ -112,13 +112,13 @@ class HostClient implements IHostClient { this.logger.debug("Connected to host"); } - async disconnect() { + async disconnect(hard: boolean) { this.logger.trace("Disconnecting from host"); const streamsExitedPromised: Promise[] = this.streams.map((stream, i) => new Promise( (res) => { - if ("writable" in stream!) { + if (!hard && "writable" in stream!) { stream .on("error", (e) => { console.error("Error on stream", i, e.stack); diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index e2baf8867..05136903f 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -280,15 +280,13 @@ export class Runner implements IComponent { this.monitoringMessageReplyTimeout = setTimeout(async () => { if (!this.connected) return; - this.connected = false; - await this.handleDisconnect(); }, 1000); } async handleDisconnect() { this.connected = false; - //await this.hostClient.disconnect(); + await this.hostClient.disconnect(true); await defer(5000); diff --git a/packages/types/src/csh-connector.ts b/packages/types/src/csh-connector.ts index 179befcd3..a239c8726 100644 --- a/packages/types/src/csh-connector.ts +++ b/packages/types/src/csh-connector.ts @@ -13,7 +13,7 @@ export interface IHostClient extends IComponent { /** * Disconnects from a host server. */ - disconnect(): Promise; + disconnect(hard: boolean): Promise; getAgent(): Agent; stdinStream: UpstreamStreamsConfig[CC.STDIN] From 742069d6f4cb76f04ecc6847941d1f0eb047f2d9 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Wed, 10 Jan 2024 22:52:17 +0000 Subject: [PATCH 43/62] Fix reconnecting docker --- .../adapters/src/docker-instance-adapter.ts | 8 +++----- packages/runner/src/runner.ts | 20 ++++++++++--------- packages/types/src/index.ts | 1 + 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/packages/adapters/src/docker-instance-adapter.ts b/packages/adapters/src/docker-instance-adapter.ts index 54d24d18c..f7cea7571 100644 --- a/packages/adapters/src/docker-instance-adapter.ts +++ b/packages/adapters/src/docker-instance-adapter.ts @@ -245,13 +245,11 @@ IComponent { async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { try { - const containerId = await this.dockerHelper.getContainerIdByLabel("scramjet.instance.id", instanceId); + this.resources.containerId = this.resources.containerId || await this.dockerHelper.getContainerIdByLabel("scramjet.instance.id", instanceId); - this.logger.debug("Container id restored", containerId); + this.logger.debug("Wait for container exit...", this.resources.containerId); - this.resources.containerId = containerId; - - const { statusCode } = await this.dockerHelper.wait(containerId); + const { statusCode } = await this.dockerHelper.wait(this.resources.containerId); this.logger.debug("Container exited", statusCode); diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 05136903f..148f8a73e 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -16,6 +16,7 @@ import { IObjectLogger, MaybePromise, MonitoringRateMessageData, + RunnerConnectInfo, SequenceInfo, StopSequenceMessageData, Streamable, @@ -23,23 +24,20 @@ import { } from "@scramjet/types"; import { defer } from "@scramjet/utility"; +import { HostClient as HostApiClient } from "@scramjet/api-client"; +import { ClientUtilsCustomAgent } from "@scramjet/client-utils"; +import { ManagerClient } from "@scramjet/manager-api-client"; + import { BufferStream, DataStream, StringStream } from "scramjet"; import { EventEmitter } from "events"; +import { writeFileSync } from "fs"; import { Readable, Writable } from "stream"; import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; import { mapToInputDataStream, readInputStreamHeaders, inputStreamInitLogger } from "./input-stream"; import { MessageUtils } from "./message-utils"; -import { HostClient as HostApiClient } from "@scramjet/api-client"; -import { ClientUtilsCustomAgent } from "@scramjet/client-utils"; -import { ManagerClient } from "@scramjet/manager-api-client"; - -import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; - -import { writeFileSync } from "fs"; - let exitHandled = false; function onBeforeExit(code: number) { @@ -271,6 +269,10 @@ export class Runner implements IComponent { } private async reportHealth() { + if (this.monitoringMessageReplyTimeout) { + clearTimeout(this.monitoringMessageReplyTimeout); + } + const { healthy } = await this.context.monitor(); MessageUtils.writeMessageOnStream( @@ -281,7 +283,7 @@ export class Runner implements IComponent { if (!this.connected) return; await this.handleDisconnect(); - }, 1000); + }, 5000); } async handleDisconnect() { diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index dcfaea2ae..1bcfaf6d0 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -25,6 +25,7 @@ export * from "./module-loader"; export * from "./object-logger"; export * from "./op-response"; export * from "./runner-config"; +export * from "./runner-connect"; export * from "./runner"; export * from "./sequence"; export * from "./utils"; From 2cab29d7f47a529d4fa4323fb713084b4f41f59c Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 12 Jan 2024 20:04:03 +0000 Subject: [PATCH 44/62] Revert py runner to connect new way but with reconnect --- packages/python-runner/runner.py | 178 +++++++------------------------ yarn.lock | 2 +- 2 files changed, 38 insertions(+), 142 deletions(-) diff --git a/packages/python-runner/runner.py b/packages/python-runner/runner.py index c6883339e..3ee7557ff 100644 --- a/packages/python-runner/runner.py +++ b/packages/python-runner/runner.py @@ -12,6 +12,7 @@ from hardcoded_magic_values import CommunicationChannels as CC from hardcoded_magic_values import RunnerMessageCodes as msg_codes + sequence_path = os.getenv('SEQUENCE_PATH') server_port = os.getenv('INSTANCES_SERVER_PORT') server_host = os.getenv('INSTANCES_SERVER_HOST') or 'localhost' @@ -40,10 +41,6 @@ def flush(self): class Runner: def __init__(self, instance_id, sequence_path, log_setup) -> None: - self.connection_retry_delay = 2 - - self.reconnect_interval = None - self.connected = False self.instance_id = instance_id self.seq_path = sequence_path self._logging_setup = log_setup @@ -53,53 +50,20 @@ def __init__(self, instance_id, sequence_path, log_setup) -> None: self.emitter = AsyncIOEventEmitter() self.keep_alive_requested = False - self.sequence = None - - self.input_type = None - self.instance_input = Stream() - self.output_content_type = None - self.instance_output = None - self.instance_direct_output = None - - - async def connect_to_host(self): + async def main(self, server_host, server_port): self.logger.info('Connecting to host...') + await self.init_connections(server_host, server_port) - try: - if not self.connected: - await self.init_connections(server_host, server_port) - - self.connected = True - else: - self.logger.warn(f"Already connected!") - except: - self.logger.debug(f"Error connecting. Retrying in {self.connection_retry_delay}") - - await asyncio.sleep(self.connection_retry_delay) - return await self.connect_to_host() - - - async def initialize(self): - await self.connect_to_host() - - asyncio.create_task(self.connect_input_stream()) - + # Do this early to have access to any thrown exceptions and logs. self.connect_stdio() self.connect_log_stream() + config, args = await self.handshake() self.logger.info('Communication established.') - asyncio.create_task(self.connect_control_stream()) asyncio.create_task(self.setup_heartbeat()) - self.forward_output_stream() - - return config, args - - - async def main(self): - config, args = await self.initialize() self.load_sequence() await self.run_instance(config, args) @@ -195,11 +159,6 @@ async def connect_control_stream(self): await self.handle_stop(data) if code == msg_codes.EVENT.value: self.emitter.emit(data['eventName'], data['message'] if 'message' in data else None) - if code == msg_codes.MONITORING_REPLY.value: - - if self.reconnect_interval: - self.reconnect_interval.cancel() - self.reconnect_interval = None async def handle_stop(self, data): @@ -222,25 +181,13 @@ async def handle_stop(self, data): async def setup_heartbeat(self): - async def timeout(self): - await asyncio.sleep(4) - - self.logger.warn("Monitoring reply not received!") - self.connected = False - - await self.initialize() - - while self.connected: - await asyncio.sleep(5) - + while True: send_encoded_msg( self.streams[CC.MONITORING], msg_codes.MONITORING, self.health_check(), ) - - if self.reconnect_interval == None: - self.reconnect_interval = asyncio.create_task(timeout(self)) + await asyncio.sleep(1) def load_sequence(self): @@ -256,15 +203,13 @@ def load_sequence(self): # switch to sequence dir so that relative paths will work os.chdir(os.path.dirname(self.seq_path)) - async def run_instance(self, config, args): context = AppContext(self, config) - self.instance_input = Stream() - - asyncio.create_task(self.connect_input_stream()) + input_stream = Stream() + asyncio.create_task(self.connect_input_stream(input_stream)) self.logger.info('Running instance...') - result = self.sequence.run(context, self.instance_input, *args) + result = self.sequence.run(context, input_stream, *args) self.logger.info(f'Sending PANG') monitoring = self.streams[CC.MONITORING] @@ -274,49 +219,28 @@ async def run_instance(self, config, args): send_encoded_msg(monitoring, msg_codes.PANG, produces) consumes = getattr(result, 'requires', None) or getattr(self.sequence, 'requires', None) - if consumes: self.logger.info(f'Sending PANG with {consumes}') send_encoded_msg(monitoring, msg_codes.PANG, consumes) if isinstance(result, types.AsyncGeneratorType): - self.logger.info("Instance result is instance") result = Stream.read_from(result) elif asyncio.iscoroutine(result): - self.logger.info("Instance result is coroutine") result = await result if result: - self.logger.info("Instance result stream") - self.instance_direct_output = result - self.get_output_content_type() - self.forward_output_stream() - - end_stream = Stream() - self.instance_output.pipe(end_stream) - - - while True: - chunk = await end_stream.read() - if chunk is None: - self.logger.debug('Ending output') - break - + await self.forward_output_stream(result) else: self.logger.debug('Sequence returned no output.') self.logger.info('Finished.') await self.cleanup() - async def cleanup(self): self.streams[CC.LOG].write_eof() - async def get_input_content_type(self): - if self.sequence is None: - return - + async def connect_input_stream(self, input_stream): if hasattr(self.sequence, "requires"): - self.input_type = self.sequence.requires.get('contentType') + input_type = self.sequence.requires.get('contentType') else: raw_headers = await self.streams[CC.IN].readuntil(b'\r\n\r\n') header_list = raw_headers.decode().rstrip().split('\r\n') @@ -324,72 +248,44 @@ async def get_input_content_type(self): key.lower(): val for key, val in [el.split(': ') for el in header_list] } self.logger.info(f'Input headers: {repr(headers)}') - self.input_type = headers.get('content-type') - - - async def connect_input_stream(self): - if self.input_type is None: - await self.get_input_content_type() + input_type = headers.get('content-type') - if self.input_type is None: - return - - if self.input_type == 'text/plain': + if input_type == 'text/plain': input = Stream.read_from(self.streams[CC.IN]) self.logger.debug('Decoding input stream...') input = input.decode('utf-8') - elif self.input_type == 'application/octet-stream': + elif input_type == 'application/octet-stream': self.logger.debug('Opening input in binary mode...') input = Stream.read_from(self.streams[CC.IN], chunk_size=CHUNK_SIZE) else: - raise TypeError(f'Unsupported input type: {repr(self.input_type)}') + raise TypeError(f'Unsupported input type: {repr(input_type)}') - input.pipe(self.instance_input, False) + input.pipe(input_stream) self.logger.debug('Input stream forwarded to the instance.') - def get_output_content_type(self): - if self.output_content_type is None: - if hasattr(self.instance_direct_output, 'provides'): - attribute = getattr(self.sequence, 'provides', None) - self.output_content_type = attribute['contentType'] - else: - if hasattr(self.sequence, 'provides'): - attribute = getattr(self.sequence, 'provides', None) - self.output_content_type = attribute['contentType'] - else: - self.logger.debug('Output type not set, using default') - self.output_content_type = 'text/plain' - - self.logger.info(f'Content-type: {self.output_content_type}') - - - def forward_output_stream(self): - if self.instance_direct_output is None: - self.logger.warn("Instance direct output not initialized") - return + async def forward_output_stream(self, output): - if self.instance_output and len(self.instance_output._sinks) > 0: - self.instance_output.unpipe() - # self.instance_output.end() + if hasattr(output, 'provides'): + attribute = getattr(self.sequence, 'provides', None) + content_type = attribute['contentType'] else: - if self.output_content_type == 'text/plain': - self.logger.debug('Output stream will be treated as text and encoded') - self.instance_output = self.instance_direct_output.map(lambda s: s.encode()) - if self.output_content_type == 'application/x-ndjson': - self.logger.debug('Output will be converted to JSON') - self.instance_output = self.instance_direct_output.map(lambda chunk: (json.dumps(chunk)+'\n').encode()) - - self.logger.debug("Writing instance_output to CC.OUT") - - s = Stream() - - self.instance_output.pipe(s, False) + if hasattr(self.sequence, 'provides'): + attribute = getattr(self.sequence, 'provides', None) + content_type = attribute['contentType'] + else: + self.logger.debug('Output type not set, using default') + content_type = 'text/plain' - async def write(): - await s.write_to(self.streams[CC.OUT]) + self.logger.info(f'Content-type: {content_type}') + if content_type == 'text/plain': + self.logger.debug('Output stream will be treated as text and encoded') + output = output.map(lambda s: s.encode()) + if content_type == 'application/x-ndjson': + self.logger.debug('Output will be converted to JSON') + output = output.map(lambda chunk: (json.dumps(chunk)+'\n').encode()) - asyncio.create_task(write()) + await output.write_to(self.streams[CC.OUT]) async def send_keep_alive(self, timeout: int = 0, can_keep_alive: bool = False): @@ -444,4 +340,4 @@ async def keep_alive(self, timeout: int = 0): sys.exit(2) runner = Runner(instance_id, sequence_path, log_setup) -asyncio.run(runner.main()) +asyncio.run(runner.main(server_host, server_port)) diff --git a/yarn.lock b/yarn.lock index 4f8c31491..d5125039d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7366,7 +7366,7 @@ scramjet-core@^4.32.12: resolved "https://registry.npmjs.org/scramjet-core/-/scramjet-core-4.32.12.tgz" integrity sha512-FkNaZqzXvzqdwrUWzMztJq2RUBcpBlm08zOYIhA69+//FzgrespLBz7DmCXdXfujjvmUIFGgq/T3aPFy1ctonw== -scramjet@^4.36.9: +scramjet@^4.36.6, scramjet@^4.36.9: version "4.37.0" resolved "https://registry.yarnpkg.com/scramjet/-/scramjet-4.37.0.tgz#2e89f07cbaffd1f9cdd5a3da64aba250745aac13" integrity sha512-Y6b59qGsulkr5MxiVn9CABnL9pE/sPKihCcWSUhzZc6W0YWbfLWRXc1fE1M40QKfOQUBxks81efzJ7WpEuFmlQ== From 88389056810aa20e69cdd8e2dd5e457221f63059 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 12 Jan 2024 20:37:30 +0000 Subject: [PATCH 45/62] Fix after devel merge --- packages/host/src/lib/csi-controller.ts | 2 -- packages/host/src/lib/csi-dispatcher.ts | 11 +++++---- packages/host/src/lib/host.ts | 14 +++++------ packages/runner/src/runner.ts | 32 ++++++++++++++----------- yarn.lock | 23 +++++++++--------- 5 files changed, 43 insertions(+), 39 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 36ce7f869..df1586fb9 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -170,8 +170,6 @@ export class CSIController extends TypedEmitter { public localEmitter: EventEmitter & { lastEvents: { [evname: string]: any } }; - communicationHandler: ICommunicationHandler; - constructor( private handshakeMessage: MessageDataType, public communicationHandler: ICommunicationHandler, diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 213e907ba..b1b1f3afd 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -2,13 +2,11 @@ import { getInstanceAdapter } from "@scramjet/adapters"; import { IDProvider } from "@scramjet/model"; import { ObjLogger } from "@scramjet/obj-logger"; import { RunnerMessageCode } from "@scramjet/symbols"; -import { HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, MessageDataType, PangMessageData, PingMessageData, STHConfiguration, STHRestAPI, SequenceInfo, SequenceInfoInstance } from "@scramjet/types"; -import { StartSequencePayload } from "@scramjet/types/src/rest-api-sth"; +import { ContentType, EventMessageData, HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, MessageDataType, PangMessageData, PingMessageData, STHConfiguration, STHRestAPI, SequenceInfo, SequenceInfoInstance } from "@scramjet/types"; import { TypedEmitter } from "@scramjet/utility"; import { CSIController, CSIControllerInfo } from "./csi-controller"; import { InstanceStore } from "./instance-store"; import { ServiceDiscovery } from "./serviceDiscovery/sd-adapter"; -import { ContentType } from "./serviceDiscovery/contentType"; import TopicId from "./serviceDiscovery/topicId"; import { Readable, Writable } from "stream"; import SequenceStore from "./sequenceStore"; @@ -26,6 +24,7 @@ type Events = { end: (data: DispatcherInstanceEndEventData) => void; terminated: (data: DispatcherInstanceEndEventData) => void; established: (data: DispatcherInstanceEstablishedEventData) => void; + event: (eventData: { event: EventMessageData, id: string }) => void; }; type CSIDispatcherOpts = { @@ -55,7 +54,7 @@ export class CSIDispatcher extends TypedEmitter { async createCSIController( id: string, sequenceInfo: SequenceInfo, - payload: StartSequencePayload, + payload: STHRestAPI.StartSequencePayload, communicationHandler: ICommunicationHandler, config: STHConfiguration, instanceProxy: HostProxy) { @@ -73,6 +72,10 @@ export class CSIDispatcher extends TypedEmitter { this.emit("error", { id, err }); }); + csiController.on("event", async (event: EventMessageData) => { + this.emit("event", { event, id: csiController.id }); + }); + // eslint-disable-next-line complexity csiController.on("pang", async (data: PangMessageData) => { this.logger.trace("PANG received", [csiController.id, data]); diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index eabd40480..93d6512af 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -9,7 +9,6 @@ import { CommunicationHandler, HostError, IDProvider } from "@scramjet/model"; import { HostHeaders, InstanceMessageCode, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; import { APIExpose, - ContentType, CPMConnectorOptions, EventMessageData, HostProxy, @@ -41,8 +40,6 @@ import { DuplexStream } from "@scramjet/api-server"; import { ConfigService, development } from "@scramjet/sth-config"; import { isStartSequenceDTO, isStartSequenceEndpointPayloadDTO, readJsonFile, defer, FileBuilder } from "@scramjet/utility"; -import { readFileSync } from "fs"; -import { cpus, totalmem } from "os"; import { DataStream } from "scramjet"; import { inspect } from "util"; @@ -50,16 +47,15 @@ import { AuditedRequest, Auditor } from "./auditor"; import { auditMiddleware, logger as auditMiddlewareLogger } from "./middlewares/audit"; import { corsMiddleware } from "./middlewares/cors"; import { optionsMiddleware } from "./middlewares/options"; -import { S3Client } from "./s3-client"; + import { ServiceDiscovery } from "./serviceDiscovery/sd-adapter"; import { SocketServer } from "./socket-server"; import { getTelemetryAdapter, ITelemetryAdapter } from "@scramjet/telemetry"; import { cpus, homedir, totalmem } from "os"; import { S3Client } from "./s3-client"; -import { DuplexStream } from "@scramjet/api-server"; + import { existsSync, mkdirSync, readFileSync } from "fs"; -import TopicId from "./serviceDiscovery/topicId"; import TopicRouter from "./serviceDiscovery/topicRouter"; import SequenceStore from "./sequenceStore"; @@ -313,6 +309,9 @@ export class Host implements IComponent { attachDispatcherEvents() { this.csiDispatcher + .on("event", async ({ event, id }) => { + await this.eventBus({ source: id, ...event }); + }) .on("end", async (eventData: DispatcherInstanceEndEventData) => { await this.handleDispatcherEndEvent(eventData); }) @@ -1285,7 +1284,8 @@ export class Host implements IComponent { */ async setTelemetry(): Promise { if (this.config.telemetry.status) { - this.telemetryAdapter?.logger.pipe(this.logger); + this.telemetryAdapter = await getTelemetryAdapter(this.config.telemetry.adapter, this.config.telemetry); + this.telemetryAdapter.logger.pipe(this.logger); const ipAddress = require("ext-ip")(); diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 148f8a73e..6ec824f0e 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -22,7 +22,7 @@ import { Streamable, SynchronousStreamable } from "@scramjet/types"; -import { defer } from "@scramjet/utility"; +import { defer, promiseTimeout } from "@scramjet/utility"; import { HostClient as HostApiClient } from "@scramjet/api-client"; import { ClientUtilsCustomAgent } from "@scramjet/client-utils"; @@ -166,6 +166,7 @@ export class Runner implements IComponent { this.runnerConnectInfo = runnerConnectInfo; this.logger = new ObjLogger(this, { id: instanceId }); + hostClient.logger.pipe(this.logger); inputStreamInitLogger.pipe(this.logger); @@ -258,7 +259,7 @@ export class Runner implements IComponent { let working = false; this.monitoringInterval = setInterval(async () => { - if (working || !this.connected) { + if (working) { return; } @@ -269,10 +270,6 @@ export class Runner implements IComponent { } private async reportHealth() { - if (this.monitoringMessageReplyTimeout) { - clearTimeout(this.monitoringMessageReplyTimeout); - } - const { healthy } = await this.context.monitor(); MessageUtils.writeMessageOnStream( @@ -280,17 +277,25 @@ export class Runner implements IComponent { ); this.monitoringMessageReplyTimeout = setTimeout(async () => { - if (!this.connected) return; + this.logger.warn("Monitoring Reply Timeout. Connected", this.connected); await this.handleDisconnect(); - }, 5000); + }, 500); } async handleDisconnect() { + if (this.monitoringInterval) { + clearInterval(this.monitoringInterval); + } + this.connected = false; - await this.hostClient.disconnect(true); - await defer(5000); + try { + await this.hostClient.disconnect(true); + await defer(5000); + } catch (e) { + this.logger.error("Disconnect failed"); + } this.logger.info("Reinitializing...."); @@ -353,7 +358,7 @@ export class Runner implements IComponent { this.logger.debug("premain"); try { - await this.hostClient.init(this.instanceId); + await promiseTimeout(this.hostClient.init(this.instanceId), 2000); this.connected = true; } catch (e) { this.connected = false; @@ -383,6 +388,8 @@ export class Runner implements IComponent { this.logger.debug("Handshake received", appConfig, args); + await this.handleMonitoringRequest({ monitoringRate: 1 }); + return { appConfig, args }; } @@ -391,9 +398,6 @@ export class Runner implements IComponent { this.initAppContext(appConfig as X); - await this.reportHealth(); - await this.handleMonitoringRequest({ monitoringRate: 1 }); - let sequence: any[] = []; try { diff --git a/yarn.lock b/yarn.lock index 55a3128e9..e38188e90 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6660,10 +6660,10 @@ pad-right@^0.2.2: dependencies: repeat-string "^1.5.2" -papaparse@^5.3.2: - version "5.3.2" - resolved "https://registry.npmjs.org/papaparse/-/papaparse-5.3.2.tgz" - integrity sha512-6dNZu0Ki+gyV0eBsFKJhYr+MdQYAzFUGlBMNj3GNrmHxmz1lfRa24CjFObPXtjcetlOv5Ad299MhIK0znp3afw== +papaparse@^5.4.1: + version "5.4.1" + resolved "https://registry.yarnpkg.com/papaparse/-/papaparse-5.4.1.tgz#f45c0f871853578bd3a30f92d96fdcfb6ebea127" + integrity sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw== parent-module@^1.0.0: version "1.0.1" @@ -7323,20 +7323,19 @@ safe-stable-stringify@^2.3.1: resolved "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz" integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== -scramjet-core@^4.32.10: - version "4.32.10" - resolved "https://registry.npmjs.org/scramjet-core/-/scramjet-core-4.32.10.tgz" - integrity sha512-B4xYXl8+sT3Fy/DAKmGE1TJafHm+U46AUY3w0v4XseNwhy94C+JfzKR/9+F1gmEihn+E7tDPXnqlquGDmQ6o8Q== - +scramjet-core@^4.32.12: + version "4.32.12" + resolved "https://registry.yarnpkg.com/scramjet-core/-/scramjet-core-4.32.12.tgz#d049a2b2cb4a2635f2f30e56d54f6b0367cf21d5" + integrity sha512-FkNaZqzXvzqdwrUWzMztJq2RUBcpBlm08zOYIhA69+//FzgrespLBz7DmCXdXfujjvmUIFGgq/T3aPFy1ctonw== -scramjet@^4.36.6, scramjet@^4.36.9, scramjet@^4.37.0: +scramjet@^4.36.6, scramjet@^4.36.9: version "4.37.0" resolved "https://registry.yarnpkg.com/scramjet/-/scramjet-4.37.0.tgz#2e89f07cbaffd1f9cdd5a3da64aba250745aac13" integrity sha512-Y6b59qGsulkr5MxiVn9CABnL9pE/sPKihCcWSUhzZc6W0YWbfLWRXc1fE1M40QKfOQUBxks81efzJ7WpEuFmlQ== dependencies: - papaparse "^5.3.2" + papaparse "^5.4.1" rereadable-stream "^1.4.14" - scramjet-core "^4.32.10" + scramjet-core "^4.32.12" seed-random@~2.2.0: version "2.2.0" From e6ef9da8ee43ab2997e6f1ab621bb1d8a7e05c29 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 16 Jan 2024 22:00:16 +0000 Subject: [PATCH 46/62] Send Monitoring reply before stats --- .../adapters/src/process-instance-adapter.ts | 14 ++++-- packages/host/src/lib/csi-controller.ts | 14 +++--- packages/host/src/lib/csi-dispatcher.ts | 11 ++++- packages/host/src/lib/host.ts | 2 +- packages/runner/src/runner.ts | 45 ++++++++++++------- .../types/src/rest-api-sth/start-sequence.ts | 2 +- 6 files changed, 61 insertions(+), 27 deletions(-) diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 96d8be5ac..7f05f3134 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -173,9 +173,9 @@ class ProcessInstanceAdapter implements this.crashLogStreams = Promise.all([runnerProcess.stdout, runnerProcess.stderr].map(streamToString)); - this.logger.trace("Runner process is running", runnerProcess.pid); - this.runnerProcess = runnerProcess; + + this.logger.trace("Runner process is running", runnerProcess.pid); } getRunnerInfo(): RunnerConnectInfo["system"] { @@ -234,7 +234,15 @@ class ProcessInstanceAdapter implements res(parseInt(data!, 10)); } catch (err) { - /** file not exists */ + /** OK. file not exists. check if process is*/ + + try { + process.kill(this.processPID, 0); + } catch (e) { + this.logger.error("Runner process not exists", e); + /** process not exists */ + reject("pid not exists"); + } } }, 1000); }); diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index df1586fb9..1a78fbb69 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -236,7 +236,7 @@ export class CSIController extends TypedEmitter { async main() { this.status = InstanceStatus.RUNNING; - this.logger.trace("Instance started"); + this.logger.trace("Instance started", this.status); let code = -1; @@ -291,6 +291,8 @@ export class CSIController extends TypedEmitter { const exitcode = await this.endOfSequence; + this.logger.trace("End of sequence"); + if (exitcode > 0) { this.status = InstanceStatus.ERRORED; this.logger.error("Crashlog", await this.instanceAdapter.getCrashLog()); @@ -497,6 +499,10 @@ export class CSIController extends TypedEmitter { }); this.communicationHandler.addMonitoringHandler(RunnerMessageCode.MONITORING, async message => { + await this.controlDataStream?.whenWrote( + MessageUtilities.serializeMessage({ msgCode: RunnerMessageCode.MONITORING_REPLY }) + ); + const stats = await this.instanceAdapter.stats(message[1]); this._lastStats = stats; @@ -505,10 +511,6 @@ export class CSIController extends TypedEmitter { message[1] = stats; - await this.controlDataStream?.whenWrote( - MessageUtilities.serializeMessage({ msgCode: RunnerMessageCode.MONITORING_REPLY }) - ); - return message; }, true); @@ -566,7 +568,7 @@ export class CSIController extends TypedEmitter { } this.info.started = new Date(); //@TODO: set by runner? - this.logger.info("Instance started", JSON.stringify(message, undefined)); + this.logger.info("Handshake", JSON.stringify(message, undefined)); } async handleInstanceConnect(streams: DownstreamStreamsConfig) { diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index b1b1f3afd..595cef75e 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -73,6 +73,7 @@ export class CSIDispatcher extends TypedEmitter { }); csiController.on("event", async (event: EventMessageData) => { + this.logger.info("Received event", event); this.emit("event", { event, id: csiController.id }); }); @@ -116,6 +117,14 @@ export class CSIDispatcher extends TypedEmitter { }); csiController.on("ping", (pingMessage: PingMessageData) => { + this.logger.info("Ping received", JSON.stringify(pingMessage)); + + if (pingMessage.sequenceInfo.config.type !== this.STHConfig.runtimeAdapter) { + this.logger.error("Incorrect Instance adapter"); + + return; + } + const seq = this.sequenceStore.getById(csiController.sequence.id); if (seq) { @@ -180,7 +189,7 @@ export class CSIDispatcher extends TypedEmitter { csiController.start().catch((e) => { this.logger.error("CSIC start error", csiController.id, e); - throw new Error("CSIC start error"); + this.emit("error", { id: csiController.id, err: "fatal" }); }); this.logger.trace("csiController started", id); diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 93d6512af..1e11c41f6 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -1110,7 +1110,7 @@ export class Host implements IComponent { this.logger.debug("Instance connecting", id); if (!this.instancesStore[id]) { - this.logger.info("creating new CSIController for runner connecting"); + this.logger.info("creating new CSIController unknown istance"); await this.csiDispatcher.createCSIController( id, diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 6ec824f0e..f5f12a746 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -31,7 +31,7 @@ import { ManagerClient } from "@scramjet/manager-api-client"; import { BufferStream, DataStream, StringStream } from "scramjet"; import { EventEmitter } from "events"; -import { writeFileSync } from "fs"; +import { createWriteStream, writeFileSync } from "fs"; import { Readable, Writable } from "stream"; import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; @@ -174,6 +174,10 @@ export class Runner implements IComponent { this.logger.addOutput(process.stdout); } + if (process.env.RUNNER_LOG_FILE) { + this.logger.addOutput(createWriteStream(process.env.RUNNER_LOG_FILE)); + } + this.inputDataStream = new DataStream().catch((e: any) => { this.logger.error("Error during input data stream", e); @@ -200,6 +204,10 @@ export class Runner implements IComponent { async controlStreamHandler([code, data]: EncodedControlMessage) { this.logger.debug("Control message received", code, data); + if (this.monitoringMessageReplyTimeout) { + clearTimeout(this.monitoringMessageReplyTimeout); + } + switch (code) { case RunnerMessageCode.MONITORING_RATE: await this.handleMonitoringRequest(data as MonitoringRateMessageData); @@ -219,9 +227,6 @@ export class Runner implements IComponent { this.emitter.emit(eventData.eventName, eventData.message); break; case RunnerMessageCode.MONITORING_REPLY: - if (this.monitoringMessageReplyTimeout) { - clearTimeout(this.monitoringMessageReplyTimeout); - } break; default: break; @@ -264,23 +269,25 @@ export class Runner implements IComponent { } working = true; - await this.reportHealth(); + await this.reportHealth(1000); working = false; }, 1000 / data.monitoringRate).unref(); } - private async reportHealth() { + private async reportHealth(timeout?: number) { const { healthy } = await this.context.monitor(); MessageUtils.writeMessageOnStream( [RunnerMessageCode.MONITORING, { healthy }], this.hostClient.monitorStream ); - this.monitoringMessageReplyTimeout = setTimeout(async () => { - this.logger.warn("Monitoring Reply Timeout. Connected", this.connected); + if (timeout) { + this.monitoringMessageReplyTimeout = setTimeout(async () => { + this.logger.warn("Monitoring Reply Timeout. Connected"); - await this.handleDisconnect(); - }, 500); + await this.handleDisconnect(); + }, timeout); + } } async handleDisconnect() { @@ -288,10 +295,14 @@ export class Runner implements IComponent { clearInterval(this.monitoringInterval); } + if (this.monitoringMessageReplyTimeout) { + clearTimeout(this.monitoringMessageReplyTimeout); + } + this.connected = false; try { - await this.hostClient.disconnect(true); + await this.hostClient.disconnect(!this.connected); await defer(5000); } catch (e) { this.logger.error("Disconnect failed"); @@ -358,8 +369,11 @@ export class Runner implements IComponent { this.logger.debug("premain"); try { + this.logger.debug("connecting..."); await promiseTimeout(this.hostClient.init(this.instanceId), 2000); + this.logger.debug("connected"); this.connected = true; + await this.handleMonitoringRequest({ monitoringRate: 1 }); } catch (e) { this.connected = false; this.logger.error("hostClient init error", e); @@ -369,8 +383,10 @@ export class Runner implements IComponent { return await this.premain(); } + this.logger.debug("Redirecting outputs"); this.redirectOutputs(); + this.logger.debug("Defining control stream"); this.defineControlStream(); this.hostClient.stdinStream @@ -386,10 +402,6 @@ export class Runner implements IComponent { const { args, appConfig } = this.runnerConnectInfo; - this.logger.debug("Handshake received", appConfig, args); - - await this.handleMonitoringRequest({ monitoringRate: 1 }); - return { appConfig, args }; } @@ -398,6 +410,9 @@ export class Runner implements IComponent { this.initAppContext(appConfig as X); + await this.reportHealth(); + await this.handleMonitoringRequest({ monitoringRate: 1 }); + let sequence: any[] = []; try { diff --git a/packages/types/src/rest-api-sth/start-sequence.ts b/packages/types/src/rest-api-sth/start-sequence.ts index 7a2d4ee19..d8c17bb5a 100644 --- a/packages/types/src/rest-api-sth/start-sequence.ts +++ b/packages/types/src/rest-api-sth/start-sequence.ts @@ -2,4 +2,4 @@ import { RunnerConnectInfo } from "../runner-connect"; export type StartSequenceResponse = { id: string }; -export type StartSequencePayload = RunnerConnectInfo; +export type StartSequencePayload = Omit; From 21601ef8026253e228f53a99b33f7417d0ec3202 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Wed, 17 Jan 2024 15:48:54 +0000 Subject: [PATCH 47/62] Restore 'created' field in InstanceInfo --- packages/host/src/lib/cpm-connector.ts | 1 + packages/host/src/lib/csi-controller.ts | 7 ++++--- packages/runner/src/runner.ts | 2 ++ packages/types/src/messages/handshake.ts | 1 + 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/host/src/lib/cpm-connector.ts b/packages/host/src/lib/cpm-connector.ts index 00e1ea0fd..163fbb9f2 100644 --- a/packages/host/src/lib/cpm-connector.ts +++ b/packages/host/src/lib/cpm-connector.ts @@ -586,6 +586,7 @@ export class CPMConnector extends TypedEmitter { */ async sendInstanceInfo(instance: Instance, instanceStatus: InstanceMessageCode): Promise { this.logger.trace("Send instance status update", instanceStatus); + await this.communicationStream?.whenWrote( [CPMMessageCode.INSTANCE, { instance, status: instanceStatus }] ); diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 1a78fbb69..a3477734e 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -171,7 +171,7 @@ export class CSIController extends TypedEmitter { public localEmitter: EventEmitter & { lastEvents: { [evname: string]: any } }; constructor( - private handshakeMessage: MessageDataType, + private handshakeMessage: Omit, "created">, public communicationHandler: ICommunicationHandler, private sthConfig: STHConfiguration, private hostProxy: HostProxy, @@ -470,7 +470,8 @@ export class CSIController extends TypedEmitter { this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { const payload = message[1].payload; - this.args = message[1].payload.args; + this.args = payload.args; + this.info.created = new Date(message[1].created); this.provides ||= this.outputTopic || payload?.outputTopic; this.requires ||= this.inputTopic || payload?.inputTopic; @@ -478,6 +479,7 @@ export class CSIController extends TypedEmitter { await this.handleHandshake(message); this.emit("ping", message[1]); + return null; }); @@ -828,7 +830,6 @@ export class CSIController extends TypedEmitter { } getInfo(): STHRestAPI.GetInstanceResponse { - // eslint-disable-next-line no-console this.logger.debug("Get info [seq, info]", this.sequence, this.info); return { diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index f5f12a746..2e46e611f 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -148,6 +148,7 @@ export class Runner implements IComponent { private sequenceInfo: SequenceInfo; private connected = false; + private created = Date.now(); private runnerConnectInfo: RunnerConnectInfo = { appConfig: {} @@ -570,6 +571,7 @@ export class Runner implements IComponent { RunnerMessageCode.PING, { id: this.instanceId, sequenceInfo: this.sequenceInfo, + created: this.created, payload: { ...this.runnerConnectInfo, system: { diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index 64f33a20b..2cf5ed7e7 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -19,6 +19,7 @@ export type PingMessageData = { ports?: Record; payload: StartSequencePayload; sequenceInfo: SequenceInfo; + created: number; }; export type PangMessageData = { From b4d0c844abed53b053f13530cddbcea319a7c913 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Thu, 18 Jan 2024 11:01:52 +0000 Subject: [PATCH 48/62] Warning on unsuccesful connection. Fix status after reconnect --- packages/host/src/lib/csi-controller.ts | 4 +++- packages/runner/src/runner.ts | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index a3477734e..b9bff5e45 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -262,7 +262,7 @@ export class CSIController extends TypedEmitter { } this.info.ended = new Date(); - this.executionTime = (this.info.ended.getTime() - this.info.started!.getTime()) / 1000; + this.executionTime = (this.info.ended.getTime() - this.info.created!.getTime()) / 1000; this.emit("terminated", code); @@ -470,6 +470,8 @@ export class CSIController extends TypedEmitter { this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { const payload = message[1].payload; + this.status = InstanceStatus.RUNNING; + this.args = payload.args; this.info.created = new Date(message[1].created); diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 2e46e611f..74ce1ffd6 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -377,7 +377,7 @@ export class Runner implements IComponent { await this.handleMonitoringRequest({ monitoringRate: 1 }); } catch (e) { this.connected = false; - this.logger.error("hostClient init error", e); + this.logger.warn("Can't connect to Host", e); await defer(2000); From ce5452bf33a6e4d8578b749e40b8cfbd39a2cb77 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 19 Jan 2024 11:05:45 +0000 Subject: [PATCH 49/62] Add logs and missing payload in k8s IA --- .../adapters/src/kubernetes-instance-adapter.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/packages/adapters/src/kubernetes-instance-adapter.ts b/packages/adapters/src/kubernetes-instance-adapter.ts index 9e660d559..e3526cedc 100644 --- a/packages/adapters/src/kubernetes-instance-adapter.ts +++ b/packages/adapters/src/kubernetes-instance-adapter.ts @@ -94,7 +94,7 @@ IComponent { } }; } - async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, _payload: RunnerConnectInfo): Promise { + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { if (config.type !== "kubernetes") { throw new Error(`Invalid config type for kubernetes adapter: ${config.type}`); } @@ -116,7 +116,8 @@ IComponent { instancesServerHost: this.adapterConfig.sthPodHost, instanceId, pipesPath: "", - sequenceInfo + sequenceInfo, + payload }, { ...this.sthConfig.runnerEnvs }).map(([name, value]) => ({ name, value })); @@ -147,8 +148,12 @@ IComponent { 2 ); + this.logger.debug("Runner Pod created"); + const startPodStatus = await this.kubeClient.waitForPodStatus(runnerName, ["Running", "Failed"]); + this.logger.debug("Runner Pod status"); + if (startPodStatus.status === "Failed") { this.logger.error("Runner unable to start", startPodStatus); @@ -168,9 +173,15 @@ IComponent { this.stdErrorStream.on("data", (data) => { this.logger.error("POD stderr", data.toString()); }); await this.kubeClient.exec(runnerName, runnerName, ["unpack.sh", "/package"], process.stdout, this.stdErrorStream, compressedStream, 2); + + this.logger.debug("Copy command done"); } - async waitUntilExit(_config: InstanceConfig, _instanceId: string, _sequenceInfo: SequenceInfo): Promise { + async waitUntilExit(_config: InstanceConfig, instanceId: string, _sequenceInfo: SequenceInfo): Promise { + this.logger.info("Waiting for pod exit..."); + + this._runnerName ||= `runner-${ instanceId }`; + const exitPodStatus = await this.kubeClient.waitForPodStatus(this._runnerName!, ["Succeeded", "Failed", "Unknown"]); this.stdErrorStream?.end(); From bce56bf404cdb06f36c91a32990128d6224b8be9 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 19 Jan 2024 12:29:57 +0000 Subject: [PATCH 50/62] Init kubeClient on reconnect. Fix execTime on error --- packages/adapters/src/kubernetes-instance-adapter.ts | 3 ++- packages/adapters/src/types.ts | 2 +- packages/host/src/lib/csi-controller.ts | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/adapters/src/kubernetes-instance-adapter.ts b/packages/adapters/src/kubernetes-instance-adapter.ts index e3526cedc..d68e3c001 100644 --- a/packages/adapters/src/kubernetes-instance-adapter.ts +++ b/packages/adapters/src/kubernetes-instance-adapter.ts @@ -62,7 +62,8 @@ IComponent { private get kubeClient() { if (!this._kubeClient) { - throw new Error("Kubernetes client not initialized"); + this._kubeClient = new KubernetesClientAdapter(this.adapterConfig.authConfigPath, this.adapterConfig.namespace); + this._kubeClient.init(); } return this._kubeClient; diff --git a/packages/adapters/src/types.ts b/packages/adapters/src/types.ts index b2a0a2862..d140c2b22 100644 --- a/packages/adapters/src/types.ts +++ b/packages/adapters/src/types.ts @@ -327,7 +327,7 @@ export type RunnerEnvConfig = { instancesServerHost: string; instanceId: InstanceId; sequenceInfo: SequenceInfo - payload?: StartSequencePayload + payload: StartSequencePayload } export type RunnerEnvironmentVariables = Partial<{ diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index b9bff5e45..5bad31244 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -220,7 +220,10 @@ export class CSIController extends TypedEmitter { i.then(() => this.main()).catch(async (e) => { this.logger.info("Instance status: errored", e); + this.status ||= InstanceStatus.ERRORED; + this.executionTime = (Date.now() - this.info.created!.getTime()) / 1000; + this.setExitInfo(e.exitcode, e.message); this.emit("error", e); From 15eda16dfc1d052355f93e88c5e818799af3c94e Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 22 Jan 2024 12:21:26 +0000 Subject: [PATCH 51/62] :resend PANGs after reconnect --- packages/host/src/lib/csi-controller.ts | 6 ++++ packages/host/src/lib/csi-dispatcher.ts | 1 + packages/host/src/lib/host.ts | 2 ++ packages/runner/src/runner.ts | 46 +++++++++++++++---------- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 5bad31244..59485326c 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -473,6 +473,12 @@ export class CSIController extends TypedEmitter { this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { const payload = message[1].payload; + if (!payload) { + this.emit("error", "No payload in ping!"); + + return null; + } + this.status = InstanceStatus.RUNNING; this.args = payload.args; diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 595cef75e..35fd31a56 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -133,6 +133,7 @@ export class CSIDispatcher extends TypedEmitter { this.logger.warn("Instance of not existing sequence connected"); //@TODO: ? } + this.emit("established", { id: pingMessage.id, sequence: pingMessage.sequenceInfo }); }); diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 1e11c41f6..e6da870b1 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -843,6 +843,8 @@ export class Host implements IComponent { * Used to recover Sequences information after restart. */ async identifyExistingSequences() { + this.logger.trace("Identifing existing sequences"); + const adapter = await initializeRuntimeAdapters(this.config); const sequenceAdapter = getSequenceAdapter(adapter, this.config); diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 74ce1ffd6..d306e0487 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -150,6 +150,11 @@ export class Runner implements IComponent { private connected = false; private created = Date.now(); + private requires?: string; + private requiresContentType?: string; + private provides?: string; + private providesContentType?: string; + private runnerConnectInfo: RunnerConnectInfo = { appConfig: {} }; @@ -312,6 +317,14 @@ export class Runner implements IComponent { this.logger.info("Reinitializing...."); await this.premain(); + + if (this.requires) { + this.sendPang({ requires: this.requires, contentType: this.requiresContentType }); + } + + if (this.provides) { + this.sendPang({ provides: this.provides, contentType: this.providesContentType }); + } } async handleKillRequest(): Promise { @@ -406,6 +419,11 @@ export class Runner implements IComponent { return { appConfig, args }; } + sendPang(args: { contentType?: string, requires?: string, provides?: string }) { + MessageUtils.writeMessageOnStream( + [RunnerMessageCode.PANG, args], this.hostClient.monitorStream); + } + async main() { const { appConfig, args } = await this.premain(); @@ -423,11 +441,10 @@ export class Runner implements IComponent { if (sequence.length && typeof sequence[0] !== "function") { this.logger.debug("First Sequence object is not a function:", sequence[0]); - MessageUtils.writeMessageOnStream( - [RunnerMessageCode.PANG, { - requires: sequence[0].requires, - contentType: sequence[0].contentType - }], this.hostClient.monitorStream); + this.requires = sequence[0].requires; + this.requiresContentType = sequence[0].contentType; + + this.sendPang({ requires: this.requires, contentType: this.requiresContentType }); this.logger.trace("Waiting for input stream"); @@ -694,13 +711,7 @@ export class Runner implements IComponent { this.hostClient.outputStream.end(`${intermediate}`); - MessageUtils.writeMessageOnStream( - [RunnerMessageCode.PANG, { - provides: "", - contentType: "" - }], - this.hostClient.monitorStream, - ); + this.sendPang({ provides: "", contentType: "" }); res(); } else if (stream && this.hostClient.outputStream) { @@ -722,13 +733,10 @@ export class Runner implements IComponent { : this.hostClient.outputStream ); - MessageUtils.writeMessageOnStream( - [RunnerMessageCode.PANG, { - provides: intermediate.topic || "", - contentType: intermediate.contentType || "" - }], - this.hostClient.monitorStream, - ); + this.provides = intermediate.topic || ""; + this.providesContentType = intermediate.contentType || ""; + + this.sendPang({ provides: this.provides, contentType: this.providesContentType }); } else { // TODO: this should push a PANG message with the sequence description this.logger.debug("Sequence did not output a stream"); From aa2e5d7183115eab807e122fc5de73cb313ad01d Mon Sep 17 00:00:00 2001 From: patuwwy Date: Mon, 22 Jan 2024 22:37:00 +0000 Subject: [PATCH 52/62] Send InstanceStatus in Pang --- packages/host/src/lib/csi-controller.ts | 4 +-- packages/host/src/lib/csi-dispatcher.ts | 4 +-- packages/runner/src/runner.ts | 34 +++++++++++++++++++----- packages/types/src/messages/handshake.ts | 2 ++ 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 59485326c..afe45e3fa 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -471,7 +471,7 @@ export class CSIController extends TypedEmitter { .pipe(this.upStreams[CC.CONTROL]); this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { - const payload = message[1].payload; + const { status, payload } = message[1]; if (!payload) { this.emit("error", "No payload in ping!"); @@ -479,7 +479,7 @@ export class CSIController extends TypedEmitter { return null; } - this.status = InstanceStatus.RUNNING; + this.status = status || InstanceStatus.RUNNING; this.args = payload.args; this.info.created = new Date(message[1].created); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 35fd31a56..f08fd6b25 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -2,7 +2,7 @@ import { getInstanceAdapter } from "@scramjet/adapters"; import { IDProvider } from "@scramjet/model"; import { ObjLogger } from "@scramjet/obj-logger"; import { RunnerMessageCode } from "@scramjet/symbols"; -import { ContentType, EventMessageData, HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, MessageDataType, PangMessageData, PingMessageData, STHConfiguration, STHRestAPI, SequenceInfo, SequenceInfoInstance } from "@scramjet/types"; +import { ContentType, EventMessageData, HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, InstanceStatus, MessageDataType, PangMessageData, PingMessageData, STHConfiguration, STHRestAPI, SequenceInfo, SequenceInfoInstance } from "@scramjet/types"; import { TypedEmitter } from "@scramjet/utility"; import { CSIController, CSIControllerInfo } from "./csi-controller"; import { InstanceStore } from "./instance-store"; @@ -60,7 +60,7 @@ export class CSIDispatcher extends TypedEmitter { instanceProxy: HostProxy) { sequenceInfo.instances = sequenceInfo.instances || []; - const csiController = new CSIController({ id, sequenceInfo, payload }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); + const csiController = new CSIController({ id, sequenceInfo, payload, status: InstanceStatus.INITIALIZING }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); this.logger.trace("CSIController created", id, sequenceInfo); diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index d306e0487..9d67924f8 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -14,8 +14,10 @@ import { IComponent, IHostClient, IObjectLogger, + InstanceStatus, MaybePromise, MonitoringRateMessageData, + PangMessageData, RunnerConnectInfo, SequenceInfo, StopSequenceMessageData, @@ -155,6 +157,8 @@ export class Runner implements IComponent { private provides?: string; private providesContentType?: string; + private status: InstanceStatus = InstanceStatus.STARTING; + private runnerConnectInfo: RunnerConnectInfo = { appConfig: {} }; @@ -334,10 +338,13 @@ export class Runner implements IComponent { if (!this.stopExpected) { this.logger.trace(`Exiting (unexpected, ${RunnerExitCode.KILLED})`); + this.status = InstanceStatus.KILLING; + return this.exit(RunnerExitCode.KILLED); } this.logger.trace("Exiting (expected)"); + this.status = InstanceStatus.STOPPING; return this.exit(RunnerExitCode.STOPPED); } @@ -358,6 +365,8 @@ export class Runner implements IComponent { } if (!data.canCallKeepalive || !this.keepAliveRequested) { + this.status = InstanceStatus.STOPPING; + MessageUtils.writeMessageOnStream( [RunnerMessageCode.SEQUENCE_STOPPED, { sequenceError }], this.hostClient.monitorStream ); @@ -419,7 +428,7 @@ export class Runner implements IComponent { return { appConfig, args }; } - sendPang(args: { contentType?: string, requires?: string, provides?: string }) { + sendPang(args: PangMessageData) { MessageUtils.writeMessageOnStream( [RunnerMessageCode.PANG, args], this.hostClient.monitorStream); } @@ -474,6 +483,8 @@ export class Runner implements IComponent { this.logger.error("Sequence error:", error.stack); } + this.status = InstanceStatus.ERRORED; + return this.exit(RunnerExitCode.SEQUENCE_FAILED_ON_START); } @@ -481,13 +492,18 @@ export class Runner implements IComponent { await this.runSequence(sequence, args); this.logger.trace(`Sequence completed. Waiting ${this.context.exitTimeout}ms with exit.`); + + this.status = InstanceStatus.COMPLETED; this.writeMonitoringMessage([RunnerMessageCode.SEQUENCE_COMPLETED, { timeout: this.context.exitTimeout }]); await defer(this.context.exitTimeout); + return this.exit(0); } catch (error: any) { this.logger.error("Error occurred during Sequence execution: ", error.stack); + this.status = InstanceStatus.ERRORED; + return this.exit(RunnerExitCode.SEQUENCE_FAILED_DURING_EXECUTION); } } @@ -511,11 +527,9 @@ export class Runner implements IComponent { try { this.logger.info("Cleaning up streams"); - - // await promiseTimeout( - // this.hostClient.disconnect(), 5000 - // ); } catch (e: any) { + this.status = InstanceStatus.ERRORED; + exitcode = RunnerExitCode.CLEANUP_FAILED; } @@ -594,7 +608,8 @@ export class Runner implements IComponent { system: { processPID: process.pid.toString() } - } + }, + status: this.status }], this.hostClient.monitorStream); this.logger.trace("Handshake sent"); @@ -646,6 +661,8 @@ export class Runner implements IComponent { try { this.logger.debug("Processing function on index", sequence.length - itemsLeftInSequence - 1); + this.status = InstanceStatus.RUNNING; + out = func.call( this.context, stream, @@ -656,6 +673,8 @@ export class Runner implements IComponent { } catch (error: any) { this.logger.error("Function errored", sequence.length - itemsLeftInSequence, error.stack); + this.status = InstanceStatus.ERRORED; + throw new RunnerError("SEQUENCE_RUNTIME_ERROR"); } @@ -663,9 +682,12 @@ export class Runner implements IComponent { intermediate = await out; this.logger.info("Function output type", sequence.length - itemsLeftInSequence - 1, typeof out); + if (!intermediate) { this.logger.error("Sequence ended premature"); + this.status = InstanceStatus.ERRORED; + throw new RunnerError("SEQUENCE_ENDED_PREMATURE"); } else if (typeof intermediate === "object" && intermediate instanceof DataStream) { this.logger.debug("Sequence function returned DataStream.", sequence.length - itemsLeftInSequence - 1); diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index 2cf5ed7e7..c454e83e6 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -1,6 +1,7 @@ import { RunnerMessageCode } from "@scramjet/symbols"; import { SequenceInfo } from "../sequence-adapter"; import { StartSequencePayload } from "../rest-api-sth"; +import { InstanceStatus } from "../instance"; /** * Runner sends a handshake message to the Cloud Server Host (CSH) after it is. @@ -20,6 +21,7 @@ export type PingMessageData = { payload: StartSequencePayload; sequenceInfo: SequenceInfo; created: number; + status: InstanceStatus; }; export type PangMessageData = { From cbc770c6f745a8a891bef5b92d7b625a9bdaa865 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 23 Jan 2024 12:26:34 +0000 Subject: [PATCH 53/62] Try to get instance from external source if not exists on Hosts --- packages/host/src/lib/host.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index e6da870b1..c9a2231e8 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -327,11 +327,26 @@ export class Host implements IComponent { } /** + * Check for Sequence. * Pass information about connected instance to monitoring and platform services. * * @param {Instance} instance Instance data. */ async handleDispatcherEstablishedEvent(instance: Instance) { + const seq = this.sequenceStore.getById(instance.sequence.id); + + this.logger.info("Checking Sequence..."); + + if (!seq) { + this.logger.info("Sequence not found. Checking Store..."); + + try { + await this.getExternalSequence(instance.sequence.id); + } catch (e) { + this.logger.warn("Sequence not found in store. Instance has no Sequence."); + } + } + this.auditor.auditInstance(instance.id, InstanceMessageCode.INSTANCE_CONNECTED); await this.cpmConnector?.sendInstanceInfo({ From 891fa75b6f9c779aec4c383a452d85f9d1d12093 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 23 Jan 2024 16:47:55 +0000 Subject: [PATCH 54/62] Fix sending sequences info on platform connect --- packages/host/src/lib/host.ts | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index c9a2231e8..fd6d2bd9f 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -333,17 +333,19 @@ export class Host implements IComponent { * @param {Instance} instance Instance data. */ async handleDispatcherEstablishedEvent(instance: Instance) { - const seq = this.sequenceStore.getById(instance.sequence.id); - this.logger.info("Checking Sequence..."); + const seq = this.sequenceStore.getById(instance.sequence.id); + if (!seq) { this.logger.info("Sequence not found. Checking Store..."); try { - await this.getExternalSequence(instance.sequence.id); + const extSeq = await this.getExternalSequence(instance.sequence.id); + + this.logger.info("Sequence acquired.", extSeq); } catch (e) { - this.logger.warn("Sequence not found in store. Instance has no Sequence."); + this.logger.warn("Sequence not found in Store. Instance has no Sequence."); } } @@ -594,7 +596,22 @@ export class Host implements IComponent { connector.init(); connector.on("connect", async () => { - await connector.sendSequencesInfo(this.getSequences()); + await defer(3000); + //await connector.sendSequencesInfo(this.getSequences()); + await Promise.all( + this.getSequences() + .map( + s => + connector.sendSequenceInfo( + s.id, + SequenceMessageCode.SEQUENCE_CREATED, + { + ...s.config, + location: this.getId()! + } as unknown as STHRestAPI.GetSequenceResponse + ) + ) + ); await connector.sendInstancesInfo(this.getInstances()); await connector.sendTopicsInfo(this.getTopics()); @@ -998,6 +1015,8 @@ export class Host implements IComponent { } async getExternalSequence(id: string): Promise { + this.logger.info("Requesting Sequence from external source"); + let packageStream: IncomingMessage | undefined; try { @@ -1021,7 +1040,7 @@ export class Host implements IComponent { return this.sequenceStore.getById(result.id)!; } catch (e: any) { - this.logger.error("Error requesting sequence", e.message); + this.logger.warn("Can't aquire Sequence from external source", e.message); throw new Error(ReasonPhrases.NOT_FOUND); } From 10be6f2864ee88118a322b0ca44457e461bedcc3 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 23 Jan 2024 22:00:42 +0000 Subject: [PATCH 55/62] Handle failed start --- package.json | 2 +- .../adapters/src/process-instance-adapter.ts | 14 +- packages/host/src/lib/csi-controller.ts | 75 +---- packages/host/src/lib/csi-dispatcher.ts | 269 ++++++++++-------- packages/host/src/lib/host.ts | 54 ++-- packages/host/src/lib/utils.ts | 70 +++++ 6 files changed, 260 insertions(+), 224 deletions(-) create mode 100644 packages/host/src/lib/utils.ts diff --git a/package.json b/package.json index 3e7a9ea5f..9d75e0c47 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,7 @@ "lint": "TIMING=1 NODE_OPTIONS=\"--max-old-space-size=2048\" scripts/run-script.js -w modules -j 4 -e \"! ls .eslintrc* > /dev/null || npx eslint ./ --ext .ts --ext .js --cache --cache-strategy=content\"", "lint:uncached": "find . -name .eslintcache -delete && yarn lint", "start": "DEVELOPMENT=true node dist/sth/bin/hub.js", - "start:dev": "DEVELOPMENT=true ts-node packages/sth/src/bin/hub.ts", + "start:dev": "ts-node packages/sth/src/bin/hub.ts", "start:dev:cli": "DEVELOPMENT=true ts-node packages/cli/src/bin/index.ts", "install:clean": "yarn clean && yarn clean:modules && yarn install", "postinstall": "scripts/run-script.js -v -w modules install:deps", diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 7f05f3134..6679a545b 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -35,6 +35,7 @@ class ProcessInstanceAdapter implements sthConfig: STHConfiguration; processPID: number = -1; + exitCode = -1; id?: string | undefined; private runnerProcess?: ChildProcess; @@ -171,6 +172,11 @@ class ProcessInstanceAdapter implements runnerProcess.unref(); + runnerProcess.on("exit", (code) => { + this.exitCode = Number(code) || -1; + this.logger.info("Runner exit code", code); + }); + this.crashLogStreams = Promise.all([runnerProcess.stdout, runnerProcess.stderr].map(streamToString)); this.runnerProcess = runnerProcess; @@ -187,7 +193,13 @@ class ProcessInstanceAdapter implements async waitUntilExit(_config: InstanceConfig, _instanceId: string, _sequenceInfo: SequenceInfo): Promise { if (this.runnerProcess) { const [statusCode, signal] = await new Promise<[number | null, NodeJS.Signals | null]>( - (res) => this.runnerProcess?.on("exit", (code, sig) => res([code, sig])) + (res) => { + if (this.exitCode > -1) { + res([this.exitCode, null]); + } + + this.runnerProcess?.on("exit", (code, sig) => res([code, sig])); + } ); this.logger.trace("Runner process exited", this.runnerProcess?.pid); diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index afe45e3fa..ec5a9dda0 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -6,7 +6,7 @@ import { MessageUtilities } from "@scramjet/model"; import { development } from "@scramjet/sth-config"; -import { CommunicationChannel as CC, RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols"; +import { CommunicationChannel as CC, RunnerMessageCode } from "@scramjet/symbols"; import { APIRoute, AppConfig, @@ -45,6 +45,7 @@ import { ObjLogger } from "@scramjet/obj-logger"; import { RunnerConnectInfo } from "@scramjet/types/src/runner-connect"; import { cancellableDefer, CancellablePromise, defer, promiseTimeout, TypedEmitter } from "@scramjet/utility"; import { ReasonPhrases } from "http-status-codes"; +import { mapRunnerExitCode } from "./utils"; /** * @TODO: Runner exits after 10secs and k8s client checks status every 500ms so we need to give it some time @@ -239,7 +240,7 @@ export class CSIController extends TypedEmitter { async main() { this.status = InstanceStatus.RUNNING; - this.logger.trace("Instance started", this.status); + this.logger.trace("Main. Current status:", this.status); let code = -1; @@ -315,7 +316,7 @@ export class CSIController extends TypedEmitter { }; this.instancePromise = instanceMain() - .then((exitcode) => this.mapRunnerExitCode(exitcode)) + .then((exitcode) => mapRunnerExitCode(exitcode, this.sequence)) .catch((error) => { this.logger.error("Instance promise rejected", error); this.initResolver?.rej(error); @@ -336,74 +337,6 @@ export class CSIController extends TypedEmitter { }); } - // eslint-disable-next-line complexity - private mapRunnerExitCode(exitcode: number): Promise< - { message: string, exitcode: number, status: InstanceStatus } - > { - // eslint-disable-next-line default-case - switch (exitcode) { - case RunnerExitCode.INVALID_ENV_VARS: { - return Promise.reject({ - message: "Runner was started with invalid configuration. This is probably a bug in STH.", - exitcode: RunnerExitCode.INVALID_ENV_VARS, - status: InstanceStatus.ERRORED - }); - } - case RunnerExitCode.PODS_LIMIT_REACHED: { - return Promise.reject({ - message: "Instance limit reached", - exitcode: RunnerExitCode.PODS_LIMIT_REACHED, - status: InstanceStatus.ERRORED - }); - } - case RunnerExitCode.INVALID_SEQUENCE_PATH: { - return Promise.reject({ - message: `Sequence entrypoint path ${this.sequence.config.entrypointPath} is invalid. ` + - "Check `main` field in Sequence package.json", - exitcode: RunnerExitCode.INVALID_SEQUENCE_PATH, - status: InstanceStatus.ERRORED - }); - } - case RunnerExitCode.SEQUENCE_FAILED_ON_START: { - return Promise.reject({ - message: "Sequence failed on start", - exitcode: RunnerExitCode.SEQUENCE_FAILED_ON_START, - status: InstanceStatus.ERRORED - }); - } - case RunnerExitCode.SEQUENCE_FAILED_DURING_EXECUTION: { - return Promise.reject({ - message: "Sequence failed during execution", - exitcode: RunnerExitCode.SEQUENCE_FAILED_DURING_EXECUTION, - status: InstanceStatus.ERRORED - }); - } - case RunnerExitCode.SEQUENCE_UNPACK_FAILED: { - return Promise.reject({ - message: "Sequence unpack failed", - exitcode: RunnerExitCode.SEQUENCE_UNPACK_FAILED, - status: InstanceStatus.ERRORED - }); - } - case RunnerExitCode.KILLED: { - return Promise.resolve({ - message: "Instance killed", exitcode: RunnerExitCode.KILLED, status: InstanceStatus.COMPLETED - }); - } - case RunnerExitCode.STOPPED: { - return Promise.resolve({ - message: "Instance stopped", exitcode: RunnerExitCode.STOPPED, status: InstanceStatus.COMPLETED - }); - } - } - - if (exitcode > 0) { - return Promise.reject({ message: "Runner failed", exitcode, status: InstanceStatus.ERRORED }); - } - - return Promise.resolve({ message: "Instance completed", exitcode, status: InstanceStatus.COMPLETED }); - } - async cleanup() { await this.instanceAdapter.cleanup(); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index f08fd6b25..d0f8691d4 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -10,15 +10,17 @@ import { ServiceDiscovery } from "./serviceDiscovery/sd-adapter"; import TopicId from "./serviceDiscovery/topicId"; import { Readable, Writable } from "stream"; import SequenceStore from "./sequenceStore"; +import { mapRunnerExitCode } from "./utils"; export type DispatcherErrorEventData = { id:string, err: any }; export type DispatcherInstanceEndEventData = { id: string, code: number, info: CSIControllerInfo & { executionTime: number }, sequence: SequenceInfoInstance}; export type DispatcherInstanceTerminatedEventData = DispatcherInstanceEndEventData; export type DispatcherInstanceEstablishedEventData = Instance; +export type DispatcherChimeEvent = { id: string, language: string, seqId: string }; type Events = { pang: (payload: MessageDataType) => void; - hourChime: () => void; + hourChime: (data: DispatcherChimeEvent) => void; error: (data: DispatcherErrorEventData) => void; stop: (code: number) => void; end: (data: DispatcherInstanceEndEventData) => void; @@ -60,133 +62,142 @@ export class CSIDispatcher extends TypedEmitter { instanceProxy: HostProxy) { sequenceInfo.instances = sequenceInfo.instances || []; - const csiController = new CSIController({ id, sequenceInfo, payload, status: InstanceStatus.INITIALIZING }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); + const csiController = new CSIController({ + id, + sequenceInfo, + payload, + status: InstanceStatus.INITIALIZING + }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); this.logger.trace("CSIController created", id, sequenceInfo); csiController.logger.pipe(this.logger, { end: false }); communicationHandler.logger.pipe(this.logger, { end: false }); - csiController.on("error", (err) => { - this.logger.error("CSIController errored", err.message, err.exitcode); - this.emit("error", { id, err }); - }); - - csiController.on("event", async (event: EventMessageData) => { - this.logger.info("Received event", event); - this.emit("event", { event, id: csiController.id }); - }); - - // eslint-disable-next-line complexity - csiController.on("pang", async (data: PangMessageData) => { - this.logger.trace("PANG received", [csiController.id, data]); - - if ((data.requires || data.provides) && !data.contentType) { - this.logger.warn("Missing topic content-type"); - } - - if (data.requires && !csiController.inputRouted && data.contentType) { - this.logger.trace("Routing topic to Sequence input", data.requires); - - await this.serviceDiscovery.routeTopicToStream( - { topic: new TopicId(data.requires), contentType: data.contentType as ContentType }, - csiController.getInputStream() - ); + csiController + .on("error", (err) => { + this.logger.error("CSIController errored", err.message, err.exitcode); + this.emit("error", { id, err }); + }) + .on("event", async (event: EventMessageData) => { + this.logger.info("Received event", event); + this.emit("event", { event, id: csiController.id }); + }) + .on("hourChime", () => { + this.emit("hourChime", { + id: csiController.id, + language: csiController.sequence.config.language, + seqId: csiController.sequence.id + }); + }) - csiController.inputRouted = true; + // eslint-disable-next-line complexity + .on("pang", async (data: PangMessageData) => { + this.logger.trace("PANG received", [csiController.id, data]); - await this.serviceDiscovery.update({ - requires: data.requires, contentType: data.contentType, topicName: data.requires, status: "add" - }); - } + if ((data.requires || data.provides) && !data.contentType) { + this.logger.warn("Missing topic content-type"); + } - if (data.provides && !csiController.outputRouted && data.contentType) { - this.logger.trace("Routing Sequence output to topic", data.provides); + if (data.requires && !csiController.inputRouted && data.contentType) { + this.logger.trace("Routing topic to Sequence input", data.requires); - await this.serviceDiscovery.routeStreamToTopic( - csiController.getOutputStream(), - { topic: new TopicId(data.provides), contentType: data.contentType as ContentType } - ); + await this.serviceDiscovery.routeTopicToStream( + { topic: new TopicId(data.requires), contentType: data.contentType as ContentType }, + csiController.getInputStream() + ); - csiController.outputRouted = true; + csiController.inputRouted = true; - await this.serviceDiscovery.update({ - provides: data.provides, contentType: data.contentType!, topicName: data.provides, status: "add" - }); - } - }); + await this.serviceDiscovery.update({ + requires: data.requires, contentType: data.contentType, topicName: data.requires, status: "add" + }); + } - csiController.on("ping", (pingMessage: PingMessageData) => { - this.logger.info("Ping received", JSON.stringify(pingMessage)); + if (data.provides && !csiController.outputRouted && data.contentType) { + this.logger.trace("Routing Sequence output to topic", data.provides); - if (pingMessage.sequenceInfo.config.type !== this.STHConfig.runtimeAdapter) { - this.logger.error("Incorrect Instance adapter"); + await this.serviceDiscovery.routeStreamToTopic( + csiController.getOutputStream(), + { topic: new TopicId(data.provides), contentType: data.contentType as ContentType } + ); - return; - } + csiController.outputRouted = true; - const seq = this.sequenceStore.getById(csiController.sequence.id); + await this.serviceDiscovery.update({ + provides: data.provides, contentType: data.contentType!, topicName: data.provides, status: "add" + }); + } + }) + .on("ping", (pingMessage: PingMessageData) => { + this.logger.info("Ping received", JSON.stringify(pingMessage)); - if (seq) { - seq.instances.push(csiController.id); - } else { - this.logger.warn("Instance of not existing sequence connected"); - //@TODO: ? - } + if (pingMessage.sequenceInfo.config.type !== this.STHConfig.runtimeAdapter) { + this.logger.error("Incorrect Instance adapter"); - this.emit("established", { id: pingMessage.id, sequence: pingMessage.sequenceInfo }); - }); + return; + } - csiController.on("end", async (code: number) => { - this.logger.trace("csiControllerontrolled ended", `id: ${csiController.id}`, `Exit code: ${code}`); + const seq = this.sequenceStore.getById(csiController.sequence.id); - if (csiController.provides && csiController.provides !== "") { - csiController.getOutputStream().unpipe(this.serviceDiscovery.getData( - { - topic: new TopicId(csiController.provides), - contentType: "" as ContentType - } - ) as Writable); - } + if (seq) { + seq.instances.push(csiController.id); + } else { + this.logger.warn("Instance of not existing sequence connected"); + //@TODO: ? + } - csiController.logger.unpipe(this.logger); + this.emit("established", { id: pingMessage.id, sequence: pingMessage.sequenceInfo }); + }) + .on("end", async (code: number) => { + this.logger.trace("csiControllerontrolled ended", `id: ${csiController.id}`, `Exit code: ${code}`); + + if (csiController.provides && csiController.provides !== "") { + csiController.getOutputStream().unpipe(this.serviceDiscovery.getData( + { + topic: new TopicId(csiController.provides), + contentType: "" as ContentType + } + ) as Writable); + } - this.emit("end", { - id, - code, - info: { - executionTime: csiController.executionTime - }, - sequence: csiController.sequence - }); + csiController.logger.unpipe(this.logger); - const seq = this.sequenceStore.getById(csiController.sequence.id); + this.emit("end", { + id, + code, + info: { + executionTime: csiController.executionTime + }, + sequence: csiController.sequence + }); - if (seq) { - seq.instances = seq.instances.filter(i => i !== csiController.id); - } + const seq = this.sequenceStore.getById(csiController.sequence.id); - delete this.instanceStore[csiController.id]; - }); + if (seq) { + seq.instances = seq.instances.filter(i => i !== csiController.id); + } - csiController.once("terminated", (code) => { - if (csiController.requires && csiController.requires !== "") { - (this.serviceDiscovery.getData({ - topic: new TopicId(csiController.requires), - contentType: "" as ContentType, - }) as Readable - ).unpipe(csiController.getInputStream()!); - } + delete this.instanceStore[csiController.id]; + }) + .once("terminated", (code) => { + if (csiController.requires && csiController.requires !== "") { + (this.serviceDiscovery.getData({ + topic: new TopicId(csiController.requires), + contentType: "" as ContentType, + }) as Readable + ).unpipe(csiController.getInputStream()!); + } - this.emit("terminated", { - id, - code, - info: { - executionTime: csiController.executionTime - }, - sequence: csiController.sequence + this.emit("terminated", { + id, + code, + info: { + executionTime: csiController.executionTime + }, + sequence: csiController.sequence + }); }); - }); csiController.start().catch((e) => { this.logger.error("CSIC start error", csiController.id, e); @@ -201,6 +212,8 @@ export class CSIDispatcher extends TypedEmitter { } async startRunner(sequence: SequenceInfo, payload: STHRestAPI.StartSequencePayload) { + this.logger.debug("Preparing Runner..."); + const limits = { memory: payload.limits?.memory || this.STHConfig.docker.runner.maxMem }; @@ -209,13 +222,18 @@ export class CSIDispatcher extends TypedEmitter { const instanceAdapter = getInstanceAdapter(this.STHConfig.runtimeAdapter, this.STHConfig, id); const instanceConfig: InstanceConfig = { ...sequence.config, - limits: limits, + limits, instanceAdapterExitDelay: this.STHConfig.timings.instanceAdapterExitDelay }; instanceAdapter.logger.pipe(this.logger); + this.logger.debug("Initializing Adapter..."); + await instanceAdapter.init(); + + this.logger.debug("Dispatching..."); + await instanceAdapter.dispatch( instanceConfig, this.STHConfig.host.instancesServerPort, @@ -224,25 +242,38 @@ export class CSIDispatcher extends TypedEmitter { payload ); - await new Promise((resolve, _reject) => { - const resolveFunction = (instance: Instance) => { - if (instance.id === id) { - this.off("established", resolveFunction); - resolve(); - } - }; + this.logger.debug("Dispatched."); + this.logger.debug("Waiting for connection..."); - this.on("established", resolveFunction); - }); + return await Promise.race([ + new Promise((resolve, _reject) => { + const resolveFunction = (instance: Instance) => { + if (instance.id === id) { + this.logger.debug("Established", id); - return { - id, - appConfig: payload.appConfig, - args: payload.args, - sequenceId: sequence.id, - info: {}, - limits, - sequence - }; + this.off("established", resolveFunction); + resolve(); + } + }; + + this.on("established", resolveFunction); + }).then(() => ({ + id, + appConfig: payload.appConfig, + args: payload.args, + sequenceId: sequence.id, + info: {}, + limits, + sequence + })), + // handle failed start + Promise.resolve() + .then(() => instanceAdapter.waitUntilExit(undefined, id, sequence)) + .then(async (exitCode) => { + this.logger.info("Exited before established", id, exitCode); + + return mapRunnerExitCode(exitCode, sequence); + }) + ]); } } diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index fd6d2bd9f..e469dcc53 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -62,7 +62,7 @@ import SequenceStore from "./sequenceStore"; import { loadModule, logger as loadModuleLogger } from "@scramjet/module-loader"; -import { CSIDispatcher, DispatcherErrorEventData, DispatcherInstanceEndEventData, DispatcherInstanceTerminatedEventData } from "./csi-dispatcher"; +import { CSIDispatcher, DispatcherChimeEvent as DispatcherChimeEventData, DispatcherErrorEventData, DispatcherInstanceEndEventData, DispatcherInstanceEstablishedEventData, DispatcherInstanceTerminatedEventData } from "./csi-dispatcher"; import { parse } from "path"; @@ -323,6 +323,9 @@ export class Host implements IComponent { }) .on("error", (errorData: DispatcherErrorEventData) => { this.pushTelemetry("Instance error", { ...errorData }, "error"); + }) + .on("hourChime", (data: DispatcherChimeEventData) => { + this.pushTelemetry("Instance hour chime", data); }); } @@ -330,9 +333,9 @@ export class Host implements IComponent { * Check for Sequence. * Pass information about connected instance to monitoring and platform services. * - * @param {Instance} instance Instance data. + * @param {DispatcherInstanceEstablishedEventData} instance Instance data. */ - async handleDispatcherEstablishedEvent(instance: Instance) { + async handleDispatcherEstablishedEvent(instance: DispatcherInstanceEstablishedEventData) { this.logger.info("Checking Sequence..."); const seq = this.sequenceStore.getById(instance.sequence.id); @@ -575,11 +578,13 @@ export class Host implements IComponent { this.logger.warn("Sequence id not found for startup config", seqenceConfig); return; } + await this.csiDispatcher.startRunner(sequence, { appConfig: seqenceConfig.appConfig || {}, args: seqenceConfig.args, instanceId: seqenceConfig.instanceId }); + this.logger.debug("Starting sequence based on config", seqenceConfig); }) .run(); @@ -1085,6 +1090,7 @@ export class Host implements IComponent { if (this.cpmConnector?.connected) { sequence ||= await this.getExternalSequence(sequenceId).catch((error: ReasonPhrases) => { this.logger.error("Error getting sequence from external sources", error); + return undefined; }); } @@ -1098,39 +1104,23 @@ export class Host implements IComponent { try { const runner = await this.csiDispatcher.startRunner(sequence, payload); - // @todo more info - // await this.cpmConnector?.sendInstanceInfo({ - // id: runner.id, - // appConfig: runner.appConfig, - // args: runner.args, - // sequence: (info => { - // // eslint-disable-next-line @typescript-eslint/no-unused-vars - // const { instances, ...rest } = info; - - // return rest; - // })(sequence), - // ports: runner.info.ports, - // created: csic.info.created, - // started: csic.info.started, - // status: csic.status, - // }, InstanceMessageCode.INSTANCE_STARTED); - - this.logger.debug("Instance limits", runner.limits); - this.auditor.auditInstanceStart(runner.id, req as AuditedRequest, runner.limits); - this.pushTelemetry("Instance started", { id: runner.id, language: runner.sequence.config.language, seqId: runner.sequence.id }); - - // csic.on("hourChime", () => { - // this.pushTelemetry("Instance hour chime", { id: csic.id, language: csic.sequence.config.language, seqId: csic.sequence.id }); - // }); + if ("id" in runner) { + this.logger.debug("Instance limits", runner.limits); + this.auditor.auditInstanceStart(runner.id, req as AuditedRequest, runner.limits); + this.pushTelemetry("Instance started", { id: runner.id, language: runner.sequence.config.language, seqId: runner.sequence.id }); - return { - opStatus: ReasonPhrases.OK, - message: `Sequence ${runner.id} starting`, - id: runner.id - }; + return { + opStatus: ReasonPhrases.OK, + message: `Sequence ${runner.id} starting`, + id: runner.id + }; + } else { + throw runner; + } } catch (error: any) { this.pushTelemetry("Instance start failed", { error: error.message }, "error"); this.logger.error(error.message); + return { opStatus: ReasonPhrases.BAD_REQUEST, error: error.message diff --git a/packages/host/src/lib/utils.ts b/packages/host/src/lib/utils.ts new file mode 100644 index 000000000..c346f9cd6 --- /dev/null +++ b/packages/host/src/lib/utils.ts @@ -0,0 +1,70 @@ +import { RunnerExitCode } from "@scramjet/symbols"; +import { InstanceStatus, SequenceInfo } from "@scramjet/types"; + +// eslint-disable-next-line complexity +export const mapRunnerExitCode = async (exitcode: number, sequence: SequenceInfo): Promise< +{ message: string, exitcode: number, status: InstanceStatus } +> => { +// eslint-disable-next-line default-case + switch (exitcode) { + case RunnerExitCode.INVALID_ENV_VARS: { + return Promise.reject({ + message: "Runner was started with invalid configuration. This is probably a bug in STH.", + exitcode: RunnerExitCode.INVALID_ENV_VARS, + status: InstanceStatus.ERRORED + }); + } + case RunnerExitCode.PODS_LIMIT_REACHED: { + return Promise.reject({ + message: "Instance limit reached", + exitcode: RunnerExitCode.PODS_LIMIT_REACHED, + status: InstanceStatus.ERRORED + }); + } + case RunnerExitCode.INVALID_SEQUENCE_PATH: { + return Promise.reject({ + message: `Sequence entrypoint path ${sequence.config.entrypointPath} is invalid. ` + + "Check `main` field in Sequence package.json", + exitcode: RunnerExitCode.INVALID_SEQUENCE_PATH, + status: InstanceStatus.ERRORED + }); + } + case RunnerExitCode.SEQUENCE_FAILED_ON_START: { + return Promise.reject({ + message: "Sequence failed on start", + exitcode: RunnerExitCode.SEQUENCE_FAILED_ON_START, + status: InstanceStatus.ERRORED + }); + } + case RunnerExitCode.SEQUENCE_FAILED_DURING_EXECUTION: { + return Promise.reject({ + message: "Sequence failed during execution", + exitcode: RunnerExitCode.SEQUENCE_FAILED_DURING_EXECUTION, + status: InstanceStatus.ERRORED + }); + } + case RunnerExitCode.SEQUENCE_UNPACK_FAILED: { + return Promise.reject({ + message: "Sequence unpack failed", + exitcode: RunnerExitCode.SEQUENCE_UNPACK_FAILED, + status: InstanceStatus.ERRORED + }); + } + case RunnerExitCode.KILLED: { + return Promise.resolve({ + message: "Instance killed", exitcode: RunnerExitCode.KILLED, status: InstanceStatus.COMPLETED + }); + } + case RunnerExitCode.STOPPED: { + return Promise.resolve({ + message: "Instance stopped", exitcode: RunnerExitCode.STOPPED, status: InstanceStatus.COMPLETED + }); + } + } + + if (exitcode > 0) { + return Promise.reject({ message: "Runner failed", exitcode, status: InstanceStatus.ERRORED }); + } + + return Promise.resolve({ message: "Instance completed", exitcode, status: InstanceStatus.COMPLETED }); +}; From 0afc68553bd0ba7a764e1414199fb4037acf29aa Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 23 Jan 2024 23:23:02 +0000 Subject: [PATCH 56/62] Dispatching error code handling --- packages/adapters/src/docker-instance-adapter.ts | 4 +++- packages/adapters/src/kubernetes-instance-adapter.ts | 8 +++++--- packages/adapters/src/process-instance-adapter.ts | 4 +++- packages/host/src/lib/csi-controller.ts | 5 +++-- packages/host/src/lib/csi-dispatcher.ts | 12 ++++++++---- packages/host/src/lib/host.ts | 2 +- packages/host/src/lib/serviceDiscovery/sd-adapter.ts | 1 + packages/types/src/lifecycle-adapters.ts | 2 +- 8 files changed, 25 insertions(+), 13 deletions(-) diff --git a/packages/adapters/src/docker-instance-adapter.ts b/packages/adapters/src/docker-instance-adapter.ts index 11b868b37..1f673e6ce 100644 --- a/packages/adapters/src/docker-instance-adapter.ts +++ b/packages/adapters/src/docker-instance-adapter.ts @@ -185,7 +185,7 @@ IComponent { } // eslint-disable-next-line complexity - async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { if (!(config.type === "docker" && "container" in config)) { throw new Error("Docker instance adapter run with invalid runner config"); } @@ -241,6 +241,8 @@ IComponent { this.resources.containerId = containerId; // doesnt matter this.logger.trace("Container is running", containerId); + + return 0; } async waitUntilExit(config: InstanceConfig, instanceId:string, _sequenceInfo: SequenceInfo): Promise { diff --git a/packages/adapters/src/kubernetes-instance-adapter.ts b/packages/adapters/src/kubernetes-instance-adapter.ts index d68e3c001..7be572307 100644 --- a/packages/adapters/src/kubernetes-instance-adapter.ts +++ b/packages/adapters/src/kubernetes-instance-adapter.ts @@ -95,13 +95,13 @@ IComponent { } }; } - async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { if (config.type !== "kubernetes") { throw new Error(`Invalid config type for kubernetes adapter: ${config.type}`); } if (this.adapterConfig.quotaName && await this.kubeClient.isPodsLimitReached(this.adapterConfig.quotaName)) { - throw Error(RunnerExitCode.PODS_LIMIT_REACHED.toString()); + return RunnerExitCode.PODS_LIMIT_REACHED; } this.limits = config.limits; @@ -163,7 +163,7 @@ IComponent { // This means runner pod was unable to start. So it went from "Pending" to "Failed" state directly. // Return 1 which is Linux exit code for "General Error" since we are not able // to determine what happened exactly. - return; + return RunnerExitCode.UNCAUGHT_EXCEPTION; } this.logger.debug("Copy sequence files to Runner"); @@ -176,6 +176,8 @@ IComponent { await this.kubeClient.exec(runnerName, runnerName, ["unpack.sh", "/package"], process.stdout, this.stdErrorStream, compressedStream, 2); this.logger.debug("Copy command done"); + + return 0; } async waitUntilExit(_config: InstanceConfig, instanceId: string, _sequenceInfo: SequenceInfo): Promise { diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 6679a545b..b4f6822e6 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -138,7 +138,7 @@ class ProcessInstanceAdapter implements return this.waitUntilExit(config, instanceId, sequenceInfo); } - async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { + async dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise { if (config.type !== "process") { throw new Error("Process instance adapter run with invalid runner config"); } @@ -182,6 +182,8 @@ class ProcessInstanceAdapter implements this.runnerProcess = runnerProcess; this.logger.trace("Runner process is running", runnerProcess.pid); + + return 0; } getRunnerInfo(): RunnerConnectInfo["system"] { diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index ec5a9dda0..50408d7bd 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -406,14 +406,14 @@ export class CSIController extends TypedEmitter { this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { const { status, payload } = message[1]; + this.status = status || InstanceStatus.RUNNING; + if (!payload) { this.emit("error", "No payload in ping!"); return null; } - this.status = status || InstanceStatus.RUNNING; - this.args = payload.args; this.info.created = new Date(message[1].created); @@ -517,6 +517,7 @@ export class CSIController extends TypedEmitter { this.logger.info("Handshake", JSON.stringify(message, undefined)); } + //@TODO: ! unhookup ! set proper state for reconnecting ! async handleInstanceConnect(streams: DownstreamStreamsConfig) { try { this.hookupStreams(streams); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index d0f8691d4..0b12db7b2 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -234,7 +234,7 @@ export class CSIDispatcher extends TypedEmitter { this.logger.debug("Dispatching..."); - await instanceAdapter.dispatch( + const dispatchResultCode = await instanceAdapter.dispatch( instanceConfig, this.STHConfig.host.instancesServerPort, id, @@ -242,8 +242,12 @@ export class CSIDispatcher extends TypedEmitter { payload ); - this.logger.debug("Dispatched."); - this.logger.debug("Waiting for connection..."); + if (dispatchResultCode !== 0) { + this.logger.warn("Dispatch result code:", dispatchResultCode); + throw await mapRunnerExitCode(dispatchResultCode, sequence); + } + + this.logger.debug("Dispatched. Waiting for connection...", id); return await Promise.race([ new Promise((resolve, _reject) => { @@ -266,7 +270,7 @@ export class CSIDispatcher extends TypedEmitter { limits, sequence })), - // handle failed start + // handle fast fail - before connection is established. Promise.resolve() .then(() => instanceAdapter.waitUntilExit(undefined, id, sequence)) .then(async (exitCode) => { diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index e469dcc53..6a167997e 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -1136,7 +1136,7 @@ export class Host implements IComponent { this.logger.debug("Instance connecting", id); if (!this.instancesStore[id]) { - this.logger.info("creating new CSIController unknown istance"); + this.logger.info("Creating new CSIController for unknown Instance"); await this.csiDispatcher.createCSIController( id, diff --git a/packages/host/src/lib/serviceDiscovery/sd-adapter.ts b/packages/host/src/lib/serviceDiscovery/sd-adapter.ts index ddb761c69..43c54e833 100644 --- a/packages/host/src/lib/serviceDiscovery/sd-adapter.ts +++ b/packages/host/src/lib/serviceDiscovery/sd-adapter.ts @@ -169,6 +169,7 @@ export class ServiceDiscovery { const topic = this.createTopicIfNotExist(topicData); topic.acceptPipe(source); + await this.cpmConnector?.sendTopicInfo({ provides: topicData.topic.toString(), topicName: topicData.topic.toString(), diff --git a/packages/types/src/lifecycle-adapters.ts b/packages/types/src/lifecycle-adapters.ts index 5397f1ad6..5d5d17ac4 100644 --- a/packages/types/src/lifecycle-adapters.ts +++ b/packages/types/src/lifecycle-adapters.ts @@ -46,7 +46,7 @@ export interface ILifeCycleAdapterRun extends ILifeCycleAdapterMain { * @param {InstanceConfig} Runner configuration. * @returns {ExitCode} Runner exit code. */ - dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise; + dispatch(config: InstanceConfig, instancesServerPort: number, instanceId: string, sequenceInfo: SequenceInfo, payload: RunnerConnectInfo): Promise; /** * Starts Runner - in essence does `dispatch` and then `waitUntilExit`. From 0704147b61b358c1c27aba7d0e4d21d8fac5bd1f Mon Sep 17 00:00:00 2001 From: patuwwy Date: Wed, 24 Jan 2024 20:51:54 +0000 Subject: [PATCH 57/62] Fix sending sequences info on platform connect 2 --- packages/host/src/lib/cpm-connector.ts | 8 +++----- packages/host/src/lib/csi-dispatcher.ts | 2 +- packages/host/src/lib/host.ts | 17 +---------------- .../test/serviceDiscovery/sd-discovery.spec.ts | 2 +- 4 files changed, 6 insertions(+), 23 deletions(-) diff --git a/packages/host/src/lib/cpm-connector.ts b/packages/host/src/lib/cpm-connector.ts index 163fbb9f2..2c81d3966 100644 --- a/packages/host/src/lib/cpm-connector.ts +++ b/packages/host/src/lib/cpm-connector.ts @@ -291,7 +291,7 @@ export class CPMConnector extends TypedEmitter { return message; }).catch((e: any) => { - this.logger.error("communicationChannel error", e.message); + this.logger.warn("communicationChannel error", e.message); }); this.communicationStream = new StringStream().JSONStringify().resume(); @@ -386,7 +386,7 @@ export class CPMConnector extends TypedEmitter { }); this.verserClient.once("error", async (error: any) => { - this.logger.error("VerserClient error", error); + this.logger.warn("VerserClient error", error); try { await this.reconnect(); @@ -408,9 +408,7 @@ export class CPMConnector extends TypedEmitter { this.connection?.removeAllListeners(); this.connected = false; - this.logger.trace("Tunnel closed", this.getId()); - - this.logger.info("CPM connection closed."); + this.logger.info("CPM connection closed.", connectionStatusCode, this.getId()); if (this.loadInterval) { clearInterval(this.loadInterval); diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 0b12db7b2..46bf443a4 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -125,7 +125,7 @@ export class CSIDispatcher extends TypedEmitter { csiController.outputRouted = true; await this.serviceDiscovery.update({ - provides: data.provides, contentType: data.contentType!, topicName: data.provides, status: "add" + localProvider: csiController.id, provides: data.provides, contentType: data.contentType!, topicName: data.provides, status: "add" }); } }) diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index 6a167997e..f450b68df 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -601,22 +601,7 @@ export class Host implements IComponent { connector.init(); connector.on("connect", async () => { - await defer(3000); - //await connector.sendSequencesInfo(this.getSequences()); - await Promise.all( - this.getSequences() - .map( - s => - connector.sendSequenceInfo( - s.id, - SequenceMessageCode.SEQUENCE_CREATED, - { - ...s.config, - location: this.getId()! - } as unknown as STHRestAPI.GetSequenceResponse - ) - ) - ); + await connector.sendSequencesInfo(this.getSequences().map(s => ({ ...s, status: SequenceMessageCode.SEQUENCE_CREATED }))); await connector.sendInstancesInfo(this.getInstances()); await connector.sendTopicsInfo(this.getTopics()); diff --git a/packages/host/test/serviceDiscovery/sd-discovery.spec.ts b/packages/host/test/serviceDiscovery/sd-discovery.spec.ts index b4a4bc731..deeff275a 100644 --- a/packages/host/test/serviceDiscovery/sd-discovery.spec.ts +++ b/packages/host/test/serviceDiscovery/sd-discovery.spec.ts @@ -21,7 +21,7 @@ beforeEach(() => { serviceDiscovery.cpmConnector = { sendTopicInfo: (data: AddSTHTopicEventData): Promise => { topicInfo = data; - return new Promise((resolve) => resolve()); + return Promise.resolve(); } } as CPMConnector; }); From d840bcfc0e18cc7667d1d0a9e75354e51626e01d Mon Sep 17 00:00:00 2001 From: patuwwy Date: Wed, 24 Jan 2024 22:09:40 +0000 Subject: [PATCH 58/62] Move InstanceStatus to enums. Send instance event without saparate status --- packages/host/src/lib/cpm-connector.ts | 10 +++----- packages/host/src/lib/csi-controller.ts | 3 +-- packages/host/src/lib/csi-dispatcher.ts | 24 ++++++++++++------ packages/host/src/lib/host.ts | 31 +++++++++++++----------- packages/host/src/lib/utils.ts | 4 +-- packages/runner/src/runner.ts | 3 +-- packages/symbols/src/index.ts | 1 + packages/symbols/src/instance-status.ts | 10 ++++++++ packages/types/src/instance-store.ts | 3 ++- packages/types/src/instance.ts | 10 -------- packages/types/src/messages/handshake.ts | 3 +-- 11 files changed, 55 insertions(+), 47 deletions(-) create mode 100644 packages/symbols/src/instance-status.ts diff --git a/packages/host/src/lib/cpm-connector.ts b/packages/host/src/lib/cpm-connector.ts index 2c81d3966..20bd14967 100644 --- a/packages/host/src/lib/cpm-connector.ts +++ b/packages/host/src/lib/cpm-connector.ts @@ -2,7 +2,7 @@ import fs from "fs"; import { Readable } from "stream"; import * as http from "http"; -import { CPMMessageCode, InstanceMessageCode, SequenceMessageCode } from "@scramjet/symbols"; +import { CPMMessageCode, SequenceMessageCode } from "@scramjet/symbols"; import { STHRestAPI, CPMConnectorOptions, @@ -582,14 +582,12 @@ export class CPMConnector extends TypedEmitter { * @param {string} instance Instance details. * @param {SequenceMessageCode} instanceStatus Instance status. */ - async sendInstanceInfo(instance: Instance, instanceStatus: InstanceMessageCode): Promise { - this.logger.trace("Send instance status update", instanceStatus); + async sendInstanceInfo(instance: Instance): Promise { + this.logger.trace("Send instance status update", instance.status); await this.communicationStream?.whenWrote( - [CPMMessageCode.INSTANCE, { instance, status: instanceStatus }] + [CPMMessageCode.INSTANCE, { instance }] ); - - this.logger.trace("Instance status update sent", instanceStatus); } /** diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 50408d7bd..87fec31ca 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -6,7 +6,7 @@ import { MessageUtilities } from "@scramjet/model"; import { development } from "@scramjet/sth-config"; -import { CommunicationChannel as CC, RunnerMessageCode } from "@scramjet/symbols"; +import { CommunicationChannel as CC, InstanceStatus, RunnerMessageCode } from "@scramjet/symbols"; import { APIRoute, AppConfig, @@ -19,7 +19,6 @@ import { ILifeCycleAdapterRun, InstanceLimits, InstanceStats, - InstanceStatus, IObjectLogger, MessageDataType, MonitoringMessageData, diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index 46bf443a4..b877b1965 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -1,8 +1,8 @@ import { getInstanceAdapter } from "@scramjet/adapters"; import { IDProvider } from "@scramjet/model"; import { ObjLogger } from "@scramjet/obj-logger"; -import { RunnerMessageCode } from "@scramjet/symbols"; -import { ContentType, EventMessageData, HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, InstanceStatus, MessageDataType, PangMessageData, PingMessageData, STHConfiguration, STHRestAPI, SequenceInfo, SequenceInfoInstance } from "@scramjet/types"; +import { InstanceStatus, RunnerMessageCode } from "@scramjet/symbols"; +import { ContentType, EventMessageData, HostProxy, ICommunicationHandler, IObjectLogger, Instance, InstanceConfig, MessageDataType, PangMessageData, PingMessageData, STHConfiguration, STHRestAPI, SequenceInfo, SequenceInfoInstance } from "@scramjet/types"; import { TypedEmitter } from "@scramjet/utility"; import { CSIController, CSIControllerInfo } from "./csi-controller"; import { InstanceStore } from "./instance-store"; @@ -249,6 +249,8 @@ export class CSIDispatcher extends TypedEmitter { this.logger.debug("Dispatched. Waiting for connection...", id); + let established = false; + return await Promise.race([ new Promise((resolve, _reject) => { const resolveFunction = (instance: Instance) => { @@ -256,6 +258,7 @@ export class CSIDispatcher extends TypedEmitter { this.logger.debug("Established", id); this.off("established", resolveFunction); + established = true; resolve(); } }; @@ -271,13 +274,18 @@ export class CSIDispatcher extends TypedEmitter { sequence })), // handle fast fail - before connection is established. - Promise.resolve() - .then(() => instanceAdapter.waitUntilExit(undefined, id, sequence)) - .then(async (exitCode) => { - this.logger.info("Exited before established", id, exitCode); + Promise.resolve().then( + () => instanceAdapter.waitUntilExit(undefined, id, sequence) + .then(async (exitCode: number) => { + if (!established) { + this.logger.info("Exited before established", id, exitCode); + + return mapRunnerExitCode(exitCode, sequence); + } - return mapRunnerExitCode(exitCode, sequence); - }) + return undefined; + }) + ) ]); } } diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index f450b68df..a4a107d66 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -6,7 +6,7 @@ import { AddressInfo } from "net"; import { Duplex } from "stream"; import { CommunicationHandler, HostError, IDProvider } from "@scramjet/model"; -import { HostHeaders, InstanceMessageCode, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; +import { HostHeaders, InstanceMessageCode, InstanceStatus, RunnerMessageCode, SequenceMessageCode } from "@scramjet/symbols"; import { APIExpose, CPMConnectorOptions, @@ -357,7 +357,7 @@ export class Host implements IComponent { await this.cpmConnector?.sendInstanceInfo({ id: instance.id, sequence: instance.sequence - }, InstanceMessageCode.INSTANCE_CONNECTED); + }); this.pushTelemetry("Instance connected", { id: instance.id, @@ -368,21 +368,22 @@ export class Host implements IComponent { /** * Pass information about ended instance to monitoring and platform services. * - * @param {DispatcherInstanceEndEventData} eventData Event details. + * @param {DispatcherInstanceEndEventData} instance Event details. */ - async handleDispatcherEndEvent(eventData: DispatcherInstanceEndEventData) { - this.auditor.auditInstance(eventData.id, InstanceMessageCode.INSTANCE_ENDED); + async handleDispatcherEndEvent(instance: DispatcherInstanceEndEventData) { + this.auditor.auditInstance(instance.id, InstanceMessageCode.INSTANCE_ENDED); await this.cpmConnector?.sendInstanceInfo({ - id: eventData.id, - sequence: eventData.sequence - }, InstanceMessageCode.INSTANCE_ENDED); + id: instance.id, + status: InstanceStatus.GONE, + sequence: instance.sequence + }); this.pushTelemetry("Instance ended", { - executionTime: eventData.info.executionTime.toString(), - id: eventData.id, - code: eventData.code.toString(), - seqId: eventData.sequence.id + executionTime: instance.info.executionTime.toString(), + id: instance.id, + code: instance.code.toString(), + seqId: instance.sequence.id }); } @@ -1089,7 +1090,7 @@ export class Host implements IComponent { try { const runner = await this.csiDispatcher.startRunner(sequence, payload); - if ("id" in runner) { + if (runner && "id" in runner) { this.logger.debug("Instance limits", runner.limits); this.auditor.auditInstanceStart(runner.id, req as AuditedRequest, runner.limits); this.pushTelemetry("Instance started", { id: runner.id, language: runner.sequence.config.language, seqId: runner.sequence.id }); @@ -1099,9 +1100,11 @@ export class Host implements IComponent { message: `Sequence ${runner.id} starting`, id: runner.id }; - } else { + } else if (runner) { throw runner; } + + throw Error("Unexpected startup error"); } catch (error: any) { this.pushTelemetry("Instance start failed", { error: error.message }, "error"); this.logger.error(error.message); diff --git a/packages/host/src/lib/utils.ts b/packages/host/src/lib/utils.ts index c346f9cd6..87a62d630 100644 --- a/packages/host/src/lib/utils.ts +++ b/packages/host/src/lib/utils.ts @@ -1,5 +1,5 @@ -import { RunnerExitCode } from "@scramjet/symbols"; -import { InstanceStatus, SequenceInfo } from "@scramjet/types"; +import { InstanceStatus, RunnerExitCode } from "@scramjet/symbols"; +import { SequenceInfo } from "@scramjet/types"; // eslint-disable-next-line complexity export const mapRunnerExitCode = async (exitcode: number, sequence: SequenceInfo): Promise< diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 9d67924f8..5543b57cd 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -1,6 +1,6 @@ import { RunnerError } from "@scramjet/model"; import { ObjLogger } from "@scramjet/obj-logger"; -import { RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols"; +import { InstanceStatus, RunnerExitCode, RunnerMessageCode } from "@scramjet/symbols"; import { AppConfig, ApplicationFunction, @@ -14,7 +14,6 @@ import { IComponent, IHostClient, IObjectLogger, - InstanceStatus, MaybePromise, MonitoringRateMessageData, PangMessageData, diff --git a/packages/symbols/src/index.ts b/packages/symbols/src/index.ts index e6db6a71b..83d0796ad 100644 --- a/packages/symbols/src/index.ts +++ b/packages/symbols/src/index.ts @@ -8,5 +8,6 @@ export { SequenceMessageCode } from "./sequence-status-code"; export { OpRecordCode } from "./op-record-code"; export { APIErrorCode } from "./api-error-codes"; export { DisconnectHubErrors } from "./disconnect-error-codes"; +export { InstanceStatus } from "./instance-status"; export * from "./headers"; diff --git a/packages/symbols/src/instance-status.ts b/packages/symbols/src/instance-status.ts new file mode 100644 index 000000000..4fe1f0ae3 --- /dev/null +++ b/packages/symbols/src/instance-status.ts @@ -0,0 +1,10 @@ +export const enum InstanceStatus { + INITIALIZING = "initializing", + STARTING = "starting", + RUNNING = "running", + STOPPING = "stopping", + KILLING = "killing", + COMPLETED ="completed", + ERRORED = "errored", + GONE = "gone" +} diff --git a/packages/types/src/instance-store.ts b/packages/types/src/instance-store.ts index cf7f39ec8..dbee1ccb7 100644 --- a/packages/types/src/instance-store.ts +++ b/packages/types/src/instance-store.ts @@ -1,5 +1,6 @@ +import { InstanceStatus } from "@scramjet/symbols"; import { AppConfig } from "./app-config"; -import { InstanceArgs, InstanceId, InstanceStatus } from "./instance"; +import { InstanceArgs, InstanceId } from "./instance"; import { SequenceInfoInstance } from "./sequence-adapter"; export type Instance = { diff --git a/packages/types/src/instance.ts b/packages/types/src/instance.ts index abef13195..d0b7eb229 100644 --- a/packages/types/src/instance.ts +++ b/packages/types/src/instance.ts @@ -2,16 +2,6 @@ export type InstanceId = string; export type InstanceArgs = any[]; -export const enum InstanceStatus { - INITIALIZING = "initializing", - STARTING = "starting", - RUNNING = "running", - STOPPING = "stopping", - KILLING = "killing", - COMPLETED ="completed", - ERRORED = "errored", -} - export type InstanceConnectionInfo = { } diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index c454e83e6..a4791c842 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -1,7 +1,6 @@ -import { RunnerMessageCode } from "@scramjet/symbols"; +import { InstanceStatus, RunnerMessageCode } from "@scramjet/symbols"; import { SequenceInfo } from "../sequence-adapter"; import { StartSequencePayload } from "../rest-api-sth"; -import { InstanceStatus } from "../instance"; /** * Runner sends a handshake message to the Cloud Server Host (CSH) after it is. From 1895e1afc63752eb082b96d3c9939ea6ef35f5f1 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Fri, 26 Jan 2024 01:23:08 +0000 Subject: [PATCH 59/62] Fix input & not-serialized output on reconnect --- .../adapters/src/process-instance-adapter.ts | 4 +- packages/host/src/lib/csi-controller.ts | 21 ++-- packages/host/src/lib/csi-dispatcher.ts | 10 +- packages/host/src/lib/host.ts | 2 +- .../src/lib/serviceDiscovery/sd-adapter.ts | 3 +- packages/host/src/lib/socket-server.ts | 1 + packages/runner/src/host-client.ts | 37 ++++++- packages/runner/src/runner.ts | 99 ++++++++++++------- packages/types/src/messages/handshake.ts | 7 +- .../types/src/rest-api-sth/start-sequence.ts | 2 +- 10 files changed, 130 insertions(+), 56 deletions(-) diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index b4f6822e6..4a286aa11 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -254,7 +254,9 @@ class ProcessInstanceAdapter implements process.kill(this.processPID, 0); } catch (e) { this.logger.error("Runner process not exists", e); - /** process not exists */ + + clearInterval(interval); + reject("pid not exists"); } } diff --git a/packages/host/src/lib/csi-controller.ts b/packages/host/src/lib/csi-controller.ts index 87fec31ca..cde9f87ea 100644 --- a/packages/host/src/lib/csi-controller.ts +++ b/packages/host/src/lib/csi-controller.ts @@ -116,6 +116,7 @@ export class CSIController extends TypedEmitter { apiInputEnabled = true; executionTime: number = -1; + inputHeadersSent = false; /** * Topic to which the output stream should be routed @@ -222,7 +223,8 @@ export class CSIController extends TypedEmitter { this.logger.info("Instance status: errored", e); this.status ||= InstanceStatus.ERRORED; - this.executionTime = (Date.now() - this.info.created!.getTime()) / 1000; + + this.executionTime = this.info.created ? (Date.now() - this.info.created!.getTime()) / 1000 : -1; this.setExitInfo(e.exitcode, e.message); @@ -403,9 +405,10 @@ export class CSIController extends TypedEmitter { .pipe(this.upStreams[CC.CONTROL]); this.communicationHandler.addMonitoringHandler(RunnerMessageCode.PING, async (message) => { - const { status, payload } = message[1]; + const { status, payload, inputHeadersSent } = message[1]; this.status = status || InstanceStatus.RUNNING; + this.inputHeadersSent = inputHeadersSent; if (!payload) { this.emit("error", "No payload in ping!"); @@ -485,6 +488,10 @@ export class CSIController extends TypedEmitter { this.logger.trace("Received a PING message with ports config"); } + this.inputHeadersSent = !!message[1].inputHeadersSent; + + this.logger.info("Headers already sent for input?", this.inputHeadersSent); + if (this.instanceAdapter.setRunner) { await this.instanceAdapter.setRunner({ ...message[1].payload.system, @@ -537,8 +544,6 @@ export class CSIController extends TypedEmitter { } createInstanceAPIRouter() { - let inputHeadersSent = false; - if (!this.upStreams) { throw new AppError("UNATTACHED_STREAMS"); } @@ -551,11 +556,11 @@ export class CSIController extends TypedEmitter { * @experimental */ this.router.duplex("/inout", (duplex, _headers) => { - if (!inputHeadersSent) { + if (!this.inputHeadersSent) { this.downStreams![CC.IN].write(`Content-Type: ${_headers["content-type"]}\r\n`); this.downStreams![CC.IN].write("\r\n"); - inputHeadersSent = true; + this.inputHeadersSent = true; } (duplex as unknown as DuplexStream).input.pipe(this.downStreams![CC.IN], { end: false }); @@ -597,7 +602,7 @@ export class CSIController extends TypedEmitter { const contentType = req.headers["content-type"]; // @TODO: Check if subsequent requests have the same content-type. - if (!inputHeadersSent) { + if (!this.inputHeadersSent) { if (contentType === undefined) { return { opStatus: ReasonPhrases.NOT_ACCEPTABLE, error: "Content-Type must be defined" }; } @@ -605,7 +610,7 @@ export class CSIController extends TypedEmitter { stream.write(`Content-Type: ${contentType}\r\n`); stream.write("\r\n"); - inputHeadersSent = true; + this.inputHeadersSent = true; } return stream; diff --git a/packages/host/src/lib/csi-dispatcher.ts b/packages/host/src/lib/csi-dispatcher.ts index b877b1965..dafc5a344 100644 --- a/packages/host/src/lib/csi-dispatcher.ts +++ b/packages/host/src/lib/csi-dispatcher.ts @@ -66,12 +66,14 @@ export class CSIDispatcher extends TypedEmitter { id, sequenceInfo, payload, - status: InstanceStatus.INITIALIZING + status: InstanceStatus.INITIALIZING, + inputHeadersSent: false }, communicationHandler, config, instanceProxy, this.STHConfig.runtimeAdapter); this.logger.trace("CSIController created", id, sequenceInfo); csiController.logger.pipe(this.logger, { end: false }); + communicationHandler.logger.pipe(this.logger, { end: false }); csiController @@ -99,15 +101,15 @@ export class CSIDispatcher extends TypedEmitter { this.logger.warn("Missing topic content-type"); } - if (data.requires && !csiController.inputRouted && data.contentType) { - this.logger.trace("Routing topic to Sequence input", data.requires); + if (data.requires && data.contentType) { + this.logger.trace("Routing topic to Instance input", data.requires); await this.serviceDiscovery.routeTopicToStream( { topic: new TopicId(data.requires), contentType: data.contentType as ContentType }, csiController.getInputStream() ); - csiController.inputRouted = true; + csiController.inputHeadersSent = true; await this.serviceDiscovery.update({ requires: data.requires, contentType: data.contentType, topicName: data.requires, status: "add" diff --git a/packages/host/src/lib/host.ts b/packages/host/src/lib/host.ts index a4a107d66..e4436d906 100644 --- a/packages/host/src/lib/host.ts +++ b/packages/host/src/lib/host.ts @@ -340,7 +340,7 @@ export class Host implements IComponent { const seq = this.sequenceStore.getById(instance.sequence.id); - if (!seq) { + if (!seq && this.cpmConnector?.connected) { this.logger.info("Sequence not found. Checking Store..."); try { diff --git a/packages/host/src/lib/serviceDiscovery/sd-adapter.ts b/packages/host/src/lib/serviceDiscovery/sd-adapter.ts index 43c54e833..e5ea3b3de 100644 --- a/packages/host/src/lib/serviceDiscovery/sd-adapter.ts +++ b/packages/host/src/lib/serviceDiscovery/sd-adapter.ts @@ -179,9 +179,8 @@ export class ServiceDiscovery { } async update(data: STHTopicEventData) { - this.logger.trace("Topic update. Send topic info to CPM", data); - if (this.cpmConnector?.connected) { + this.logger.trace("Topic update. Send topic info to CPM", data); await this.cpmConnector?.sendTopicInfo(data); } } diff --git a/packages/host/src/lib/socket-server.ts b/packages/host/src/lib/socket-server.ts index b17017180..400d5d1c9 100644 --- a/packages/host/src/lib/socket-server.ts +++ b/packages/host/src/lib/socket-server.ts @@ -77,6 +77,7 @@ export class SocketServer extends TypedEmitter implements IComponent { this.server! .listen(this.port, this.hostname, () => { this.logger.info("SocketServer on", this.server?.address()); + res(); }) .on("error", rej); diff --git a/packages/runner/src/host-client.ts b/packages/runner/src/host-client.ts index 1bd4267d7..93aeb2104 100644 --- a/packages/runner/src/host-client.ts +++ b/packages/runner/src/host-client.ts @@ -2,8 +2,10 @@ import { ObjLogger } from "@scramjet/obj-logger"; import { CommunicationChannel as CC } from "@scramjet/symbols"; import { IHostClient, IObjectLogger, UpstreamStreamsConfig, } from "@scramjet/types"; +import { defer } from "@scramjet/utility"; import { Agent } from "http"; import net, { Socket, createConnection } from "net"; +import { PassThrough } from "stream"; type HostOpenConnections = [ net.Socket, net.Socket, net.Socket, net.Socket, net.Socket, net.Socket, net.Socket, net.Socket, net.Socket @@ -42,20 +44,24 @@ class HostClient implements IHostClient { async init(id: string): Promise { const openConnections = await Promise.all( Array.from(Array(9)) - .map(() => { + .map((_e: any, i: number) => { // Error handling for each connection is process crash for now let connection: Socket; try { connection = net.createConnection(this.instancesServerPort, this.instancesServerHost); - connection.on("error", () => {}); + connection.on("error", () => { + this.logger.warn(`${i} Stream error`); + }); connection.setNoDelay(true); } catch (e) { return Promise.reject(e); } return new Promise(res => { - connection.on("connect", () => res(connection)); + connection.on("connect", () => { + res(connection); + }); }); }) .map((connPromised, index) => { @@ -74,6 +80,26 @@ class HostClient implements IHostClient { this._streams = openConnections as HostOpenConnections; + const input = this._streams[CC.IN]; + + const inputTarget = new PassThrough({ emitClose: false }); + + input.on("end", async () => { + await defer(500); + + if ((this._streams![CC.CONTROL] as net.Socket).readableEnded) { + this.logger.info("Input end. Control is also ended... We are disconnected."); + } else { + this.logger.info("Input end. Control not ended. We are online. Desired input end."); + inputTarget.end(); + } + }); + + input.pipe(inputTarget, { end: false }); + + this._streams[CC.IN] = inputTarget; + //this._streams[CC.STDIN] = this._streams[CC.STDIN].pipe(new PassThrough({ emitClose: false }), { end: false }); + try { this.bpmux = new BPMux(this._streams[CC.PACKAGE]); } catch (e) { @@ -118,6 +144,11 @@ class HostClient implements IHostClient { const streamsExitedPromised: Promise[] = this.streams.map((stream, i) => new Promise( (res) => { + if ([CC.IN, CC.STDIN, CC.CONTROL].includes(i)) { + res(); + return; + } + if (!hard && "writable" in stream!) { stream .on("error", (e) => { diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 5543b57cd..e140c34ab 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -32,7 +32,7 @@ import { ManagerClient } from "@scramjet/manager-api-client"; import { BufferStream, DataStream, StringStream } from "scramjet"; import { EventEmitter } from "events"; -import { createWriteStream, writeFileSync } from "fs"; +import { WriteStream, createWriteStream, writeFileSync } from "fs"; import { Readable, Writable } from "stream"; import { RunnerAppContext, RunnerProxy } from "./runner-app-context"; @@ -52,6 +52,7 @@ function onBeforeExit(code: number) { } function onException(_error: Error) { + console.error(_error); onBeforeExit(RunnerExitCode.UNCAUGHT_EXCEPTION); } @@ -156,12 +157,17 @@ export class Runner implements IComponent { private provides?: string; private providesContentType?: string; + private inputContentType: string = ""; + private shouldSerialize = false; private status: InstanceStatus = InstanceStatus.STARTING; + private logFile?: WriteStream; private runnerConnectInfo: RunnerConnectInfo = { appConfig: {} }; + instanceOutput?: Readable & HasTopicInformation | void; + constructor( private sequencePath: string, private hostClient: IHostClient, @@ -184,7 +190,9 @@ export class Runner implements IComponent { } if (process.env.RUNNER_LOG_FILE) { - this.logger.addOutput(createWriteStream(process.env.RUNNER_LOG_FILE)); + this.logFile ||= createWriteStream(process.env.RUNNER_LOG_FILE); + this.logFile.write("\n\n------------- \n\n"); + this.logger.addOutput(this.logFile); } this.inputDataStream = new DataStream().catch((e: any) => { @@ -253,11 +261,11 @@ export class Runner implements IComponent { } async setInputContentType(headers: any) { - const contentType = headers["content-type"]; + this.inputContentType ||= headers["content-type"]; - this.logger.debug("Content-Type", contentType); + this.logger.debug("Content-Type", this.inputContentType); - mapToInputDataStream(this.hostClient.inputStream, contentType) + mapToInputDataStream(this.hostClient.inputStream, this.inputContentType) .catch((error: any) => { this.logger.error("mapToInputDataStream", error); // TODO: we should be doing some error handling here: @@ -266,6 +274,8 @@ export class Runner implements IComponent { } async handleMonitoringRequest(data: MonitoringRateMessageData): Promise { + this.logger.info("handleMonitoringRequest"); + if (this.monitoringInterval) { clearInterval(this.monitoringInterval); } @@ -273,22 +283,22 @@ export class Runner implements IComponent { let working = false; this.monitoringInterval = setInterval(async () => { + this.logger.info("working", working); + if (working) { - return; + //return; } working = true; await this.reportHealth(1000); working = false; - }, 1000 / data.monitoringRate).unref(); + }, 1000 / data.monitoringRate);//.unref(); } private async reportHealth(timeout?: number) { - const { healthy } = await this.context.monitor(); + this.logger.info("Report health"); - MessageUtils.writeMessageOnStream( - [RunnerMessageCode.MONITORING, { healthy }], this.hostClient.monitorStream - ); + const { healthy } = await this.context.monitor(); if (timeout) { this.monitoringMessageReplyTimeout = setTimeout(async () => { @@ -297,6 +307,10 @@ export class Runner implements IComponent { await this.handleDisconnect(); }, timeout); } + + MessageUtils.writeMessageOnStream( + [RunnerMessageCode.MONITORING, { healthy }], this.hostClient.monitorStream + ); } async handleDisconnect() { @@ -344,6 +358,7 @@ export class Runner implements IComponent { this.logger.trace("Exiting (expected)"); this.status = InstanceStatus.STOPPING; + return this.exit(RunnerExitCode.STOPPED); } @@ -392,15 +407,18 @@ export class Runner implements IComponent { try { this.logger.debug("connecting..."); - await promiseTimeout(this.hostClient.init(this.instanceId), 2000); + await promiseTimeout(this.hostClient.init(this.instanceId), 5000); this.logger.debug("connected"); this.connected = true; + + this.hostClient.inputStream.pipe(this.logFile!); + await this.handleMonitoringRequest({ monitoringRate: 1 }); } catch (e) { this.connected = false; this.logger.warn("Can't connect to Host", e); - await defer(2000); + await defer(5000); return await this.premain(); } @@ -411,6 +429,10 @@ export class Runner implements IComponent { this.logger.debug("Defining control stream"); this.defineControlStream(); + if (this.inputContentType) { + await this.setInputContentType({ headers: { "content-type": this.inputContentType } }); + } + this.hostClient.stdinStream .on("data", (chunk) => process.stdin.unshift(chunk)) .on("end", () => process.stdin.emit("end")); @@ -546,10 +568,20 @@ export class Runner implements IComponent { private redirectOutputs() { this.logger.pipe(this.hostClient.logStream, { stringified: true }); + + if (!this.shouldSerialize) { + this.instanceOutput?.pipe(this.hostClient.outputStream); + } + this.outputDataStream .JSONStringify() .pipe(this.hostClient.outputStream); + if (process.env.PRINT_TO_STDOUT) { + process.stdout.pipe(this.logFile!); + process.stderr.pipe(this.logFile!); + } + overrideStandardStream(process.stdout, this.hostClient.stdoutStream); overrideStandardStream(process.stderr, this.hostClient.stderrStream); } @@ -608,7 +640,8 @@ export class Runner implements IComponent { processPID: process.pid.toString() } }, - status: this.status + status: this.status, + inputHeadersSent: !!this.inputContentType }], this.hostClient.monitorStream); this.logger.trace("Handshake sent"); @@ -648,9 +681,9 @@ export class Runner implements IComponent { * * Pass the input stream to stream instead of creating new DataStream(); */ - let stream: Readable & HasTopicInformation | void = this.inputDataStream; + this.instanceOutput = this.inputDataStream; let itemsLeftInSequence = sequence.length; - let intermediate: SynchronousStreamable | void = stream; + let intermediate: SynchronousStreamable | void = this.instanceOutput; for (const func of sequence) { itemsLeftInSequence--; @@ -664,7 +697,7 @@ export class Runner implements IComponent { out = func.call( this.context, - stream, + this.instanceOutput, ...args ); @@ -691,11 +724,11 @@ export class Runner implements IComponent { } else if (typeof intermediate === "object" && intermediate instanceof DataStream) { this.logger.debug("Sequence function returned DataStream.", sequence.length - itemsLeftInSequence - 1); - stream = intermediate; + this.instanceOutput = intermediate; } else { this.logger.debug("Sequence function returned readable", sequence.length - itemsLeftInSequence - 1); // TODO: what if this is not a DataStream, but BufferStream stream!!!! - stream = DataStream.from(intermediate as Readable); + this.instanceOutput = DataStream.from(intermediate as Readable); } } else { this.logger.info("All Sequences processed."); @@ -703,17 +736,17 @@ export class Runner implements IComponent { intermediate = await out; if (intermediate instanceof Readable) { - stream = intermediate; + this.instanceOutput = intermediate; } else if (intermediate !== undefined && isSynchronousStreamable(intermediate)) { - stream = Object.assign(DataStream.from(intermediate as Readable, { highWaterMark: 0 }), { + this.instanceOutput = Object.assign(DataStream.from(intermediate as Readable, { highWaterMark: 0 }), { topic: intermediate.topic, contentType: intermediate.contentType }); } else { - stream = undefined; + this.instanceOutput = undefined; } - this.logger.debug("Stream type is", typeof stream); + this.logger.debug("Stream type is", typeof this.instanceOutput); } } @@ -735,27 +768,27 @@ export class Runner implements IComponent { this.sendPang({ provides: "", contentType: "" }); res(); - } else if (stream && this.hostClient.outputStream) { - this.logger.trace("Piping Sequence output", typeof stream); + } else if (this.instanceOutput && this.hostClient.outputStream) { + this.logger.trace("Piping Sequence output", typeof this.instanceOutput); - const shouldSerialize = stream.contentType && - ["application/x-ndjson", "text/x-ndjson"].includes(stream.contentType) || - stream instanceof DataStream && !( - stream instanceof StringStream || stream instanceof BufferStream + this.shouldSerialize = this.instanceOutput.contentType && + ["application/x-ndjson", "text/x-ndjson"].includes(this.instanceOutput.contentType) || + this.instanceOutput instanceof DataStream && !( + this.instanceOutput instanceof StringStream || this.instanceOutput instanceof BufferStream ); - stream + this.instanceOutput .once("end", () => { this.logger.debug("Sequence stream ended"); res(); }) - .pipe(shouldSerialize + .pipe(this.shouldSerialize ? this.outputDataStream : this.hostClient.outputStream ); - this.provides = intermediate.topic || ""; - this.providesContentType = intermediate.contentType || ""; + this.provides = intermediate.topic || ""; + this.providesContentType = intermediate.contentType || ""; this.sendPang({ provides: this.provides, contentType: this.providesContentType }); } else { diff --git a/packages/types/src/messages/handshake.ts b/packages/types/src/messages/handshake.ts index a4791c842..338d9c4af 100644 --- a/packages/types/src/messages/handshake.ts +++ b/packages/types/src/messages/handshake.ts @@ -21,10 +21,11 @@ export type PingMessageData = { sequenceInfo: SequenceInfo; created: number; status: InstanceStatus; + inputHeadersSent: boolean; }; export type PangMessageData = { - requires?: string, - contentType?: string, - provides?: string + requires?: string; + contentType?: string; + provides?: string; }; diff --git a/packages/types/src/rest-api-sth/start-sequence.ts b/packages/types/src/rest-api-sth/start-sequence.ts index d8c17bb5a..fc9454200 100644 --- a/packages/types/src/rest-api-sth/start-sequence.ts +++ b/packages/types/src/rest-api-sth/start-sequence.ts @@ -2,4 +2,4 @@ import { RunnerConnectInfo } from "../runner-connect"; export type StartSequenceResponse = { id: string }; -export type StartSequencePayload = Omit; +export type StartSequencePayload = Omit, "inputContentType">; From e341423479f828baf0c0331cea721a897129f370 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Tue, 6 Feb 2024 10:48:56 +0000 Subject: [PATCH 60/62] Fix imports after merge --- packages/runner/src/runner.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index aaf198247..f5244674e 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -10,7 +10,6 @@ import { EventMessageData, HandshakeAcknowledgeMessageData, HasTopicInformation, - HostClient, IComponent, IHostClient, IObjectLogger, @@ -27,7 +26,6 @@ import { defer, promiseTimeout } from "@scramjet/utility"; import { HostClient as HostApiClient } from "@scramjet/api-client"; import { ClientUtilsCustomAgent } from "@scramjet/client-utils"; -import { ManagerClient } from "@scramjet/manager-api-client"; import { BufferStream, DataStream, StringStream } from "scramjet"; @@ -59,7 +57,6 @@ function onException(_error: Error) { process.once("beforeExit", onBeforeExit); process.once("uncaughtException", onException); - // async function flushStream(source: Readable | undefined, target: Writable) { // if (!source) return; From a05c64f385c1e18c3a2ec161d28140ca190bfb30 Mon Sep 17 00:00:00 2001 From: patuwwy Date: Wed, 21 Feb 2024 00:28:54 +0000 Subject: [PATCH 61/62] Remove debugging code --- packages/runner/src/runner.ts | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index f5244674e..62a5b1304 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -300,7 +300,7 @@ export class Runner implements IComponent { if (timeout) { this.monitoringMessageReplyTimeout = setTimeout(async () => { - this.logger.warn("Monitoring Reply Timeout. Connected"); + this.logger.warn("Monitoring Reply Timeout"); await this.handleDisconnect(); }, timeout); @@ -324,7 +324,7 @@ export class Runner implements IComponent { try { await this.hostClient.disconnect(!this.connected); - await defer(5000); + await defer(10000); } catch (e) { this.logger.error("Disconnect failed"); } @@ -392,8 +392,7 @@ export class Runner implements IComponent { } private async exit(exitCode?: number) { - //TODO: we need to wait a bit for the logs to flush - we shouldn't need to as cleanup should wait. - //await defer(200); + await defer(200); this.cleanup() .then((code) => { process.exitCode = exitCode || code; }, (e) => console.error(e?.stack)) @@ -405,18 +404,16 @@ export class Runner implements IComponent { try { this.logger.debug("connecting..."); - await promiseTimeout(this.hostClient.init(this.instanceId), 5000); + await promiseTimeout(this.hostClient.init(this.instanceId), 10000); this.logger.debug("connected"); this.connected = true; - this.hostClient.inputStream.pipe(this.logFile!); - await this.handleMonitoringRequest({ monitoringRate: 1 }); } catch (e) { this.connected = false; this.logger.warn("Can't connect to Host", e); - await defer(5000); + await defer(10000); return await this.premain(); } @@ -464,7 +461,6 @@ export class Runner implements IComponent { try { sequence = this.getSequence(); - // this.logger.debug("Sequence", sequence); if (sequence.length && typeof sequence[0] !== "function") { this.logger.debug("First Sequence object is not a function:", sequence[0]); From cc146bf603b3014cc5ea12021879e774c8104e6a Mon Sep 17 00:00:00 2001 From: patuwwy Date: Wed, 6 Mar 2024 12:02:44 +0000 Subject: [PATCH 62/62] Remove dev characters from logs --- packages/adapters/src/process-instance-adapter.ts | 2 +- packages/runner/src/runner.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/adapters/src/process-instance-adapter.ts b/packages/adapters/src/process-instance-adapter.ts index 4a286aa11..9574c8d29 100644 --- a/packages/adapters/src/process-instance-adapter.ts +++ b/packages/adapters/src/process-instance-adapter.ts @@ -129,7 +129,7 @@ class ProcessInstanceAdapter implements } setRunner(system: Record): void { - this.logger.info("--------- Setting system from runner", system); + this.logger.info("Setting system from runner", system); this.processPID = parseInt(system.processPID, 10); } diff --git a/packages/runner/src/runner.ts b/packages/runner/src/runner.ts index 62a5b1304..8aefd0b7e 100644 --- a/packages/runner/src/runner.ts +++ b/packages/runner/src/runner.ts @@ -189,7 +189,7 @@ export class Runner implements IComponent { if (process.env.RUNNER_LOG_FILE) { this.logFile ||= createWriteStream(process.env.RUNNER_LOG_FILE); - this.logFile.write("\n\n------------- \n\n"); + this.logFile.write("\n\n"); this.logger.addOutput(this.logFile); }