From 9ee467b9cd28d665905c03bbe47d9aa62ec978c5 Mon Sep 17 00:00:00 2001 From: Chocobozzz Date: Tue, 2 Jul 2024 09:01:41 +0200 Subject: [PATCH] Better whisper logging --- package.json | 2 +- packages/tests/src/shared/common.ts | 5 +++++ .../transcription/transcriber-factory.spec.ts | 5 ++--- .../whisper/openai-transcriber.spec.ts | 4 ++-- .../whisper/whisper-ctranslate2.spec.ts | 6 +++--- .../transcription-devtools/src/benchmark.ts | 2 +- .../transcription/src/abstract-transcriber.ts | 21 +++++++++++++++++++ .../transcription/src/transcription-run.ts | 3 +-- .../transcriber/ctranslate2-transcriber.ts | 5 ++--- .../whisper/transcriber/openai-transcriber.ts | 6 +++--- yarn.lock | 8 +++---- 11 files changed, 45 insertions(+), 22 deletions(-) create mode 100644 packages/tests/src/shared/common.ts diff --git a/package.json b/package.json index e04151785..361baea2f 100644 --- a/package.json +++ b/package.json @@ -127,7 +127,7 @@ "create-torrent": "^6.0.15", "deep-object-diff": "^1.1.0", "email-templates": "^11.0.3", - "execa": "^9.2.0", + "execa": "^9.3.0", "express": "^4.18.1", "express-rate-limit": "^7.1.1", "express-validator": "^7.0.1", diff --git a/packages/tests/src/shared/common.ts b/packages/tests/src/shared/common.ts new file mode 100644 index 000000000..b8ba13d58 --- /dev/null +++ b/packages/tests/src/shared/common.ts @@ -0,0 +1,5 @@ +import { createLogger, transports } from 'winston' + +export function createConsoleLogger () { + return createLogger({ transports: [ new transports.Console() ] }) +} diff --git a/packages/tests/src/transcription/transcriber-factory.spec.ts b/packages/tests/src/transcription/transcriber-factory.spec.ts index 139b557f8..0df6fbc06 100644 --- a/packages/tests/src/transcription/transcriber-factory.spec.ts +++ b/packages/tests/src/transcription/transcriber-factory.spec.ts @@ -1,5 +1,5 @@ import { TranscriptionEngineName, transcriberFactory } from '@peertube/peertube-transcription' -import { createLogger } from 'winston' +import { createConsoleLogger } from '@tests/shared/common.js' describe('Transcriber factory', function () { const transcribers: TranscriptionEngineName[] = [ 'openai-whisper', 'whisper-ctranslate2' ] @@ -8,9 +8,8 @@ describe('Transcriber factory', function () { for (const transcriberName of transcribers) { it(`Should be able to create a(n) ${transcriberName} transcriber`, function () { - transcriberFactory.createFromEngineName({ engineName: transcriberName, logger: createLogger() }) + transcriberFactory.createFromEngineName({ engineName: transcriberName, logger: createConsoleLogger() }) }) } - }) }) diff --git a/packages/tests/src/transcription/whisper/openai-transcriber.spec.ts b/packages/tests/src/transcription/whisper/openai-transcriber.spec.ts index 20b2b56d5..c0a01da62 100644 --- a/packages/tests/src/transcription/whisper/openai-transcriber.spec.ts +++ b/packages/tests/src/transcription/whisper/openai-transcriber.spec.ts @@ -7,12 +7,12 @@ import { WhisperBuiltinModel } from '@peertube/peertube-transcription' import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools' +import { createConsoleLogger } from '@tests/shared/common.js' import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js' import { config, expect } from 'chai' import { ensureDir, remove } from 'fs-extra/esm' import { tmpdir } from 'node:os' import { join } from 'node:path' -import { createLogger } from 'winston' config.truncateThreshold = 0 @@ -38,7 +38,7 @@ describe('Open AI Whisper transcriber', function () { languageDetection: true, version: '' }, - logger: createLogger() + logger: createConsoleLogger() }) const model = new TranscriptionModel('tiny') diff --git a/packages/tests/src/transcription/whisper/whisper-ctranslate2.spec.ts b/packages/tests/src/transcription/whisper/whisper-ctranslate2.spec.ts index cd4f30a39..289372bae 100644 --- a/packages/tests/src/transcription/whisper/whisper-ctranslate2.spec.ts +++ b/packages/tests/src/transcription/whisper/whisper-ctranslate2.spec.ts @@ -7,12 +7,12 @@ import { TranscriptionModel } from '@peertube/peertube-transcription' import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools' +import { createConsoleLogger } from '@tests/shared/common.js' import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js' import { config, expect } from 'chai' import { ensureDir, remove } from 'fs-extra/esm' import { tmpdir } from 'node:os' import { join } from 'node:path' -import { createLogger } from 'winston' config.truncateThreshold = 0 @@ -32,7 +32,7 @@ describe('Whisper CTranslate2 transcriber', function () { languageDetection: true, version: '0.4.4' }, - logger: createLogger() + logger: createConsoleLogger() }) const model = new TranscriptionModel('tiny') @@ -156,7 +156,7 @@ describe('Whisper CTranslate2 transcriber', function () { supportedModelFormats: [ 'PyTorch' ], version: '0.4.4' }, - logger: createLogger() + logger: createConsoleLogger() }) const openaiTranscript = await openaiTranscriber.transcribe({ ...transcribeArgs, diff --git a/packages/transcription-devtools/src/benchmark.ts b/packages/transcription-devtools/src/benchmark.ts index 884af03c0..13f3bb6c4 100644 --- a/packages/transcription-devtools/src/benchmark.ts +++ b/packages/transcription-devtools/src/benchmark.ts @@ -99,7 +99,7 @@ void (async () => { const transcriber = transcriberFactory.createFromEngineName({ engineName: transcriberName, - logger: createLogger(), + logger: createLogger({ transports: [ new transports.Console() ] }), binDirectory: join(pipDirectory, 'bin') }) diff --git a/packages/transcription/src/abstract-transcriber.ts b/packages/transcription/src/abstract-transcriber.ts index b3d998b72..efc9a2479 100644 --- a/packages/transcription/src/abstract-transcriber.ts +++ b/packages/transcription/src/abstract-transcriber.ts @@ -1,5 +1,6 @@ import { SimpleLogger } from '@peertube/peertube-models' import { buildSUUID, SUUID } from '@peertube/peertube-node-utils' +import { $ } from 'execa' import { PerformanceObserver } from 'node:perf_hooks' import { join } from 'path' import { TranscriptFile, TranscriptFormat } from './transcript-file.js' @@ -75,6 +76,26 @@ export abstract class AbstractTranscriber { return this.engine.command } + protected getExec (env?: { [ id: string ]: string }) { + const logLevels = { + command: 'debug', + output: 'debug', + ipc: 'debug', + error: 'error', + duration: 'debug' + } + + return $({ + verbose: (_verboseLine, { message, ...verboseObject }) => { + const level = logLevels[verboseObject.type] + + this.logger[level](message, verboseObject) + }, + + env + }) + } + abstract transcribe (options: TranscribeArgs): Promise abstract install (path: string): Promise diff --git a/packages/transcription/src/transcription-run.ts b/packages/transcription/src/transcription-run.ts index 570027a93..07dbdcd2b 100644 --- a/packages/transcription/src/transcription-run.ts +++ b/packages/transcription/src/transcription-run.ts @@ -1,12 +1,11 @@ import { SimpleLogger } from '@peertube/peertube-models' import { buildSUUID, SUUID } from '@peertube/peertube-node-utils' -import { createLogger } from 'winston' export class TranscriptionRun { uuid: SUUID logger: SimpleLogger - constructor (logger: SimpleLogger = createLogger(), uuid: SUUID = buildSUUID()) { + constructor (logger: SimpleLogger, uuid: SUUID = buildSUUID()) { this.uuid = uuid this.logger = logger } diff --git a/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts b/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts index 14b92f18b..87b424f46 100644 --- a/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts +++ b/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts @@ -1,5 +1,4 @@ import { buildSUUID } from '@peertube/peertube-node-utils' -import { $ } from 'execa' import assert from 'node:assert' import { lstat } from 'node:fs/promises' import { TranscribeArgs } from '../../abstract-transcriber.js' @@ -20,7 +19,7 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber { }: TranscribeArgs): Promise { this.assertLanguageDetectionAvailable(language) - const $$ = $({ env: this.getExecEnv() }) + const $$ = this.getExec(this.getExecEnv()) if (model.path) { assert(await lstat(model.path).then(stats => stats.isDirectory()), 'Model path must be a path to a directory.') @@ -56,7 +55,7 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber { } async install (directory: string) { - const $$ = $({ verbose: 'full' }) + const $$ = this.getExec() await $$`pip3 install -U -t ${directory} whisper-ctranslate2==${this.engine.version}` } diff --git a/packages/transcription/src/whisper/transcriber/openai-transcriber.ts b/packages/transcription/src/whisper/transcriber/openai-transcriber.ts index 839f51a3a..afad29c48 100644 --- a/packages/transcription/src/whisper/transcriber/openai-transcriber.ts +++ b/packages/transcription/src/whisper/transcriber/openai-transcriber.ts @@ -1,5 +1,4 @@ import { buildSUUID } from '@peertube/peertube-node-utils' -import { $ } from 'execa' import { readJSON } from 'fs-extra/esm' import { parse } from 'node:path' import { join, resolve } from 'path' @@ -18,7 +17,8 @@ export class OpenaiTranscriber extends AbstractTranscriber { }: TranscribeArgs): Promise { this.assertLanguageDetectionAvailable(language) - const $$ = $({ env: this.getExecEnv() }) + const $$ = this.getExec(this.getExecEnv()) + const languageArgs = language ? [ '--language', language ] : [] this.createRun(runId) @@ -64,7 +64,7 @@ export class OpenaiTranscriber extends AbstractTranscriber { // --------------------------------------------------------------------------- async install (directory: string) { - const $$ = $({ verbose: 'full' }) + const $$ = this.getExec() await $$`pip3 install -U -t ${[ directory ]} openai-whisper==${this.engine.version}` } diff --git a/yarn.lock b/yarn.lock index 5616ebac1..b541c3341 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5346,10 +5346,10 @@ execa@^5.0.0: signal-exit "^3.0.3" strip-final-newline "^2.0.0" -execa@^9.2.0: - version "9.2.0" - resolved "https://registry.yarnpkg.com/execa/-/execa-9.2.0.tgz#ec5e9de67a714d0f47ce073d37a851fbf0c2f688" - integrity sha512-vpOyYg7UAVKLAWWtRS2gAdgkT7oJbCn0me3gmUmxZih4kd3MF/oo8kNTBTIbkO3yuuF5uB4ZCZfn8BOolITYhg== +execa@^9.3.0: + version "9.3.0" + resolved "https://registry.yarnpkg.com/execa/-/execa-9.3.0.tgz#b10b70f52c1a978985e8492cc1fa74795c59963c" + integrity sha512-l6JFbqnHEadBoVAVpN5dl2yCyfX28WoBAGaoQcNmLLSedOxTxcn2Qa83s8I/PA5i56vWru2OHOtrwF7Om2vqlg== dependencies: "@sindresorhus/merge-streams" "^4.0.0" cross-spawn "^7.0.3"