Better whisper logging

This commit is contained in:
Chocobozzz 2024-07-02 09:01:41 +02:00
parent ebcf3329f5
commit 9ee467b9cd
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
11 changed files with 45 additions and 22 deletions

View File

@ -127,7 +127,7 @@
"create-torrent": "^6.0.15",
"deep-object-diff": "^1.1.0",
"email-templates": "^11.0.3",
"execa": "^9.2.0",
"execa": "^9.3.0",
"express": "^4.18.1",
"express-rate-limit": "^7.1.1",
"express-validator": "^7.0.1",

View File

@ -0,0 +1,5 @@
import { createLogger, transports } from 'winston'
export function createConsoleLogger () {
return createLogger({ transports: [ new transports.Console() ] })
}

View File

@ -1,5 +1,5 @@
import { TranscriptionEngineName, transcriberFactory } from '@peertube/peertube-transcription'
import { createLogger } from 'winston'
import { createConsoleLogger } from '@tests/shared/common.js'
describe('Transcriber factory', function () {
const transcribers: TranscriptionEngineName[] = [ 'openai-whisper', 'whisper-ctranslate2' ]
@ -8,9 +8,8 @@ describe('Transcriber factory', function () {
for (const transcriberName of transcribers) {
it(`Should be able to create a(n) ${transcriberName} transcriber`, function () {
transcriberFactory.createFromEngineName({ engineName: transcriberName, logger: createLogger() })
transcriberFactory.createFromEngineName({ engineName: transcriberName, logger: createConsoleLogger() })
})
}
})
})

View File

@ -7,12 +7,12 @@ import {
WhisperBuiltinModel
} from '@peertube/peertube-transcription'
import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools'
import { createConsoleLogger } from '@tests/shared/common.js'
import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js'
import { config, expect } from 'chai'
import { ensureDir, remove } from 'fs-extra/esm'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { createLogger } from 'winston'
config.truncateThreshold = 0
@ -38,7 +38,7 @@ describe('Open AI Whisper transcriber', function () {
languageDetection: true,
version: ''
},
logger: createLogger()
logger: createConsoleLogger()
})
const model = new TranscriptionModel('tiny')

View File

@ -7,12 +7,12 @@ import {
TranscriptionModel
} from '@peertube/peertube-transcription'
import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools'
import { createConsoleLogger } from '@tests/shared/common.js'
import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js'
import { config, expect } from 'chai'
import { ensureDir, remove } from 'fs-extra/esm'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { createLogger } from 'winston'
config.truncateThreshold = 0
@ -32,7 +32,7 @@ describe('Whisper CTranslate2 transcriber', function () {
languageDetection: true,
version: '0.4.4'
},
logger: createLogger()
logger: createConsoleLogger()
})
const model = new TranscriptionModel('tiny')
@ -156,7 +156,7 @@ describe('Whisper CTranslate2 transcriber', function () {
supportedModelFormats: [ 'PyTorch' ],
version: '0.4.4'
},
logger: createLogger()
logger: createConsoleLogger()
})
const openaiTranscript = await openaiTranscriber.transcribe({
...transcribeArgs,

View File

@ -99,7 +99,7 @@ void (async () => {
const transcriber = transcriberFactory.createFromEngineName({
engineName: transcriberName,
logger: createLogger(),
logger: createLogger({ transports: [ new transports.Console() ] }),
binDirectory: join(pipDirectory, 'bin')
})

View File

@ -1,5 +1,6 @@
import { SimpleLogger } from '@peertube/peertube-models'
import { buildSUUID, SUUID } from '@peertube/peertube-node-utils'
import { $ } from 'execa'
import { PerformanceObserver } from 'node:perf_hooks'
import { join } from 'path'
import { TranscriptFile, TranscriptFormat } from './transcript-file.js'
@ -75,6 +76,26 @@ export abstract class AbstractTranscriber {
return this.engine.command
}
protected getExec (env?: { [ id: string ]: string }) {
const logLevels = {
command: 'debug',
output: 'debug',
ipc: 'debug',
error: 'error',
duration: 'debug'
}
return $({
verbose: (_verboseLine, { message, ...verboseObject }) => {
const level = logLevels[verboseObject.type]
this.logger[level](message, verboseObject)
},
env
})
}
abstract transcribe (options: TranscribeArgs): Promise<TranscriptFile>
abstract install (path: string): Promise<void>

View File

@ -1,12 +1,11 @@
import { SimpleLogger } from '@peertube/peertube-models'
import { buildSUUID, SUUID } from '@peertube/peertube-node-utils'
import { createLogger } from 'winston'
export class TranscriptionRun {
uuid: SUUID
logger: SimpleLogger
constructor (logger: SimpleLogger = createLogger(), uuid: SUUID = buildSUUID()) {
constructor (logger: SimpleLogger, uuid: SUUID = buildSUUID()) {
this.uuid = uuid
this.logger = logger
}

View File

@ -1,5 +1,4 @@
import { buildSUUID } from '@peertube/peertube-node-utils'
import { $ } from 'execa'
import assert from 'node:assert'
import { lstat } from 'node:fs/promises'
import { TranscribeArgs } from '../../abstract-transcriber.js'
@ -20,7 +19,7 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
}: TranscribeArgs): Promise<TranscriptFile> {
this.assertLanguageDetectionAvailable(language)
const $$ = $({ env: this.getExecEnv() })
const $$ = this.getExec(this.getExecEnv())
if (model.path) {
assert(await lstat(model.path).then(stats => stats.isDirectory()), 'Model path must be a path to a directory.')
@ -56,7 +55,7 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
}
async install (directory: string) {
const $$ = $({ verbose: 'full' })
const $$ = this.getExec()
await $$`pip3 install -U -t ${directory} whisper-ctranslate2==${this.engine.version}`
}

View File

@ -1,5 +1,4 @@
import { buildSUUID } from '@peertube/peertube-node-utils'
import { $ } from 'execa'
import { readJSON } from 'fs-extra/esm'
import { parse } from 'node:path'
import { join, resolve } from 'path'
@ -18,7 +17,8 @@ export class OpenaiTranscriber extends AbstractTranscriber {
}: TranscribeArgs): Promise<TranscriptFile> {
this.assertLanguageDetectionAvailable(language)
const $$ = $({ env: this.getExecEnv() })
const $$ = this.getExec(this.getExecEnv())
const languageArgs = language ? [ '--language', language ] : []
this.createRun(runId)
@ -64,7 +64,7 @@ export class OpenaiTranscriber extends AbstractTranscriber {
// ---------------------------------------------------------------------------
async install (directory: string) {
const $$ = $({ verbose: 'full' })
const $$ = this.getExec()
await $$`pip3 install -U -t ${[ directory ]} openai-whisper==${this.engine.version}`
}

View File

@ -5346,10 +5346,10 @@ execa@^5.0.0:
signal-exit "^3.0.3"
strip-final-newline "^2.0.0"
execa@^9.2.0:
version "9.2.0"
resolved "https://registry.yarnpkg.com/execa/-/execa-9.2.0.tgz#ec5e9de67a714d0f47ce073d37a851fbf0c2f688"
integrity sha512-vpOyYg7UAVKLAWWtRS2gAdgkT7oJbCn0me3gmUmxZih4kd3MF/oo8kNTBTIbkO3yuuF5uB4ZCZfn8BOolITYhg==
execa@^9.3.0:
version "9.3.0"
resolved "https://registry.yarnpkg.com/execa/-/execa-9.3.0.tgz#b10b70f52c1a978985e8492cc1fa74795c59963c"
integrity sha512-l6JFbqnHEadBoVAVpN5dl2yCyfX28WoBAGaoQcNmLLSedOxTxcn2Qa83s8I/PA5i56vWru2OHOtrwF7Om2vqlg==
dependencies:
"@sindresorhus/merge-streams" "^4.0.0"
cross-spawn "^7.0.3"