fix: refactor + unit tests

This commit is contained in:
Mohamed Marrouchi 2024-12-25 07:36:50 +01:00
parent f60b59aa54
commit 328c5cefb3
4 changed files with 255 additions and 236 deletions

View File

@ -6,17 +6,12 @@
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import fs from 'fs';
import { CACHE_MANAGER } from '@nestjs/cache-manager';
import { BadRequestException, NotFoundException } from '@nestjs/common';
import { EventEmitter2 } from '@nestjs/event-emitter';
import { MongooseModule } from '@nestjs/mongoose';
import { Test, TestingModule } from '@nestjs/testing';
import { AttachmentRepository } from '@/attachment/repositories/attachment.repository';
import { AttachmentModel } from '@/attachment/schemas/attachment.schema';
import { AttachmentService } from '@/attachment/services/attachment.service';
import { HelperService } from '@/helper/helper.service';
import { LanguageRepository } from '@/i18n/repositories/language.repository';
import { Language, LanguageModel } from '@/i18n/schemas/language.schema';
@ -50,7 +45,6 @@ import { NlpEntityService } from '../services/nlp-entity.service';
import { NlpSampleEntityService } from '../services/nlp-sample-entity.service';
import { NlpSampleService } from '../services/nlp-sample.service';
import { NlpValueService } from '../services/nlp-value.service';
import { NlpService } from '../services/nlp.service';
import { NlpSampleController } from './nlp-sample.controller';
@ -60,7 +54,6 @@ describe('NlpSampleController', () => {
let nlpSampleService: NlpSampleService;
let nlpEntityService: NlpEntityService;
let nlpValueService: NlpValueService;
let attachmentService: AttachmentService;
let languageService: LanguageService;
let byeJhonSampleId: string;
let languages: Language[];
@ -76,7 +69,6 @@ describe('NlpSampleController', () => {
MongooseModule.forFeature([
NlpSampleModel,
NlpSampleEntityModel,
AttachmentModel,
NlpEntityModel,
NlpValueModel,
SettingModel,
@ -87,9 +79,7 @@ describe('NlpSampleController', () => {
LoggerService,
NlpSampleRepository,
NlpSampleEntityRepository,
AttachmentService,
NlpEntityService,
AttachmentRepository,
NlpEntityRepository,
NlpValueService,
NlpValueRepository,
@ -98,7 +88,6 @@ describe('NlpSampleController', () => {
LanguageRepository,
LanguageService,
EventEmitter2,
NlpService,
HelperService,
SettingRepository,
SettingService,
@ -131,7 +120,6 @@ describe('NlpSampleController', () => {
text: 'Bye Jhon',
})
).id;
attachmentService = module.get<AttachmentService>(AttachmentService);
languageService = module.get<LanguageService>(LanguageService);
languages = await languageService.findAll();
});
@ -315,83 +303,44 @@ describe('NlpSampleController', () => {
});
});
describe('import', () => {
it('should throw exception when attachment is not found', async () => {
const invalidattachmentId = (
await attachmentService.findOne({
name: 'store2.jpg',
})
).id;
await attachmentService.deleteOne({ name: 'store2.jpg' });
await expect(
nlpSampleController.import(invalidattachmentId),
).rejects.toThrow(NotFoundException);
});
it('should throw exception when file location is not present', async () => {
const attachmentId = (
await attachmentService.findOne({
name: 'store1.jpg',
})
).id;
jest.spyOn(fs, 'existsSync').mockReturnValueOnce(false);
await expect(nlpSampleController.import(attachmentId)).rejects.toThrow(
NotFoundException,
describe('importFile', () => {
it('should throw exception when something is wrong with the upload', async () => {
const file = {
buffer: Buffer.from('', 'utf-8'),
size: 0,
mimetype: 'text/csv',
} as Express.Multer.File;
await expect(nlpSampleController.importFile(file)).rejects.toThrow(
'Bad Request Exception',
);
});
it('should return a failure if an error occurs when parsing csv file ', async () => {
const mockCsvDataWithErrors: string = `intent,entities,lang,question
greeting,person,en`;
jest.spyOn(fs, 'existsSync').mockReturnValueOnce(true);
jest.spyOn(fs, 'readFileSync').mockReturnValueOnce(mockCsvDataWithErrors);
const attachmentId = (
await attachmentService.findOne({
name: 'store1.jpg',
})
).id;
const mockParsedCsvDataWithErrors = {
data: [{ intent: 'greeting', entities: 'person', lang: 'en' }],
errors: [
{
type: 'FieldMismatch',
code: 'TooFewFields',
message: 'Too few fields: expected 4 fields but parsed 3',
row: 0,
},
],
meta: {
delimiter: ',',
linebreak: '\n',
aborted: false,
truncated: false,
cursor: 49,
fields: ['intent', 'entities', 'lang', 'question'],
},
};
await expect(nlpSampleController.import(attachmentId)).rejects.toThrow(
new BadRequestException({
cause: mockParsedCsvDataWithErrors.errors,
description: 'Error while parsing CSV',
}),
);
const buffer = Buffer.from(mockCsvDataWithErrors, 'utf-8');
const file = {
buffer,
size: buffer.length,
mimetype: 'text/csv',
} as Express.Multer.File;
await expect(nlpSampleController.importFile(file)).rejects.toThrow();
});
it('should import data from a CSV file', async () => {
const attachmentId = (
await attachmentService.findOne({
name: 'store1.jpg',
})
).id;
const mockCsvData: string = [
`text,intent,language`,
`How much does a BMW cost?,price,en`,
].join('\n');
jest.spyOn(fs, 'existsSync').mockReturnValueOnce(true);
jest.spyOn(fs, 'readFileSync').mockReturnValueOnce(mockCsvData);
const result = await nlpSampleController.import(attachmentId);
const buffer = Buffer.from(mockCsvData, 'utf-8');
const file = {
buffer,
size: buffer.length,
mimetype: 'text/csv',
} as Express.Multer.File;
const result = await nlpSampleController.importFile(file);
const intentEntityResult = await nlpEntityService.findOne({
name: 'intent',
});
@ -429,9 +378,10 @@ describe('NlpSampleController', () => {
expect(intentEntityResult).toEqualPayload(intentEntity);
expect(priceValueResult).toEqualPayload(priceValue);
expect(textSampleResult).toEqualPayload(textSample);
expect(result).toEqual({ success: true });
expect(result).toEqualPayload([textSample]);
});
});
describe('deleteMany', () => {
it('should delete multiple nlp samples', async () => {
const samplesToDelete = [

View File

@ -6,8 +6,6 @@
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import fs from 'fs';
import { join } from 'path';
import { Readable } from 'stream';
import {
@ -31,10 +29,7 @@ import {
import { FileInterceptor } from '@nestjs/platform-express';
import { CsrfCheck } from '@tekuconcept/nestjs-csrf';
import { Response } from 'express';
import Papa from 'papaparse';
import { AttachmentService } from '@/attachment/services/attachment.service';
import { config } from '@/config';
import { HelperService } from '@/helper/helper.service';
import { LanguageService } from '@/i18n/services/language.service';
import { CsrfInterceptor } from '@/interceptors/csrf.interceptor';
@ -47,18 +42,17 @@ import { PopulatePipe } from '@/utils/pipes/populate.pipe';
import { SearchFilterPipe } from '@/utils/pipes/search-filter.pipe';
import { TFilterQuery } from '@/utils/types/filter.types';
import { NlpSampleCreateDto, NlpSampleDto } from '../dto/nlp-sample.dto';
import { NlpSampleDto } from '../dto/nlp-sample.dto';
import {
NlpSample,
NlpSampleFull,
NlpSamplePopulate,
NlpSampleStub,
} from '../schemas/nlp-sample.schema';
import { NlpSampleEntityValue, NlpSampleState } from '../schemas/types';
import { NlpSampleState } from '../schemas/types';
import { NlpEntityService } from '../services/nlp-entity.service';
import { NlpSampleEntityService } from '../services/nlp-sample-entity.service';
import { NlpSampleService } from '../services/nlp-sample.service';
import { NlpService } from '../services/nlp.service';
@UseInterceptors(CsrfInterceptor)
@Controller('nlpsample')
@ -70,11 +64,9 @@ export class NlpSampleController extends BaseController<
> {
constructor(
private readonly nlpSampleService: NlpSampleService,
private readonly attachmentService: AttachmentService,
private readonly nlpSampleEntityService: NlpSampleEntityService,
private readonly nlpEntityService: NlpEntityService,
private readonly logger: LoggerService,
private readonly nlpService: NlpService,
private readonly languageService: LanguageService,
private readonly helperService: HelperService,
) {
@ -371,157 +363,11 @@ export class NlpSampleController extends BaseController<
return deleteResult;
}
private async parseAndSaveDataset(data: string) {
const allEntities = await this.nlpEntityService.findAll();
// Check if file location is present
if (allEntities.length === 0) {
throw new NotFoundException(
'No entities found, please create them first.',
);
}
// Parse local CSV file
const result: {
errors: any[];
data: Array<Record<string, string>>;
} = Papa.parse(data, {
header: true,
skipEmptyLines: true,
});
if (result.errors && result.errors.length > 0) {
this.logger.warn(
`Errors parsing the file: ${JSON.stringify(result.errors)}`,
);
throw new BadRequestException(result.errors, {
cause: result.errors,
description: 'Error while parsing CSV',
});
}
// Remove data with no intent
const filteredData = result.data.filter((d) => d.intent !== 'none');
const languages = await this.languageService.getLanguages();
const defaultLanguage = await this.languageService.getDefaultLanguage();
const nlpSamples: NlpSample[] = [];
// Reduce function to ensure executing promises one by one
for (const d of filteredData) {
try {
// Check if a sample with the same text already exists
const existingSamples = await this.nlpSampleService.find({
text: d.text,
});
// Skip if sample already exists
if (Array.isArray(existingSamples) && existingSamples.length > 0) {
continue;
}
// Fallback to default language if 'language' is missing or invalid
if (!d.language || !(d.language in languages)) {
if (d.language) {
this.logger.warn(
`Language "${d.language}" does not exist, falling back to default.`,
);
}
d.language = defaultLanguage.code;
}
// Create a new sample dto
const sample: NlpSampleCreateDto = {
text: d.text,
trained: false,
language: languages[d.language].id,
};
// Create a new sample entity dto
const entities: NlpSampleEntityValue[] = allEntities
.filter(({ name }) => name in d)
.map(({ name }) => ({
entity: name,
value: d[name],
}));
// Store any new entity/value
const storedEntities = await this.nlpEntityService.storeNewEntities(
sample.text,
entities,
['trait'],
);
// Store sample
const createdSample = await this.nlpSampleService.create(sample);
nlpSamples.push(createdSample);
// Map and assign the sample ID to each stored entity
const sampleEntities = storedEntities.map((storedEntity) => ({
...storedEntity,
sample: createdSample?.id,
}));
// Store sample entities
await this.nlpSampleEntityService.createMany(sampleEntities);
} catch (err) {
this.logger.error('Error occurred when extracting data. ', err);
}
}
return nlpSamples;
}
@CsrfCheck(true)
@Post('import')
@UseInterceptors(FileInterceptor('file'))
async importFile(@UploadedFile() file: Express.Multer.File) {
try {
const datasetContent = file.buffer.toString('utf-8');
return await this.parseAndSaveDataset(datasetContent);
} catch (err) {
this.logger.error('Error processing file:', err);
}
}
/**
* @deprecated
* Imports NLP samples from a CSV file.
*
* @param file - The file path or ID of the CSV file to import.
*
* @returns A success message after the import process is completed.
*/
@CsrfCheck(true)
@Post('import/:file')
async import(
@Param('file')
file: string,
) {
// Check if file is present
const importedFile = await this.attachmentService.findOne(file);
if (!importedFile) {
throw new NotFoundException('Missing file!');
}
const filePath = importedFile
? join(config.parameters.uploadDir, importedFile.location)
: undefined;
// Check if file location is present
if (!fs.existsSync(filePath)) {
throw new NotFoundException('File does not exist');
}
const allEntities = await this.nlpEntityService.findAll();
// Check if file location is present
if (allEntities.length === 0) {
throw new NotFoundException(
'No entities found, please create them first.',
);
}
// Read file content
const data = fs.readFileSync(filePath, 'utf8');
await this.parseAndSaveDataset(data);
this.logger.log('Import process completed successfully.');
return { success: true };
const datasetContent = file.buffer.toString('utf-8');
return await this.nlpSampleService.parseAndSaveDataset(datasetContent);
}
}

View File

@ -7,6 +7,7 @@
*/
import { CACHE_MANAGER } from '@nestjs/cache-manager';
import { BadRequestException, NotFoundException } from '@nestjs/common';
import { EventEmitter2 } from '@nestjs/event-emitter';
import { MongooseModule } from '@nestjs/mongoose';
import { Test, TestingModule } from '@nestjs/testing';
@ -27,7 +28,7 @@ import { NlpEntityRepository } from '../repositories/nlp-entity.repository';
import { NlpSampleEntityRepository } from '../repositories/nlp-sample-entity.repository';
import { NlpSampleRepository } from '../repositories/nlp-sample.repository';
import { NlpValueRepository } from '../repositories/nlp-value.repository';
import { NlpEntityModel } from '../schemas/nlp-entity.schema';
import { NlpEntity, NlpEntityModel } from '../schemas/nlp-entity.schema';
import {
NlpSampleEntity,
NlpSampleEntityModel,
@ -41,7 +42,10 @@ import { NlpSampleService } from './nlp-sample.service';
import { NlpValueService } from './nlp-value.service';
describe('NlpSampleService', () => {
let nlpEntityService: NlpEntityService;
let nlpSampleService: NlpSampleService;
let nlpSampleEntityService: NlpSampleEntityService;
let languageService: LanguageService;
let nlpSampleEntityRepository: NlpSampleEntityRepository;
let nlpSampleRepository: NlpSampleRepository;
let languageRepository: LanguageRepository;
@ -84,7 +88,11 @@ describe('NlpSampleService', () => {
},
],
}).compile();
nlpEntityService = module.get<NlpEntityService>(NlpEntityService);
nlpSampleService = module.get<NlpSampleService>(NlpSampleService);
nlpSampleEntityService = module.get<NlpSampleEntityService>(
NlpSampleEntityService,
);
nlpSampleRepository = module.get<NlpSampleRepository>(NlpSampleRepository);
nlpSampleEntityRepository = module.get<NlpSampleEntityRepository>(
NlpSampleEntityRepository,
@ -92,6 +100,7 @@ describe('NlpSampleService', () => {
nlpSampleEntityRepository = module.get<NlpSampleEntityRepository>(
NlpSampleEntityRepository,
);
languageService = module.get<LanguageService>(LanguageService);
languageRepository = module.get<LanguageRepository>(LanguageRepository);
noNlpSample = await nlpSampleService.findOne({ text: 'No' });
nlpSampleEntity = await nlpSampleEntityRepository.findOne({
@ -162,4 +171,104 @@ describe('NlpSampleService', () => {
expect(result.deletedCount).toEqual(1);
});
});
describe('parseAndSaveDataset', () => {
it('should throw NotFoundException if no entities are found', async () => {
jest.spyOn(nlpEntityService, 'findAll').mockResolvedValue([]);
await expect(
nlpSampleService.parseAndSaveDataset(
'text,intent,language\nHello,none,en',
),
).rejects.toThrow(NotFoundException);
expect(nlpEntityService.findAll).toHaveBeenCalled();
});
it('should throw BadRequestException if CSV parsing fails', async () => {
const invalidCSV = 'text,intent,language\n"Hello,none'; // Malformed CSV
jest
.spyOn(nlpEntityService, 'findAll')
.mockResolvedValue([{ name: 'intent' } as NlpEntity]);
jest.spyOn(languageService, 'getLanguages').mockResolvedValue({});
jest
.spyOn(languageService, 'getDefaultLanguage')
.mockResolvedValue({ code: 'en' } as Language);
await expect(
nlpSampleService.parseAndSaveDataset(invalidCSV),
).rejects.toThrow(BadRequestException);
});
it('should filter out rows with "none" as intent', async () => {
const mockData = 'text,intent,language\nHello,none,en\nHi,greet,en';
jest
.spyOn(nlpEntityService, 'findAll')
.mockResolvedValue([{ name: 'intent' } as NlpEntity]);
jest
.spyOn(languageService, 'getLanguages')
.mockResolvedValue({ en: { id: '1' } });
jest
.spyOn(languageService, 'getDefaultLanguage')
.mockResolvedValue({ code: 'en' } as Language);
jest.spyOn(nlpSampleService, 'find').mockResolvedValue([]);
jest
.spyOn(nlpSampleService, 'create')
.mockResolvedValue({ id: '1', text: 'Hi' } as NlpSample);
jest.spyOn(nlpSampleEntityService, 'createMany').mockResolvedValue([]);
const result = await nlpSampleService.parseAndSaveDataset(mockData);
expect(result).toHaveLength(1);
expect(result[0].text).toEqual('Hi');
});
it('should fallback to the default language if the language is invalid', async () => {
const mockData = 'text,intent,language\nHi,greet,invalidLang';
jest
.spyOn(nlpEntityService, 'findAll')
.mockResolvedValue([{ name: 'intent' } as NlpEntity]);
jest
.spyOn(languageService, 'getLanguages')
.mockResolvedValue({ en: { id: '1' } });
jest
.spyOn(languageService, 'getDefaultLanguage')
.mockResolvedValue({ code: 'en' } as Language);
jest.spyOn(nlpSampleService, 'find').mockResolvedValue([]);
jest
.spyOn(nlpSampleService, 'create')
.mockResolvedValue({ id: '1', text: 'Hi' } as NlpSample);
jest.spyOn(nlpSampleEntityService, 'createMany').mockResolvedValue([]);
const result = await nlpSampleService.parseAndSaveDataset(mockData);
expect(result).toHaveLength(1);
expect(result[0].text).toEqual('Hi');
});
it('should successfully process and save valid dataset rows', async () => {
const mockData = 'text,intent,language\nHi,greet,en\nBye,bye,en';
const mockLanguages = { en: { id: '1' } };
jest
.spyOn(languageService, 'getLanguages')
.mockResolvedValue(mockLanguages);
jest
.spyOn(languageService, 'getDefaultLanguage')
.mockResolvedValue({ code: 'en' } as Language);
jest.spyOn(nlpSampleService, 'find').mockResolvedValue([]);
let id = 0;
jest.spyOn(nlpSampleService, 'create').mockImplementation((s) => {
return Promise.resolve({ id: (++id).toString(), ...s } as NlpSample);
});
jest.spyOn(nlpSampleEntityService, 'createMany').mockResolvedValue([]);
const result = await nlpSampleService.parseAndSaveDataset(mockData);
expect(nlpSampleEntityService.createMany).toHaveBeenCalledTimes(2);
expect(result).toHaveLength(2);
expect(result[0].text).toEqual('Hi');
expect(result[1].text).toEqual('Bye');
});
});
});

View File

@ -6,8 +6,13 @@
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { Injectable } from '@nestjs/common';
import {
BadRequestException,
Injectable,
NotFoundException,
} from '@nestjs/common';
import { OnEvent } from '@nestjs/event-emitter';
import Papa from 'papaparse';
import { Message } from '@/chat/schemas/message.schema';
import { Language } from '@/i18n/schemas/language.schema';
@ -23,7 +28,10 @@ import {
NlpSampleFull,
NlpSamplePopulate,
} from '../schemas/nlp-sample.schema';
import { NlpSampleState } from '../schemas/types';
import { NlpSampleEntityValue, NlpSampleState } from '../schemas/types';
import { NlpEntityService } from './nlp-entity.service';
import { NlpSampleEntityService } from './nlp-sample-entity.service';
@Injectable()
export class NlpSampleService extends BaseService<
@ -33,6 +41,8 @@ export class NlpSampleService extends BaseService<
> {
constructor(
readonly repository: NlpSampleRepository,
private readonly nlpSampleEntityService: NlpSampleEntityService,
private readonly nlpEntityService: NlpEntityService,
private readonly languageService: LanguageService,
private readonly logger: LoggerService,
) {
@ -50,6 +60,110 @@ export class NlpSampleService extends BaseService<
return await this.repository.deleteOne(id);
}
/**
* This function is responsible for parsing a CSV dataset string and saving the parsed data into the database.
* It ensures that all necessary entities and languages exist, validates the dataset, and processes it row by row
* to create NLP samples and associated entities in the system.
*
* @param data - The raw CSV dataset as a string.
* @returns A promise that resolves to an array of created NLP samples.
*/
async parseAndSaveDataset(data: string) {
const allEntities = await this.nlpEntityService.findAll();
// Check if file location is present
if (allEntities.length === 0) {
throw new NotFoundException(
'No entities found, please create them first.',
);
}
// Parse local CSV file
const result: {
errors: any[];
data: Array<Record<string, string>>;
} = Papa.parse(data, {
header: true,
skipEmptyLines: true,
});
if (result.errors && result.errors.length > 0) {
this.logger.warn(
`Errors parsing the file: ${JSON.stringify(result.errors)}`,
);
throw new BadRequestException(result.errors, {
cause: result.errors,
description: 'Error while parsing CSV',
});
}
// Remove data with no intent
const filteredData = result.data.filter((d) => d.intent !== 'none');
const languages = await this.languageService.getLanguages();
const defaultLanguage = await this.languageService.getDefaultLanguage();
const nlpSamples: NlpSample[] = [];
// Reduce function to ensure executing promises one by one
for (const d of filteredData) {
try {
// Check if a sample with the same text already exists
const existingSamples = await this.find({
text: d.text,
});
// Skip if sample already exists
if (Array.isArray(existingSamples) && existingSamples.length > 0) {
continue;
}
// Fallback to default language if 'language' is missing or invalid
if (!d.language || !(d.language in languages)) {
if (d.language) {
this.logger.warn(
`Language "${d.language}" does not exist, falling back to default.`,
);
}
d.language = defaultLanguage.code;
}
// Create a new sample dto
const sample: NlpSampleCreateDto = {
text: d.text,
trained: false,
language: languages[d.language].id,
};
// Create a new sample entity dto
const entities: NlpSampleEntityValue[] = allEntities
.filter(({ name }) => name in d)
.map(({ name }) => ({
entity: name,
value: d[name],
}));
// Store any new entity/value
const storedEntities = await this.nlpEntityService.storeNewEntities(
sample.text,
entities,
['trait'],
);
// Store sample
const createdSample = await this.create(sample);
nlpSamples.push(createdSample);
// Map and assign the sample ID to each stored entity
const sampleEntities = storedEntities.map((storedEntity) => ({
...storedEntity,
sample: createdSample?.id,
}));
// Store sample entities
await this.nlpSampleEntityService.createMany(sampleEntities);
} catch (err) {
this.logger.error('Error occurred when extracting data. ', err);
}
}
return nlpSamples;
}
/**
* When a language gets deleted, we need to set related samples to null
*