Merge pull request #1006 from Hexastack/feat/nlp-pattern-lookup-strategy

Feat/nlp pattern lookup strategy
This commit is contained in:
Med Marrouchi 2025-05-14 08:35:51 +01:00 committed by GitHub
commit 336c296456
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 978 additions and 257 deletions

View File

@ -59,6 +59,7 @@ import {
blockProductListMock,
blocks,
mockNlpAffirmationPatterns,
mockNlpFirstNamePatterns,
mockNlpGreetingAnyNamePatterns,
mockNlpGreetingNamePatterns,
mockNlpGreetingPatterns,
@ -69,6 +70,7 @@ import {
subscriberContextBlankInstance,
} from '@/utils/test/mocks/conversation';
import {
mockNlpFirstNameEntities,
mockNlpGreetingFullNameEntities,
mockNlpGreetingNameEntities,
} from '@/utils/test/mocks/nlp';
@ -353,6 +355,25 @@ describe('BlockService', () => {
]);
});
it('should return match nlp patterns with synonyms match (canonical value)', () => {
const result = blockService.getMatchingNluPatterns(
mockNlpFirstNameEntities,
{
...blockGetStarted,
patterns: [...blockGetStarted.patterns, mockNlpFirstNamePatterns],
},
);
expect(result).toEqual([
[
{
entity: 'firstname',
match: 'value',
value: 'jhon',
},
],
]);
});
it('should return empty array when it does not match nlp patterns', () => {
const result = blockService.getMatchingNluPatterns(
mockNlpGreetingFullNameEntities,

View File

@ -294,11 +294,11 @@ export class BlockService extends BaseService<
* @returns The NLU patterns that matches the predicted entities
*/
getMatchingNluPatterns<E extends NLU.ParseEntities, B extends BlockStub>(
nlp: E,
{ entities }: E,
block: B,
): NlpPattern[][] {
// No nlp entities to check against
if (nlp.entities.length === 0) {
if (entities.length === 0) {
return [];
}
@ -312,18 +312,21 @@ export class BlockService extends BaseService<
}
// Filter NLP patterns match based on best guessed entities
return nlpPatterns.filter((entities: NlpPattern[]) => {
return entities.every((ev: NlpPattern) => {
if (ev.match === 'value') {
return nlp.entities.find((e) => {
return e.entity === ev.entity && e.value === ev.value;
return nlpPatterns.filter((patterns: NlpPattern[]) => {
return patterns.every((p: NlpPattern) => {
if (p.match === 'value') {
return entities.find((e) => {
return (
e.entity === p.entity &&
(e.value === p.value || e.canonicalValue === p.value)
);
});
} else if (ev.match === 'entity') {
return nlp.entities.find((e) => {
return e.entity === ev.entity;
} else if (p.match === 'entity') {
return entities.find((e) => {
return e.entity === p.entity;
});
} else {
this.logger.warn('Unknown NLP match type', ev);
this.logger.warn('Unknown NLP match type', p);
return false;
}
});
@ -429,12 +432,14 @@ export class BlockService extends BaseService<
* - Returns `true` if all conditions are met, otherwise `false`.
*/
private matchesNluEntity<E extends NLU.ParseEntity>(
{ entity, value }: E,
{ entity, value, canonicalValue }: E,
pattern: NlpPattern,
): boolean {
return (
entity === pattern.entity &&
(pattern.match !== 'value' || value === pattern.value)
(pattern.match !== 'value' ||
value === pattern.value ||
canonicalValue === pattern.value)
);
}

View File

@ -12,7 +12,7 @@ import Handlebars from 'handlebars';
import { HelperService } from '@/helper/helper.service';
import BaseNlpHelper from '@/helper/lib/base-nlp-helper';
import { LLM, NLU } from '@/helper/types';
import { HelperType, LLM, NLU } from '@/helper/types';
import { LanguageService } from '@/i18n/services/language.service';
import { LoggerService } from '@/logger/logger.service';
import { NlpEntityFull } from '@/nlp/schemas/nlp-entity.schema';
@ -66,12 +66,9 @@ export default class LlmNluHelper
async buildClassifiersPrompt() {
const settings = await this.getSettings();
if (settings) {
const entities = await this.nlpEntityService.findAndPopulate({
const traitEntities = await this.nlpEntityService.findAndPopulate({
lookups: 'trait',
});
const traitEntities = entities.filter(({ lookups }) =>
lookups.includes('trait'),
);
this.traitClassifierPrompts = traitEntities.map((entity) => ({
...entity,
prompt: Handlebars.compile(settings.trait_classifier_prompt_template)({
@ -88,48 +85,9 @@ export default class LlmNluHelper
await this.buildClassifiersPrompt();
}
/**
* Finds entities in a given text based on their values and synonyms.
*
* This function takes a string of text and an array of entities, where each entity contains a value
* and a list of synonyms. It returns an array of objects, each representing an entity found in the text
* along with its start and end positions.
*
* @param text - The input text to search for entities.
* @param entities - An array of entities to search for, each containing a `value` and a list of `synonyms`.
*
* @returns An array of objects representing the found entities, with their `value`, `start`, and `end` positions.
*/
private findKeywordEntities(text: string, entity: NlpEntityFull) {
return (
entity.values
.flatMap(({ value, expressions }) => {
const allValues = [value, ...expressions];
// Filter the terms that are found in the text
return allValues
.flatMap((term) => {
const regex = new RegExp(`\\b${term}\\b`, 'g');
const matches = [...text.matchAll(regex)];
// Map matches to FoundEntity format
return matches.map((match) => ({
entity: entity.name,
value: term,
start: match.index!,
end: match.index! + term.length,
confidence: 1,
}));
})
.shift();
})
.filter((v) => !!v) || []
);
}
async predict(text: string): Promise<NLU.ParseEntities> {
const settings = await this.getSettings();
const helper = await this.helperService.getDefaultLlmHelper();
const helper = await this.helperService.getDefaultHelper(HelperType.LLM);
const defaultLanguage = await this.languageService.getDefaultLanguage();
// Detect language
const language = await helper.generateStructuredResponse<string>?.(
@ -174,13 +132,12 @@ export default class LlmNluHelper
// Perform slot filling in a deterministic way since
// it's currently a challenging task for the LLMs.
const keywordEntities = await this.nlpEntityService.findAndPopulate({
lookups: 'keywords',
const entities = await this.nlpEntityService.findAndPopulate({
lookups: { $in: ['keywords', 'pattern'] },
});
const entities = keywordEntities.flatMap((keywordEntity) =>
this.findKeywordEntities(text, keywordEntity),
) as NLU.ParseEntity[];
return { entities: traits.concat(entities) };
const slotEntities = this.runDeterministicSlotFilling(text, entities);
return { entities: traits.concat(slotEntities) };
}
}

View File

@ -30,6 +30,7 @@ import BaseNlpHelper from '../base-nlp-helper';
const mockLoggerService = {
log: jest.fn(),
error: jest.fn(),
warn: jest.fn(),
} as unknown as LoggerService;
const mockSettingService = {
@ -160,7 +161,7 @@ describe('BaseNlpHelper', () => {
updatedAt: new Date(),
builtin: false,
expressions: [],
metadata: [],
metadata: {},
},
value2: {
id: new ObjectId().toString(),
@ -170,7 +171,7 @@ describe('BaseNlpHelper', () => {
updatedAt: new Date(),
builtin: false,
expressions: [],
metadata: [],
metadata: {},
},
});
@ -218,4 +219,253 @@ describe('BaseNlpHelper', () => {
);
});
});
describe('extractKeywordBasedSlots', () => {
it('should return matches for exact keywords and synonyms', () => {
const entity: NlpEntityFull = {
name: 'color',
values: [
{ value: 'blue', expressions: ['azure', 'navy'] },
{ value: 'green', expressions: ['emerald', 'lime'] },
],
} as any;
const result = helper.extractKeywordBasedSlots(
'The sky is azure and emerald',
entity,
);
expect(result).toEqual([
{
entity: 'color',
value: 'blue',
start: 11,
end: 16,
confidence: 1,
},
{
entity: 'color',
value: 'green',
start: 21,
end: 28,
confidence: 1,
},
]);
});
it('should return empty array if no values present', () => {
const result = helper.extractKeywordBasedSlots('anything', {
name: 'empty',
values: [],
} as any);
expect(result).toEqual([]);
});
});
describe('extractPatternBasedSlots', () => {
it('should match using a valid regex pattern', () => {
const entity: NlpEntityFull = {
name: 'infos',
values: [
{
value: 'number',
metadata: { pattern: '\\d+', wordBoundary: true },
},
],
} as NlpEntityFull;
const result = helper.extractPatternBasedSlots(
'Order 123 and 456 now!',
entity,
);
expect(result).toEqual([
{
entity: 'infos',
canonicalValue: 'number',
value: '123',
start: 6,
end: 9,
confidence: 1,
},
{
entity: 'infos',
canonicalValue: 'number',
value: '456',
start: 14,
end: 17,
confidence: 1,
},
]);
});
it('should respect metadata like toLowerCase and removeSpaces', () => {
const entity: NlpEntityFull = {
name: 'name',
values: [
{
value: 'brand',
metadata: {
pattern: 'HEX BOT',
toLowerCase: true,
removeSpaces: true,
},
},
],
} as NlpEntityFull;
const result = helper.extractPatternBasedSlots(
'My CODE is HEX BOT!',
entity,
);
expect(result).toEqual([
{
entity: 'name',
canonicalValue: 'brand',
value: 'hexbot',
start: 11,
end: 18,
confidence: 1,
},
]);
});
it('should respect metadata stripDiacritics', () => {
const entity: NlpEntityFull = {
name: 'keyword',
values: [
{
value: 'word',
metadata: {
pattern: '".+"',
toLowerCase: true,
removeSpaces: true,
stripDiacritics: true,
},
},
],
} as NlpEntityFull;
const result = helper.extractPatternBasedSlots(
'The word "où" (where)',
entity,
);
expect(result).toEqual([
{
entity: 'keyword',
canonicalValue: 'word',
value: '"ou"',
start: 9,
end: 13,
confidence: 1,
},
]);
});
it('should return empty array if no values', () => {
const result = helper.extractPatternBasedSlots('test', {
name: 'noop',
values: [],
} as any);
expect(result).toEqual([]);
});
it('should handle invalid regex pattern gracefully', () => {
const entity: NlpEntityFull = {
name: 'fail',
values: [
{
value: 'Invalid',
metadata: { pattern: '[a-', wordBoundary: true },
},
],
} as any;
const result = helper.extractPatternBasedSlots('test', entity);
expect(result).toEqual([]);
});
});
describe('runDeterministicSlotFilling', () => {
it('should call keyword-based extractor for keyword lookup strategy', () => {
const mockEntities: NlpEntityFull[] = [
{
name: 'product',
lookups: ['keywords'],
values: [
{
value: 'tshirt',
expressions: [],
},
{
value: 'pizza',
expressions: [],
},
],
} as unknown as NlpEntityFull,
];
jest.spyOn(helper, 'extractKeywordBasedSlots');
jest.spyOn(helper, 'extractPatternBasedSlots');
const result = helper.runDeterministicSlotFilling(
'order pizza',
mockEntities,
);
expect(helper.extractKeywordBasedSlots).toHaveBeenCalledTimes(1);
expect(helper.extractPatternBasedSlots).not.toHaveBeenCalled();
expect(result).toHaveLength(1);
expect(result[0].entity).toBe('product');
});
it('should call pattern-based extractor for pattern lookup strategy', () => {
const mockEntities: NlpEntityFull[] = [
{
name: 'number',
lookups: ['pattern'],
values: [
{
value: 'phone',
metadata: { pattern: '\\d+' },
expressions: [],
},
],
} as unknown as NlpEntityFull,
];
jest.spyOn(helper, 'extractKeywordBasedSlots');
jest.spyOn(helper, 'extractPatternBasedSlots');
const result = helper.runDeterministicSlotFilling(
'call me at 1234567890',
mockEntities,
);
expect(helper.extractPatternBasedSlots).toHaveBeenCalledTimes(1);
expect(helper.extractKeywordBasedSlots).not.toHaveBeenCalled();
expect(result).toHaveLength(1);
expect(result[0].entity).toBe('number');
});
it('should skip entities that do not support the selected lookup strategy', () => {
const mockEntities: NlpEntityFull[] = [
{
name: 'irrelevant',
lookups: ['trait'],
values: [],
} as unknown as NlpEntityFull,
];
jest.spyOn(helper, 'extractKeywordBasedSlots');
jest.spyOn(helper, 'extractPatternBasedSlots');
const result = helper.runDeterministicSlotFilling(
'any text',
mockEntities,
);
expect(helper.extractKeywordBasedSlots).not.toHaveBeenCalled();
expect(helper.extractPatternBasedSlots).not.toHaveBeenCalled();
expect(result).toHaveLength(0);
});
});
});

View File

@ -225,4 +225,144 @@ export default abstract class BaseNlpHelper<
threshold?: boolean,
project?: string,
): Promise<NLU.ParseEntities>;
/**
* Finds entities in a given text based on their values and synonyms.
*
* This function takes a string of text and an array of entities, where each entity contains a value
* and a list of synonyms. It returns an array of objects, each representing an entity found in the text
* along with its start and end positions.
*
* @param text - The input text to search for entities.
* @param entities - An array of entities to search for, each containing a `value` and a list of `synonyms`.
*
* @returns An array of objects representing the found entities, with their `value`, `start`, and `end` positions.
*/
public extractKeywordBasedSlots(
text: string,
entity: NlpEntityFull,
): NLU.ParseEntity[] {
if (!entity.values?.length) {
this.logger.warn('NLP entity has no values');
return [];
}
return (entity.values
.flatMap(({ value, expressions }) => {
const allValues = [value, ...expressions];
// Filter the terms that are found in the text
return allValues
.flatMap((term) => {
const regex = new RegExp(`\\b${term}\\b`, 'g');
const matches = [...text.matchAll(regex)];
// Map matches to FoundEntity format
return matches.map((match) => ({
entity: entity.name,
value,
start: match.index!,
end: match.index! + term.length,
confidence: 1,
}));
})
.shift();
})
.filter((v) => !!v) || []) as NLU.ParseEntity[];
}
/**
* Finds entities in a given text based on regex patterns (stored in `value` field).
*
* @param text - Input text to evaluate.
* @param entity - NlpEntityFull with regex values in `value` and optional metadata.
* @returns An array of matched entities with value, position, and confidence.
*/
public extractPatternBasedSlots(
text: string,
entity: NlpEntityFull,
): NLU.ParseEntity[] {
if (!entity.values?.length) {
this.logger.warn('NLP entity has no values');
return [];
}
return (entity.values
.flatMap((nlpValue) => {
const pattern = nlpValue.metadata?.pattern;
if (!pattern) {
this.logger.error('Missing NLP regex pattern');
return [];
}
let regex: RegExp;
try {
const shouldWrap = nlpValue.metadata?.wordBoundary;
regex = new RegExp(shouldWrap ? `\\b${pattern}\\b` : pattern, 'gi');
} catch {
this.logger.error('Invalid NLP regex pattern');
return [];
}
const matches = [...text.matchAll(regex)];
return matches.map((match) => {
let value = match[0];
// Apply preprocessing if needed
if (nlpValue.metadata?.removeSpaces) {
value = value.replace(/\s+/g, '');
}
if (nlpValue.metadata?.toLowerCase) {
value = value.toLowerCase();
}
if (nlpValue.metadata?.stripDiacritics) {
value = value.normalize('NFD').replace(/\p{Diacritic}/gu, '');
}
return {
entity: entity.name,
value,
canonicalValue: nlpValue.value,
start: match.index!,
end: match.index! + match[0].length,
confidence: 1,
};
});
})
.filter((v) => !!v) || []) as NLU.ParseEntity[];
}
/**
* Extracts slot values from text based on the specified lookup strategy.
*
* This function supports deterministic slot filling by scanning the input text using either
* keyword-based or pattern-based entity recognition, depending on the provided lookup strategy.
*
* - For `keywords`: It uses exact term and synonym matching with word boundaries.
* - For `pattern`: It uses regular expressions defined in each entity value (stored in `value` field),
* optionally applying preprocessing such as `removeSpaces`, `lowercase`, and `stripDiacritics`.
*
* @param text - The input text from which to extract slot values.
* @param entities - An array of NlpEntityFull objects, each containing slot values and metadata.
*
* @returns An array of `ParseEntity` objects containing the entity name, matched value, position, and confidence.
*/
public runDeterministicSlotFilling(
text: string,
entities: NlpEntityFull[],
): NLU.ParseEntity[] {
return entities.flatMap((e) => {
if (e.lookups.includes('keywords')) {
return this.extractKeywordBasedSlots(text, e);
} else if (e.lookups.includes('pattern')) {
return this.extractPatternBasedSlots(text, e);
} else {
return [];
}
});
}
}

View File

@ -21,6 +21,9 @@ export namespace NLU {
confidence: number;
start?: number;
end?: number;
// When lookup strategy is either 'keywords' or 'pattern', the canonical value
// is the actual NlpValue.value, given the match is either a synonym (expression) or a pattern match
canonicalValue?: string;
}
export interface ParseEntities {

View File

@ -95,7 +95,7 @@ describe('NlpValueController', () => {
entity: nlpEntities[0].id,
value: 'valuetest',
expressions: ['synonym1', 'synonym2'],
metadata: { firstkey: 'firstvalue', secondKey: 1995 },
metadata: {},
builtin: false,
doc: '',
};

View File

@ -71,14 +71,17 @@ export class NlpValueController extends BaseController<
async create(
@Body() createNlpValueDto: NlpValueCreateDto,
): Promise<NlpValue> {
const nlpEntity = createNlpValueDto.entity
? await this.nlpEntityService.findOne(createNlpValueDto.entity!)
: null;
this.validate({
dto: createNlpValueDto,
allowedIds: {
entity: createNlpValueDto.entity
? (await this.nlpEntityService.findOne(createNlpValueDto.entity))?.id
: null,
entity: nlpEntity?.id,
},
});
return await this.nlpValueService.create(createNlpValueDto);
}
@ -171,6 +174,17 @@ export class NlpValueController extends BaseController<
@Param('id') id: string,
@Body() updateNlpValueDto: NlpValueUpdateDto,
): Promise<NlpValue> {
const nlpEntity = updateNlpValueDto.entity
? await this.nlpEntityService.findOne(updateNlpValueDto.entity!)
: null;
this.validate({
dto: updateNlpValueDto,
allowedIds: {
entity: nlpEntity?.id,
},
});
return await this.nlpValueService.updateOne(id, updateNlpValueDto);
}

View File

@ -21,7 +21,7 @@ import {
import { DtoConfig } from '@/utils/types/dto.types';
export type Lookup = 'keywords' | 'trait' | 'free-text';
import { Lookup, LookupStrategy } from '../schemas/types';
export class NlpEntityCreateDto {
@ApiProperty({ description: 'Name of the nlp entity', type: String })
@ -33,10 +33,10 @@ export class NlpEntityCreateDto {
@ApiPropertyOptional({
isArray: true,
enum: ['keywords', 'trait', 'free-text'],
enum: Object.values(LookupStrategy),
})
@IsArray()
@IsIn(['keywords', 'trait', 'free-text'], { each: true })
@IsIn(Object.values(LookupStrategy), { each: true })
@IsOptional()
lookups?: Lookup[];

View File

@ -19,6 +19,8 @@ import {
import { DtoConfig } from '@/utils/types/dto.types';
import { IsObjectId } from '@/utils/validation-rules/is-object-id';
import { NlpMetadata } from '../schemas/types';
export class NlpValueCreateDto {
@ApiProperty({ description: 'Nlp value', type: String })
@IsString()
@ -37,7 +39,7 @@ export class NlpValueCreateDto {
@ApiPropertyOptional({ description: 'Nlp value metadata', type: Object })
@IsOptional()
@IsObject()
metadata?: Record<string, any>;
metadata?: NlpMetadata;
@ApiPropertyOptional({ description: 'Nlp Value Description', type: String })
@IsString()
@ -82,6 +84,11 @@ export class NlpValueUpdateDto {
@IsObjectId({ message: 'Entity must be a valid ObjectId' })
entity?: string | null;
@ApiPropertyOptional({ description: 'Nlp Metadata', type: Object })
@IsObject()
@IsOptional()
metadata?: NlpMetadata;
@ApiPropertyOptional({ description: 'Nlp Value Description', type: String })
@IsString()
@IsOptional()

View File

@ -16,10 +16,8 @@ import {
THydratedDocument,
} from '@/utils/types/filter.types';
import { Lookup } from '../dto/nlp-entity.dto';
import { NlpValue } from './nlp-value.schema';
import { NlpEntityMap } from './types';
import { Lookup, LookupStrategy, NlpEntityMap } from './types';
@Schema({ timestamps: true })
export class NlpEntityStub extends BaseSchema {
@ -41,9 +39,18 @@ export class NlpEntityStub extends BaseSchema {
name: string;
/**
* Lookup strategy can contain : keywords, trait, free-text
* Lookup strategy
*/
@Prop({ type: [String], default: ['keywords'] })
@Prop({
type: [String],
default: ['keywords'],
validate: {
validator: (lookups: string[]) =>
lookups.every((lookup) =>
Object.values(LookupStrategy).includes(lookup as LookupStrategy),
),
},
})
lookups: Lookup[];
/**

View File

@ -19,9 +19,9 @@ import {
import { TStubOrFull } from '@/utils/types/format.types';
import { NlpEntity, NlpEntityFull } from './nlp-entity.schema';
import { NlpValueMap } from './types';
import { NlpMetadata, NlpValueMap } from './types';
@Schema({ timestamps: true })
@Schema({ timestamps: true, minimize: false })
export class NlpValueStub extends BaseSchema {
/**
* This value content.
@ -44,8 +44,8 @@ export class NlpValueStub extends BaseSchema {
/**
* Metadata are additional data that can be associated to this values, most of the time, the metadata contains system values or ids (e.g: value: "coffee", metadata: "item_11") .
*/
@Prop({ type: JSON, default: {} })
metadata: Record<string, any>;
@Prop({ type: JSON, default: () => {} })
metadata?: NlpMetadata;
/**
* Description of the entity's value purpose.

View File

@ -9,6 +9,15 @@
import { NlpEntityFull, NlpEntityStub } from './nlp-entity.schema';
import { NlpValueStub } from './nlp-value.schema';
export enum LookupStrategy {
keywords = 'keywords',
trait = 'trait',
free_text = 'free-text',
pattern = 'pattern',
}
export type Lookup = `${LookupStrategy}`;
export interface NlpSampleEntityValue {
entity: string; // entity name
value: string; // entity value
@ -27,3 +36,12 @@ export enum NlpSampleState {
}
export type NlpCacheMap = Map<string, NlpEntityFull>;
export type NlpMetadata = {
// Required when lookups is "pattern"
pattern?: string;
wordBoundary?: boolean;
removeSpaces?: boolean;
toLowerCase?: boolean;
stripDiacritics?: boolean;
};

View File

@ -15,14 +15,14 @@ import { NLP_MAP_CACHE_KEY } from '@/utils/constants/cache';
import { Cacheable } from '@/utils/decorators/cacheable.decorator';
import { BaseService } from '@/utils/generics/base-service';
import { Lookup, NlpEntityDto } from '../dto/nlp-entity.dto';
import { NlpEntityDto } from '../dto/nlp-entity.dto';
import { NlpEntityRepository } from '../repositories/nlp-entity.repository';
import {
NlpEntity,
NlpEntityFull,
NlpEntityPopulate,
} from '../schemas/nlp-entity.schema';
import { NlpCacheMap, NlpSampleEntityValue } from '../schemas/types';
import { Lookup, NlpCacheMap, NlpSampleEntityValue } from '../schemas/types';
import { NlpValueService } from './nlp-value.service';

View File

@ -51,6 +51,7 @@ export class NlpService {
.filter(({ entity }) => nlpMap.has(entity))
.map((e) => {
const entity = nlpMap.get(e.entity)!;
return {
...e,
score: e.confidence * (entity.weight || 1),

View File

@ -294,6 +294,14 @@ export const mockNlpGreetingAnyNamePatterns: NlpPattern[] = [
},
];
export const mockNlpFirstNamePatterns: NlpPattern[] = [
{
entity: 'firstname',
match: 'value',
value: 'jhon',
},
];
export const mockModifiedNlpBlock: BlockFull = {
...baseBlockInstance,
name: 'Modified Mock Nlp',

View File

@ -44,3 +44,14 @@ export const mockNlpGreetingFullNameEntities: NLU.ParseEntities = {
},
],
};
export const mockNlpFirstNameEntities: NLU.ParseEntities = {
entities: [
{
entity: 'firstname',
value: 'jhonny',
canonicalValue: 'jhon',
confidence: 0.75,
},
],
};

View File

@ -351,6 +351,10 @@
"doc": "Documentation",
"builtin": "Built-in?",
"weight": "Weight",
"word_boundary": "Word boundary",
"remove_spaces": "Remove spaces",
"to_lower_case": "Lowercase",
"strip_diacritics": "Strip diacritics",
"dataset": "Dataset",
"yes": "Yes",
"no": "No",

View File

@ -350,6 +350,10 @@
"synonyms": "Synonymes",
"doc": "Documentation",
"weight": "Poids",
"word_boundary": "Délimiter (Mot)",
"remove_spaces": "Supprimer les espaces",
"to_lower_case": "Mettre en minucules",
"strip_diacritics": "Supprimer les accents",
"builtin": "Intégré?",
"dataset": "Données",
"yes": "Oui",

View File

@ -1,5 +1,5 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
* Copyright © 2025 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -7,17 +7,18 @@
*/
import { InputAdornment, TextFieldProps } from "@mui/material";
import React, { ForwardedRef, forwardRef } from "react";
import { ForwardedRef, forwardRef } from "react";
import { Input } from "./Input";
export const RegexInput = forwardRef(
(
{
onChange,
value,
flags = ["g", "i"],
...props
}: TextFieldProps & { value: string; onChange: (value: string) => void },
}: TextFieldProps & {
flags?: string[];
},
ref: ForwardedRef<HTMLDivElement>,
) => {
return (
@ -26,15 +27,13 @@ export const RegexInput = forwardRef(
{...props}
InputProps={{
startAdornment: <InputAdornment position="start">/</InputAdornment>,
endAdornment: <InputAdornment position="end">/gi</InputAdornment>,
}}
value={value}
onChange={(e) => {
onChange(`/${e.target.value}/`);
endAdornment: (
<InputAdornment position="end">/{flags.join("")}</InputAdornment>
),
}}
/>
);
},
);
RegexInput.displayName = "Input";
RegexInput.displayName = "RegexInput";

View File

@ -6,11 +6,22 @@
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { Box, CircularProgress, Input, styled } from "@mui/material";
import { Box, CircularProgress, Input, styled, Tooltip } from "@mui/material";
import randomSeed from "random-seed";
import { FC, useCallback, useEffect, useMemo, useRef, useState } from "react";
import {
CSSProperties,
FC,
useCallback,
useEffect,
useMemo,
useRef,
useState,
} from "react";
import { INlpDatasetKeywordEntity } from "../../types/nlp-sample.types";
import {
INlpDatasetKeywordEntity,
INlpDatasetPatternEntity,
} from "../../types/nlp-sample.types";
const SelectableBox = styled(Box)({
position: "relative",
@ -40,22 +51,62 @@ const COLORS = [
{ name: "orange", bg: "#E6A23C" },
];
const UNKNOWN_COLOR = { name: "grey", bg: "#aaaaaa" };
const TODAY = new Date().toDateString();
const getColor = (no: number) => {
const rand = randomSeed.create(TODAY);
const NOW = (+new Date()).toString();
const getColor = (no: number, seedPrefix: string = "") => {
const rand = randomSeed.create(seedPrefix + NOW);
const startIndex = rand(COLORS.length);
const color =
no < 0 ? UNKNOWN_COLOR : COLORS[(startIndex + no) % COLORS.length];
return {
backgroundColor: color.bg,
opacity: 0.3,
opacity: 0.2,
};
};
interface INlpSelectionEntity {
start: string;
entity: string;
value: string;
end: string;
style: CSSProperties;
}
const SelectionEntityBackground: React.FC<{
selectionEntity: INlpSelectionEntity;
}> = ({ selectionEntity: e }) => {
return (
<div className="highlight">
<span>{e.start}</span>
<Tooltip
open={true}
placement="top"
title={e.entity}
arrow
componentsProps={{
tooltip: {
sx: {
color: "#FFF",
backgroundColor: e.style.backgroundColor,
},
},
arrow: {
sx: {
color: e.style.backgroundColor,
},
},
}}
>
<span style={e.style}>{e.value}</span>
</Tooltip>
<span>{e.end}</span>
</div>
);
};
type SelectableProps = {
defaultValue?: string;
entities?: INlpDatasetKeywordEntity[];
keywordEntities?: INlpDatasetKeywordEntity[];
patternEntities?: INlpDatasetPatternEntity[];
placeholder?: string;
onSelect: (str: string, start: number, end: number) => void;
onChange: (sample: {
@ -65,9 +116,27 @@ type SelectableProps = {
loading?: boolean;
};
const buildSelectionEntities = (
text: string,
entities: INlpDatasetKeywordEntity[] | INlpDatasetPatternEntity[],
): INlpSelectionEntity[] => {
return entities?.map((e, index) => {
const start = e.start ? e.start : text.indexOf(e.value);
const end = e.end ? e.end : start + e.value.length;
return {
start: text.substring(0, start),
entity: e.entity,
value: text.substring(start, end),
end: text.substring(end),
style: getColor(e.entity ? index : -1, e.entity),
};
});
};
const Selectable: FC<SelectableProps> = ({
defaultValue,
entities = [],
keywordEntities = [],
patternEntities = [],
placeholder = "",
onChange,
onSelect,
@ -76,20 +145,13 @@ const Selectable: FC<SelectableProps> = ({
const [text, setText] = useState(defaultValue || "");
const editableRef = useRef<HTMLDivElement>(null);
const selectableRef = useRef(null);
const selectedEntities = useMemo(
() =>
entities?.map((e, index) => {
const start = e.start ? e.start : text.indexOf(e.value);
const end = e.end ? e.end : start + e.value.length;
return {
start: text.substring(0, start),
value: text.substring(start, end),
end: text.substring(end),
style: getColor(e.entity ? index : -1),
};
}),
[entities, text],
const selectedKeywordEntities = useMemo(
() => buildSelectionEntities(text, keywordEntities),
[keywordEntities, text],
);
const selectedPatternEntities = useMemo(
() => buildSelectionEntities(text, patternEntities),
[patternEntities, text],
);
useEffect(() => {
@ -143,7 +205,7 @@ const Selectable: FC<SelectableProps> = ({
const handleTextChange = useCallback(
(newText: string) => {
const oldText = text;
const oldEntities = [...entities];
const oldEntities = [...keywordEntities];
const newEntities: INlpDatasetKeywordEntity[] = [];
const findCharDiff = (oldStr: string, newStr: string): number => {
const minLength = Math.min(oldStr.length, newStr.length);
@ -187,17 +249,22 @@ const Selectable: FC<SelectableProps> = ({
onChange({ text: newText, entities: newEntities });
},
[text, onChange, entities],
[text, onChange, keywordEntities],
);
return (
<SelectableBox ref={selectableRef}>
{selectedEntities?.map((e, idx) => (
<div key={idx} className="highlight">
<span>{e.start}</span>
<span style={e.style}>{e.value}</span>
<span>{e.end}</span>
</div>
{selectedPatternEntities?.map((e, idx) => (
<SelectionEntityBackground
key={`${e.entity}_${e.value}_${idx}`}
selectionEntity={e}
/>
))}
{selectedKeywordEntities?.map((e, idx) => (
<SelectionEntityBackground
key={`${e.entity}_${e.value}_${idx}`}
selectionEntity={e}
/>
))}
<Input
ref={editableRef}

View File

@ -27,7 +27,7 @@ import { ComponentFormProps } from "@/types/common/dialogs.types";
import {
INlpEntity,
INlpEntityAttributes,
NlpLookups,
LookupStrategy,
} from "@/types/nlp-entity.types";
export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
@ -94,27 +94,30 @@ export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
<Wrapper onSubmit={handleSubmit(onSubmitForm)} {...WrapperProps}>
<form onSubmit={handleSubmit(onSubmitForm)}>
<ContentContainer>
{!nlpEntity ? (
<ContentItem>
<FormControl>
<FormLabel>{t("label.lookup_strategies")}</FormLabel>
<RadioGroup
row
{...register("lookups")}
defaultValue="keywords"
>
{Object.values(NlpLookups).map((nlpLookup, index) => (
<FormControlLabel
key={index}
value={nlpLookup}
control={<Radio {...register("lookups.0")} />}
label={nlpLookup}
/>
))}
</RadioGroup>
</FormControl>
</ContentItem>
) : null}
<ContentItem>
<FormControl>
<FormLabel>{t("label.lookup_strategies")}</FormLabel>
<RadioGroup
row
{...register("lookups")}
defaultValue={nlpEntity ? nlpEntity.lookups[0] : "keywords"}
>
{Object.values(LookupStrategy).map((nlpLookup, index) => (
<FormControlLabel
key={index}
value={nlpLookup}
control={
<Radio
disabled={!!nlpEntity}
{...register("lookups.0")}
/>
}
label={nlpLookup}
/>
))}
</RadioGroup>
</FormControl>
</ContentItem>
<ContentItem>
<Input
label={t("label.name")}
@ -131,10 +134,11 @@ export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
label={t("label.doc")}
{...register("doc")}
multiline={true}
rows={3}
disabled={nlpEntity?.builtin}
/>
</ContentItem>
<ContentItem>
<ContentItem maxWidth="25%">
<Input
label={t("label.weight")}
{...register("weight", {
@ -157,8 +161,8 @@ export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
}}
error={!!errors.weight}
helperText={errors.weight?.message}
/>
</ContentItem>
/>
</ContentItem>
</ContentContainer>
</form>
</Wrapper>

View File

@ -30,15 +30,16 @@ import { ContentContainer, ContentItem } from "@/app-components/dialogs";
import AutoCompleteEntitySelect from "@/app-components/inputs/AutoCompleteEntitySelect";
import AutoCompleteSelect from "@/app-components/inputs/AutoCompleteSelect";
import Selectable from "@/app-components/inputs/Selectable";
import { useFind } from "@/hooks/crud/useFind";
import { useGetFromCache } from "@/hooks/crud/useGet";
import { useApiClient } from "@/hooks/useApiClient";
import { useNlp } from "@/hooks/useNlp";
import { useTranslate } from "@/hooks/useTranslate";
import { EntityType, Format } from "@/services/types";
import { ILanguage } from "@/types/language.types";
import { INlpEntity } from "@/types/nlp-entity.types";
import {
INlpDatasetKeywordEntity,
INlpDatasetPatternEntity,
INlpDatasetSample,
INlpDatasetTraitEntity,
INlpSampleFormAttributes,
@ -56,39 +57,32 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
submitForm,
}) => {
const { t } = useTranslate();
const { data: entities, refetch: refetchEntities } = useFind(
{
entity: EntityType.NLP_ENTITY,
format: Format.FULL,
},
{
hasCount: false,
},
);
const {
allTraitEntities,
allKeywordEntities,
allPatternEntities,
refetchAllEntities,
} = useNlp();
const getNlpValueFromCache = useGetFromCache(EntityType.NLP_VALUE);
// eslint-disable-next-line react-hooks/exhaustive-deps
const defaultValues: INlpSampleFormAttributes = useMemo(
() => ({
type: sample?.type || NlpSampleType.train,
text: sample?.text || "",
language: sample?.language || null,
traitEntities: (entities || [])
.filter(({ lookups }) => {
return lookups.includes("trait");
})
.map((e) => {
return {
entity: e.name,
value: sample
? sample.entities.find(({ entity }) => entity === e.name)?.value
: "",
} as INlpDatasetTraitEntity;
}),
keywordEntities: (sample?.entities || []).filter(
(e) => "start" in e && typeof e.start === "number",
traitEntities: [...allTraitEntities.values()].map((e) => {
return {
entity: e.name,
value:
(sample?.entities || []).find((se) => se.entity === e.name)
?.value || "",
};
}) as INlpDatasetTraitEntity[],
keywordEntities: (sample?.entities || []).filter((e) =>
allKeywordEntities.has(e.entity),
) as INlpDatasetKeywordEntity[],
}),
[sample, entities],
// eslint-disable-next-line react-hooks/exhaustive-deps
[allKeywordEntities, allTraitEntities, JSON.stringify(sample)],
);
const { handleSubmit, control, register, reset, setValue, watch } =
useForm<INlpSampleFormAttributes>({
@ -97,6 +91,9 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
const currentText = watch("text");
const currentType = watch("type");
const { apiClient } = useApiClient();
const [patternEntities, setPatternEntities] = useState<
INlpDatasetPatternEntity[]
>([]);
const { fields: traitEntities, update: updateTraitEntity } = useFieldArray({
control,
name: "traitEntities",
@ -122,22 +119,29 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
queryFn: async () => {
return await apiClient.predictNlp(currentText);
},
onSuccess: (result) => {
const traitEntities: INlpDatasetTraitEntity[] = result.entities.filter(
(e) => !("start" in e && "end" in e) && e.entity !== "language",
);
const keywordEntities = result.entities.filter(
(e) => "start" in e && "end" in e,
onSuccess: (prediction) => {
const predictedTraitEntities: INlpDatasetTraitEntity[] =
prediction.entities.filter((e) => allTraitEntities.has(e.entity));
const predictedKeywordEntities = prediction.entities.filter((e) =>
allKeywordEntities.has(e.entity),
) as INlpDatasetKeywordEntity[];
const language = result.entities.find(
const predictedPatternEntities = prediction.entities.filter((e) =>
allPatternEntities.has(e.entity),
) as INlpDatasetKeywordEntity[];
const language = prediction.entities.find(
({ entity }) => entity === "language",
);
setValue("language", language?.value || "");
setValue("traitEntities", traitEntities);
setValue("keywordEntities", keywordEntities);
setValue("traitEntities", predictedTraitEntities);
setValue("keywordEntities", predictedKeywordEntities);
setPatternEntities(predictedPatternEntities);
},
enabled: !sample && !!currentText,
enabled:
// Inbox sample update
sample?.type === "inbox" ||
// New sample
(!sample && !!currentText),
});
const findInsertIndex = (newItem: INlpDatasetKeywordEntity): number => {
const index = keywordEntities.findIndex(
@ -153,7 +157,7 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
} | null>(null);
const onSubmitForm = (form: INlpSampleFormAttributes) => {
submitForm(form);
refetchEntities();
refetchAllEntities();
reset({
...defaultValues,
text: "",
@ -203,7 +207,8 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
<ContentItem>
<Selectable
defaultValue={currentText}
entities={keywordEntities}
keywordEntities={keywordEntities}
patternEntities={patternEntities}
placeholder={t("placeholder.nlp_sample_text")}
onSelect={(selection, start, end) => {
setSelection({
@ -223,11 +228,13 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
end,
})),
);
setPatternEntities([]);
}}
loading={isLoading}
/>
</ContentItem>
<Box display="flex" flexDirection="column">
{/* Language selection */}
<ContentItem
display="flex"
flexDirection="row"
@ -261,6 +268,7 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
}}
/>
</ContentItem>
{/* Trait entities */}
{traitEntities.map((traitEntity, index) => (
<ContentItem
key={traitEntity.id}
@ -275,13 +283,9 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
control={control}
render={({ field }) => {
const { onChange: _, value, ...rest } = field;
const entity = entities?.find(
({ name }) => name === traitEntity.entity,
);
const options =
entity?.values.map(
(v) => getNlpValueFromCache(v) as INlpValue,
) || [];
const options = (
allTraitEntities.get(traitEntity.entity)?.values || []
).map((v) => getNlpValueFromCache(v)!);
return (
<>
@ -318,7 +322,9 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
</ContentItem>
))}
</Box>
{
/* Keyword entities */
}
<Box display="flex" flexDirection="column">
{keywordEntities.map((keywordEntity, index) => (
<ContentItem
@ -335,22 +341,16 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
control={control}
render={({ field }) => {
const { onChange: _, ...rest } = field;
const options = [...allKeywordEntities.values()];
return (
<AutoCompleteEntitySelect<INlpEntity, "name", false>
<AutoCompleteSelect<INlpEntity, "name", false>
fullWidth={true}
searchFields={["name"]}
entity={EntityType.NLP_ENTITY}
format={Format.FULL}
options={options}
idKey="name"
labelKey="name"
label={t("label.nlp_entity")}
multiple={false}
preprocess={(options) => {
return options.filter(({ lookups }) =>
lookups.includes("keywords"),
);
}}
onChange={(_e, selected, ..._) => {
updateKeywordEntity(index, {
...keywordEntities[index],
@ -367,13 +367,9 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
control={control}
render={({ field }) => {
const { onChange: _, value, ...rest } = field;
const entity = entities?.find(
({ name }) => name === keywordEntity.entity,
);
const options =
entity?.values.map(
(v) => getNlpValueFromCache(v) as INlpValue,
) || [];
const options = (
allKeywordEntities.get(keywordEntity.entity)?.values || []
).map((v) => getNlpValueFromCache(v)!);
return (
<AutoCompleteSelect<

View File

@ -6,6 +6,7 @@
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { FormControlLabel, Switch } from "@mui/material";
import { useRouter } from "next/router";
import { FC, Fragment, useEffect } from "react";
import { Controller, useForm } from "react-hook-form";
@ -13,6 +14,7 @@ import { Controller, useForm } from "react-hook-form";
import { ContentContainer, ContentItem } from "@/app-components/dialogs";
import { Input } from "@/app-components/inputs/Input";
import MultipleInput from "@/app-components/inputs/MultipleInput";
import { RegexInput } from "@/app-components/inputs/RegexInput";
import { useCreate } from "@/hooks/crud/useCreate";
import { useGet } from "@/hooks/crud/useGet";
import { useUpdate } from "@/hooks/crud/useUpdate";
@ -20,8 +22,29 @@ import { useToast } from "@/hooks/useToast";
import { useTranslate } from "@/hooks/useTranslate";
import { EntityType, Format } from "@/services/types";
import { ComponentFormProps } from "@/types/common/dialogs.types";
import { INlpEntity, NlpLookups } from "@/types/nlp-entity.types";
import {
INlpEntity,
INlpMetadata,
LookupStrategy,
} from "@/types/nlp-entity.types";
import { INlpValue, INlpValueAttributes } from "@/types/nlp-value.types";
import { isRegex } from "@/utils/string";
const getDefaultNlpMetadata = (
nlpEntity: INlpEntity | undefined,
): INlpMetadata => {
if (nlpEntity?.lookups.includes(LookupStrategy.pattern)) {
return {
pattern: "",
wordBoundary: true,
removeSpaces: false,
toLowerCase: false,
stripDiacritics: false,
};
} else {
return {};
}
};
export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
data: { defaultValues: nlpValue, presetValues: nlpEntity },
@ -36,7 +59,8 @@ export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
entity: EntityType.NLP_ENTITY,
format: Format.FULL,
});
const canHaveSynonyms = nlpEntity?.lookups.includes(NlpLookups.keywords);
const canHaveSynonyms = nlpEntity?.lookups.includes(LookupStrategy.keywords);
const isPattern = nlpEntity?.lookups.includes(LookupStrategy.pattern);
const { mutate: createNlpValue } = useCreate(EntityType.NLP_VALUE, {
onError: () => {
rest.onError?.();
@ -73,15 +97,9 @@ export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
value: nlpValue?.value || "",
doc: nlpValue?.doc || "",
expressions: nlpValue?.expressions || [],
metadata: nlpValue?.metadata || getDefaultNlpMetadata(nlpEntity),
},
});
const validationRules = {
value: {
required: t("message.value_is_required"),
},
name: {},
description: {},
};
const onSubmitForm = async (params: INlpValueAttributes) => {
if (nlpValue) {
updateNlpValue({ id: nlpValue.id, params });
@ -96,11 +114,17 @@ export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
value: nlpValue.value,
expressions: nlpValue.expressions,
doc: nlpValue.doc,
metadata: nlpValue.metadata,
});
} else {
reset();
reset({
value: "",
expressions: [],
doc: "",
metadata: getDefaultNlpMetadata(nlpEntity),
});
}
}, [nlpValue, reset]);
}, [nlpValue, nlpEntity, reset]);
return (
<Wrapper onSubmit={handleSubmit(onSubmitForm)} {...WrapperProps}>
@ -112,15 +136,87 @@ export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
error={!!errors.value}
required
autoFocus
helperText={errors.value ? errors.value.message : null}
{...register("value", validationRules.value)}
helperText={errors.value?.message}
{...register("value", {
required: t("message.value_is_required"),
})}
/>
</ContentItem>
{isPattern && (
<>
<ContentItem>
<RegexInput
{...register("metadata.pattern", {
required: t("message.regex_is_invalid"),
validate: (pattern: string | undefined) => {
return isRegex(pattern)
? true
: t("message.regex_is_invalid");
},
})}
helperText={errors.metadata?.pattern?.message}
error={!!errors.metadata?.pattern}
label={t("label.regex")}
placeholder={t("placeholder.pattern")}
flags={["i"]}
/>
</ContentItem>
<ContentItem>
<Controller
name="metadata.wordBoundary"
control={control}
render={({ field }) => (
<FormControlLabel
control={<Switch {...field} checked={field.value} />}
label={t("label.word_boundary")}
/>
)}
/>
</ContentItem>
<ContentItem>
<Controller
name="metadata.removeSpaces"
control={control}
render={({ field }) => (
<FormControlLabel
control={<Switch {...field} checked={field.value} />}
label={t("label.remove_spaces")}
/>
)}
/>
</ContentItem>
<ContentItem>
<Controller
name="metadata.toLowerCase"
control={control}
render={({ field }) => (
<FormControlLabel
control={<Switch {...field} checked={field.value} />}
label={t("label.to_lower_case")}
/>
)}
/>
</ContentItem>
<ContentItem>
<Controller
name="metadata.stripDiacritics"
control={control}
render={({ field }) => (
<FormControlLabel
control={<Switch {...field} checked={field.value} />}
label={t("label.strip_diacritics")}
/>
)}
/>
</ContentItem>
</>
)}
<ContentItem>
<Input
label={t("label.doc")}
{...register("doc")}
multiline={true}
rows={3}
/>
</ContentItem>

View File

@ -22,15 +22,18 @@ import {
PatternType,
PayloadPattern,
} from "@/types/block.types";
import {
extractRegexBody,
formatWithSlashes,
isRegex,
isRegexString,
} from "@/utils/string";
import { OutcomeInput } from "./OutcomeInput";
import { PostbackInput } from "./PostbackInput";
const isRegex = (str: Pattern) => {
return typeof str === "string" && str.startsWith("/") && str.endsWith("/");
};
const getType = (pattern: Pattern): PatternType => {
if (isRegex(pattern)) {
const getPatternType = (pattern: Pattern): PatternType => {
if (isRegexString(pattern)) {
return "regex";
} else if (Array.isArray(pattern)) {
return "nlp";
@ -69,7 +72,7 @@ const PatternInput: FC<PatternInputProps> = ({
formState: { errors },
} = useFormContext<IBlockAttributes>();
const [pattern, setPattern] = useState<Pattern>(value);
const patternType = getType(value);
const patternType = getPatternType(value);
const registerInput = (
errorMessage: string,
idx: number,
@ -122,23 +125,15 @@ const PatternInput: FC<PatternInputProps> = ({
<RegexInput
{...registerInput(t("message.regex_is_empty"), idx, {
validate: (pattern) => {
try {
const parsedPattern = new RegExp(pattern.slice(1, -1));
if (String(parsedPattern) !== pattern) {
throw t("message.regex_is_invalid");
}
return true;
} catch (_e) {
return t("message.regex_is_invalid");
}
return isRegex(extractRegexBody(pattern))
? true
: t("message.regex_is_invalid");
},
setValueAs: (v) => (isRegex(v) ? v : `/${v}/`),
setValueAs: (v) => (isRegexString(v) ? v : formatWithSlashes(v)),
})}
value={extractRegexBody(value)}
label={t("label.regex")}
value={value.slice(1, -1)}
onChange={(v) => onChange(v)}
onChange={(e) => onChange(formatWithSlashes(e.target.value))}
required
/>
) : null}

View File

@ -0,0 +1,55 @@
/*
* Copyright © 2025 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { useMemo } from "react";
import { EntityType, Format } from "@/services/types";
import { INlpEntity, Lookup } from "@/types/nlp-entity.types";
import { useFind } from "./crud/useFind";
const buildNlpEntityMap = (entities: INlpEntity[], lookup: Lookup) => {
const intialMap = new Map<string, INlpEntity>();
return entities
.filter(({ lookups }) => {
return lookups.includes(lookup);
}).reduce((acc, curr) => {
acc.set(curr.name, curr);
return acc;
}, intialMap)
}
export const useNlp = () => {
const { data: allEntities, refetch: refetchAllEntities } = useFind(
{
entity: EntityType.NLP_ENTITY,
format: Format.FULL,
},
{
hasCount: false,
},
);
const allTraitEntities = useMemo(() => {
return buildNlpEntityMap((allEntities || []), 'trait')
}, [allEntities]);
const allKeywordEntities = useMemo(() => {
return buildNlpEntityMap((allEntities || []), 'keywords')
}, [allEntities]);
const allPatternEntities = useMemo(() => {
return buildNlpEntityMap((allEntities || []), 'pattern')
}, [allEntities]);
return {
allTraitEntities,
allKeywordEntities,
allPatternEntities,
refetchAllEntities
}
};

View File

@ -11,7 +11,23 @@ import { EntityType, Format } from "@/services/types";
import { IBaseSchema, IFormat, OmitPopulate } from "./base.types";
import { INlpValue } from "./nlp-value.types";
export type Lookup = "keywords" | "trait" | "free-text";
export enum LookupStrategy {
keywords = "keywords",
trait = "trait",
// free_text = "free-text",
pattern = "pattern",
}
export type Lookup = `${LookupStrategy}`;
export interface INlpMetadata {
// Required when lookups is "pattern"
pattern?: string;
wordBoundary?: boolean;
removeSpaces?: boolean;
toLowerCase?: boolean;
stripDiacritics?: boolean;
}
export interface INlpEntityAttributes {
foreign_id?: string;
@ -22,11 +38,6 @@ export interface INlpEntityAttributes {
weight?: number;
}
export enum NlpLookups {
keywords = "keywords",
trait = "trait",
}
export interface INlpEntityStub
extends IBaseSchema,
OmitPopulate<INlpEntityAttributes, EntityType.NLP_ENTITY> {}

View File

@ -1,5 +1,5 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
* Copyright © 2025 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -52,6 +52,8 @@ export interface INlpDatasetKeywordEntity extends INlpDatasetTraitEntity {
end: number;
}
export interface INlpDatasetPatternEntity extends INlpDatasetKeywordEntity {}
export interface INlpSampleFormAttributes
extends Omit<INlpSampleAttributes, "entities"> {
traitEntities: INlpDatasetTraitEntity[];

View File

@ -9,7 +9,7 @@
import { Format } from "@/services/types";
import { IBaseSchema, IFormat } from "./base.types";
import { INlpEntity } from "./nlp-entity.types";
import { INlpEntity, INlpMetadata } from "./nlp-entity.types";
export interface INlpValueAttributes {
entity: string;
@ -17,7 +17,7 @@ export interface INlpValueAttributes {
value: string;
doc?: string;
expressions?: string[];
metadata?: Record<string, any>;
metadata?: INlpMetadata;
builtin?: boolean;
nlpSamplesCount?: number;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
* Copyright © 2025 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -18,3 +18,49 @@ export const slugify = (str: string) => {
export const getNamespace = (extensionName: string) => {
return extensionName.replaceAll("-", "_");
};
/**
* Checks if the string starts/ends with slashes
*/
export const isRegexString = (str: any) => {
return typeof str === "string" && str.startsWith("/") && str.endsWith("/");
};
/**
* Ensures value is wrapped in slashes: /value/
*/
export const formatWithSlashes = (value: string): string => {
if (!value) return "/";
if (!value.startsWith("/")) value = "/" + value;
if (!value.endsWith("/")) value = value + "/";
return value;
};
/**
* Extracts the inner regex from /.../
*/
export const extractRegexBody = (value: string | undefined): string => {
if (value && value.startsWith("/") && value.endsWith("/")) {
return value.slice(1, -1);
}
return '';
};
/**
* Checks if the regex pattern compiles correctly
*/
export const isRegex = (pattern: string | undefined) => {
try {
if (!pattern) {
throw new Error("Pattern was not provided!");
}
new RegExp(pattern);
return true;
} catch {
return false;
}
};