Merge pull request #1006 from Hexastack/feat/nlp-pattern-lookup-strategy

Feat/nlp pattern lookup strategy
2025-06-26 18:27:28 +00:00 · 2025-05-14 08:35:51 +01:00 · 2025-05-14 08:35:51 +01:00 · 336c296456
commit 336c296456
parent e2d6d15215 6de740f683
30 changed files with 978 additions and 257 deletions
--- a/api/src/chat/services/block.service.spec.ts
+++ b/api/src/chat/services/block.service.spec.ts
@ -59,6 +59,7 @@ import {
  blockProductListMock,
  blocks,
  mockNlpAffirmationPatterns,
+  mockNlpFirstNamePatterns,
  mockNlpGreetingAnyNamePatterns,
  mockNlpGreetingNamePatterns,
  mockNlpGreetingPatterns,
@ -69,6 +70,7 @@ import {
  subscriberContextBlankInstance,
 } from '@/utils/test/mocks/conversation';
 import {
+  mockNlpFirstNameEntities,
  mockNlpGreetingFullNameEntities,
  mockNlpGreetingNameEntities,
 } from '@/utils/test/mocks/nlp';
@ -353,6 +355,25 @@ describe('BlockService', () => {
      ]);
    });

+    it('should return match nlp patterns with synonyms match (canonical value)', () => {
+      const result = blockService.getMatchingNluPatterns(
+        mockNlpFirstNameEntities,
+        {
+          ...blockGetStarted,
+          patterns: [...blockGetStarted.patterns, mockNlpFirstNamePatterns],
+        },
+      );
+      expect(result).toEqual([
+        [
+          {
+            entity: 'firstname',
+            match: 'value',
+            value: 'jhon',
+          },
+        ],
+      ]);
+    });
+
    it('should return empty array when it does not match nlp patterns', () => {
      const result = blockService.getMatchingNluPatterns(
        mockNlpGreetingFullNameEntities,
--- a/api/src/chat/services/block.service.ts
+++ b/api/src/chat/services/block.service.ts
@ -294,11 +294,11 @@ export class BlockService extends BaseService<
   * @returns The NLU patterns that matches the predicted entities
   */
  getMatchingNluPatterns<E extends NLU.ParseEntities, B extends BlockStub>(
-    nlp: E,
+    { entities }: E,
    block: B,
  ): NlpPattern[][] {
    // No nlp entities to check against
-    if (nlp.entities.length === 0) {
+    if (entities.length === 0) {
      return [];
    }

@ -312,18 +312,21 @@ export class BlockService extends BaseService<
    }

    // Filter NLP patterns match based on best guessed entities
-    return nlpPatterns.filter((entities: NlpPattern[]) => {
-      return entities.every((ev: NlpPattern) => {
-        if (ev.match === 'value') {
-          return nlp.entities.find((e) => {
-            return e.entity === ev.entity && e.value === ev.value;
+    return nlpPatterns.filter((patterns: NlpPattern[]) => {
+      return patterns.every((p: NlpPattern) => {
+        if (p.match === 'value') {
+          return entities.find((e) => {
+            return (
+              e.entity === p.entity &&
+              (e.value === p.value || e.canonicalValue === p.value)
+            );
          });
-        } else if (ev.match === 'entity') {
-          return nlp.entities.find((e) => {
-            return e.entity === ev.entity;
+        } else if (p.match === 'entity') {
+          return entities.find((e) => {
+            return e.entity === p.entity;
          });
        } else {
-          this.logger.warn('Unknown NLP match type', ev);
+          this.logger.warn('Unknown NLP match type', p);
          return false;
        }
      });
@ -429,12 +432,14 @@ export class BlockService extends BaseService<
   * - Returns `true` if all conditions are met, otherwise `false`.
   */
  private matchesNluEntity<E extends NLU.ParseEntity>(
-    { entity, value }: E,
+    { entity, value, canonicalValue }: E,
    pattern: NlpPattern,
  ): boolean {
    return (
      entity === pattern.entity &&
-      (pattern.match !== 'value' || value === pattern.value)
+      (pattern.match !== 'value' ||
+        value === pattern.value ||
+        canonicalValue === pattern.value)
    );
  }

--- a/api/src/extensions/helpers/llm-nlu/index.helper.ts
+++ b/api/src/extensions/helpers/llm-nlu/index.helper.ts
@ -12,7 +12,7 @@ import Handlebars from 'handlebars';

 import { HelperService } from '@/helper/helper.service';
 import BaseNlpHelper from '@/helper/lib/base-nlp-helper';
-import { LLM, NLU } from '@/helper/types';
+import { HelperType, LLM, NLU } from '@/helper/types';
 import { LanguageService } from '@/i18n/services/language.service';
 import { LoggerService } from '@/logger/logger.service';
 import { NlpEntityFull } from '@/nlp/schemas/nlp-entity.schema';
@ -66,12 +66,9 @@ export default class LlmNluHelper
  async buildClassifiersPrompt() {
    const settings = await this.getSettings();
    if (settings) {
-      const entities = await this.nlpEntityService.findAndPopulate({
+      const traitEntities = await this.nlpEntityService.findAndPopulate({
        lookups: 'trait',
      });
-      const traitEntities = entities.filter(({ lookups }) =>
-        lookups.includes('trait'),
-      );
      this.traitClassifierPrompts = traitEntities.map((entity) => ({
        ...entity,
        prompt: Handlebars.compile(settings.trait_classifier_prompt_template)({
@ -88,48 +85,9 @@ export default class LlmNluHelper
    await this.buildClassifiersPrompt();
  }

-  /**
-   * Finds entities in a given text based on their values and synonyms.
-   *
-   * This function takes a string of text and an array of entities, where each entity contains a value
-   * and a list of synonyms. It returns an array of objects, each representing an entity found in the text
-   * along with its start and end positions.
-   *
-   * @param text - The input text to search for entities.
-   * @param entities - An array of entities to search for, each containing a `value` and a list of `synonyms`.
-   *
-   * @returns An array of objects representing the found entities, with their `value`, `start`, and `end` positions.
-   */
-  private findKeywordEntities(text: string, entity: NlpEntityFull) {
-    return (
-      entity.values
-        .flatMap(({ value, expressions }) => {
-          const allValues = [value, ...expressions];
-
-          // Filter the terms that are found in the text
-          return allValues
-            .flatMap((term) => {
-              const regex = new RegExp(`\\b${term}\\b`, 'g');
-              const matches = [...text.matchAll(regex)];
-
-              // Map matches to FoundEntity format
-              return matches.map((match) => ({
-                entity: entity.name,
-                value: term,
-                start: match.index!,
-                end: match.index! + term.length,
-                confidence: 1,
-              }));
-            })
-            .shift();
-        })
-        .filter((v) => !!v) || []
-    );
-  }
-
  async predict(text: string): Promise<NLU.ParseEntities> {
    const settings = await this.getSettings();
-    const helper = await this.helperService.getDefaultLlmHelper();
+    const helper = await this.helperService.getDefaultHelper(HelperType.LLM);
    const defaultLanguage = await this.languageService.getDefaultLanguage();
    // Detect language
    const language = await helper.generateStructuredResponse<string>?.(
@ -174,13 +132,12 @@ export default class LlmNluHelper

    // Perform slot filling in a deterministic way since
    // it's currently a challenging task for the LLMs.
-    const keywordEntities = await this.nlpEntityService.findAndPopulate({
-      lookups: 'keywords',
+    const entities = await this.nlpEntityService.findAndPopulate({
+      lookups: { $in: ['keywords', 'pattern'] },
    });
-    const entities = keywordEntities.flatMap((keywordEntity) =>
-      this.findKeywordEntities(text, keywordEntity),
-    ) as NLU.ParseEntity[];

-    return { entities: traits.concat(entities) };
+    const slotEntities = this.runDeterministicSlotFilling(text, entities);
+
+    return { entities: traits.concat(slotEntities) };
  }
 }
--- a/api/src/helper/lib/test/base-nlp-helper.spec.ts
+++ b/api/src/helper/lib/test/base-nlp-helper.spec.ts
@ -30,6 +30,7 @@ import BaseNlpHelper from '../base-nlp-helper';
 const mockLoggerService = {
  log: jest.fn(),
  error: jest.fn(),
+  warn: jest.fn(),
 } as unknown as LoggerService;

 const mockSettingService = {
@ -160,7 +161,7 @@ describe('BaseNlpHelper', () => {
          updatedAt: new Date(),
          builtin: false,
          expressions: [],
-          metadata: [],
+          metadata: {},
        },
        value2: {
          id: new ObjectId().toString(),
@ -170,7 +171,7 @@ describe('BaseNlpHelper', () => {
          updatedAt: new Date(),
          builtin: false,
          expressions: [],
-          metadata: [],
+          metadata: {},
        },
      });

@ -218,4 +219,253 @@ describe('BaseNlpHelper', () => {
      );
    });
  });
+
+  describe('extractKeywordBasedSlots', () => {
+    it('should return matches for exact keywords and synonyms', () => {
+      const entity: NlpEntityFull = {
+        name: 'color',
+        values: [
+          { value: 'blue', expressions: ['azure', 'navy'] },
+          { value: 'green', expressions: ['emerald', 'lime'] },
+        ],
+      } as any;
+
+      const result = helper.extractKeywordBasedSlots(
+        'The sky is azure and emerald',
+        entity,
+      );
+      expect(result).toEqual([
+        {
+          entity: 'color',
+          value: 'blue',
+          start: 11,
+          end: 16,
+          confidence: 1,
+        },
+        {
+          entity: 'color',
+          value: 'green',
+          start: 21,
+          end: 28,
+          confidence: 1,
+        },
+      ]);
+    });
+
+    it('should return empty array if no values present', () => {
+      const result = helper.extractKeywordBasedSlots('anything', {
+        name: 'empty',
+        values: [],
+      } as any);
+
+      expect(result).toEqual([]);
+    });
+  });
+
+  describe('extractPatternBasedSlots', () => {
+    it('should match using a valid regex pattern', () => {
+      const entity: NlpEntityFull = {
+        name: 'infos',
+        values: [
+          {
+            value: 'number',
+            metadata: { pattern: '\\d+', wordBoundary: true },
+          },
+        ],
+      } as NlpEntityFull;
+
+      const result = helper.extractPatternBasedSlots(
+        'Order 123 and 456 now!',
+        entity,
+      );
+      expect(result).toEqual([
+        {
+          entity: 'infos',
+          canonicalValue: 'number',
+          value: '123',
+          start: 6,
+          end: 9,
+          confidence: 1,
+        },
+        {
+          entity: 'infos',
+          canonicalValue: 'number',
+          value: '456',
+          start: 14,
+          end: 17,
+          confidence: 1,
+        },
+      ]);
+    });
+
+    it('should respect metadata like toLowerCase and removeSpaces', () => {
+      const entity: NlpEntityFull = {
+        name: 'name',
+        values: [
+          {
+            value: 'brand',
+            metadata: {
+              pattern: 'HEX BOT',
+              toLowerCase: true,
+              removeSpaces: true,
+            },
+          },
+        ],
+      } as NlpEntityFull;
+
+      const result = helper.extractPatternBasedSlots(
+        'My CODE is HEX BOT!',
+        entity,
+      );
+      expect(result).toEqual([
+        {
+          entity: 'name',
+          canonicalValue: 'brand',
+          value: 'hexbot',
+          start: 11,
+          end: 18,
+          confidence: 1,
+        },
+      ]);
+    });
+
+    it('should respect metadata stripDiacritics', () => {
+      const entity: NlpEntityFull = {
+        name: 'keyword',
+        values: [
+          {
+            value: 'word',
+            metadata: {
+              pattern: '".+"',
+              toLowerCase: true,
+              removeSpaces: true,
+              stripDiacritics: true,
+            },
+          },
+        ],
+      } as NlpEntityFull;
+
+      const result = helper.extractPatternBasedSlots(
+        'The word "où" (where)',
+        entity,
+      );
+      expect(result).toEqual([
+        {
+          entity: 'keyword',
+          canonicalValue: 'word',
+          value: '"ou"',
+          start: 9,
+          end: 13,
+          confidence: 1,
+        },
+      ]);
+    });
+
+    it('should return empty array if no values', () => {
+      const result = helper.extractPatternBasedSlots('test', {
+        name: 'noop',
+        values: [],
+      } as any);
+
+      expect(result).toEqual([]);
+    });
+
+    it('should handle invalid regex pattern gracefully', () => {
+      const entity: NlpEntityFull = {
+        name: 'fail',
+        values: [
+          {
+            value: 'Invalid',
+            metadata: { pattern: '[a-', wordBoundary: true },
+          },
+        ],
+      } as any;
+
+      const result = helper.extractPatternBasedSlots('test', entity);
+      expect(result).toEqual([]);
+    });
+  });
+
+  describe('runDeterministicSlotFilling', () => {
+    it('should call keyword-based extractor for keyword lookup strategy', () => {
+      const mockEntities: NlpEntityFull[] = [
+        {
+          name: 'product',
+          lookups: ['keywords'],
+          values: [
+            {
+              value: 'tshirt',
+              expressions: [],
+            },
+            {
+              value: 'pizza',
+              expressions: [],
+            },
+          ],
+        } as unknown as NlpEntityFull,
+      ];
+      jest.spyOn(helper, 'extractKeywordBasedSlots');
+      jest.spyOn(helper, 'extractPatternBasedSlots');
+
+      const result = helper.runDeterministicSlotFilling(
+        'order pizza',
+        mockEntities,
+      );
+
+      expect(helper.extractKeywordBasedSlots).toHaveBeenCalledTimes(1);
+      expect(helper.extractPatternBasedSlots).not.toHaveBeenCalled();
+      expect(result).toHaveLength(1);
+      expect(result[0].entity).toBe('product');
+    });
+
+    it('should call pattern-based extractor for pattern lookup strategy', () => {
+      const mockEntities: NlpEntityFull[] = [
+        {
+          name: 'number',
+          lookups: ['pattern'],
+          values: [
+            {
+              value: 'phone',
+              metadata: { pattern: '\\d+' },
+              expressions: [],
+            },
+          ],
+        } as unknown as NlpEntityFull,
+      ];
+
+      jest.spyOn(helper, 'extractKeywordBasedSlots');
+      jest.spyOn(helper, 'extractPatternBasedSlots');
+
+      const result = helper.runDeterministicSlotFilling(
+        'call me at 1234567890',
+        mockEntities,
+      );
+
+      expect(helper.extractPatternBasedSlots).toHaveBeenCalledTimes(1);
+      expect(helper.extractKeywordBasedSlots).not.toHaveBeenCalled();
+      expect(result).toHaveLength(1);
+      expect(result[0].entity).toBe('number');
+    });
+
+    it('should skip entities that do not support the selected lookup strategy', () => {
+      const mockEntities: NlpEntityFull[] = [
+        {
+          name: 'irrelevant',
+          lookups: ['trait'],
+          values: [],
+        } as unknown as NlpEntityFull,
+      ];
+      jest.spyOn(helper, 'extractKeywordBasedSlots');
+      jest.spyOn(helper, 'extractPatternBasedSlots');
+
+      const result = helper.runDeterministicSlotFilling(
+        'any text',
+        mockEntities,
+      );
+
+      expect(helper.extractKeywordBasedSlots).not.toHaveBeenCalled();
+      expect(helper.extractPatternBasedSlots).not.toHaveBeenCalled();
+      expect(result).toHaveLength(0);
+    });
+  });
 });
--- a/api/src/helper/lib/base-nlp-helper.ts
+++ b/api/src/helper/lib/base-nlp-helper.ts
@ -225,4 +225,144 @@ export default abstract class BaseNlpHelper<
    threshold?: boolean,
    project?: string,
  ): Promise<NLU.ParseEntities>;
+
+  /**
+   * Finds entities in a given text based on their values and synonyms.
+   *
+   * This function takes a string of text and an array of entities, where each entity contains a value
+   * and a list of synonyms. It returns an array of objects, each representing an entity found in the text
+   * along with its start and end positions.
+   *
+   * @param text - The input text to search for entities.
+   * @param entities - An array of entities to search for, each containing a `value` and a list of `synonyms`.
+   *
+   * @returns An array of objects representing the found entities, with their `value`, `start`, and `end` positions.
+   */
+  public extractKeywordBasedSlots(
+    text: string,
+    entity: NlpEntityFull,
+  ): NLU.ParseEntity[] {
+    if (!entity.values?.length) {
+      this.logger.warn('NLP entity has no values');
+      return [];
+    }
+
+    return (entity.values
+      .flatMap(({ value, expressions }) => {
+        const allValues = [value, ...expressions];
+
+        // Filter the terms that are found in the text
+        return allValues
+          .flatMap((term) => {
+            const regex = new RegExp(`\\b${term}\\b`, 'g');
+            const matches = [...text.matchAll(regex)];
+
+            // Map matches to FoundEntity format
+            return matches.map((match) => ({
+              entity: entity.name,
+              value,
+              start: match.index!,
+              end: match.index! + term.length,
+              confidence: 1,
+            }));
+          })
+          .shift();
+      })
+      .filter((v) => !!v) || []) as NLU.ParseEntity[];
+  }
+
+  /**
+   * Finds entities in a given text based on regex patterns (stored in `value` field).
+   *
+   * @param text - Input text to evaluate.
+   * @param entity - NlpEntityFull with regex values in `value` and optional metadata.
+   * @returns An array of matched entities with value, position, and confidence.
+   */
+  public extractPatternBasedSlots(
+    text: string,
+    entity: NlpEntityFull,
+  ): NLU.ParseEntity[] {
+    if (!entity.values?.length) {
+      this.logger.warn('NLP entity has no values');
+      return [];
+    }
+
+    return (entity.values
+      .flatMap((nlpValue) => {
+        const pattern = nlpValue.metadata?.pattern;
+
+        if (!pattern) {
+          this.logger.error('Missing NLP regex pattern');
+          return [];
+        }
+
+        let regex: RegExp;
+        try {
+          const shouldWrap = nlpValue.metadata?.wordBoundary;
+          regex = new RegExp(shouldWrap ? `\\b${pattern}\\b` : pattern, 'gi');
+        } catch {
+          this.logger.error('Invalid NLP regex pattern');
+          return [];
+        }
+
+        const matches = [...text.matchAll(regex)];
+
+        return matches.map((match) => {
+          let value = match[0];
+
+          // Apply preprocessing if needed
+          if (nlpValue.metadata?.removeSpaces) {
+            value = value.replace(/\s+/g, '');
+          }
+
+          if (nlpValue.metadata?.toLowerCase) {
+            value = value.toLowerCase();
+          }
+
+          if (nlpValue.metadata?.stripDiacritics) {
+            value = value.normalize('NFD').replace(/\p{Diacritic}/gu, '');
+          }
+
+          return {
+            entity: entity.name,
+            value,
+            canonicalValue: nlpValue.value,
+            start: match.index!,
+            end: match.index! + match[0].length,
+            confidence: 1,
+          };
+        });
+      })
+      .filter((v) => !!v) || []) as NLU.ParseEntity[];
+  }
+
+  /**
+   * Extracts slot values from text based on the specified lookup strategy.
+   *
+   * This function supports deterministic slot filling by scanning the input text using either
+   * keyword-based or pattern-based entity recognition, depending on the provided lookup strategy.
+   *
+   * - For `keywords`: It uses exact term and synonym matching with word boundaries.
+   * - For `pattern`: It uses regular expressions defined in each entity value (stored in `value` field),
+   *   optionally applying preprocessing such as `removeSpaces`, `lowercase`, and `stripDiacritics`.
+   *
+   * @param text - The input text from which to extract slot values.
+   * @param entities - An array of NlpEntityFull objects, each containing slot values and metadata.
+   *
+   * @returns An array of `ParseEntity` objects containing the entity name, matched value, position, and confidence.
+   */
+  public runDeterministicSlotFilling(
+    text: string,
+    entities: NlpEntityFull[],
+  ): NLU.ParseEntity[] {
+    return entities.flatMap((e) => {
+      if (e.lookups.includes('keywords')) {
+        return this.extractKeywordBasedSlots(text, e);
+      } else if (e.lookups.includes('pattern')) {
+        return this.extractPatternBasedSlots(text, e);
+      } else {
+        return [];
+      }
+    });
+  }
 }
--- a/api/src/helper/types.ts
+++ b/api/src/helper/types.ts
@ -21,6 +21,9 @@ export namespace NLU {
    confidence: number;
    start?: number;
    end?: number;
+    // When lookup strategy is either 'keywords' or 'pattern', the canonical value
+    // is the actual NlpValue.value, given the match is either a synonym (expression) or a pattern match
+    canonicalValue?: string;
  }

  export interface ParseEntities {
--- a/api/src/nlp/controllers/nlp-value.controller.spec.ts
+++ b/api/src/nlp/controllers/nlp-value.controller.spec.ts
@ -95,7 +95,7 @@ describe('NlpValueController', () => {
        entity: nlpEntities[0].id,
        value: 'valuetest',
        expressions: ['synonym1', 'synonym2'],
-        metadata: { firstkey: 'firstvalue', secondKey: 1995 },
+        metadata: {},
        builtin: false,
        doc: '',
      };
--- a/api/src/nlp/controllers/nlp-value.controller.ts
+++ b/api/src/nlp/controllers/nlp-value.controller.ts
@ -71,14 +71,17 @@ export class NlpValueController extends BaseController<
  async create(
    @Body() createNlpValueDto: NlpValueCreateDto,
  ): Promise<NlpValue> {
+    const nlpEntity = createNlpValueDto.entity
+      ? await this.nlpEntityService.findOne(createNlpValueDto.entity!)
+      : null;
+
    this.validate({
      dto: createNlpValueDto,
      allowedIds: {
-        entity: createNlpValueDto.entity
-          ? (await this.nlpEntityService.findOne(createNlpValueDto.entity))?.id
-          : null,
+        entity: nlpEntity?.id,
      },
    });
+
    return await this.nlpValueService.create(createNlpValueDto);
  }

@ -171,6 +174,17 @@ export class NlpValueController extends BaseController<
    @Param('id') id: string,
    @Body() updateNlpValueDto: NlpValueUpdateDto,
  ): Promise<NlpValue> {
+    const nlpEntity = updateNlpValueDto.entity
+      ? await this.nlpEntityService.findOne(updateNlpValueDto.entity!)
+      : null;
+
+    this.validate({
+      dto: updateNlpValueDto,
+      allowedIds: {
+        entity: nlpEntity?.id,
+      },
+    });
+
    return await this.nlpValueService.updateOne(id, updateNlpValueDto);
  }

--- a/api/src/nlp/dto/nlp-entity.dto.ts
+++ b/api/src/nlp/dto/nlp-entity.dto.ts
@ -21,7 +21,7 @@ import {

 import { DtoConfig } from '@/utils/types/dto.types';

-export type Lookup = 'keywords' | 'trait' | 'free-text';
+import { Lookup, LookupStrategy } from '../schemas/types';

 export class NlpEntityCreateDto {
  @ApiProperty({ description: 'Name of the nlp entity', type: String })
@ -33,10 +33,10 @@ export class NlpEntityCreateDto {

  @ApiPropertyOptional({
    isArray: true,
-    enum: ['keywords', 'trait', 'free-text'],
+    enum: Object.values(LookupStrategy),
  })
  @IsArray()
-  @IsIn(['keywords', 'trait', 'free-text'], { each: true })
+  @IsIn(Object.values(LookupStrategy), { each: true })
  @IsOptional()
  lookups?: Lookup[];

--- a/api/src/nlp/dto/nlp-value.dto.ts
+++ b/api/src/nlp/dto/nlp-value.dto.ts
@ -19,6 +19,8 @@ import {
 import { DtoConfig } from '@/utils/types/dto.types';
 import { IsObjectId } from '@/utils/validation-rules/is-object-id';

+import { NlpMetadata } from '../schemas/types';
+
 export class NlpValueCreateDto {
  @ApiProperty({ description: 'Nlp value', type: String })
  @IsString()
@ -37,7 +39,7 @@ export class NlpValueCreateDto {
  @ApiPropertyOptional({ description: 'Nlp value metadata', type: Object })
  @IsOptional()
  @IsObject()
-  metadata?: Record<string, any>;
+  metadata?: NlpMetadata;

  @ApiPropertyOptional({ description: 'Nlp Value Description', type: String })
  @IsString()
@ -82,6 +84,11 @@ export class NlpValueUpdateDto {
  @IsObjectId({ message: 'Entity must be a valid ObjectId' })
  entity?: string | null;

+  @ApiPropertyOptional({ description: 'Nlp Metadata', type: Object })
+  @IsObject()
+  @IsOptional()
+  metadata?: NlpMetadata;
+
  @ApiPropertyOptional({ description: 'Nlp Value Description', type: String })
  @IsString()
  @IsOptional()
--- a/api/src/nlp/schemas/nlp-entity.schema.ts
+++ b/api/src/nlp/schemas/nlp-entity.schema.ts
@ -16,10 +16,8 @@ import {
  THydratedDocument,
 } from '@/utils/types/filter.types';

-import { Lookup } from '../dto/nlp-entity.dto';
-
 import { NlpValue } from './nlp-value.schema';
-import { NlpEntityMap } from './types';
+import { Lookup, LookupStrategy, NlpEntityMap } from './types';

@Schema({ timestamps: true })
 export class NlpEntityStub extends BaseSchema {
@ -41,9 +39,18 @@ export class NlpEntityStub extends BaseSchema {
  name: string;

  /**
-   * Lookup strategy can contain : keywords, trait, free-text
+   * Lookup strategy
   */
-  @Prop({ type: [String], default: ['keywords'] })
+  @Prop({
+    type: [String],
+    default: ['keywords'],
+    validate: {
+      validator: (lookups: string[]) =>
+        lookups.every((lookup) =>
+          Object.values(LookupStrategy).includes(lookup as LookupStrategy),
+        ),
+    },
+  })
  lookups: Lookup[];

  /**
--- a/api/src/nlp/schemas/nlp-value.schema.ts
+++ b/api/src/nlp/schemas/nlp-value.schema.ts
@ -19,9 +19,9 @@ import {
 import { TStubOrFull } from '@/utils/types/format.types';

 import { NlpEntity, NlpEntityFull } from './nlp-entity.schema';
-import { NlpValueMap } from './types';
+import { NlpMetadata, NlpValueMap } from './types';

-@Schema({ timestamps: true })
+@Schema({ timestamps: true, minimize: false })
 export class NlpValueStub extends BaseSchema {
  /**
   * This value content.
@ -44,8 +44,8 @@ export class NlpValueStub extends BaseSchema {
  /**
   * Metadata are additional data that can be associated to this values, most of the time, the metadata contains system values or ids (e.g: value: "coffee", metadata: "item_11") .
   */
-  @Prop({ type: JSON, default: {} })
-  metadata: Record<string, any>;
+  @Prop({ type: JSON, default: () => {} })
+  metadata?: NlpMetadata;

  /**
   * Description of the entity's value purpose.
--- a/api/src/nlp/schemas/types.ts
+++ b/api/src/nlp/schemas/types.ts
@ -9,6 +9,15 @@
 import { NlpEntityFull, NlpEntityStub } from './nlp-entity.schema';
 import { NlpValueStub } from './nlp-value.schema';

+export enum LookupStrategy {
+  keywords = 'keywords',
+  trait = 'trait',
+  free_text = 'free-text',
+  pattern = 'pattern',
+}
+
+export type Lookup = `${LookupStrategy}`;
+
 export interface NlpSampleEntityValue {
  entity: string; // entity name
  value: string; // entity value
@ -27,3 +36,12 @@ export enum NlpSampleState {
 }

 export type NlpCacheMap = Map<string, NlpEntityFull>;
+
+export type NlpMetadata = {
+  // Required when lookups is "pattern"
+  pattern?: string;
+  wordBoundary?: boolean;
+  removeSpaces?: boolean;
+  toLowerCase?: boolean;
+  stripDiacritics?: boolean;
+};
--- a/api/src/nlp/services/nlp-entity.service.ts
+++ b/api/src/nlp/services/nlp-entity.service.ts
@ -15,14 +15,14 @@ import { NLP_MAP_CACHE_KEY } from '@/utils/constants/cache';
 import { Cacheable } from '@/utils/decorators/cacheable.decorator';
 import { BaseService } from '@/utils/generics/base-service';

-import { Lookup, NlpEntityDto } from '../dto/nlp-entity.dto';
+import { NlpEntityDto } from '../dto/nlp-entity.dto';
 import { NlpEntityRepository } from '../repositories/nlp-entity.repository';
 import {
  NlpEntity,
  NlpEntityFull,
  NlpEntityPopulate,
 } from '../schemas/nlp-entity.schema';
-import { NlpCacheMap, NlpSampleEntityValue } from '../schemas/types';
+import { Lookup, NlpCacheMap, NlpSampleEntityValue } from '../schemas/types';

 import { NlpValueService } from './nlp-value.service';

--- a/api/src/nlp/services/nlp.service.ts
+++ b/api/src/nlp/services/nlp.service.ts
@ -51,6 +51,7 @@ export class NlpService {
      .filter(({ entity }) => nlpMap.has(entity))
      .map((e) => {
        const entity = nlpMap.get(e.entity)!;
+
        return {
          ...e,
          score: e.confidence * (entity.weight || 1),
--- a/api/src/utils/test/mocks/block.ts
+++ b/api/src/utils/test/mocks/block.ts
@ -294,6 +294,14 @@ export const mockNlpGreetingAnyNamePatterns: NlpPattern[] = [
  },
 ];

+export const mockNlpFirstNamePatterns: NlpPattern[] = [
+  {
+    entity: 'firstname',
+    match: 'value',
+    value: 'jhon',
+  },
+];
+
 export const mockModifiedNlpBlock: BlockFull = {
  ...baseBlockInstance,
  name: 'Modified Mock Nlp',
--- a/api/src/utils/test/mocks/nlp.ts
+++ b/api/src/utils/test/mocks/nlp.ts
@ -44,3 +44,14 @@ export const mockNlpGreetingFullNameEntities: NLU.ParseEntities = {
    },
  ],
 };
+
+export const mockNlpFirstNameEntities: NLU.ParseEntities = {
+  entities: [
+    {
+      entity: 'firstname',
+      value: 'jhonny',
+      canonicalValue: 'jhon',
+      confidence: 0.75,
+    },
+  ],
+};
--- a/frontend/public/locales/en/translation.json
+++ b/frontend/public/locales/en/translation.json
@ -351,6 +351,10 @@
    "doc": "Documentation",
    "builtin": "Built-in?",
    "weight": "Weight",
+    "word_boundary": "Word boundary",
+    "remove_spaces": "Remove spaces",
+    "to_lower_case": "Lowercase",
+    "strip_diacritics": "Strip diacritics",
    "dataset": "Dataset",
    "yes": "Yes",
    "no": "No",
--- a/frontend/public/locales/fr/translation.json
+++ b/frontend/public/locales/fr/translation.json
@ -350,6 +350,10 @@
    "synonyms": "Synonymes",
    "doc": "Documentation",
    "weight": "Poids",
+    "word_boundary": "Délimiter (Mot)",
+    "remove_spaces": "Supprimer les espaces",
+    "to_lower_case": "Mettre en minucules",
+    "strip_diacritics": "Supprimer les accents",
    "builtin": "Intégré?",
    "dataset": "Données",
    "yes": "Oui",
--- a/frontend/src/app-components/inputs/RegexInput.tsx
+++ b/frontend/src/app-components/inputs/RegexInput.tsx
@ -1,5 +1,5 @@
 /*
- * Copyright © 2024 Hexastack. All rights reserved.
+ * Copyright © 2025 Hexastack. All rights reserved.
 *
 * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
 * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -7,17 +7,18 @@
 */

 import { InputAdornment, TextFieldProps } from "@mui/material";
-import React, { ForwardedRef, forwardRef } from "react";
+import { ForwardedRef, forwardRef } from "react";

 import { Input } from "./Input";

 export const RegexInput = forwardRef(
  (
    {
-      onChange,
-      value,
+      flags = ["g", "i"],
      ...props
-    }: TextFieldProps & { value: string; onChange: (value: string) => void },
+    }: TextFieldProps & {
+      flags?: string[];
+    },
    ref: ForwardedRef<HTMLDivElement>,
  ) => {
    return (
@ -26,15 +27,13 @@ export const RegexInput = forwardRef(
        {...props}
        InputProps={{
          startAdornment: <InputAdornment position="start">/</InputAdornment>,
-          endAdornment: <InputAdornment position="end">/gi</InputAdornment>,
-        }}
-        value={value}
-        onChange={(e) => {
-          onChange(`/${e.target.value}/`);
+          endAdornment: (
+            <InputAdornment position="end">/{flags.join("")}</InputAdornment>
+          ),
        }}
      />
    );
  },
 );

-RegexInput.displayName = "Input";
+RegexInput.displayName = "RegexInput";
--- a/frontend/src/app-components/inputs/Selectable.tsx
+++ b/frontend/src/app-components/inputs/Selectable.tsx
@ -6,11 +6,22 @@
 * 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
 */

-import { Box, CircularProgress, Input, styled } from "@mui/material";
+import { Box, CircularProgress, Input, styled, Tooltip } from "@mui/material";
 import randomSeed from "random-seed";
-import { FC, useCallback, useEffect, useMemo, useRef, useState } from "react";
+import {
+  CSSProperties,
+  FC,
+  useCallback,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from "react";

-import { INlpDatasetKeywordEntity } from "../../types/nlp-sample.types";
+import {
+  INlpDatasetKeywordEntity,
+  INlpDatasetPatternEntity,
+} from "../../types/nlp-sample.types";

 const SelectableBox = styled(Box)({
  position: "relative",
@ -40,22 +51,62 @@ const COLORS = [
  { name: "orange", bg: "#E6A23C" },
 ];
 const UNKNOWN_COLOR = { name: "grey", bg: "#aaaaaa" };
-const TODAY = new Date().toDateString();
-const getColor = (no: number) => {
-  const rand = randomSeed.create(TODAY);
+const NOW = (+new Date()).toString();
+const getColor = (no: number, seedPrefix: string = "") => {
+  const rand = randomSeed.create(seedPrefix + NOW);
  const startIndex = rand(COLORS.length);
  const color =
    no < 0 ? UNKNOWN_COLOR : COLORS[(startIndex + no) % COLORS.length];

  return {
    backgroundColor: color.bg,
-    opacity: 0.3,
+    opacity: 0.2,
  };
 };

+interface INlpSelectionEntity {
+  start: string;
+  entity: string;
+  value: string;
+  end: string;
+  style: CSSProperties;
+}
+const SelectionEntityBackground: React.FC<{
+  selectionEntity: INlpSelectionEntity;
+}> = ({ selectionEntity: e }) => {
+  return (
+    <div className="highlight">
+      <span>{e.start}</span>
+      <Tooltip
+        open={true}
+        placement="top"
+        title={e.entity}
+        arrow
+        componentsProps={{
+          tooltip: {
+            sx: {
+              color: "#FFF",
+              backgroundColor: e.style.backgroundColor,
+            },
+          },
+          arrow: {
+            sx: {
+              color: e.style.backgroundColor,
+            },
+          },
+        }}
+      >
+        <span style={e.style}>{e.value}</span>
+      </Tooltip>
+      <span>{e.end}</span>
+    </div>
+  );
+};
+
 type SelectableProps = {
  defaultValue?: string;
-  entities?: INlpDatasetKeywordEntity[];
+  keywordEntities?: INlpDatasetKeywordEntity[];
+  patternEntities?: INlpDatasetPatternEntity[];
  placeholder?: string;
  onSelect: (str: string, start: number, end: number) => void;
  onChange: (sample: {
@ -65,9 +116,27 @@ type SelectableProps = {
  loading?: boolean;
 };

+const buildSelectionEntities = (
+  text: string,
+  entities: INlpDatasetKeywordEntity[] | INlpDatasetPatternEntity[],
+): INlpSelectionEntity[] => {
+  return entities?.map((e, index) => {
+    const start = e.start ? e.start : text.indexOf(e.value);
+    const end = e.end ? e.end : start + e.value.length;
+
+    return {
+      start: text.substring(0, start),
+      entity: e.entity,
+      value: text.substring(start, end),
+      end: text.substring(end),
+      style: getColor(e.entity ? index : -1, e.entity),
+    };
+  });
+};
 const Selectable: FC<SelectableProps> = ({
  defaultValue,
-  entities = [],
+  keywordEntities = [],
+  patternEntities = [],
  placeholder = "",
  onChange,
  onSelect,
@ -76,20 +145,13 @@ const Selectable: FC<SelectableProps> = ({
  const [text, setText] = useState(defaultValue || "");
  const editableRef = useRef<HTMLDivElement>(null);
  const selectableRef = useRef(null);
-  const selectedEntities = useMemo(
-    () =>
-      entities?.map((e, index) => {
-        const start = e.start ? e.start : text.indexOf(e.value);
-        const end = e.end ? e.end : start + e.value.length;
-
-        return {
-          start: text.substring(0, start),
-          value: text.substring(start, end),
-          end: text.substring(end),
-          style: getColor(e.entity ? index : -1),
-        };
-      }),
-    [entities, text],
+  const selectedKeywordEntities = useMemo(
+    () => buildSelectionEntities(text, keywordEntities),
+    [keywordEntities, text],
+  );
+  const selectedPatternEntities = useMemo(
+    () => buildSelectionEntities(text, patternEntities),
+    [patternEntities, text],
  );

  useEffect(() => {
@ -143,7 +205,7 @@ const Selectable: FC<SelectableProps> = ({
  const handleTextChange = useCallback(
    (newText: string) => {
      const oldText = text;
-      const oldEntities = [...entities];
+      const oldEntities = [...keywordEntities];
      const newEntities: INlpDatasetKeywordEntity[] = [];
      const findCharDiff = (oldStr: string, newStr: string): number => {
        const minLength = Math.min(oldStr.length, newStr.length);
@ -187,17 +249,22 @@ const Selectable: FC<SelectableProps> = ({

      onChange({ text: newText, entities: newEntities });
    },
-    [text, onChange, entities],
+    [text, onChange, keywordEntities],
  );

  return (
    <SelectableBox ref={selectableRef}>
-      {selectedEntities?.map((e, idx) => (
-        <div key={idx} className="highlight">
-          <span>{e.start}</span>
-          <span style={e.style}>{e.value}</span>
-          <span>{e.end}</span>
-        </div>
+      {selectedPatternEntities?.map((e, idx) => (
+        <SelectionEntityBackground
+          key={`${e.entity}_${e.value}_${idx}`}
+          selectionEntity={e}
+        />
+      ))}
+      {selectedKeywordEntities?.map((e, idx) => (
+        <SelectionEntityBackground
+          key={`${e.entity}_${e.value}_${idx}`}
+          selectionEntity={e}
+        />
      ))}
      <Input
        ref={editableRef}
--- a/frontend/src/components/nlp/components/NlpEntityForm.tsx
+++ b/frontend/src/components/nlp/components/NlpEntityForm.tsx
@ -27,7 +27,7 @@ import { ComponentFormProps } from "@/types/common/dialogs.types";
 import {
  INlpEntity,
  INlpEntityAttributes,
-  NlpLookups,
+  LookupStrategy,
 } from "@/types/nlp-entity.types";

 export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
@ -94,27 +94,30 @@ export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
    <Wrapper onSubmit={handleSubmit(onSubmitForm)} {...WrapperProps}>
      <form onSubmit={handleSubmit(onSubmitForm)}>
        <ContentContainer>
-          {!nlpEntity ? (
-            <ContentItem>
-              <FormControl>
-                <FormLabel>{t("label.lookup_strategies")}</FormLabel>
-                <RadioGroup
-                  row
-                  {...register("lookups")}
-                  defaultValue="keywords"
-                >
-                  {Object.values(NlpLookups).map((nlpLookup, index) => (
-                    <FormControlLabel
-                      key={index}
-                      value={nlpLookup}
-                      control={<Radio {...register("lookups.0")} />}
-                      label={nlpLookup}
-                    />
-                  ))}
-                </RadioGroup>
-              </FormControl>
-            </ContentItem>
-          ) : null}
+          <ContentItem>
+            <FormControl>
+              <FormLabel>{t("label.lookup_strategies")}</FormLabel>
+              <RadioGroup
+                row
+                {...register("lookups")}
+                defaultValue={nlpEntity ? nlpEntity.lookups[0] : "keywords"}
+              >
+                {Object.values(LookupStrategy).map((nlpLookup, index) => (
+                  <FormControlLabel
+                    key={index}
+                    value={nlpLookup}
+                    control={
+                      <Radio
+                        disabled={!!nlpEntity}
+                        {...register("lookups.0")}
+                      />
+                    }
+                    label={nlpLookup}
+                  />
+                ))}
+              </RadioGroup>
+            </FormControl>
+          </ContentItem>
          <ContentItem>
            <Input
              label={t("label.name")}
@ -131,10 +134,11 @@ export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
              label={t("label.doc")}
              {...register("doc")}
              multiline={true}
+              rows={3}
              disabled={nlpEntity?.builtin}
            />
          </ContentItem>
-          <ContentItem>
+          <ContentItem maxWidth="25%">
            <Input
              label={t("label.weight")}
              {...register("weight", {
@ -157,8 +161,8 @@ export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
              }}
              error={!!errors.weight}
              helperText={errors.weight?.message}
-          />
-        </ContentItem>
+            />
+          </ContentItem>
        </ContentContainer>
      </form>
    </Wrapper>
--- a/frontend/src/components/nlp/components/NlpTrainForm.tsx
+++ b/frontend/src/components/nlp/components/NlpTrainForm.tsx
@ -30,15 +30,16 @@ import { ContentContainer, ContentItem } from "@/app-components/dialogs";
 import AutoCompleteEntitySelect from "@/app-components/inputs/AutoCompleteEntitySelect";
 import AutoCompleteSelect from "@/app-components/inputs/AutoCompleteSelect";
 import Selectable from "@/app-components/inputs/Selectable";
-import { useFind } from "@/hooks/crud/useFind";
 import { useGetFromCache } from "@/hooks/crud/useGet";
 import { useApiClient } from "@/hooks/useApiClient";
+import { useNlp } from "@/hooks/useNlp";
 import { useTranslate } from "@/hooks/useTranslate";
 import { EntityType, Format } from "@/services/types";
 import { ILanguage } from "@/types/language.types";
 import { INlpEntity } from "@/types/nlp-entity.types";
 import {
  INlpDatasetKeywordEntity,
+  INlpDatasetPatternEntity,
  INlpDatasetSample,
  INlpDatasetTraitEntity,
  INlpSampleFormAttributes,
@ -56,39 +57,32 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
  submitForm,
 }) => {
  const { t } = useTranslate();
-  const { data: entities, refetch: refetchEntities } = useFind(
-    {
-      entity: EntityType.NLP_ENTITY,
-      format: Format.FULL,
-    },
-    {
-      hasCount: false,
-    },
-  );
+  const {
+    allTraitEntities,
+    allKeywordEntities,
+    allPatternEntities,
+    refetchAllEntities,
+  } = useNlp();
  const getNlpValueFromCache = useGetFromCache(EntityType.NLP_VALUE);
-  // eslint-disable-next-line react-hooks/exhaustive-deps
  const defaultValues: INlpSampleFormAttributes = useMemo(
    () => ({
      type: sample?.type || NlpSampleType.train,
      text: sample?.text || "",
      language: sample?.language || null,
-      traitEntities: (entities || [])
-        .filter(({ lookups }) => {
-          return lookups.includes("trait");
-        })
-        .map((e) => {
-          return {
-            entity: e.name,
-            value: sample
-              ? sample.entities.find(({ entity }) => entity === e.name)?.value
-              : "",
-          } as INlpDatasetTraitEntity;
-        }),
-      keywordEntities: (sample?.entities || []).filter(
-        (e) => "start" in e && typeof e.start === "number",
+      traitEntities: [...allTraitEntities.values()].map((e) => {
+        return {
+          entity: e.name,
+          value:
+            (sample?.entities || []).find((se) => se.entity === e.name)
+              ?.value || "",
+        };
+      }) as INlpDatasetTraitEntity[],
+      keywordEntities: (sample?.entities || []).filter((e) =>
+        allKeywordEntities.has(e.entity),
      ) as INlpDatasetKeywordEntity[],
    }),
-    [sample, entities],
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    [allKeywordEntities, allTraitEntities, JSON.stringify(sample)],
  );
  const { handleSubmit, control, register, reset, setValue, watch } =
    useForm<INlpSampleFormAttributes>({
@ -97,6 +91,9 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
  const currentText = watch("text");
  const currentType = watch("type");
  const { apiClient } = useApiClient();
+  const [patternEntities, setPatternEntities] = useState<
+    INlpDatasetPatternEntity[]
+  >([]);
  const { fields: traitEntities, update: updateTraitEntity } = useFieldArray({
    control,
    name: "traitEntities",
@ -122,22 +119,29 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
    queryFn: async () => {
      return await apiClient.predictNlp(currentText);
    },
-    onSuccess: (result) => {
-      const traitEntities: INlpDatasetTraitEntity[] = result.entities.filter(
-        (e) => !("start" in e && "end" in e) && e.entity !== "language",
-      );
-      const keywordEntities = result.entities.filter(
-        (e) => "start" in e && "end" in e,
+    onSuccess: (prediction) => {
+      const predictedTraitEntities: INlpDatasetTraitEntity[] =
+        prediction.entities.filter((e) => allTraitEntities.has(e.entity));
+      const predictedKeywordEntities = prediction.entities.filter((e) =>
+        allKeywordEntities.has(e.entity),
      ) as INlpDatasetKeywordEntity[];
-      const language = result.entities.find(
+      const predictedPatternEntities = prediction.entities.filter((e) =>
+        allPatternEntities.has(e.entity),
+      ) as INlpDatasetKeywordEntity[];
+      const language = prediction.entities.find(
        ({ entity }) => entity === "language",
      );

      setValue("language", language?.value || "");
-      setValue("traitEntities", traitEntities);
-      setValue("keywordEntities", keywordEntities);
+      setValue("traitEntities", predictedTraitEntities);
+      setValue("keywordEntities", predictedKeywordEntities);
+      setPatternEntities(predictedPatternEntities);
    },
-    enabled: !sample && !!currentText,
+    enabled:
+      // Inbox sample update
+      sample?.type === "inbox" ||
+      // New sample
+      (!sample && !!currentText),
  });
  const findInsertIndex = (newItem: INlpDatasetKeywordEntity): number => {
    const index = keywordEntities.findIndex(
@ -153,7 +157,7 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
  } | null>(null);
  const onSubmitForm = (form: INlpSampleFormAttributes) => {
    submitForm(form);
-    refetchEntities();
+    refetchAllEntities();
    reset({
      ...defaultValues,
      text: "",
@ -203,7 +207,8 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
          <ContentItem>
            <Selectable
              defaultValue={currentText}
-              entities={keywordEntities}
+              keywordEntities={keywordEntities}
+              patternEntities={patternEntities}
              placeholder={t("placeholder.nlp_sample_text")}
              onSelect={(selection, start, end) => {
                setSelection({
@ -223,11 +228,13 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
                    end,
                  })),
                );
+                setPatternEntities([]);
              }}
              loading={isLoading}
            />
          </ContentItem>
          <Box display="flex" flexDirection="column">
+            {/* Language selection */}
            <ContentItem
              display="flex"
              flexDirection="row"
@ -261,6 +268,7 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
                }}
              />
            </ContentItem>
+            {/* Trait entities */}
            {traitEntities.map((traitEntity, index) => (
              <ContentItem
                key={traitEntity.id}
@ -275,13 +283,9 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
                  control={control}
                  render={({ field }) => {
                    const { onChange: _, value, ...rest } = field;
-                    const entity = entities?.find(
-                      ({ name }) => name === traitEntity.entity,
-                    );
-                    const options =
-                      entity?.values.map(
-                        (v) => getNlpValueFromCache(v) as INlpValue,
-                      ) || [];
+                    const options = (
+                      allTraitEntities.get(traitEntity.entity)?.values || []
+                    ).map((v) => getNlpValueFromCache(v)!);

                    return (
                      <>
@ -318,7 +322,9 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
              </ContentItem>
            ))}
          </Box>
-
+          {
+            /* Keyword entities */
+          }
          <Box display="flex" flexDirection="column">
            {keywordEntities.map((keywordEntity, index) => (
              <ContentItem
@ -335,22 +341,16 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
                  control={control}
                  render={({ field }) => {
                    const { onChange: _, ...rest } = field;
+                    const options = [...allKeywordEntities.values()];

                    return (
-                      <AutoCompleteEntitySelect<INlpEntity, "name", false>
+                      <AutoCompleteSelect<INlpEntity, "name", false>
                        fullWidth={true}
-                        searchFields={["name"]}
-                        entity={EntityType.NLP_ENTITY}
-                        format={Format.FULL}
+                        options={options}
                        idKey="name"
                        labelKey="name"
                        label={t("label.nlp_entity")}
                        multiple={false}
-                        preprocess={(options) => {
-                          return options.filter(({ lookups }) =>
-                            lookups.includes("keywords"),
-                          );
-                        }}
                        onChange={(_e, selected, ..._) => {
                          updateKeywordEntity(index, {
                            ...keywordEntities[index],
@ -367,13 +367,9 @@ const NlpDatasetSample: FC<NlpDatasetSampleProps> = ({
                  control={control}
                  render={({ field }) => {
                    const { onChange: _, value, ...rest } = field;
-                    const entity = entities?.find(
-                      ({ name }) => name === keywordEntity.entity,
-                    );
-                    const options =
-                      entity?.values.map(
-                        (v) => getNlpValueFromCache(v) as INlpValue,
-                      ) || [];
+                    const options = (
+                      allKeywordEntities.get(keywordEntity.entity)?.values || []
+                    ).map((v) => getNlpValueFromCache(v)!);

                    return (
                      <AutoCompleteSelect<
--- a/frontend/src/components/nlp/components/NlpValueForm.tsx
+++ b/frontend/src/components/nlp/components/NlpValueForm.tsx
@ -6,6 +6,7 @@
 * 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
 */

+import { FormControlLabel, Switch } from "@mui/material";
 import { useRouter } from "next/router";
 import { FC, Fragment, useEffect } from "react";
 import { Controller, useForm } from "react-hook-form";
@ -13,6 +14,7 @@ import { Controller, useForm } from "react-hook-form";
 import { ContentContainer, ContentItem } from "@/app-components/dialogs";
 import { Input } from "@/app-components/inputs/Input";
 import MultipleInput from "@/app-components/inputs/MultipleInput";
+import { RegexInput } from "@/app-components/inputs/RegexInput";
 import { useCreate } from "@/hooks/crud/useCreate";
 import { useGet } from "@/hooks/crud/useGet";
 import { useUpdate } from "@/hooks/crud/useUpdate";
@ -20,8 +22,29 @@ import { useToast } from "@/hooks/useToast";
 import { useTranslate } from "@/hooks/useTranslate";
 import { EntityType, Format } from "@/services/types";
 import { ComponentFormProps } from "@/types/common/dialogs.types";
-import { INlpEntity, NlpLookups } from "@/types/nlp-entity.types";
+import {
+  INlpEntity,
+  INlpMetadata,
+  LookupStrategy,
+} from "@/types/nlp-entity.types";
 import { INlpValue, INlpValueAttributes } from "@/types/nlp-value.types";
+import { isRegex } from "@/utils/string";
+
+const getDefaultNlpMetadata = (
+  nlpEntity: INlpEntity | undefined,
+): INlpMetadata => {
+  if (nlpEntity?.lookups.includes(LookupStrategy.pattern)) {
+    return {
+      pattern: "",
+      wordBoundary: true,
+      removeSpaces: false,
+      toLowerCase: false,
+      stripDiacritics: false,
+    };
+  } else {
+    return {};
+  }
+};

 export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
  data: { defaultValues: nlpValue, presetValues: nlpEntity },
@ -36,7 +59,8 @@ export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
    entity: EntityType.NLP_ENTITY,
    format: Format.FULL,
  });
-  const canHaveSynonyms = nlpEntity?.lookups.includes(NlpLookups.keywords);
+  const canHaveSynonyms = nlpEntity?.lookups.includes(LookupStrategy.keywords);
+  const isPattern = nlpEntity?.lookups.includes(LookupStrategy.pattern);
  const { mutate: createNlpValue } = useCreate(EntityType.NLP_VALUE, {
    onError: () => {
      rest.onError?.();
@ -73,15 +97,9 @@ export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
      value: nlpValue?.value || "",
      doc: nlpValue?.doc || "",
      expressions: nlpValue?.expressions || [],
+      metadata: nlpValue?.metadata || getDefaultNlpMetadata(nlpEntity),
    },
  });
-  const validationRules = {
-    value: {
-      required: t("message.value_is_required"),
-    },
-    name: {},
-    description: {},
-  };
  const onSubmitForm = async (params: INlpValueAttributes) => {
    if (nlpValue) {
      updateNlpValue({ id: nlpValue.id, params });
@ -96,11 +114,17 @@ export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
        value: nlpValue.value,
        expressions: nlpValue.expressions,
        doc: nlpValue.doc,
+        metadata: nlpValue.metadata,
      });
    } else {
-      reset();
+      reset({
+        value: "",
+        expressions: [],
+        doc: "",
+        metadata: getDefaultNlpMetadata(nlpEntity),
+      });
    }
-  }, [nlpValue, reset]);
+  }, [nlpValue, nlpEntity, reset]);

  return (
    <Wrapper onSubmit={handleSubmit(onSubmitForm)} {...WrapperProps}>
@ -112,15 +136,87 @@ export const NlpValueForm: FC<ComponentFormProps<INlpValue, INlpEntity>> = ({
              error={!!errors.value}
              required
              autoFocus
-              helperText={errors.value ? errors.value.message : null}
-              {...register("value", validationRules.value)}
+              helperText={errors.value?.message}
+              {...register("value", {
+                required: t("message.value_is_required"),
+              })}
            />
          </ContentItem>
+          {isPattern && (
+            <>
+              <ContentItem>
+                <RegexInput
+                  {...register("metadata.pattern", {
+                    required: t("message.regex_is_invalid"),
+                    validate: (pattern: string | undefined) => {
+                      return isRegex(pattern)
+                        ? true
+                        : t("message.regex_is_invalid");
+                    },
+                  })}
+                  helperText={errors.metadata?.pattern?.message}
+                  error={!!errors.metadata?.pattern}
+                  label={t("label.regex")}
+                  placeholder={t("placeholder.pattern")}
+                  flags={["i"]}
+                />
+              </ContentItem>
+              <ContentItem>
+                <Controller
+                  name="metadata.wordBoundary"
+                  control={control}
+                  render={({ field }) => (
+                    <FormControlLabel
+                      control={<Switch {...field} checked={field.value} />}
+                      label={t("label.word_boundary")}
+                    />
+                  )}
+                />
+              </ContentItem>
+              <ContentItem>
+                <Controller
+                  name="metadata.removeSpaces"
+                  control={control}
+                  render={({ field }) => (
+                    <FormControlLabel
+                      control={<Switch {...field} checked={field.value} />}
+                      label={t("label.remove_spaces")}
+                    />
+                  )}
+                />
+              </ContentItem>
+              <ContentItem>
+                <Controller
+                  name="metadata.toLowerCase"
+                  control={control}
+                  render={({ field }) => (
+                    <FormControlLabel
+                      control={<Switch {...field} checked={field.value} />}
+                      label={t("label.to_lower_case")}
+                    />
+                  )}
+                />
+              </ContentItem>
+              <ContentItem>
+                <Controller
+                  name="metadata.stripDiacritics"
+                  control={control}
+                  render={({ field }) => (
+                    <FormControlLabel
+                      control={<Switch {...field} checked={field.value} />}
+                      label={t("label.strip_diacritics")}
+                    />
+                  )}
+                />
+              </ContentItem>
+            </>
+          )}
          <ContentItem>
            <Input
              label={t("label.doc")}
              {...register("doc")}
              multiline={true}
+              rows={3}
            />
          </ContentItem>

--- a/frontend/src/components/visual-editor/form/inputs/triggers/PatternInput.tsx
+++ b/frontend/src/components/visual-editor/form/inputs/triggers/PatternInput.tsx
@ -22,15 +22,18 @@ import {
  PatternType,
  PayloadPattern,
 } from "@/types/block.types";
+import {
+  extractRegexBody,
+  formatWithSlashes,
+  isRegex,
+  isRegexString,
+} from "@/utils/string";

 import { OutcomeInput } from "./OutcomeInput";
 import { PostbackInput } from "./PostbackInput";

-const isRegex = (str: Pattern) => {
-  return typeof str === "string" && str.startsWith("/") && str.endsWith("/");
-};
-const getType = (pattern: Pattern): PatternType => {
-  if (isRegex(pattern)) {
+const getPatternType = (pattern: Pattern): PatternType => {
+  if (isRegexString(pattern)) {
    return "regex";
  } else if (Array.isArray(pattern)) {
    return "nlp";
@ -69,7 +72,7 @@ const PatternInput: FC<PatternInputProps> = ({
    formState: { errors },
  } = useFormContext<IBlockAttributes>();
  const [pattern, setPattern] = useState<Pattern>(value);
-  const patternType = getType(value);
+  const patternType = getPatternType(value);
  const registerInput = (
    errorMessage: string,
    idx: number,
@ -122,23 +125,15 @@ const PatternInput: FC<PatternInputProps> = ({
        <RegexInput
          {...registerInput(t("message.regex_is_empty"), idx, {
            validate: (pattern) => {
-              try {
-                const parsedPattern = new RegExp(pattern.slice(1, -1));
-
-                if (String(parsedPattern) !== pattern) {
-                  throw t("message.regex_is_invalid");
-                }
-
-                return true;
-              } catch (_e) {
-                return t("message.regex_is_invalid");
-              }
+              return isRegex(extractRegexBody(pattern))
+                ? true
+                : t("message.regex_is_invalid");
            },
-            setValueAs: (v) => (isRegex(v) ? v : `/${v}/`),
+            setValueAs: (v) => (isRegexString(v) ? v : formatWithSlashes(v)),
          })}
+          value={extractRegexBody(value)}
          label={t("label.regex")}
-          value={value.slice(1, -1)}
-          onChange={(v) => onChange(v)}
+          onChange={(e) => onChange(formatWithSlashes(e.target.value))}
          required
        />
      ) : null}
--- a/frontend/src/hooks/useNlp.tsx
+++ b/frontend/src/hooks/useNlp.tsx
@ -0,0 +1,55 @@
+/*
+ * Copyright © 2025 Hexastack. All rights reserved.
+ *
+ * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
+ * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
+ * 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
+ */
+
+import { useMemo } from "react";
+
+import { EntityType, Format } from "@/services/types";
+import { INlpEntity, Lookup } from "@/types/nlp-entity.types";
+
+import { useFind } from "./crud/useFind";
+
+const buildNlpEntityMap = (entities: INlpEntity[], lookup: Lookup) => {
+  const intialMap = new Map<string, INlpEntity>();
+  
+  return entities
+    .filter(({ lookups }) => {
+      return lookups.includes(lookup);
+    }).reduce((acc, curr) => {
+      acc.set(curr.name, curr);
+      
+      return acc;
+    }, intialMap)
+}
+
+export const useNlp = () => {
+    const { data: allEntities, refetch: refetchAllEntities } = useFind(
+      {
+        entity: EntityType.NLP_ENTITY,
+        format: Format.FULL,
+      },
+      {
+        hasCount: false,
+      },
+    );
+    const allTraitEntities = useMemo(() => {
+      return buildNlpEntityMap((allEntities || []), 'trait')
+    }, [allEntities]);
+    const allKeywordEntities = useMemo(() => {
+      return buildNlpEntityMap((allEntities || []), 'keywords')
+    }, [allEntities]);
+    const allPatternEntities = useMemo(() => {
+      return buildNlpEntityMap((allEntities || []), 'pattern')
+    }, [allEntities]);
+
+    return {
+      allTraitEntities,
+      allKeywordEntities,
+      allPatternEntities,
+      refetchAllEntities
+    }
+};
--- a/frontend/src/types/nlp-entity.types.ts
+++ b/frontend/src/types/nlp-entity.types.ts
@ -11,7 +11,23 @@ import { EntityType, Format } from "@/services/types";
 import { IBaseSchema, IFormat, OmitPopulate } from "./base.types";
 import { INlpValue } from "./nlp-value.types";

-export type Lookup = "keywords" | "trait" | "free-text";
+export enum LookupStrategy {
+  keywords = "keywords",
+  trait = "trait",
+  // free_text = "free-text",
+  pattern = "pattern",
+}
+
+export type Lookup = `${LookupStrategy}`;
+
+export interface INlpMetadata {
+  // Required when lookups is "pattern"
+  pattern?: string;
+  wordBoundary?: boolean;
+  removeSpaces?: boolean;
+  toLowerCase?: boolean;
+  stripDiacritics?: boolean;
+}

 export interface INlpEntityAttributes {
  foreign_id?: string;
@ -22,11 +38,6 @@ export interface INlpEntityAttributes {
  weight?: number;
 }

-export enum NlpLookups {
-  keywords = "keywords",
-  trait = "trait",
-}
-
 export interface INlpEntityStub
  extends IBaseSchema,
    OmitPopulate<INlpEntityAttributes, EntityType.NLP_ENTITY> {}
--- a/frontend/src/types/nlp-sample.types.ts
+++ b/frontend/src/types/nlp-sample.types.ts
@ -1,5 +1,5 @@
 /*
- * Copyright © 2024 Hexastack. All rights reserved.
+ * Copyright © 2025 Hexastack. All rights reserved.
 *
 * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
 * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -52,6 +52,8 @@ export interface INlpDatasetKeywordEntity extends INlpDatasetTraitEntity {
  end: number;
 }

+export interface INlpDatasetPatternEntity extends INlpDatasetKeywordEntity {}
+
 export interface INlpSampleFormAttributes
  extends Omit<INlpSampleAttributes, "entities"> {
  traitEntities: INlpDatasetTraitEntity[];
--- a/frontend/src/types/nlp-value.types.ts
+++ b/frontend/src/types/nlp-value.types.ts
@ -9,7 +9,7 @@
 import { Format } from "@/services/types";

 import { IBaseSchema, IFormat } from "./base.types";
-import { INlpEntity } from "./nlp-entity.types";
+import { INlpEntity, INlpMetadata } from "./nlp-entity.types";

 export interface INlpValueAttributes {
  entity: string;
@ -17,7 +17,7 @@ export interface INlpValueAttributes {
  value: string;
  doc?: string;
  expressions?: string[];
-  metadata?: Record<string, any>;
+  metadata?: INlpMetadata;
  builtin?: boolean;
  nlpSamplesCount?: number;
 }
--- a/frontend/src/utils/string.ts
+++ b/frontend/src/utils/string.ts
@ -1,5 +1,5 @@
 /*
- * Copyright © 2024 Hexastack. All rights reserved.
+ * Copyright © 2025 Hexastack. All rights reserved.
 *
 * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
 * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -18,3 +18,49 @@ export const slugify = (str: string) => {
 export const getNamespace = (extensionName: string) => {
  return extensionName.replaceAll("-", "_");
 };
+
+/**
+ * Checks if the string starts/ends with slashes
+ */
+export const isRegexString = (str: any) => {
+  return typeof str === "string" && str.startsWith("/") && str.endsWith("/");
+};
+
+/**
+ * Ensures value is wrapped in slashes: /value/
+ */
+export const formatWithSlashes = (value: string): string => {
+  if (!value) return "/";
+  if (!value.startsWith("/")) value = "/" + value;
+  if (!value.endsWith("/")) value = value + "/";
+
+  return value;
+};
+
+/**
+ * Extracts the inner regex from /.../
+ */
+export const extractRegexBody = (value: string | undefined): string => {
+  if (value && value.startsWith("/") && value.endsWith("/")) {
+    return value.slice(1, -1);
+  }
+
+  return '';
+};
+
+/**
+ * Checks if the regex pattern compiles correctly
+ */
+export const isRegex = (pattern: string | undefined) => {
+  try {
+    if (!pattern) {
+      throw new Error("Pattern was not provided!");
+    }
+
+    new RegExp(pattern);
+
+    return true;
+  } catch {
+    return false;
+  }
+};