Merge pull request #792 from Hexastack/feat/annotate-sample-with-keyword-entities

feat: nlu keyword entity annotation
2025-04-07 06:24:23 +00:00 · 2025-03-06 14:08:19 +01:00 · 2025-03-06 14:08:19 +01:00 · affcd12b52
commit affcd12b52
parent 7acf637273 67314a6440
10 changed files with 519 additions and 36 deletions
--- a/api/src/nlp/controllers/nlp-sample.controller.ts
+++ b/api/src/nlp/controllers/nlp-sample.controller.ts
@ -31,6 +31,7 @@ import { CsrfCheck } from '@tekuconcept/nestjs-csrf';
 import { Response } from 'express';

 import { HelperService } from '@/helper/helper.service';
+import { HelperType } from '@/helper/types';
 import { LanguageService } from '@/i18n/services/language.service';
 import { CsrfInterceptor } from '@/interceptors/csrf.interceptor';
 import { LoggerService } from '@/logger/logger.service';
@ -74,6 +75,28 @@ export class NlpSampleController extends BaseController<
    super(nlpSampleService);
  }

+  @CsrfCheck(true)
+  @Post('annotate/:entityId')
+  async annotateWithKeywordEntity(@Param('entityId') entityId: string) {
+    const entity = await this.nlpEntityService.findOneAndPopulate(entityId);
+
+    if (!entity) {
+      throw new NotFoundException('Unable to find the keyword entity.');
+    }
+
+    if (!entity.lookups.includes('keywords')) {
+      throw new BadRequestException(
+        'Cannot annotate samples with a non-keyword entity',
+      );
+    }
+
+    await this.nlpSampleService.annotateWithKeywordEntity(entity);
+
+    return {
+      success: true,
+    };
+  }
+
  /**
   * Exports the NLP samples in a formatted JSON file, using the Rasa NLU format.
   *
@ -91,7 +114,7 @@ export class NlpSampleController extends BaseController<
      type ? { type } : {},
    );
    const entities = await this.nlpEntityService.findAllAndPopulate();
-    const helper = await this.helperService.getDefaultNluHelper();
+    const helper = await this.helperService.getDefaultHelper(HelperType.NLU);
    const result = await helper.format(samples, entities);

    // Sending the JSON data as a file
@ -173,27 +196,10 @@ export class NlpSampleController extends BaseController<
   */
  @Get('message')
  async message(@Query('text') text: string) {
-    const helper = await this.helperService.getDefaultNluHelper();
+    const helper = await this.helperService.getDefaultHelper(HelperType.NLU);
    return helper.predict(text);
  }

-  /**
-   * Fetches the samples and entities for a given sample type.
-   *
-   * @param type - The sample type (e.g., 'train', 'test').
-   * @returns An object containing the samples and entities.
-   * @private
-   */
-  private async getSamplesAndEntitiesByType(type: NlpSample['type']) {
-    const samples = await this.nlpSampleService.findAndPopulate({
-      type,
-    });
-
-    const entities = await this.nlpEntityService.findAllAndPopulate();
-
-    return { samples, entities };
-  }
-
  /**
   * Initiates the training process for the NLP service using the 'train' sample type.
   *
@ -202,10 +208,10 @@ export class NlpSampleController extends BaseController<
  @Get('train')
  async train() {
    const { samples, entities } =
-      await this.getSamplesAndEntitiesByType('train');
+      await this.nlpSampleService.getAllSamplesAndEntitiesByType('train');

    try {
-      const helper = await this.helperService.getDefaultNluHelper();
+      const helper = await this.helperService.getDefaultHelper(HelperType.NLU);
      const response = await helper.train?.(samples, entities);
      // Mark samples as trained
      await this.nlpSampleService.updateMany(
@ -229,9 +235,9 @@ export class NlpSampleController extends BaseController<
  @Get('evaluate')
  async evaluate() {
    const { samples, entities } =
-      await this.getSamplesAndEntitiesByType('test');
+      await this.nlpSampleService.getAllSamplesAndEntitiesByType('test');

-    const helper = await this.helperService.getDefaultNluHelper();
+    const helper = await this.helperService.getDefaultHelper(HelperType.NLU);
    return await helper.evaluate?.(samples, entities);
  }

--- a/api/src/nlp/services/nlp-sample-entity.service.spec.ts
+++ b/api/src/nlp/services/nlp-sample-entity.service.spec.ts
@ -1,5 +1,5 @@
 /*
- * Copyright © 2024 Hexastack. All rights reserved.
+ * Copyright © 2025 Hexastack. All rights reserved.
 *
 * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
 * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -25,6 +25,7 @@ import {
 } from '@/utils/test/test';
 import { TFixtures } from '@/utils/test/types';

+import { NlpSampleEntityCreateDto } from '../dto/nlp-sample-entity.dto';
 import { NlpEntityRepository } from '../repositories/nlp-entity.repository';
 import { NlpSampleEntityRepository } from '../repositories/nlp-sample-entity.repository';
 import { NlpValueRepository } from '../repositories/nlp-value.repository';
@ -201,7 +202,15 @@ describe('NlpSampleEntityService', () => {
    });

    it('should throw an error if stored entity or value cannot be found', async () => {
-      const sample = { id: 1, text: 'Hello world' } as any as NlpSample;
+      const sample: NlpSample = {
+        id: 's1',
+        text: 'Hello world',
+        language: null,
+        trained: false,
+        type: 'train',
+        createdAt: new Date(),
+        updatedAt: new Date(),
+      };
      const entities = [
        { entity: 'greeting', value: 'Hello', start: 0, end: 5 },
      ];
@ -214,4 +223,235 @@ describe('NlpSampleEntityService', () => {
      ).rejects.toThrow('Unable to find the stored entity or value');
    });
  });
+
+  describe('extractKeywordEntities', () => {
+    it('should extract entities when keywords are found', () => {
+      const sample = {
+        id: 's1',
+        text: 'Hello world, AI is amazing!',
+      } as NlpSample;
+      const value = {
+        id: 'v1',
+        entity: 'e1',
+        value: 'AI',
+        expressions: ['amazing'],
+      } as NlpValue;
+
+      const expected: NlpSampleEntityCreateDto[] = [
+        {
+          sample: 's1',
+          entity: 'e1',
+          value: 'v1',
+          start: 13,
+          end: 15,
+        },
+        {
+          sample: 's1',
+          entity: 'e1',
+          value: 'v1',
+          start: 19,
+          end: 26,
+        },
+      ];
+
+      expect(
+        nlpSampleEntityService.extractKeywordEntities(sample, value),
+      ).toEqual(expected);
+    });
+
+    it('should be case-insensitive', () => {
+      const sample = {
+        id: 's2',
+        text: 'I love ai and artificial intelligence.',
+      } as NlpSample;
+      const value = {
+        id: 'v2',
+        entity: 'e2',
+        value: 'AI',
+        expressions: [],
+      } as unknown as NlpValue;
+
+      const expected: NlpSampleEntityCreateDto[] = [
+        {
+          sample: 's2',
+          entity: 'e2',
+          value: 'v2',
+          start: 7,
+          end: 9,
+        },
+      ];
+
+      expect(
+        nlpSampleEntityService.extractKeywordEntities(sample, value),
+      ).toEqual(expected);
+    });
+
+    it('should extract multiple occurrences of the same keyword', () => {
+      const sample = {
+        id: 's3',
+        text: 'AI AI AI is everywhere.',
+      } as NlpSample;
+      const value = {
+        id: 'v3',
+        entity: 'e3',
+        value: 'AI',
+        expressions: [],
+      } as unknown as NlpValue;
+
+      const expected: NlpSampleEntityCreateDto[] = [
+        {
+          sample: 's3',
+          entity: 'e3',
+          value: 'v3',
+          start: 0,
+          end: 2,
+        },
+        {
+          sample: 's3',
+          entity: 'e3',
+          value: 'v3',
+          start: 3,
+          end: 5,
+        },
+        {
+          sample: 's3',
+          entity: 'e3',
+          value: 'v3',
+          start: 6,
+          end: 8,
+        },
+      ];
+
+      expect(
+        nlpSampleEntityService.extractKeywordEntities(sample, value),
+      ).toEqual(expected);
+    });
+
+    it('should handle empty expressions array correctly', () => {
+      const sample = {
+        id: 's4',
+        text: 'Data science is great.',
+      } as NlpSample;
+      const value = {
+        id: 'v4',
+        entity: 'e4',
+        value: 'science',
+        expressions: [],
+      } as unknown as NlpValue;
+
+      const expected: NlpSampleEntityCreateDto[] = [
+        {
+          sample: 's4',
+          entity: 'e4',
+          value: 'v4',
+          start: 5,
+          end: 12,
+        },
+      ];
+
+      expect(
+        nlpSampleEntityService.extractKeywordEntities(sample, value),
+      ).toEqual(expected);
+    });
+
+    it('should return an empty array if no matches are found', () => {
+      const sample = { id: 'sample5', text: 'Hello world!' } as NlpSample;
+      const value = {
+        id: 'v5',
+        entity: 'e5',
+        value: 'Python',
+        expressions: [],
+      } as unknown as NlpValue;
+
+      expect(
+        nlpSampleEntityService.extractKeywordEntities(sample, value),
+      ).toEqual([]);
+    });
+
+    it('should match keywords as whole words only', () => {
+      const sample = {
+        id: 'sample6',
+        text: 'Technical claim.',
+      } as NlpSample;
+      const value = {
+        id: 'v6',
+        entity: 'e6',
+        value: 'AI',
+        expressions: [],
+      } as unknown as NlpValue;
+
+      // Should not match "AI-powered" since it's not a standalone word
+      const expected: NlpSampleEntityCreateDto[] = [];
+
+      expect(
+        nlpSampleEntityService.extractKeywordEntities(sample, value),
+      ).toEqual(expected);
+    });
+
+    it('should handle special characters in the text correctly', () => {
+      const sample = { id: 's7', text: 'Hello, AI. AI? AI!' } as NlpSample;
+      const value = {
+        id: 'v7',
+        entity: 'e7',
+        value: 'AI',
+        expressions: [],
+      } as unknown as NlpValue;
+
+      const expected: NlpSampleEntityCreateDto[] = [
+        {
+          sample: 's7',
+          entity: 'e7',
+          value: 'v7',
+          start: 7,
+          end: 9,
+        },
+        {
+          sample: 's7',
+          entity: 'e7',
+          value: 'v7',
+          start: 11,
+          end: 13,
+        },
+        {
+          sample: 's7',
+          entity: 'e7',
+          value: 'v7',
+          start: 15,
+          end: 17,
+        },
+      ];
+
+      expect(
+        nlpSampleEntityService.extractKeywordEntities(sample, value),
+      ).toEqual(expected);
+    });
+
+    it('should handle regex special characters in keyword values correctly', () => {
+      const sample = {
+        id: 's10',
+        text: 'Find the,AI, in this text.',
+      } as NlpSample;
+
+      const value = {
+        id: 'v10',
+        entity: 'e10',
+        value: 'AI',
+        expressions: [],
+      } as unknown as NlpValue;
+
+      const expected: NlpSampleEntityCreateDto[] = [
+        {
+          sample: 's10',
+          entity: 'e10',
+          value: 'v10',
+          start: 9,
+          end: 11,
+        },
+      ];
+
+      expect(
+        nlpSampleEntityService.extractKeywordEntities(sample, value),
+      ).toEqual(expected);
+    });
+  });
 });
--- a/api/src/nlp/services/nlp-sample-entity.service.ts
+++ b/api/src/nlp/services/nlp-sample-entity.service.ts
@ -1,5 +1,5 @@
 /*
- * Copyright © 2024 Hexastack. All rights reserved.
+ * Copyright © 2025 Hexastack. All rights reserved.
 *
 * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
 * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -10,13 +10,15 @@ import { Injectable } from '@nestjs/common';

 import { BaseService } from '@/utils/generics/base-service';

+import { NlpSampleEntityCreateDto } from '../dto/nlp-sample-entity.dto';
 import { NlpSampleEntityRepository } from '../repositories/nlp-sample-entity.repository';
 import {
  NlpSampleEntity,
  NlpSampleEntityFull,
  NlpSampleEntityPopulate,
 } from '../schemas/nlp-sample-entity.schema';
-import { NlpSample } from '../schemas/nlp-sample.schema';
+import { NlpSample, NlpSampleStub } from '../schemas/nlp-sample.schema';
+import { NlpValue } from '../schemas/nlp-value.schema';
 import { NlpSampleEntityValue } from '../schemas/types';

 import { NlpEntityService } from './nlp-entity.service';
@ -76,4 +78,41 @@ export class NlpSampleEntityService extends BaseService<

    return await this.createMany(sampleEntities);
  }
+
+  /**
+   * Extracts entities from a given text sample by matching keywords defined in `NlpValue`.
+   * The function uses regular expressions to locate each keyword and returns an array of matches.
+   *
+   * @param sample - The text sample from which entities should be extracted.
+   * @param value - The entity value containing the primary keyword and its expressions.
+   * @returns - An array of extracted entity matches, including their positions.
+   */
+  extractKeywordEntities<S extends NlpSampleStub>(
+    sample: S,
+    value: NlpValue,
+  ): NlpSampleEntityCreateDto[] {
+    const keywords = [value.value, ...value.expressions];
+    const regex = `(?<!\\p{L})${keywords.join('|')}(?!\\p{L})`;
+    const regexPattern = new RegExp(regex, 'giu');
+    const matches: NlpSampleEntityCreateDto[] = [];
+    let match: RegExpExecArray | null;
+
+    // Find all matches in the text using the regex pattern
+    while ((match = regexPattern.exec(sample.text)) !== null) {
+      matches.push({
+        sample: sample.id,
+        entity: value.entity,
+        value: value.id,
+        start: match.index,
+        end: match.index + match[0].length,
+      });
+
+      // Prevent infinite loops when using a regex with an empty match
+      if (match.index === regexPattern.lastIndex) {
+        regexPattern.lastIndex++;
+      }
+    }
+
+    return matches;
+  }
 }
--- a/api/src/nlp/services/nlp-sample.service.spec.ts
+++ b/api/src/nlp/services/nlp-sample.service.spec.ts
@ -1,5 +1,5 @@
 /*
- * Copyright © 2024 Hexastack. All rights reserved.
+ * Copyright © 2025 Hexastack. All rights reserved.
 *
 * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
 * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -24,11 +24,16 @@ import {
  rootMongooseTestModule,
 } from '@/utils/test/test';

+import { NlpSampleEntityCreateDto } from '../dto/nlp-sample-entity.dto';
 import { NlpEntityRepository } from '../repositories/nlp-entity.repository';
 import { NlpSampleEntityRepository } from '../repositories/nlp-sample-entity.repository';
 import { NlpSampleRepository } from '../repositories/nlp-sample.repository';
 import { NlpValueRepository } from '../repositories/nlp-value.repository';
-import { NlpEntity, NlpEntityModel } from '../schemas/nlp-entity.schema';
+import {
+  NlpEntity,
+  NlpEntityFull,
+  NlpEntityModel,
+} from '../schemas/nlp-entity.schema';
 import {
  NlpSampleEntity,
  NlpSampleEntityModel,
@ -276,4 +281,74 @@ describe('NlpSampleService', () => {
      expect(result[1].text).toEqual('Bye');
    });
  });
+
+  describe('annotateWithKeywordEntity', () => {
+    it('should annotate samples when matching samples exist', async () => {
+      const entity = {
+        id: 'entity-id',
+        name: 'entity_name',
+        values: [
+          {
+            id: 'value-id',
+            value: 'keyword',
+            expressions: ['synonym1', 'synonym2'],
+          },
+        ],
+      } as NlpEntityFull;
+
+      const sampleText = 'This is a test sample with keyword in it.';
+      const samples = [{ id: 'sample-id', text: sampleText }] as NlpSample[];
+
+      const extractedMatches = [
+        { sample: 'sample-id', entity: 'test_entity', value: 'keyword' },
+      ] as NlpSampleEntityCreateDto[];
+
+      const findSpy = jest
+        .spyOn(nlpSampleService, 'find')
+        .mockResolvedValue(samples);
+      const extractSpy = jest
+        .spyOn(nlpSampleEntityService, 'extractKeywordEntities')
+        .mockReturnValue(extractedMatches);
+
+      const findOrCreateSpy = jest
+        .spyOn(nlpSampleEntityService, 'findOneOrCreate')
+        .mockResolvedValue({} as NlpSampleEntity);
+
+      await nlpSampleService.annotateWithKeywordEntity(entity);
+
+      expect(findSpy).toHaveBeenCalledWith({
+        text: { $regex: '\\b(keyword|synonym1|synonym2)\\b', $options: 'i' },
+        type: ['train', 'test'],
+      });
+
+      expect(extractSpy).toHaveBeenCalledWith(samples[0], entity.values[0]);
+      expect(findOrCreateSpy).toHaveBeenCalledWith(
+        extractedMatches[0],
+        extractedMatches[0],
+      );
+    });
+
+    it('should not annotate when no matching samples are found', async () => {
+      const entity = {
+        id: 'entity-id',
+        name: 'test_entity',
+        values: [
+          {
+            value: 'keyword',
+            expressions: ['synonym1', 'synonym2'],
+          },
+        ],
+      } as NlpEntityFull;
+
+      jest.spyOn(nlpSampleService, 'find').mockResolvedValue([]);
+      const extractSpy = jest.spyOn(
+        nlpSampleEntityService,
+        'extractKeywordEntities',
+      );
+
+      await nlpSampleService.annotateWithKeywordEntity(entity);
+
+      expect(extractSpy).not.toHaveBeenCalled();
+    });
+  });
 });
--- a/api/src/nlp/services/nlp-sample.service.ts
+++ b/api/src/nlp/services/nlp-sample.service.ts
@ -1,5 +1,5 @@
 /*
- * Copyright © 2024 Hexastack. All rights reserved.
+ * Copyright © 2025 Hexastack. All rights reserved.
 *
 * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
 * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@ -21,8 +21,10 @@ import { LoggerService } from '@/logger/logger.service';
 import { BaseService } from '@/utils/generics/base-service';
 import { THydratedDocument } from '@/utils/types/filter.types';

+import { NlpSampleEntityCreateDto } from '../dto/nlp-sample-entity.dto';
 import { NlpSampleCreateDto, TNlpSampleDto } from '../dto/nlp-sample.dto';
 import { NlpSampleRepository } from '../repositories/nlp-sample.repository';
+import { NlpEntityFull } from '../schemas/nlp-entity.schema';
 import {
  NlpSample,
  NlpSampleFull,
@ -50,6 +52,22 @@ export class NlpSampleService extends BaseService<
    super(repository);
  }

+  /**
+   * Fetches the samples and entities for a given sample type.
+   *
+   * @param type - The sample type (e.g., 'train', 'test').
+   * @returns An object containing the samples and entities.
+   */
+  public async getAllSamplesAndEntitiesByType(type: NlpSample['type']) {
+    const samples = await this.findAndPopulate({
+      type,
+    });
+
+    const entities = await this.nlpEntityService.findAllAndPopulate();
+
+    return { samples, entities };
+  }
+
  /**
   * Deletes an NLP sample by its ID and cascades the operation if needed.
   *
@ -165,6 +183,53 @@ export class NlpSampleService extends BaseService<
    return nlpSamples;
  }

+  /**
+   * Iterates through all text samples stored in the database,
+   * checks if the given keyword exists within each sample, and if so, appends it as an entity.
+   * The function ensures that duplicate entities are not added and logs the updates.
+   *
+   * @param entity The entity
+   */
+  async annotateWithKeywordEntity(entity: NlpEntityFull) {
+    for (const value of entity.values) {
+      // For each value, get any sample that may contain the keyword or any of it's synonyms
+      const keywords = [value.value, ...value.expressions];
+      const samples = await this.find({
+        text: { $regex: `\\b(${keywords.join('|')})\\b`, $options: 'i' },
+        type: ['train', 'test'],
+      });
+
+      if (samples.length > 0) {
+        this.logger.debug(
+          `Annotating ${entity.name} - ${value.value} in ${samples.length} sample(s) ...`,
+        );
+
+        for (const sample of samples) {
+          try {
+            const matches: NlpSampleEntityCreateDto[] =
+              this.nlpSampleEntityService.extractKeywordEntities(sample, value);
+
+            if (!matches.length) {
+              throw new Error('Something went wrong, unable to match keywords');
+            }
+
+            const updates = matches.map((dto) =>
+              this.nlpSampleEntityService.findOneOrCreate(dto, dto),
+            );
+
+            await Promise.all(updates);
+
+            this.logger.debug(
+              `Successfully annotate sample with ${updates.length} matches: ${sample.text}`,
+            );
+          } catch (err) {
+            this.logger.error(`Failed to annotate sample: ${sample.text}`);
+          }
+        }
+      }
+    }
+  }
+
  /**
   * When a language gets deleted, we need to set related samples to null
   *
--- a/frontend/public/locales/en/translation.json
+++ b/frontend/public/locales/en/translation.json
@ -67,6 +67,8 @@
    "nlp_entity_name_is_invalid": "NLU Entity name format is invalid! Only `A-z`, `0-9` and `_` are allowed.",
    "nlp_unable_to_guess": "Unable to predict any meaning from the sentence.",
    "nlp_success_trained": "NLU Model has been successfully trained!",
+    "nlp_sample_annotation_success": "Successfully updated NLU samples by adding the entity values",
+    "nlp_sample_annotation_failure": "An error occurred while updating samples.",
    "no_user": "There are no subscribers at the moment.",
    "no_user_assigned": "No one is assigned to you.",
    "no_user_handledby_chatbot": "No one is handled currently by the chatbot",
@ -584,7 +586,8 @@
    "text": "Text",
    "location": "Location",
    "mark_as_default": "Mark as Default",
-    "toggle": "Toggle button"
+    "toggle": "Toggle",
+    "annotate": "Annotate Dataset"
  },
  "input": {
    "search": "Search"
--- a/frontend/public/locales/fr/translation.json
+++ b/frontend/public/locales/fr/translation.json
@ -67,6 +67,8 @@
    "nlp_entity_name_is_invalid": "Le nom d'entité NLU n'est pas valide! Seuls `A-z`,` 0-9` et `_` sont autorisés.",
    "nlp_unable_to_guess": "Impossible de prédire le sens de la phrase.",
    "nlp_success_trained": "L'apprentissage du modèle NLU a été éffectué avec succès!",
+    "nlp_sample_annotation_success": "Mise à jour réussie des échantillons NLU en ajoutant les valeurs d'entité.",
+    "nlp_sample_annotation_failure": "Une erreur est survenue lors de la mise à jour des échantillons.",
    "no_user": "Il n'y a aucun abonné pour le moment",
    "no_user_assigned": "Aucun abonné n'est assigné à vous.",
    "no_user_handledby_chatbot": "Aucun abonné n'est géré actuellement par le chatbot",
@ -585,7 +587,8 @@
    "text": "Texte",
    "location": "Emplacement",
    "mark_as_default": "Par Défaut",
-    "toggle": "Bouton de bascule"
+    "toggle": "Basculer",
+    "annotate": "Annoter les données"
  },
  "input": {
    "search": "Recherche"
--- a/frontend/src/app-components/tables/columns/getColumns.tsx
+++ b/frontend/src/app-components/tables/columns/getColumns.tsx
@ -1,13 +1,14 @@
 /*
- * Copyright © 2024 Hexastack. All rights reserved.
+ * Copyright © 2025 Hexastack. All rights reserved.
 *
 * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
 * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
 * 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
 */

-import { CheckCircle } from "@mui/icons-material";
 import AdminPanelSettingsIcon from "@mui/icons-material/AdminPanelSettingsOutlined";
+import CachedIcon from "@mui/icons-material/Cached";
+import CheckCircleIcon from "@mui/icons-material/CheckCircle";
 import DeleteIcon from "@mui/icons-material/DeleteOutlined";
 import EditIcon from "@mui/icons-material/EditOutlined";
 import ListAltOutlinedIcon from "@mui/icons-material/ListAltOutlined";
@ -41,6 +42,7 @@ export enum ActionColumnLabel {
  Fields = "Fields",
  Manage_Labels = "Manage_Labels",
  Toggle = "Toggle",
+  Annotate = "Annotate",
 }

 const ACTION_COLUMN_LABEL_MAP: Record<ActionColumnLabel, TTranslationKeys> = {
@ -53,6 +55,7 @@ const ACTION_COLUMN_LABEL_MAP: Record<ActionColumnLabel, TTranslationKeys> = {
  [ActionColumnLabel.Fields]: "button.fields",
  [ActionColumnLabel.Manage_Labels]: "title.manage_labels",
  [ActionColumnLabel.Toggle]: "button.toggle",
+  [ActionColumnLabel.Annotate]: "button.annotate",
 } as const;

 export interface ActionColumn<T extends GridValidRowModel> {
@ -88,7 +91,9 @@ function getIcon(label: ActionColumnLabel) {
    case ActionColumnLabel.Manage_Labels:
      return <LocalOfferIcon />;
    case ActionColumnLabel.Toggle:
-      return <CheckCircle />;
+      return <CheckCircleIcon />;
+    case ActionColumnLabel.Annotate:
+      return <CachedIcon />;
    default:
      return <></>;
  }
--- a/frontend/src/components/nlp/components/NlpEntity.tsx
+++ b/frontend/src/components/nlp/components/NlpEntity.tsx
@ -12,6 +12,7 @@ import { Button, Chip, Grid } from "@mui/material";
 import { GridColDef, GridRowSelectionModel } from "@mui/x-data-grid";
 import { useRouter } from "next/router";
 import { useState } from "react";
+import { useMutation, useQueryClient } from "react-query";

 import { ConfirmDialogBody } from "@/app-components/dialogs";
 import { FilterTextfield } from "@/app-components/inputs/FilterTextfield";
@ -24,12 +25,13 @@ import { DataGrid } from "@/app-components/tables/DataGrid";
 import { useDelete } from "@/hooks/crud/useDelete";
 import { useDeleteMany } from "@/hooks/crud/useDeleteMany";
 import { useFind } from "@/hooks/crud/useFind";
+import { useApiClient } from "@/hooks/useApiClient";
 import { useDialogs } from "@/hooks/useDialogs";
 import { useHasPermission } from "@/hooks/useHasPermission";
 import { useSearch } from "@/hooks/useSearch";
 import { useToast } from "@/hooks/useToast";
 import { useTranslate } from "@/hooks/useTranslate";
-import { EntityType, Format } from "@/services/types";
+import { EntityType, Format, QueryType } from "@/services/types";
 import { INlpEntity } from "@/types/nlp-entity.types";
 import { PermissionAction } from "@/types/permission.types";
 import { getDateTimeFormatter } from "@/utils/date";
@ -41,6 +43,7 @@ const NlpEntity = () => {
  const { toast } = useToast();
  const dialogs = useDialogs();
  const router = useRouter();
+  const queryClient = useQueryClient();
  const hasPermission = useHasPermission();
  const { mutate: deleteNlpEntity } = useDelete(EntityType.NLP_ENTITY, {
    onError: () => {
@ -59,6 +62,23 @@ const NlpEntity = () => {
      toast.success(t("message.item_delete_success"));
    },
  });
+  const { apiClient } = useApiClient();
+  const { mutate: annotateSamples } = useMutation({
+    mutationFn: async (entityId: string) => {
+      await apiClient.annotateNlpSamples(entityId);
+    },
+    onError: () => {
+      toast.error(t("message.nlp_sample_annotation_failure"));
+    },
+    onSuccess: () => {
+      queryClient.invalidateQueries([
+        QueryType.collection,
+        EntityType.NLP_SAMPLE,
+      ]);
+      setSelectedNlpEntities([]);
+      toast.success(t("message.nlp_sample_annotation_success"));
+    },
+  });
  const [selectedNlpEntities, setSelectedNlpEntities] = useState<string[]>([]);
  const { onSearch, searchPayload } = useSearch<INlpEntity>({
    $or: ["name", "doc"],
@ -91,6 +111,14 @@ const NlpEntity = () => {
          ),
        requires: [PermissionAction.READ],
      },
+      {
+        label: ActionColumnLabel.Annotate,
+        action: (row) => {
+          annotateSamples(row.id);
+        },
+        requires: [PermissionAction.CREATE],
+        isDisabled: (row) => !row.lookups.includes("keywords"),
+      },
      {
        label: ActionColumnLabel.Edit,
        action: (row) => dialogs.open(NlpEntityFormDialog, row),
--- a/frontend/src/services/api.class.ts
+++ b/frontend/src/services/api.class.ts
@ -41,6 +41,7 @@ export const ROUTES = {
  FETCH_REMOTE_I18N: "/i18n",
  RESET: "/user/reset",
  NLP_SAMPLE_IMPORT: "/nlpsample/import",
+  NLP_SAMPLE_ANNOTATE: "/nlpsample/annotate",
  NLP_SAMPLE_PREDICT: "/nlpsample/message",
  CONTENT_IMPORT: "/content/import",
  // Entities
@ -239,6 +240,24 @@ export class ApiClient {
    return data;
  }

+  async annotateNlpSamples(entityId: string) {
+    const { _csrf } = await this.getCsrf();
+    const { data } = await this.request.post(
+      `${ROUTES.NLP_SAMPLE_ANNOTATE}/${entityId}`,
+      { _csrf },
+    );
+
+    return data;
+  }
+
+  async importContent(contentTypeId: string, attachmentId: string) {
+    const { data } = await this.request.get(
+      `${ROUTES.CONTENT_IMPORT}/${contentTypeId}/${attachmentId}`,
+    );
+
+    return data;
+  }
+
  async predictNlp(text: string) {
    const { data } = await this.request.get<INlpDatasetSampleAttributes>(
      `${ROUTES.NLP_SAMPLE_PREDICT}`,