mirror of
https://github.com/hexastack/hexabot
synced 2024-11-28 06:52:20 +00:00
Merge branch 'main' into 40-issue-prevent-users-from-deleting-their-own-roles
This commit is contained in:
commit
a999604472
@ -45,7 +45,8 @@ AUTH_TOKEN=token123
|
|||||||
LANGUAGE_CLASSIFIER=language-classifier
|
LANGUAGE_CLASSIFIER=language-classifier
|
||||||
INTENT_CLASSIFIERS=en,fr
|
INTENT_CLASSIFIERS=en,fr
|
||||||
TFLC_REPO_ID=Hexastack/tflc
|
TFLC_REPO_ID=Hexastack/tflc
|
||||||
JISF_REPO_ID=Hexastack/jisf
|
INTENT_CLASSIFIER_REPO_ID=Hexastack/intent-classifier
|
||||||
|
SLOT_FILLER_REPO_ID=Hexastack/slot-filler
|
||||||
NLP_PORT=5000
|
NLP_PORT=5000
|
||||||
|
|
||||||
# Frontend (Next.js)
|
# Frontend (Next.js)
|
||||||
|
@ -22,7 +22,6 @@ import {
|
|||||||
Tab,
|
Tab,
|
||||||
Tabs,
|
Tabs,
|
||||||
Tooltip,
|
Tooltip,
|
||||||
debounce,
|
|
||||||
tabsClasses,
|
tabsClasses,
|
||||||
} from "@mui/material";
|
} from "@mui/material";
|
||||||
import {
|
import {
|
||||||
@ -32,7 +31,13 @@ import {
|
|||||||
DiagramModel,
|
DiagramModel,
|
||||||
DiagramModelGenerics,
|
DiagramModelGenerics,
|
||||||
} from "@projectstorm/react-diagrams";
|
} from "@projectstorm/react-diagrams";
|
||||||
import { SyntheticEvent, useEffect, useRef, useState } from "react";
|
import {
|
||||||
|
SyntheticEvent,
|
||||||
|
useCallback,
|
||||||
|
useEffect,
|
||||||
|
useRef,
|
||||||
|
useState,
|
||||||
|
} from "react";
|
||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
|
|
||||||
import { DeleteDialog } from "@/app-components/dialogs";
|
import { DeleteDialog } from "@/app-components/dialogs";
|
||||||
@ -41,6 +46,7 @@ import { useDelete, useDeleteFromCache } from "@/hooks/crud/useDelete";
|
|||||||
import { useFind } from "@/hooks/crud/useFind";
|
import { useFind } from "@/hooks/crud/useFind";
|
||||||
import { useGetFromCache } from "@/hooks/crud/useGet";
|
import { useGetFromCache } from "@/hooks/crud/useGet";
|
||||||
import { useUpdate, useUpdateCache } from "@/hooks/crud/useUpdate";
|
import { useUpdate, useUpdateCache } from "@/hooks/crud/useUpdate";
|
||||||
|
import useDebouncedUpdate from "@/hooks/useDebouncedUpdate";
|
||||||
import { getDisplayDialogs, useDialog } from "@/hooks/useDialog";
|
import { getDisplayDialogs, useDialog } from "@/hooks/useDialog";
|
||||||
import { useSearch } from "@/hooks/useSearch";
|
import { useSearch } from "@/hooks/useSearch";
|
||||||
import { EntityType, Format } from "@/services/types";
|
import { EntityType, Format } from "@/services/types";
|
||||||
@ -108,10 +114,12 @@ const Diagrams = () => {
|
|||||||
const { mutateAsync: updateBlock } = useUpdate(EntityType.BLOCK, {
|
const { mutateAsync: updateBlock } = useUpdate(EntityType.BLOCK, {
|
||||||
invalidate: false,
|
invalidate: false,
|
||||||
});
|
});
|
||||||
const debouncedZoomEvent = debounce((event) => {
|
const debouncedUpdateCategory = useDebouncedUpdate(updateCategory, 300);
|
||||||
|
const debouncedZoomEvent = useCallback(
|
||||||
|
(event: any) => {
|
||||||
if (selectedCategoryId) {
|
if (selectedCategoryId) {
|
||||||
engine?.repaintCanvas();
|
engine?.repaintCanvas();
|
||||||
updateCategory({
|
debouncedUpdateCategory({
|
||||||
id: selectedCategoryId,
|
id: selectedCategoryId,
|
||||||
params: {
|
params: {
|
||||||
zoom: event.zoom,
|
zoom: event.zoom,
|
||||||
@ -119,10 +127,13 @@ const Diagrams = () => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
event.stopPropagation();
|
event.stopPropagation();
|
||||||
}, 200);
|
},
|
||||||
const debouncedOffsetEvent = debounce((event) => {
|
[selectedCategoryId, engine, debouncedUpdateCategory],
|
||||||
|
);
|
||||||
|
const debouncedOffsetEvent = useCallback(
|
||||||
|
(event: any) => {
|
||||||
if (selectedCategoryId) {
|
if (selectedCategoryId) {
|
||||||
updateCategory({
|
debouncedUpdateCategory({
|
||||||
id: selectedCategoryId,
|
id: selectedCategoryId,
|
||||||
params: {
|
params: {
|
||||||
offset: [event.offsetX, event.offsetY],
|
offset: [event.offsetX, event.offsetY],
|
||||||
@ -130,7 +141,9 @@ const Diagrams = () => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
event.stopPropagation();
|
event.stopPropagation();
|
||||||
}, 200);
|
},
|
||||||
|
[selectedCategoryId, debouncedUpdateCategory],
|
||||||
|
);
|
||||||
const getBlockFromCache = useGetFromCache(EntityType.BLOCK);
|
const getBlockFromCache = useGetFromCache(EntityType.BLOCK);
|
||||||
const updateCachedBlock = useUpdateCache(EntityType.BLOCK);
|
const updateCachedBlock = useUpdateCache(EntityType.BLOCK);
|
||||||
const deleteCachedBlock = useDeleteFromCache(EntityType.BLOCK);
|
const deleteCachedBlock = useDeleteFromCache(EntityType.BLOCK);
|
||||||
|
54
frontend/src/hooks/useDebouncedUpdate.tsx
Normal file
54
frontend/src/hooks/useDebouncedUpdate.tsx
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2024 Hexastack. All rights reserved.
|
||||||
|
*
|
||||||
|
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
|
||||||
|
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
|
||||||
|
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
|
||||||
|
* 3. SaaS Restriction: This software, or any derivative of it, may not be used to offer a competing product or service (SaaS) without prior written consent from Hexastack. Offering the software as a service or using it in a commercial cloud environment without express permission is strictly prohibited.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { debounce } from "@mui/material";
|
||||||
|
import { useCallback, useEffect, useRef } from "react";
|
||||||
|
|
||||||
|
type DebouncedUpdateParams = {
|
||||||
|
id: string;
|
||||||
|
params: Record<string, any>;
|
||||||
|
};
|
||||||
|
|
||||||
|
function useDebouncedUpdate(
|
||||||
|
apiUpdate: (params: DebouncedUpdateParams) => void,
|
||||||
|
delay: number = 300,
|
||||||
|
) {
|
||||||
|
const accumulatedUpdates = useRef<DebouncedUpdateParams | null>(null);
|
||||||
|
const processUpdates = useRef(
|
||||||
|
debounce(() => {
|
||||||
|
if (accumulatedUpdates.current) {
|
||||||
|
apiUpdate(accumulatedUpdates.current);
|
||||||
|
accumulatedUpdates.current = null;
|
||||||
|
}
|
||||||
|
}, delay),
|
||||||
|
).current;
|
||||||
|
const handleUpdate = useCallback(
|
||||||
|
(params: DebouncedUpdateParams) => {
|
||||||
|
accumulatedUpdates.current = {
|
||||||
|
id: params.id,
|
||||||
|
params: {
|
||||||
|
...(accumulatedUpdates.current?.params || {}),
|
||||||
|
...params.params,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
processUpdates();
|
||||||
|
},
|
||||||
|
[processUpdates],
|
||||||
|
);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
return () => {
|
||||||
|
processUpdates.clear();
|
||||||
|
};
|
||||||
|
}, [processUpdates]);
|
||||||
|
|
||||||
|
return handleUpdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default useDebouncedUpdate;
|
@ -2,4 +2,5 @@ AUTH_TOKEN=123
|
|||||||
LANGUAGE_CLASSIFIER=language-classifier
|
LANGUAGE_CLASSIFIER=language-classifier
|
||||||
INTENT_CLASSIFIERS=ar,fr,tn
|
INTENT_CLASSIFIERS=ar,fr,tn
|
||||||
TFLC_REPO_ID=Hexastack/tflc
|
TFLC_REPO_ID=Hexastack/tflc
|
||||||
JISF_REPO_ID=Hexastack/jisf
|
INTENT_CLASSIFIER_REPO_ID=Hexastack/intent-classifier
|
||||||
|
SLOT_FILLER_REPO_ID=Hexastack/slot-filler
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
AUTH_TOKEN=
|
AUTH_TOKEN=
|
||||||
LANGUAGE_CLASSIFIER=
|
LANGUAGE_CLASSIFIER=
|
||||||
INTENT_CLASSIFIERS=
|
INTENT_CLASSIFIERS=
|
||||||
TFLC_REPO_ID=
|
INTENT_CLASSIFIER_REPO_ID=
|
||||||
JISF_REPO_ID=
|
SLOT_FILLER_REPO_ID=
|
@ -40,7 +40,7 @@ pip install -r requirements.txt
|
|||||||
You should run `source env.sh` on each new shell session. This activates the virtualenv and creates a nice alias for `run.py`:
|
You should run `source env.sh` on each new shell session. This activates the virtualenv and creates a nice alias for `run.py`:
|
||||||
```bash
|
```bash
|
||||||
$ cat env.sh
|
$ cat env.sh
|
||||||
source env/bin/activate
|
source venv/bin/activate
|
||||||
alias run='python run.py'
|
alias run='python run.py'
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -53,7 +53,7 @@ run fit myexperiment1 mlp mnist --batch_size=32 --learning_rate=0.1
|
|||||||
Examples :
|
Examples :
|
||||||
```bash
|
```bash
|
||||||
# Intent classification
|
# Intent classification
|
||||||
run fit intent-classifier-en-30072024 jisf --intent_num_labels=88 --slot_num_labels=17 --language=en
|
run fit intent-classifier-en-30072024 intent_classifier --intent_num_labels=88 --slot_num_labels=17 --language=en
|
||||||
run predict intent-classifier-fr-30072024 --intent_num_labels=7 --slot_num_labels=2 --language=fr
|
run predict intent-classifier-fr-30072024 --intent_num_labels=7 --slot_num_labels=2 --language=fr
|
||||||
|
|
||||||
# Language classification
|
# Language classification
|
||||||
|
@ -4,8 +4,8 @@ import json
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from transformers import PreTrainedTokenizerFast, PreTrainedTokenizer
|
from transformers import PreTrainedTokenizerFast, PreTrainedTokenizer
|
||||||
|
|
||||||
|
|
||||||
import boilerplate as tfbp
|
import boilerplate as tfbp
|
||||||
from utils.jisf_data_mapper import JisfDataMapper
|
|
||||||
from utils.json_helper import JsonHelper
|
from utils.json_helper import JsonHelper
|
||||||
|
|
||||||
|
|
||||||
@ -101,8 +101,11 @@ class JISFDL(tfbp.DataLoader):
|
|||||||
# Filter examples by language
|
# Filter examples by language
|
||||||
lang = self.hparams.language
|
lang = self.hparams.language
|
||||||
all_examples = data["common_examples"]
|
all_examples = data["common_examples"]
|
||||||
examples = filter(lambda exp: any(
|
|
||||||
e['entity'] == 'language' and e['value'] == lang for e in exp['entities']), all_examples)
|
if not bool(lang):
|
||||||
|
examples = all_examples
|
||||||
|
else:
|
||||||
|
examples = filter(lambda exp: any(e['entity'] == 'language' and e['value'] == lang for e in exp['entities']), all_examples)
|
||||||
|
|
||||||
# Parse raw data
|
# Parse raw data
|
||||||
for exp in examples:
|
for exp in examples:
|
||||||
@ -145,7 +148,6 @@ class JISFDL(tfbp.DataLoader):
|
|||||||
# the classifier.
|
# the classifier.
|
||||||
texts = [d.text for d in dataset]
|
texts = [d.text for d in dataset]
|
||||||
encoded_texts = self.encode_texts(texts, tokenizer)
|
encoded_texts = self.encode_texts(texts, tokenizer)
|
||||||
|
|
||||||
# Map intents, load from the model (evaluate), recompute from dataset otherwise (train)
|
# Map intents, load from the model (evaluate), recompute from dataset otherwise (train)
|
||||||
intents = [d.intent for d in dataset]
|
intents = [d.intent for d in dataset]
|
||||||
if not model_params:
|
if not model_params:
|
||||||
@ -161,19 +163,35 @@ class JISFDL(tfbp.DataLoader):
|
|||||||
# To handle those we need to add <PAD> to slots_names. It can be some other symbol as well.
|
# To handle those we need to add <PAD> to slots_names. It can be some other symbol as well.
|
||||||
slot_names.insert(0, "<PAD>")
|
slot_names.insert(0, "<PAD>")
|
||||||
else:
|
else:
|
||||||
intent_names = model_params.intent_names
|
if "intent_names" in model_params:
|
||||||
slot_names = model_params.slot_names
|
intent_names = model_params["intent_names"]
|
||||||
|
else:
|
||||||
|
intent_names = None
|
||||||
|
|
||||||
|
if "slot_names" in model_params:
|
||||||
|
slot_names = model_params["slot_names"]
|
||||||
|
else:
|
||||||
|
slot_names = None
|
||||||
|
|
||||||
|
if intent_names:
|
||||||
intent_map = dict() # Dict : intent -> index
|
intent_map = dict() # Dict : intent -> index
|
||||||
for idx, ui in enumerate(intent_names):
|
for idx, ui in enumerate(intent_names):
|
||||||
intent_map[ui] = idx
|
intent_map[ui] = idx
|
||||||
|
else:
|
||||||
|
intent_map = None
|
||||||
|
|
||||||
# Encode intents
|
# Encode intents
|
||||||
|
if intent_map:
|
||||||
encoded_intents = self.encode_intents(intents, intent_map)
|
encoded_intents = self.encode_intents(intents, intent_map)
|
||||||
|
else:
|
||||||
|
encoded_intents = None
|
||||||
|
|
||||||
|
if slot_names:
|
||||||
slot_map: Dict[str, int] = dict() # slot -> index
|
slot_map: Dict[str, int] = dict() # slot -> index
|
||||||
for idx, us in enumerate(slot_names):
|
for idx, us in enumerate(slot_names):
|
||||||
slot_map[us] = idx
|
slot_map[us] = idx
|
||||||
|
else:
|
||||||
|
slot_map = None
|
||||||
|
|
||||||
# Encode slots
|
# Encode slots
|
||||||
# Text : Add a tune to my elrow Guest List
|
# Text : Add a tune to my elrow Guest List
|
||||||
@ -183,8 +201,12 @@ class JISFDL(tfbp.DataLoader):
|
|||||||
max_len = len(encoded_texts["input_ids"][0]) # type: ignore
|
max_len = len(encoded_texts["input_ids"][0]) # type: ignore
|
||||||
all_slots = [td.slots for td in dataset]
|
all_slots = [td.slots for td in dataset]
|
||||||
all_texts = [td.text for td in dataset]
|
all_texts = [td.text for td in dataset]
|
||||||
|
|
||||||
|
if slot_map:
|
||||||
encoded_slots = self.encode_slots(tokenizer,
|
encoded_slots = self.encode_slots(tokenizer,
|
||||||
all_slots, all_texts, slot_map, max_len)
|
all_slots, all_texts, slot_map, max_len)
|
||||||
|
else:
|
||||||
|
encoded_slots = None
|
||||||
|
|
||||||
return encoded_texts, encoded_intents, encoded_slots, intent_names, slot_names
|
return encoded_texts, encoded_intents, encoded_slots, intent_names, slot_names
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ class TFLCDL(tfbp.DataLoader):
|
|||||||
|
|
||||||
self.json_helper = JsonHelper("tflc")
|
self.json_helper = JsonHelper("tflc")
|
||||||
self._save_dir = save_dir
|
self._save_dir = save_dir
|
||||||
print(hparams)
|
|
||||||
# We will opt for a TF-IDF representation of the data as the frequency of word
|
# We will opt for a TF-IDF representation of the data as the frequency of word
|
||||||
# roots should give us a good idea about which language we're dealing with.
|
# roots should give us a good idea about which language we're dealing with.
|
||||||
if method == "fit":
|
if method == "fit":
|
||||||
|
43
nlu/main.py
43
nlu/main.py
@ -15,8 +15,8 @@ AUTH_TOKEN = os.getenv("AUTH_TOKEN", "TOKEN_MUST_BE_DEFINED")
|
|||||||
|
|
||||||
AVAILABLE_LANGUAGES = os.getenv("AVAILABLE_LANGUAGES", "en,fr").split(',')
|
AVAILABLE_LANGUAGES = os.getenv("AVAILABLE_LANGUAGES", "en,fr").split(',')
|
||||||
TFLC_REPO_ID = os.getenv("TFLC_REPO_ID")
|
TFLC_REPO_ID = os.getenv("TFLC_REPO_ID")
|
||||||
JISF_REPO_ID = os.getenv("JISF_REPO_ID")
|
INTENT_CLASSIFIER_REPO_ID = os.getenv("INTENT_CLASSIFIER_REPO_ID")
|
||||||
|
SLOT_FILLER_REPO_ID = os.getenv("SLOT_FILLER_REPO_ID")
|
||||||
|
|
||||||
def load_language_classifier():
|
def load_language_classifier():
|
||||||
# Init language classifier model
|
# Init language classifier model
|
||||||
@ -27,21 +27,31 @@ def load_language_classifier():
|
|||||||
logging.info(f'Successfully loaded the language classifier model')
|
logging.info(f'Successfully loaded the language classifier model')
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
def load_intent_classifiers():
|
def load_intent_classifiers():
|
||||||
Model = tfbp.get_model("jisf")
|
Model = tfbp.get_model("intent_classifier")
|
||||||
models = {}
|
intent_classifiers = {}
|
||||||
for language in AVAILABLE_LANGUAGES:
|
for language in AVAILABLE_LANGUAGES:
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
models[language] = Model(save_dir=language, method="predict", repo_id=JISF_REPO_ID, **kwargs)
|
intent_classifiers[language] = Model(save_dir=language, method="predict", repo_id=INTENT_CLASSIFIER_REPO_ID, **kwargs)
|
||||||
models[language].load_model()
|
intent_classifiers[language].load_model()
|
||||||
logging.info(f'Successfully loaded the intent classifier {language} model')
|
logging.info(f'Successfully loaded the intent classifier {language} model')
|
||||||
return models
|
return intent_classifiers
|
||||||
|
|
||||||
|
def load_slot_classifiers():
|
||||||
|
Model = tfbp.get_model("slot_classifier")
|
||||||
|
slot_fillers = {}
|
||||||
|
for language in AVAILABLE_LANGUAGES:
|
||||||
|
kwargs = {}
|
||||||
|
slot_fillers[language] = Model(save_dir=language, method="predict", repo_id=SLOT_FILLER_REPO_ID, **kwargs)
|
||||||
|
slot_fillers[language].load_model()
|
||||||
|
logging.info(f'Successfully loaded the slot filler {language} model')
|
||||||
|
return slot_fillers
|
||||||
|
|
||||||
|
|
||||||
def load_models():
|
def load_models():
|
||||||
app.language_classifier = load_language_classifier() # type: ignore
|
app.language_classifier = load_language_classifier() # type: ignore
|
||||||
app.intent_classifiers = load_intent_classifiers() # type: ignore
|
app.intent_classifiers = load_intent_classifiers() # type: ignore
|
||||||
|
app.slot_fillers = load_intent_classifiers() # type: ignore
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
@ -74,13 +84,20 @@ async def check_health():
|
|||||||
|
|
||||||
@app.post("/parse")
|
@app.post("/parse")
|
||||||
def parse(input: ParseInput, is_authenticated: Annotated[str, Depends(authenticate)]):
|
def parse(input: ParseInput, is_authenticated: Annotated[str, Depends(authenticate)]):
|
||||||
if not hasattr(app, 'language_classifier') or not hasattr(app, 'intent_classifiers'):
|
if not hasattr(app, 'language_classifier') or not hasattr(app, 'intent_classifiers') or not hasattr(app, 'slot_fillers'):
|
||||||
headers = {"Retry-After": "120"} # Suggest retrying after 2 minutes
|
headers = {"Retry-After": "120"} # Suggest retrying after 2 minutes
|
||||||
return JSONResponse(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, content={"message": "Models are loading, please retry later."}, headers=headers)
|
return JSONResponse(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, content={"message": "Models are still loading, please retry later."}, headers=headers)
|
||||||
|
|
||||||
language = app.language_classifier.get_prediction(input.q) # type: ignore
|
language = app.language_classifier.get_prediction(input.q) # type: ignore
|
||||||
lang = language.get("value")
|
lang = language.get("value")
|
||||||
prediction = app.intent_classifiers[lang].get_prediction(
|
intent_prediction = app.intent_classifiers[lang].get_prediction(
|
||||||
input.q) # type: ignore
|
input.q) # type: ignore
|
||||||
prediction.get("entities").append(language)
|
slot_prediction = app.slot_fillers[lang].get_prediction(
|
||||||
return prediction
|
input.q) # type: ignore
|
||||||
|
slot_prediction.get("entities").append(language)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"text": input.q,
|
||||||
|
"intent": intent_prediction.get("intent"),
|
||||||
|
"entities": slot_prediction.get("entities"),
|
||||||
|
}
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import functools
|
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
from typing import Tuple, Dict, List
|
from typing import Tuple, Dict, List
|
||||||
@ -22,8 +21,8 @@ from data_loaders.jisfdl import JISFDL
|
|||||||
import boilerplate as tfbp
|
import boilerplate as tfbp
|
||||||
|
|
||||||
##
|
##
|
||||||
# JISF : Joint Intent Classification and Slot filling with BERT
|
# Intent Classification with BERT
|
||||||
# This notebook is based on the paper BERT for Joint Intent Classification and Slot Filling by Chen et al. (2019),
|
# This code is based on the paper BERT for Joint Intent Classification and Slot Filling by Chen et al. (2019),
|
||||||
# https://arxiv.org/abs/1902.10909 but on a different dataset made for a class project.
|
# https://arxiv.org/abs/1902.10909 but on a different dataset made for a class project.
|
||||||
#
|
#
|
||||||
# Ideas were also taken from https://github.com/monologg/JointBERT, which is a PyTorch implementation of
|
# Ideas were also taken from https://github.com/monologg/JointBERT, which is a PyTorch implementation of
|
||||||
@ -33,19 +32,16 @@ import boilerplate as tfbp
|
|||||||
BERT_MODEL_BY_LANGUAGE = {
|
BERT_MODEL_BY_LANGUAGE = {
|
||||||
'en': "bert-base-cased",
|
'en': "bert-base-cased",
|
||||||
'fr': "dbmdz/bert-base-french-europeana-cased",
|
'fr': "dbmdz/bert-base-french-europeana-cased",
|
||||||
'ar': 'asafaya/bert-base-arabic',
|
|
||||||
'tn': 'dbmdz/bert-base-french-europeana-cased'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@tfbp.default_export
|
@tfbp.default_export
|
||||||
class JISF(tfbp.Model):
|
class IntentClassifier(tfbp.Model):
|
||||||
default_hparams = {
|
default_hparams = {
|
||||||
"language": "fr",
|
"language": "",
|
||||||
"num_epochs": 2,
|
"num_epochs": 2,
|
||||||
"dropout_prob": 0.1,
|
"dropout_prob": 0.1,
|
||||||
"intent_num_labels": 7,
|
"intent_num_labels": 7,
|
||||||
"slot_num_labels": 40
|
|
||||||
}
|
}
|
||||||
data_loader: JISFDL
|
data_loader: JISFDL
|
||||||
|
|
||||||
@ -57,8 +53,8 @@ class JISF(tfbp.Model):
|
|||||||
|
|
||||||
# Load Tokenizer from transformers
|
# Load Tokenizer from transformers
|
||||||
# We will use a pretrained bert model bert-base-cased for both Tokenizer and our classifier.
|
# We will use a pretrained bert model bert-base-cased for both Tokenizer and our classifier.
|
||||||
bert_model_name = BERT_MODEL_BY_LANGUAGE[self.hparams.language]
|
bert_model_name = BERT_MODEL_BY_LANGUAGE[self.hparams.language or "en"]
|
||||||
# bert_model_name = typing.cast(str, self.hparams.bert_model_name)
|
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(
|
self.tokenizer = AutoTokenizer.from_pretrained(
|
||||||
bert_model_name, use_fast=False)
|
bert_model_name, use_fast=False)
|
||||||
self.bert = TFBertModel.from_pretrained(bert_model_name)
|
self.bert = TFBertModel.from_pretrained(bert_model_name)
|
||||||
@ -66,27 +62,18 @@ class JISF(tfbp.Model):
|
|||||||
self.dropout = Dropout(self.hparams.dropout_prob)
|
self.dropout = Dropout(self.hparams.dropout_prob)
|
||||||
self.intent_classifier = Dense(self.hparams.intent_num_labels,
|
self.intent_classifier = Dense(self.hparams.intent_num_labels,
|
||||||
name="intent_classifier", activation="softmax")
|
name="intent_classifier", activation="softmax")
|
||||||
self.slot_classifier = Dense(self.hparams.slot_num_labels,
|
|
||||||
name="slot_classifier", activation="softmax")
|
|
||||||
|
|
||||||
|
|
||||||
def call(self, inputs, **kwargs):
|
def call(self, inputs, **kwargs):
|
||||||
# two outputs from BERT
|
|
||||||
trained_bert = self.bert(inputs, **kwargs)
|
trained_bert = self.bert(inputs, **kwargs)
|
||||||
pooled_output = trained_bert.pooler_output
|
pooled_output = trained_bert.pooler_output
|
||||||
sequence_output = trained_bert.last_hidden_state
|
|
||||||
|
|
||||||
# sequence_output will be used for slot_filling / classification
|
|
||||||
sequence_output = self.dropout(sequence_output,
|
|
||||||
training=kwargs.get("training", False))
|
|
||||||
slot_probas = self.slot_classifier(sequence_output)
|
|
||||||
|
|
||||||
# pooled_output for intent classification
|
# pooled_output for intent classification
|
||||||
pooled_output = self.dropout(pooled_output,
|
pooled_output = self.dropout(pooled_output,
|
||||||
training=kwargs.get("training", False))
|
training=kwargs.get("training", False))
|
||||||
intent_probas = self.intent_classifier(pooled_output)
|
intent_probas = self.intent_classifier(pooled_output)
|
||||||
|
|
||||||
return slot_probas, intent_probas
|
return intent_probas
|
||||||
|
|
||||||
def load_data(self, data_loader) -> Tuple[BatchEncoding, tf.Tensor, ndarray, int, int]:
|
def load_data(self, data_loader) -> Tuple[BatchEncoding, tf.Tensor, ndarray, int, int]:
|
||||||
return data_loader(self.tokenizer)
|
return data_loader(self.tokenizer)
|
||||||
@ -137,18 +124,11 @@ class JISF(tfbp.Model):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Hyperparam intent_num_labels mismatch, should be : {len(intent_names)}"
|
f"Hyperparam intent_num_labels mismatch, should be : {len(intent_names)}"
|
||||||
)
|
)
|
||||||
if self.hparams.slot_num_labels != len(slot_names):
|
|
||||||
raise ValueError(
|
|
||||||
f"Hyperparam slot_num_labels mismatch, should be : {len(slot_names)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Hyperparams, Optimizer and Loss function
|
# Hyperparams, Optimizer and Loss function
|
||||||
opt = Adam(learning_rate=3e-5, epsilon=1e-08)
|
opt = Adam(learning_rate=3e-5, epsilon=1e-08)
|
||||||
|
|
||||||
# two outputs, one for slots, another for intents
|
losses = SparseCategoricalCrossentropy()
|
||||||
# we have to fine tune for both
|
|
||||||
losses = [SparseCategoricalCrossentropy(),
|
|
||||||
SparseCategoricalCrossentropy()]
|
|
||||||
|
|
||||||
metrics = [SparseCategoricalAccuracy("accuracy")]
|
metrics = [SparseCategoricalAccuracy("accuracy")]
|
||||||
|
|
||||||
@ -159,11 +139,10 @@ class JISF(tfbp.Model):
|
|||||||
"attention_mask": encoded_texts["attention_mask"]}
|
"attention_mask": encoded_texts["attention_mask"]}
|
||||||
|
|
||||||
super().fit(
|
super().fit(
|
||||||
x, (encoded_slots, encoded_intents), epochs=self.hparams.num_epochs, batch_size=32, shuffle=True)
|
x, encoded_intents, epochs=self.hparams.num_epochs, batch_size=32, shuffle=True)
|
||||||
|
|
||||||
# Persist the model
|
# Persist the model
|
||||||
self.extra_params["intent_names"] = intent_names
|
self.extra_params["intent_names"] = intent_names
|
||||||
self.extra_params["slot_names"] = slot_names
|
|
||||||
|
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
@ -175,7 +154,7 @@ class JISF(tfbp.Model):
|
|||||||
metrics = [SparseCategoricalAccuracy("accuracy")]
|
metrics = [SparseCategoricalAccuracy("accuracy")]
|
||||||
self.compile(metrics=metrics)
|
self.compile(metrics=metrics)
|
||||||
|
|
||||||
_, intent_probas = self(encoded_texts) # type: ignore
|
intent_probas = self(encoded_texts) # type: ignore
|
||||||
|
|
||||||
scores = self.get_metrics_by_intent(intent_probas, encoded_intents)
|
scores = self.get_metrics_by_intent(intent_probas, encoded_intents)
|
||||||
|
|
||||||
@ -205,84 +184,9 @@ class JISF(tfbp.Model):
|
|||||||
|
|
||||||
return json.dumps(info, indent=2)
|
return json.dumps(info, indent=2)
|
||||||
|
|
||||||
def get_slots_prediction(self, text: str, inputs, slot_probas):
|
|
||||||
slot_probas_np = slot_probas.numpy()
|
|
||||||
# Get the indices of the maximum values
|
|
||||||
slot_ids = slot_probas_np.argmax(axis=-1)[0, :]
|
|
||||||
|
|
||||||
# get all slot names and add to out_dict as keys
|
|
||||||
out_dict = {}
|
|
||||||
predicted_slots = set([self.extra_params["slot_names"][s]
|
|
||||||
for s in slot_ids if s != 0])
|
|
||||||
for ps in predicted_slots:
|
|
||||||
out_dict[ps] = []
|
|
||||||
|
|
||||||
# retrieving the tokenization that was used in the predictions
|
|
||||||
tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
|
||||||
|
|
||||||
# We'd like to eliminate all special tokens from our output
|
|
||||||
special_tokens = self.tokenizer.special_tokens_map.values()
|
|
||||||
|
|
||||||
for token, slot_id in zip(tokens, slot_ids):
|
|
||||||
if token in special_tokens:
|
|
||||||
continue
|
|
||||||
# add all to out_dict
|
|
||||||
slot_name = self.extra_params["slot_names"][slot_id]
|
|
||||||
|
|
||||||
if slot_name == "<PAD>":
|
|
||||||
continue
|
|
||||||
|
|
||||||
# collect tokens
|
|
||||||
collected_tokens = [token]
|
|
||||||
idx = tokens.index(token)
|
|
||||||
|
|
||||||
# see if it starts with ##
|
|
||||||
# then it belongs to the previous token
|
|
||||||
if token.startswith("##"):
|
|
||||||
# check if the token already exists or not
|
|
||||||
if tokens[idx - 1] not in out_dict[slot_name]:
|
|
||||||
collected_tokens.insert(0, tokens[idx - 1])
|
|
||||||
|
|
||||||
# add collected tokens to slots
|
|
||||||
out_dict[slot_name].extend(collected_tokens)
|
|
||||||
|
|
||||||
slot_names_to_ids = {value: key for key, value in enumerate(
|
|
||||||
self.extra_params["slot_names"])}
|
|
||||||
|
|
||||||
entities = []
|
|
||||||
# process out_dict
|
|
||||||
for slot_name in out_dict:
|
|
||||||
slot_id = slot_names_to_ids[slot_name]
|
|
||||||
slot_tokens = out_dict[slot_name]
|
|
||||||
|
|
||||||
slot_value = self.tokenizer.convert_tokens_to_string(
|
|
||||||
slot_tokens).strip()
|
|
||||||
|
|
||||||
entity = {
|
|
||||||
"entity": slot_name,
|
|
||||||
"value": slot_value,
|
|
||||||
"start": text.find(slot_value),
|
|
||||||
"end": text.find(slot_value) + len(slot_value),
|
|
||||||
"confidence": 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
# The confidence of a slot is the average confidence of tokens in that slot.
|
|
||||||
indices = [tokens.index(token) for token in slot_tokens]
|
|
||||||
if len(slot_tokens) > 0:
|
|
||||||
total = functools.reduce(
|
|
||||||
lambda proba1, proba2: proba1+proba2, slot_probas_np[0, indices, slot_id], 0)
|
|
||||||
entity["confidence"] = total / len(slot_tokens)
|
|
||||||
else:
|
|
||||||
entity["confidence"] = 0
|
|
||||||
|
|
||||||
entities.append(entity)
|
|
||||||
|
|
||||||
return entities
|
|
||||||
|
|
||||||
|
|
||||||
def get_prediction(self, text: str):
|
def get_prediction(self, text: str):
|
||||||
inputs = self.data_loader.encode_text(text, self.tokenizer)
|
inputs = self.data_loader.encode_text(text, self.tokenizer)
|
||||||
slot_probas, intent_probas = self(inputs) # type: ignore
|
intent_probas = self(inputs) # type: ignore
|
||||||
|
|
||||||
intent_probas_np = intent_probas.numpy()
|
intent_probas_np = intent_probas.numpy()
|
||||||
|
|
||||||
@ -292,15 +196,8 @@ class JISF(tfbp.Model):
|
|||||||
# get the confidences for each intent
|
# get the confidences for each intent
|
||||||
intent_confidences = intent_probas_np[0]
|
intent_confidences = intent_probas_np[0]
|
||||||
|
|
||||||
|
|
||||||
entities = []
|
|
||||||
if slot_probas is not None:
|
|
||||||
entities = self.get_slots_prediction(text, inputs, slot_probas)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"text": text,
|
"text": text,
|
||||||
"intent": {"name": self.extra_params["intent_names"][intent_id],
|
"intent": {"name": self.extra_params["intent_names"][intent_id],
|
||||||
"confidence": float(intent_confidences[intent_id])},
|
"confidence": float(intent_confidences[intent_id])},
|
||||||
"entities": entities,
|
|
||||||
}
|
}
|
||||||
|
|
250
nlu/models/slot_filler.py
Normal file
250
nlu/models/slot_filler.py
Normal file
@ -0,0 +1,250 @@
|
|||||||
|
import functools
|
||||||
|
import json
|
||||||
|
from transformers import TFBertModel, AutoTokenizer
|
||||||
|
from keras.layers import Dropout, Dense
|
||||||
|
from sys import platform
|
||||||
|
|
||||||
|
if platform == "darwin":
|
||||||
|
from keras.optimizers.legacy import Adam
|
||||||
|
else:
|
||||||
|
from keras.optimizers import Adam
|
||||||
|
|
||||||
|
from keras.losses import SparseCategoricalCrossentropy
|
||||||
|
from keras.metrics import SparseCategoricalAccuracy
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from data_loaders.jisfdl import JISFDL
|
||||||
|
|
||||||
|
from sklearn.metrics import classification_report
|
||||||
|
|
||||||
|
|
||||||
|
import boilerplate as tfbp
|
||||||
|
|
||||||
|
##
|
||||||
|
# Slot filling with BERT
|
||||||
|
# This notebook is based on the paper BERT for Joint Intent Classification and Slot Filling by Chen et al. (2019),
|
||||||
|
# https://arxiv.org/abs/1902.10909 but on a different dataset made for a class project.
|
||||||
|
#
|
||||||
|
# Ideas were also taken from https://github.com/monologg/JointBERT, which is a PyTorch implementation of
|
||||||
|
# the paper with the original dataset.
|
||||||
|
##
|
||||||
|
|
||||||
|
BERT_MODEL_BY_LANGUAGE = {
|
||||||
|
'en': "bert-base-cased",
|
||||||
|
'fr': "dbmdz/bert-base-french-europeana-cased",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@tfbp.default_export
|
||||||
|
class SlotFiller(tfbp.Model):
|
||||||
|
default_hparams = {
|
||||||
|
"language": "",
|
||||||
|
"num_epochs": 2,
|
||||||
|
"dropout_prob": 0.1,
|
||||||
|
"slot_num_labels": 40
|
||||||
|
}
|
||||||
|
data_loader: JISFDL
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
# Init data loader
|
||||||
|
self.data_loader = JISFDL(**kwargs)
|
||||||
|
|
||||||
|
# Load Tokenizer from transformers
|
||||||
|
# We will use a pretrained bert model bert-base-cased for both Tokenizer and our classifier.
|
||||||
|
bert_model_name = BERT_MODEL_BY_LANGUAGE[self.hparams.language or "en"]
|
||||||
|
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(
|
||||||
|
bert_model_name, use_fast=False)
|
||||||
|
self.bert = TFBertModel.from_pretrained(bert_model_name)
|
||||||
|
|
||||||
|
self.dropout = Dropout(self.hparams.dropout_prob)
|
||||||
|
self.slot_classifier = Dense(self.hparams.slot_num_labels,
|
||||||
|
name="slot_classifier", activation="softmax")
|
||||||
|
|
||||||
|
|
||||||
|
def call(self, inputs, **kwargs):
|
||||||
|
trained_bert = self.bert(inputs, **kwargs)
|
||||||
|
sequence_output = trained_bert.last_hidden_state
|
||||||
|
|
||||||
|
# sequence_output will be used for slot_filling
|
||||||
|
sequence_output = self.dropout(sequence_output,
|
||||||
|
training=kwargs.get("training", False))
|
||||||
|
slot_probas = self.slot_classifier(sequence_output)
|
||||||
|
|
||||||
|
return slot_probas
|
||||||
|
|
||||||
|
@tfbp.runnable
|
||||||
|
def fit(self):
|
||||||
|
"""Training"""
|
||||||
|
encoded_texts, encoded_intents, encoded_slots, intent_names, slot_names = self.data_loader(
|
||||||
|
self.tokenizer)
|
||||||
|
|
||||||
|
if self.hparams.slot_num_labels != len(slot_names):
|
||||||
|
raise ValueError(
|
||||||
|
f"Hyperparam slot_num_labels mismatch, should be : {len(slot_names)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Hyperparams, Optimizer and Loss function
|
||||||
|
opt = Adam(learning_rate=3e-5, epsilon=1e-08)
|
||||||
|
|
||||||
|
# two outputs, one for slots, another for intents
|
||||||
|
# we have to fine tune for both
|
||||||
|
losses = SparseCategoricalCrossentropy()
|
||||||
|
|
||||||
|
metrics = [SparseCategoricalAccuracy("accuracy")]
|
||||||
|
|
||||||
|
# Compile model
|
||||||
|
self.compile(optimizer=opt, loss=losses, metrics=metrics)
|
||||||
|
|
||||||
|
x = {"input_ids": encoded_texts["input_ids"], "token_type_ids": encoded_texts["token_type_ids"],
|
||||||
|
"attention_mask": encoded_texts["attention_mask"]}
|
||||||
|
|
||||||
|
super().fit(
|
||||||
|
x, encoded_slots, epochs=self.hparams.num_epochs, batch_size=32, shuffle=True)
|
||||||
|
|
||||||
|
# Persist the model
|
||||||
|
self.extra_params["slot_names"] = slot_names
|
||||||
|
|
||||||
|
self.save()
|
||||||
|
|
||||||
|
@tfbp.runnable
|
||||||
|
def evaluate(self):
|
||||||
|
"""Evaluation"""
|
||||||
|
# Load test data
|
||||||
|
# Assuming your data loader can return test data when mode='test' is specified
|
||||||
|
encoded_texts, _, encoded_slots, _, slot_names = self.data_loader(
|
||||||
|
self.tokenizer, self.extra_params)
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
predictions = self(encoded_texts)
|
||||||
|
predicted_slot_ids = np.argmax(predictions, axis=-1) # Shape: (batch_size, sequence_length)
|
||||||
|
|
||||||
|
true_labels = encoded_slots.flatten()
|
||||||
|
pred_labels = predicted_slot_ids.flatten()
|
||||||
|
|
||||||
|
# Filter out padding tokens (assuming padding label id is 0)
|
||||||
|
mask = true_labels != 0
|
||||||
|
filtered_true_labels = true_labels[mask]
|
||||||
|
filtered_pred_labels = pred_labels[mask]
|
||||||
|
|
||||||
|
# Adjust labels to start from 0 (since padding label 0 is removed)
|
||||||
|
filtered_true_labels -= 1
|
||||||
|
filtered_pred_labels -= 1
|
||||||
|
|
||||||
|
# Get slot names excluding padding
|
||||||
|
slot_names_no_pad = self.extra_params["slot_names"][1:] # Exclude padding label
|
||||||
|
|
||||||
|
|
||||||
|
report = classification_report(
|
||||||
|
filtered_true_labels,
|
||||||
|
filtered_pred_labels,
|
||||||
|
target_names=slot_names_no_pad,
|
||||||
|
zero_division=0
|
||||||
|
)
|
||||||
|
|
||||||
|
print(report)
|
||||||
|
|
||||||
|
# Optionally, you can return the report as a string or dictionary
|
||||||
|
return report
|
||||||
|
|
||||||
|
@tfbp.runnable
|
||||||
|
def predict(self):
|
||||||
|
text = self.data_loader.get_prediction_data()
|
||||||
|
|
||||||
|
info = self.get_prediction(text)
|
||||||
|
|
||||||
|
print(self.summary())
|
||||||
|
print("Text : " + text)
|
||||||
|
print(json.dumps(info, indent=2))
|
||||||
|
|
||||||
|
return json.dumps(info, indent=2)
|
||||||
|
|
||||||
|
def get_slots_prediction(self, text: str, inputs, slot_probas):
|
||||||
|
slot_probas_np = slot_probas.numpy()
|
||||||
|
# Get the indices of the maximum values
|
||||||
|
slot_ids = slot_probas_np.argmax(axis=-1)[0, :]
|
||||||
|
|
||||||
|
# get all slot names and add to out_dict as keys
|
||||||
|
out_dict = {}
|
||||||
|
predicted_slots = set([self.extra_params["slot_names"][s]
|
||||||
|
for s in slot_ids if s != 0])
|
||||||
|
for ps in predicted_slots:
|
||||||
|
out_dict[ps] = []
|
||||||
|
|
||||||
|
# retrieving the tokenization that was used in the predictions
|
||||||
|
tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
||||||
|
|
||||||
|
# We'd like to eliminate all special tokens from our output
|
||||||
|
special_tokens = self.tokenizer.special_tokens_map.values()
|
||||||
|
|
||||||
|
for token, slot_id in zip(tokens, slot_ids):
|
||||||
|
if token in special_tokens:
|
||||||
|
continue
|
||||||
|
# add all to out_dict
|
||||||
|
slot_name = self.extra_params["slot_names"][slot_id]
|
||||||
|
|
||||||
|
if slot_name == "<PAD>":
|
||||||
|
continue
|
||||||
|
|
||||||
|
# collect tokens
|
||||||
|
collected_tokens = [token]
|
||||||
|
idx = tokens.index(token)
|
||||||
|
|
||||||
|
# see if it starts with ##
|
||||||
|
# then it belongs to the previous token
|
||||||
|
if token.startswith("##"):
|
||||||
|
# check if the token already exists or not
|
||||||
|
if tokens[idx - 1] not in out_dict[slot_name]:
|
||||||
|
collected_tokens.insert(0, tokens[idx - 1])
|
||||||
|
|
||||||
|
# add collected tokens to slots
|
||||||
|
out_dict[slot_name].extend(collected_tokens)
|
||||||
|
|
||||||
|
slot_names_to_ids = {value: key for key, value in enumerate(
|
||||||
|
self.extra_params["slot_names"])}
|
||||||
|
|
||||||
|
entities = []
|
||||||
|
# process out_dict
|
||||||
|
for slot_name in out_dict:
|
||||||
|
slot_id = slot_names_to_ids[slot_name]
|
||||||
|
slot_tokens = out_dict[slot_name]
|
||||||
|
|
||||||
|
slot_value = self.tokenizer.convert_tokens_to_string(
|
||||||
|
slot_tokens).strip()
|
||||||
|
|
||||||
|
entity = {
|
||||||
|
"entity": slot_name,
|
||||||
|
"value": slot_value,
|
||||||
|
"start": text.find(slot_value),
|
||||||
|
"end": text.find(slot_value) + len(slot_value),
|
||||||
|
"confidence": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# The confidence of a slot is the average confidence of tokens in that slot.
|
||||||
|
indices = [tokens.index(token) for token in slot_tokens]
|
||||||
|
if len(slot_tokens) > 0:
|
||||||
|
total = functools.reduce(
|
||||||
|
lambda proba1, proba2: proba1+proba2, slot_probas_np[0, indices, slot_id], 0)
|
||||||
|
entity["confidence"] = total / len(slot_tokens)
|
||||||
|
else:
|
||||||
|
entity["confidence"] = 0
|
||||||
|
|
||||||
|
entities.append(entity)
|
||||||
|
|
||||||
|
return entities
|
||||||
|
|
||||||
|
|
||||||
|
def get_prediction(self, text: str):
|
||||||
|
inputs = self.data_loader.encode_text(text, self.tokenizer)
|
||||||
|
slot_probas = self(inputs) # type: ignore
|
||||||
|
|
||||||
|
entities = []
|
||||||
|
if slot_probas is not None:
|
||||||
|
entities = self.get_slots_prediction(text, inputs, slot_probas)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"text": text,
|
||||||
|
"entities": entities,
|
||||||
|
}
|
@ -4,7 +4,7 @@ import json
|
|||||||
class JsonHelper:
|
class JsonHelper:
|
||||||
data_folder: str
|
data_folder: str
|
||||||
|
|
||||||
def __init__(self, model:str="jisf"):
|
def __init__(self, model:str = "intent_classifier"):
|
||||||
self.data_folder=os.path.join("data",model)
|
self.data_folder=os.path.join("data",model)
|
||||||
|
|
||||||
def read_dataset_json_file(self, filename):
|
def read_dataset_json_file(self, filename):
|
||||||
|
Loading…
Reference in New Issue
Block a user