from .json_helper import JsonHelper """ Transform data set from Rasa structure to a compliant one How to use: from utils.jisf_data_mapper import JisfDataMapper mapper = JisfDataMapper() #mapper.transform_to_new("train.json") mapper.transform_to_new("test.json") """ class JisfDataMapper(object): def transform_to_new(self, filename: str, reverse: bool = False): """this method allows for changing a file's data format.""" helper=JsonHelper() data = helper.read_dataset_json_file(filename) copy_file = "copy of "+filename # we create a copy of the old data format helper.write_dataset_json_file(data, copy_file) # alternatively, we could use this method in the opposite direction if not reverse: data = self.old_to_new(data) else: data = self.new_to_old(data) helper.write_dataset_json_file(data, filename) def old_to_new(self,data:dict): converted_data=dict() converted_data["common_examples"]=[] all_intents=set() all_slots=dict() for k in data.keys(): common_example=dict() #text and intent are the same in both formats common_example["text"]=data[k]["text"] common_example["intent"]=data[k]["intent"] common_example["entities"]=[] all_intents.add(common_example["intent"]) #for every entity, we get its corresponding value as well as the index of its #start and finish for slot in data[k]["slots"].keys(): all_slots[slot]=all_slots.get(slot,set()) entity=dict() entity["entity"]=slot entity["value"]=data[k]["slots"][slot] all_slots[slot].add(entity["value"]) entity["start"],entity["end"]=tuple(data[k]["positions"][slot]) common_example["entities"].append(entity) converted_data["common_examples"].append(common_example) #lookup tables store all the intents as well as all the slot values seen in the dataset converted_data["lookup_tables"]=[] all_slots["intent"]=all_intents for name,value in all_slots.items(): converted_data["lookup_tables"].append({"name":name,"elements":list(value)}) #regex features and entity synonyms will remain empty for now converted_data["regex_features"]=[] converted_data["entity_synonyms"]=[] return converted_data def new_to_old(self,data:dict): old_data=dict() dataset=data["common_examples"] #for each piece of text, we make a JSON object. for i in range(len(dataset)): item=dict() item["text"]=dataset[i]["text"] item["intent"]=dataset[i]["intent"] item["slots"]=dict() item["positions"]=dict() for entity in dataset[i]["entities"]: item["slots"][entity["entity"]]=entity["value"] item["positions"][entity["entity"]]=[entity["start"],entity["end"]] old_data[i]=item return old_data