Preprocessing output shows a numerical vector, although there are no numerical features.
Numerical (18000, 0)
Categorical (18000, 24)
{
"name": "insurance",
"task_type": "binclass",
"header": "infer",
"column_names": [
"GoodStudent",
"Age",
"SocioEcon",
"RiskAversion",
"VehicleYear",
"RuggedAuto",
"MakeModel",
"DrivQuality",
"Mileage",
"Antilock",
"DrivingSkill",
"SeniorTrain",
"ThisCarCost",
"Theft",
"CarValue",
"HomeBase",
"AntiTheft",
"PropCost",
"OtherCarCost",
"OtherCar",
"MedCost",
"Cushioning",
"Airbag",
"ILiCost",
"DrivHist"
],
"num_col_idx": [],
"cat_col_idx": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24
],
"target_col_idx": [
0
],
"file_type": "csv",
"data_path": "data/insurance/insurance.csv",
"test_path": null,
"column_info": {
"1": {},
"type": "categorical",
"categorizes": [
0,
1
],
"2": {},
"3": {},
"4": {},
"5": {},
"6": {},
"7": {},
"8": {},
"9": {},
"10": {},
"11": {},
"12": {},
"13": {},
"14": {},
"15": {},
"16": {},
"17": {},
"18": {},
"19": {},
"20": {},
"21": {},
"22": {},
"23": {},
"24": {},
"0": {}
},
"train_num": 18000,
"test_num": 2000,
"idx_mapping": {
"0": 24,
"1": 0,
"2": 1,
"3": 2,
"4": 3,
"5": 4,
"6": 5,
"7": 6,
"8": 7,
"9": 8,
"10": 9,
"11": 10,
"12": 11,
"13": 12,
"14": 13,
"15": 14,
"16": 15,
"17": 16,
"18": 17,
"19": 18,
"20": 19,
"21": 20,
"22": 21,
"23": 22,
"24": 23
},
"inverse_idx_mapping": {
"24": 0,
"0": 1,
"1": 2,
"2": 3,
"3": 4,
"4": 5,
"5": 6,
"6": 7,
"7": 8,
"8": 9,
"9": 10,
"10": 11,
"11": 12,
"12": 13,
"13": 14,
"14": 15,
"15": 16,
"16": 17,
"17": 18,
"18": 19,
"19": 20,
"20": 21,
"21": 22,
"22": 23,
"23": 24
},
"idx_name_mapping": {
"0": "GoodStudent",
"1": "Age",
"2": "SocioEcon",
"3": "RiskAversion",
"4": "VehicleYear",
"5": "RuggedAuto",
"6": "MakeModel",
"7": "DrivQuality",
"8": "Mileage",
"9": "Antilock",
"10": "DrivingSkill",
"11": "SeniorTrain",
"12": "ThisCarCost",
"13": "Theft",
"14": "CarValue",
"15": "HomeBase",
"16": "AntiTheft",
"17": "PropCost",
"18": "OtherCarCost",
"19": "OtherCar",
"20": "MedCost",
"21": "Cushioning",
"22": "Airbag",
"23": "ILiCost",
"24": "DrivHist"
},
"metadata": {
"columns": {
"1": {
"sdtype": "categorical"
},
"2": {
"sdtype": "categorical"
},
"3": {
"sdtype": "categorical"
},
"4": {
"sdtype": "categorical"
},
"5": {
"sdtype": "categorical"
},
"6": {
"sdtype": "categorical"
},
"7": {
"sdtype": "categorical"
},
"8": {
"sdtype": "categorical"
},
"9": {
"sdtype": "categorical"
},
"10": {
"sdtype": "categorical"
},
"11": {
"sdtype": "categorical"
},
"12": {
"sdtype": "categorical"
},
"13": {
"sdtype": "categorical"
},
"14": {
"sdtype": "categorical"
},
"15": {
"sdtype": "categorical"
},
"16": {
"sdtype": "categorical"
},
"17": {
"sdtype": "categorical"
},
"18": {
"sdtype": "categorical"
},
"19": {
"sdtype": "categorical"
},
"20": {
"sdtype": "categorical"
},
"21": {
"sdtype": "categorical"
},
"22": {
"sdtype": "categorical"
},
"23": {
"sdtype": "categorical"
},
"24": {
"sdtype": "categorical"
},
"0": {
"sdtype": "categorical"
}
}
}
}
Description:
I encountered an error during the sampling phase using the
tabsynmethod. The issue seems to be related to missing numerical features during preprocessing.Steps to Reproduce:
Use the following
insurance.jsonconfiguration:It has 25 category columns and no numerical columns, the first column is the target column 0 or 1.
{ "name": "insurance", "task_type": "binclass", "header": "infer", "column_names": [ "GoodStudent", "Age", "SocioEcon", "RiskAversion", "VehicleYear", "RuggedAuto", "MakeModel", "DrivQuality", "Mileage", "Antilock", "DrivingSkill", "SeniorTrain", "ThisCarCost", "Theft", "CarValue", "HomeBase", "AntiTheft", "PropCost", "OtherCarCost", "OtherCar", "MedCost", "Cushioning", "Airbag", "ILiCost", "DrivHist" ], "num_col_idx": [], "cat_col_idx": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ], "target_col_idx": [0], "file_type": "csv", "data_path": "data/insurance/insurance.csv", "test_path": null }Preprocess and train the model successfully using VAE and tabsyn.
Preprocessing output shows a numerical vector, although there are no numerical features.
Numerical (18000, 0)
Categorical (18000, 24)
python main.py --dataname insurance --method tabsyn --mode sample
During the sampling step, the following error is thrown:
ValueError: Found array with 0 feature(s) (shape=(18000, 0)) while a minimum of 1 is required by QuantileTransformer.
This is the processed JSON in tabsyn/data/insurance/info.json
{ "name": "insurance", "task_type": "binclass", "header": "infer", "column_names": [ "GoodStudent", "Age", "SocioEcon", "RiskAversion", "VehicleYear", "RuggedAuto", "MakeModel", "DrivQuality", "Mileage", "Antilock", "DrivingSkill", "SeniorTrain", "ThisCarCost", "Theft", "CarValue", "HomeBase", "AntiTheft", "PropCost", "OtherCarCost", "OtherCar", "MedCost", "Cushioning", "Airbag", "ILiCost", "DrivHist" ], "num_col_idx": [], "cat_col_idx": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ], "target_col_idx": [ 0 ], "file_type": "csv", "data_path": "data/insurance/insurance.csv", "test_path": null, "column_info": { "1": {}, "type": "categorical", "categorizes": [ 0, 1 ], "2": {}, "3": {}, "4": {}, "5": {}, "6": {}, "7": {}, "8": {}, "9": {}, "10": {}, "11": {}, "12": {}, "13": {}, "14": {}, "15": {}, "16": {}, "17": {}, "18": {}, "19": {}, "20": {}, "21": {}, "22": {}, "23": {}, "24": {}, "0": {} }, "train_num": 18000, "test_num": 2000, "idx_mapping": { "0": 24, "1": 0, "2": 1, "3": 2, "4": 3, "5": 4, "6": 5, "7": 6, "8": 7, "9": 8, "10": 9, "11": 10, "12": 11, "13": 12, "14": 13, "15": 14, "16": 15, "17": 16, "18": 17, "19": 18, "20": 19, "21": 20, "22": 21, "23": 22, "24": 23 }, "inverse_idx_mapping": { "24": 0, "0": 1, "1": 2, "2": 3, "3": 4, "4": 5, "5": 6, "6": 7, "7": 8, "8": 9, "9": 10, "10": 11, "11": 12, "12": 13, "13": 14, "14": 15, "15": 16, "16": 17, "17": 18, "18": 19, "19": 20, "20": 21, "21": 22, "22": 23, "23": 24 }, "idx_name_mapping": { "0": "GoodStudent", "1": "Age", "2": "SocioEcon", "3": "RiskAversion", "4": "VehicleYear", "5": "RuggedAuto", "6": "MakeModel", "7": "DrivQuality", "8": "Mileage", "9": "Antilock", "10": "DrivingSkill", "11": "SeniorTrain", "12": "ThisCarCost", "13": "Theft", "14": "CarValue", "15": "HomeBase", "16": "AntiTheft", "17": "PropCost", "18": "OtherCarCost", "19": "OtherCar", "20": "MedCost", "21": "Cushioning", "22": "Airbag", "23": "ILiCost", "24": "DrivHist" }, "metadata": { "columns": { "1": { "sdtype": "categorical" }, "2": { "sdtype": "categorical" }, "3": { "sdtype": "categorical" }, "4": { "sdtype": "categorical" }, "5": { "sdtype": "categorical" }, "6": { "sdtype": "categorical" }, "7": { "sdtype": "categorical" }, "8": { "sdtype": "categorical" }, "9": { "sdtype": "categorical" }, "10": { "sdtype": "categorical" }, "11": { "sdtype": "categorical" }, "12": { "sdtype": "categorical" }, "13": { "sdtype": "categorical" }, "14": { "sdtype": "categorical" }, "15": { "sdtype": "categorical" }, "16": { "sdtype": "categorical" }, "17": { "sdtype": "categorical" }, "18": { "sdtype": "categorical" }, "19": { "sdtype": "categorical" }, "20": { "sdtype": "categorical" }, "21": { "sdtype": "categorical" }, "22": { "sdtype": "categorical" }, "23": { "sdtype": "categorical" }, "24": { "sdtype": "categorical" }, "0": { "sdtype": "categorical" } } } }