Add basic CNN training

Saurav Agarwal · Saurav Agarwal · commit f33c66eb8f93 · 2023-05-28T03:51:12.000-04:00
diff --git a/cppsrc/main/CMakeLists.txt b/cppsrc/main/CMakeLists.txt
@@ -30,6 +30,11 @@ target_compile_options(compiler_flags INTERFACE
 	"$<${gcc_like_cxx}:$<BUILD_INTERFACE:-Wall;-Wextra;-Wshadow;-Wformat=2;-Wunused;-pedantic>>"
 	"$<${msvc_cxx}:$<BUILD_INTERFACE:-W3>>"
 )
+
 add_executable(data_generation data_generation.cpp)
 target_link_libraries(data_generation PRIVATE compiler_flags CoverageControlCore CoverageControlTorch)
 install(TARGETS data_generation DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+add_executable(train_cnn train_cnn.cpp)
+target_link_libraries(train_cnn PRIVATE compiler_flags CoverageControlCore CoverageControlTorch)
+install(TARGETS train_cnn DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/cppsrc/main/train_cnn.cpp b/cppsrc/main/train_cnn.cpp
@@ -0,0 +1,21 @@
+/** Main program for training a CNN for image classification.
+ *
+ *  @file: train_cnn.cpp
+ */
+
+#include <iostream>
+#include <CoverageControlTorch/train_cnn.h>
+
+int main(int argc, char* argv[]) {
+
+	if (argc < 2) {
+		std::cout << "Usage: ./train_cnn <dataset_dir>" << std::endl;
+		return 1;
+	}
+
+	std::string config_file = std::string(argv[1]);
+	CoverageControlTorch::TrainCNN train_cnn(config_file);
+	train_cnn.Train();
+
+	return 0;
+}
diff --git a/cppsrc/setup.sh b/cppsrc/setup.sh
@@ -15,7 +15,7 @@ done
 BUILD_DIR=${COVERAGECONTROL_WS}/build
 INSTALL_DIR=${COVERAGECONTROL_WS}/install
 
-CMAKE_END_FLAGS="-DCMAKE_BUILD_TYPE=Release -G Ninja"
+CMAKE_END_FLAGS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -G Ninja"
 
 CleanBuild () {
 	rm -rf ${BUILD_DIR}
diff --git a/cppsrc/torch/include/CoverageControlTorch/cnn_backbone.h b/cppsrc/torch/include/CoverageControlTorch/cnn_backbone.h
@@ -0,0 +1,76 @@
+#ifndef COVERAGECONTROL_CNN_BACKBONE_H_
+#define COVERAGECONTROL_CNN_BACKBONE_H_
+
+#include <torch/torch.h>
+
+using namespace torch::indexing;
+
+namespace CoverageControlTorch {
+
+	struct CoverageControlCNNImpl : torch::nn::Module {
+		int input_dim_ = 4;
+		int output_dim_ = 7;
+		int num_layers_ = 2;
+		int latent_size_ = 8;
+		int kernel_size_ = 3;
+		int image_size_ = 32;
+
+		torch::nn::ModuleList conv_layers_;
+		torch::nn::ModuleList batch_norm_layers_;
+		torch::nn::Linear linear_1_;
+		torch::nn::Linear linear_2_;
+
+		CoverageControlCNNImpl(int input_dim, int output_dim, int num_layers, int latent_size, int kernel_size, int image_size) : 
+			input_dim_(input_dim),
+			output_dim_(output_dim),
+			num_layers_(num_layers),
+			latent_size_(latent_size),
+			kernel_size_(kernel_size),
+			image_size_(image_size),
+			conv_layers_(torch::nn::ModuleList()),
+			batch_norm_layers_(torch::nn::ModuleList()),
+			linear_1_(nullptr),
+			linear_2_(nullptr) {
+
+			std::vector <int> layers_;
+			layers_.push_back(input_dim_);
+			for(int i = 0; i < num_layers_; ++i) {
+				layers_.push_back(latent_size_);
+			}
+
+			for(int i = 0; i < num_layers_; ++i) {
+				conv_layers_->push_back(register_module("conv" + std::to_string(i),
+							torch::nn::Conv2d(torch::nn::Conv2dOptions(layers_[i], layers_[i + 1], 3))));
+				batch_norm_layers_->push_back(register_module("batch_norm" + std::to_string(i),
+							torch::nn::BatchNorm2d(layers_[i+1])));
+			}
+
+			size_t flatten_size = latent_size_ * (image_size_ - num_layers_ * (kernel_size_ - 1)) * (image_size_ - num_layers_ * (kernel_size_ - 1));
+			linear_1_ = register_module("linear_1", torch::nn::Linear(flatten_size, latent_size_));
+			linear_2_ = register_module("linear_2", torch::nn::Linear(latent_size_, output_dim_));
+		} 
+
+		torch::Tensor forward(torch::Tensor x) {
+			for(size_t i = 0; i < conv_layers_->size(); ++i) {
+				auto batch_norm = (batch_norm_layers_[i].get())->as<torch::nn::BatchNorm2d>();
+				auto conv = (conv_layers_[i].get())->as<torch::nn::Conv2d>();
+				x = torch::tanh(batch_norm->forward(conv->forward(x)));
+				/* std::cout << "x size: " << x.sizes() << std::endl; */
+			}
+			x = x.flatten(1);
+			/* std::cout << "x size: " << x.sizes() << std::endl; */
+			x = torch::tanh(linear_1_->forward(x));
+			/* std::cout << "x size: " << x.sizes() << std::endl; */
+			x = torch::tanh(linear_2_->forward(x));
+			/* std::cout << "x size: " << x.sizes() << std::endl; */
+			x = torch::tanh(x);
+			return x;
+		}
+	};
+
+	TORCH_MODULE(CoverageControlCNN);
+
+
+} // namespace CoverageControlTorch
+
+#endif // COVERAGECONTROL_CNN_BACKBONE_H_
diff --git a/cppsrc/torch/include/CoverageControlTorch/edge_wts_comm_map_generator.h b/cppsrc/torch/include/CoverageControlTorch/edge_wts_comm_map_generator.h
@@ -0,0 +1,69 @@
+/** Generator for communication maps **/
+
+#ifndef COVERAGECONTROLTORCH_COMMUNICATION_MAP_GENERATOR_H_
+#define COVERAGECONTROLTORCH_COMMUNICATION_MAP_GENERATOR_H_
+
+#include <torch/torch.h>
+#include <math.h>
+namespace F = torch::nn::functional;
+using namespace torch::indexing;
+namespace CoverageControlTorch {
+	// Define a new Module.
+	struct EdgeWtsCommMapGenerator : torch::nn::Module {
+		EdgeWtsCommMapGenerator (int size, double communication_range, double resolution) : size_(size), communication_range_(communication_range), resolution_(resolution) {
+		}
+
+		auto eval(torch::Tensor robot_positions) {
+			torch::Tensor edge_weights;
+			auto num_robots = robot_positions.size(-1);
+			auto  pairwise_dist_matrices= torch::cdist(robot_positions, robot_positions, 2);
+			/* auto neg_adjacency = pairwise_dist_matrices > communication_range_; */
+			/* std::cout << "neg_adjacency: " << neg_adjacency.sizes() << std::endl; */
+			/* std::cout << "neg_adjacency_type: " << neg_adjacency.dtype() << std::endl; */
+			/* neg_adjacency.fill_diagonal_(true); */
+			edge_weights = torch::exp(-(pairwise_dist_matrices.square())/(communication_range_*communication_range_));
+			F::threshold(edge_weights, F::ThresholdFuncOptions(expf(-1), 0).inplace(true));
+			/* edge_weights[neg_adjacency == true] = 0; */
+
+			torch::Tensor comm_map = torch::empty({num_robots, size_, size_});
+			auto relative_pos = robot_positions.unsqueeze(2) - robot_positions.unsqueeze(1);
+			std::cout << "relative_pos: " << relative_pos.sizes() << std::endl;
+
+			double comm_scale = (communication_range_ * 2.) / size_;
+			torch::Tensor map_translation = torch::empty({2});
+			map_translation.index_put_({0}, size_ * comm_scale * resolution_/2.);
+			map_translation.index_put_({1}, size_ * comm_scale * resolution_/2.);
+			for(int i = 0; i < num_robots; ++i) {
+				for(int j = 0; j < num_robots; ++j) {
+					if(i == j) { continue; }
+					auto neighbor_pos = relative_pos.index({Slice(), i, j, Slice()}).to(torch::kCUDA);
+					std::cout << "neighbor_pos: " << neighbor_pos.sizes() << std::endl;
+					auto map_pos = neighbor_pos + map_translation;
+					auto indices = torch::round(map_pos / (resolution_ * comm_scale));
+					comm_map.index_put_({i, indices}, 1);
+				}
+			}
+			std::vector <torch::Tensor> edge_wts_comm_map{edge_weights, comm_map};
+			// Return edge weights and communication maps
+			return edge_wts_comm_map;
+		}
+
+		int size_;
+		double communication_range_, resolution_;
+	};
+}
+
+/* auto neighbor_indices = (pairwise_dist_matrices[i] <= communication_range_).nonzero()[0]; */
+
+/* auto neighbor_pos = relative_pos.index({Slice(), pairwise_dist_matrices.index({Slice(), i, Slice(), Slice()}) <= communication_range_}); */
+/* auto neighbor_indices = (pairwise_dist_matrices[i] <= communication_range_).nonzero()[0]; */
+
+/* auto neighbor_pos = relative_pos.index({Slice(), pairwise_dist_matrices.index({Slice(), i, Slice(), Slice()}) <= communication_range_}); */
+/* double comm_scale = (communication_range_ * 2.) / size_; */
+/* torch::Tensor map_translation = torch::empty({2}); */
+/* map_translation.index_put_({0}, size_ * comm_scale * resolution_/2.); */
+/* map_translation.index_put_({1}, size_ * comm_scale * resolution_/2.); */
+/* auto map_pos = neighbor_pos + map_translation; */
+/* auto indices = torch::round(map_pos / (resolution_ * comm_scale)); */
+/* comm_map.index_put_({i, indices}, 1); */
+#endif // COVERAGECONTROLTORCH_COMMUNICATION_MAP_GENERATOR_H_
diff --git a/cppsrc/torch/include/CoverageControlTorch/train_cnn.h b/cppsrc/torch/include/CoverageControlTorch/train_cnn.h
@@ -0,0 +1,164 @@
+/** This file contains the declaration of the class TrainCNN using Torch C++ API.
+ * The class TrainCNN takes local maps, communication maps, and obstacles maps as input, and
+ * predicts the voronoi coverage features.
+ *
+ **/
+
+#ifndef COVERAGECONTROL_TRAIN_CNN_H_
+#define COVERAGECONTROL_TRAIN_CNN_H_
+
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <filesystem>
+#include <yaml-cpp/yaml.h>
+#include <torch/torch.h>
+
+#include "cnn_backbone.h"
+
+using namespace torch::indexing;
+namespace F = torch::nn::functional;
+
+namespace CoverageControlTorch {
+
+	class TrainCNN {
+		private:
+			torch::Tensor maps_;
+			torch::Tensor features_;
+			torch::Device device_ = torch::kCPU;
+			YAML::Node config_;
+			YAML::Node cnn_config_;
+			std::string data_dir_;
+			size_t batch_size_ = 64;
+			size_t num_epochs_ = 10;
+			float learning_rate_ = 0.001;
+			float weight_decay_ = 0.0001;
+			int image_size_ = 32;
+
+			std::shared_ptr<torch::optim::Adam> optimizer_;
+		public:
+
+			TrainCNN(std::string const &config_file) {
+				if (torch::cuda::is_available()) {
+					device_ = torch::kCUDA;
+					std::cout << "Using CUDA" << std::endl;
+				}
+				LoadConfigs(config_file);
+			}
+
+			/** Train CNN model.
+			 * @param dataset_dir: the directory of the dataset.
+			 * @param num_layers: the number of convolutional layers.
+			 * @param num_epochs: the number of epochs.
+			 * @param learning_rate: the learning rate.
+			 * @param batch_size: the batch size.
+			 **/
+			void Train() {
+				LoadDataset();
+
+				CoverageControlCNN model(
+						cnn_config_["InputDim"].as<int>(),
+						cnn_config_["OutputDim"].as<int>(),
+						cnn_config_["NumLayers"].as<int>(),
+						cnn_config_["LatentSize"].as<int>(),
+						cnn_config_["KernelSize"].as<int>(),
+						image_size_);
+
+
+				model->to(device_);
+
+				optimizer_ = std::make_shared<torch::optim::Adam>(
+						model->parameters(),
+						torch::optim::AdamOptions(learning_rate_).weight_decay(weight_decay_));
+
+				size_t dataset_size = maps_.size(0);
+				for (size_t epoch = 1; epoch < num_epochs_ + 1; ++epoch) {
+					for (size_t i = 0; i < dataset_size; i += batch_size_) {
+						auto loss = TrainOneBatch(model, i);
+						std::cout << "Epoch: " << epoch << ", Batch: " << i << ", Loss: " << loss << std::endl;
+					}
+				}
+				maps_ = maps_.to(device_);
+				auto pred = model->forward(maps_).to(torch::kCPU);
+				features_ = features_.to(torch::kCPU);
+				// Compute loss individually for each feature in features
+				auto loss = torch::mse_loss(pred, features_);
+				std::cout << "Final loss: " << loss.item<float>() << std::endl;
+				auto loss_vec = torch::norm(pred - features_, 2, 0).to(torch::kCPU);
+				std::cout << "Loss vector: " << loss_vec << std::endl;
+				std::cout << "Max of feature 0 true: " << features_.index({Slice(), 0}).max() << std::endl;
+				std::cout << "Max of feature 0 pred: " << pred.index({Slice(), 0}).max() << std::endl;
+				std::cout << "Max of feature 1 true: " << features_.index({Slice(), 1}).max() << std::endl;
+				std::cout << "Max of feature 1 pred: " << pred.index({Slice(), 1}).max() << std::endl;
+
+			}
+
+			float TrainOneBatch(CoverageControlCNN &model, size_t batch_idx) {
+				torch::Tensor batch = maps_.index({Slice(batch_idx, batch_idx + batch_size_)});
+				batch = batch.to(device_);
+				auto x = model->forward(batch);
+
+				// Backward and optimize
+				optimizer_->zero_grad();
+				torch::Tensor batch_features = features_.index({Slice(batch_idx, batch_idx + batch_size_)}).to(device_);
+				auto loss = torch::mse_loss(x, batch_features);
+				loss.backward();
+				optimizer_->step();
+
+				return loss.item<float>();
+
+			}
+
+
+			/** Function to load the dataset from the dataset directory.
+			 * @param dataset_dir: the directory of the dataset.
+			 **/
+			void LoadDataset() {
+				std::string local_maps_file = data_dir_ + "/local_maps.pt";
+				std::string comm_maps_file = data_dir_ + "/comm_maps.pt";
+				std::string obstacle_maps_file = data_dir_ + "/obstacle_maps.pt";
+				std::string features_file = data_dir_ + "/normalized_coverage_features.pt";
+
+				torch::Tensor local_maps;
+				torch::load(local_maps, local_maps_file);
+				local_maps = local_maps.unsqueeze(2).view({-1, 1, image_size_, image_size_});
+				torch::Tensor comm_maps;
+				torch::load(comm_maps, comm_maps_file);
+				comm_maps = comm_maps.to_dense().view({-1, 2, image_size_, image_size_});
+				torch::Tensor obstacle_maps;
+				torch::load(obstacle_maps, obstacle_maps_file);
+				obstacle_maps = obstacle_maps.to_dense().unsqueeze(2).view({-1, 1, image_size_, image_size_});
+
+				torch::load(features_, features_file);
+				features_ = features_.view({-1, features_.size(2)});
+				int output_dim = config_["CNN"]["OutputDim"].as<int>();
+				features_ = features_.index({Slice(), Slice(0, output_dim)});
+
+				maps_ = torch::cat({local_maps, comm_maps, obstacle_maps}, 1);
+				std::cout << "maps shape: " << maps_.sizes() << std::endl;
+
+			}
+
+			void LoadConfigs(std::string const &config_file) {
+				std::cout << "Using config file: " << config_file << std::endl;
+				// Check if config_file exists
+				if(not std::filesystem::exists(config_file)) {
+					throw std::runtime_error("Could not open config file: " + config_file);
+				}
+				config_ = YAML::LoadFile(config_file);
+				data_dir_ = config_["pDataDir"].as<std::string>();
+				batch_size_ = config_["BatchSize"].as<size_t>();
+				num_epochs_ = config_["NumEpochs"].as<size_t>();
+				learning_rate_ = config_["LearningRate"].as<float>();
+				weight_decay_ = config_["WeightDecay"].as<float>();
+
+				cnn_config_ = config_["CNN"]	;
+				image_size_ = cnn_config_["ImageSize"].as<int>();
+			}
+
+	};
+
+} // namespace CoverageControlTorch
+
+#endif //COVERAGECONTROL_TRAIN_CNN_H_
diff --git a/params/learning_params.yaml b/params/learning_params.yaml
@@ -0,0 +1,16 @@
+pDataDir: "/root/CoverageControl_ws/data/pure_coverage/" # Absolute location
+
+GPUs: [4, 5]
+
+LearningRate: 0.001
+WeightDecay: 0.0001
+BatchSize: 10
+NumEpochs: 50
+
+CNN:
+  InputDim: 4
+  OutputDim: 7
+  NumLayers: 2
+  LatentSize: 8
+  KernelSize: 3
+  ImageSize: 32