Skip to content

Commit b1765ce

Browse files
committed
Merge pull request #120 from sguada/images_layer
Images layer: A data provider layer directly from images
2 parents c965bc1 + 587eeab commit b1765ce

6 files changed

Lines changed: 454 additions & 0 deletions

File tree

data/cat.jpg

627 KB
Loading

include/caffe/vision_layers.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,44 @@ class DataLayer : public Layer<Dtype> {
377377
Blob<Dtype> data_mean_;
378378
};
379379

380+
// This function is used to create a pthread that prefetches the data.
381+
template <typename Dtype>
382+
void* ImagesLayerPrefetch(void* layer_pointer);
383+
384+
template <typename Dtype>
385+
class ImagesLayer : public Layer<Dtype> {
386+
// The function used to perform prefetching.
387+
friend void* ImagesLayerPrefetch<Dtype>(void* layer_pointer);
388+
389+
public:
390+
explicit ImagesLayer(const LayerParameter& param)
391+
: Layer<Dtype>(param) {}
392+
virtual ~ImagesLayer();
393+
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
394+
vector<Blob<Dtype>*>* top);
395+
396+
protected:
397+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
398+
vector<Blob<Dtype>*>* top);
399+
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
400+
vector<Blob<Dtype>*>* top);
401+
virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top,
402+
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
403+
virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
404+
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
405+
406+
vector<std::pair<std::string, int> > lines_;
407+
int lines_id_;
408+
int datum_channels_;
409+
int datum_height_;
410+
int datum_width_;
411+
int datum_size_;
412+
pthread_t thread_;
413+
shared_ptr<Blob<Dtype> > prefetch_data_;
414+
shared_ptr<Blob<Dtype> > prefetch_label_;
415+
Blob<Dtype> data_mean_;
416+
};
417+
380418

381419
template <typename Dtype>
382420
class SoftmaxLayer : public Layer<Dtype> {

src/caffe/layer_factory.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
2727
return new ConvolutionLayer<Dtype>(param);
2828
} else if (type == "data") {
2929
return new DataLayer<Dtype>(param);
30+
} else if (type == "images") {
31+
return new ImagesLayer<Dtype>(param);
3032
} else if (type == "dropout") {
3133
return new DropoutLayer<Dtype>(param);
3234
} else if (type == "euclidean_loss") {

src/caffe/layers/images_layer.cpp

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
// Copyright 2013 Yangqing Jia
2+
3+
#include <stdint.h>
4+
#include <leveldb/db.h>
5+
#include <pthread.h>
6+
7+
#include <string>
8+
#include <vector>
9+
#include <iostream>
10+
#include <fstream>
11+
12+
#include "caffe/layer.hpp"
13+
#include "caffe/util/io.hpp"
14+
#include "caffe/vision_layers.hpp"
15+
16+
using std::string;
17+
using std::pair;
18+
19+
namespace caffe {
20+
21+
template <typename Dtype>
22+
void* ImagesLayerPrefetch(void* layer_pointer) {
23+
CHECK(layer_pointer);
24+
ImagesLayer<Dtype>* layer = reinterpret_cast<ImagesLayer<Dtype>*>(layer_pointer);
25+
CHECK(layer);
26+
Datum datum;
27+
CHECK(layer->prefetch_data_);
28+
Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
29+
Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
30+
const Dtype scale = layer->layer_param_.scale();
31+
const int batchsize = layer->layer_param_.batchsize();
32+
const int cropsize = layer->layer_param_.cropsize();
33+
const bool mirror = layer->layer_param_.mirror();
34+
const int new_height = layer->layer_param_.new_height();
35+
const int new_width = layer->layer_param_.new_height();
36+
37+
if (mirror && cropsize == 0) {
38+
LOG(FATAL) << "Current implementation requires mirror and cropsize to be "
39+
<< "set at the same time.";
40+
}
41+
// datum scales
42+
const int channels = layer->datum_channels_;
43+
const int height = layer->datum_height_;
44+
const int width = layer->datum_width_;
45+
const int size = layer->datum_size_;
46+
const int lines_size = layer->lines_.size();
47+
const Dtype* mean = layer->data_mean_.cpu_data();
48+
for (int itemid = 0; itemid < batchsize; ++itemid) {
49+
// get a blob
50+
CHECK_GT(lines_size,layer->lines_id_);
51+
if (!ReadImageToDatum(layer->lines_[layer->lines_id_].first, layer->lines_[layer->lines_id_].second,
52+
new_height, new_width, &datum)) {
53+
continue;
54+
};
55+
const string& data = datum.data();
56+
if (cropsize) {
57+
CHECK(data.size()) << "Image cropping only support uint8 data";
58+
int h_off, w_off;
59+
// We only do random crop when we do training.
60+
if (Caffe::phase() == Caffe::TRAIN) {
61+
h_off = rand() % (height - cropsize);
62+
w_off = rand() % (width - cropsize);
63+
} else {
64+
h_off = (height - cropsize) / 2;
65+
w_off = (width - cropsize) / 2;
66+
}
67+
if (mirror && rand() % 2) {
68+
// Copy mirrored version
69+
for (int c = 0; c < channels; ++c) {
70+
for (int h = 0; h < cropsize; ++h) {
71+
for (int w = 0; w < cropsize; ++w) {
72+
top_data[((itemid * channels + c) * cropsize + h) * cropsize
73+
+ cropsize - 1 - w] =
74+
(static_cast<Dtype>(
75+
(uint8_t)data[(c * height + h + h_off) * width
76+
+ w + w_off])
77+
- mean[(c * height + h + h_off) * width + w + w_off])
78+
* scale;
79+
}
80+
}
81+
}
82+
} else {
83+
// Normal copy
84+
for (int c = 0; c < channels; ++c) {
85+
for (int h = 0; h < cropsize; ++h) {
86+
for (int w = 0; w < cropsize; ++w) {
87+
top_data[((itemid * channels + c) * cropsize + h) * cropsize + w]
88+
= (static_cast<Dtype>(
89+
(uint8_t)data[(c * height + h + h_off) * width
90+
+ w + w_off])
91+
- mean[(c * height + h + h_off) * width + w + w_off])
92+
* scale;
93+
}
94+
}
95+
}
96+
}
97+
} else {
98+
// Just copy the whole data
99+
if (data.size()) {
100+
for (int j = 0; j < size; ++j) {
101+
top_data[itemid * size + j] =
102+
(static_cast<Dtype>((uint8_t)data[j]) - mean[j]) * scale;
103+
}
104+
} else {
105+
for (int j = 0; j < size; ++j) {
106+
top_data[itemid * size + j] =
107+
(datum.float_data(j) - mean[j]) * scale;
108+
}
109+
}
110+
}
111+
112+
top_label[itemid] = datum.label();
113+
// go to the next iter
114+
layer->lines_id_++;
115+
if (layer->lines_id_ >= lines_size) {
116+
// We have reached the end. Restart from the first.
117+
DLOG(INFO) << "Restarting data prefetching from start.";
118+
layer->lines_id_=0;
119+
if (layer->layer_param_.shuffle_images()) {
120+
std::random_shuffle(layer->lines_.begin(), layer->lines_.end());
121+
}
122+
}
123+
}
124+
125+
return (void*)NULL;
126+
}
127+
128+
template <typename Dtype>
129+
ImagesLayer<Dtype>::~ImagesLayer<Dtype>() {
130+
// Finally, join the thread
131+
CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
132+
}
133+
134+
template <typename Dtype>
135+
void ImagesLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
136+
vector<Blob<Dtype>*>* top) {
137+
CHECK_EQ(bottom.size(), 0) << "Input Layer takes no input blobs.";
138+
CHECK_EQ(top->size(), 2) << "Input Layer takes two blobs as output.";
139+
const int new_height = this->layer_param_.new_height();
140+
const int new_width = this->layer_param_.new_height();
141+
CHECK((new_height==0 && new_width==0)||(new_height>0 && new_width > 0)) <<
142+
"Current implementation requires new_height and new_width to be set at the same time.";
143+
// Read the file with filenames and labels
144+
LOG(INFO) << "Opening file " << this->layer_param_.source();
145+
std::ifstream infile(this->layer_param_.source().c_str());
146+
string filename;
147+
int label;
148+
while (infile >> filename >> label) {
149+
lines_.push_back(std::make_pair(filename, label));
150+
}
151+
152+
if (this->layer_param_.shuffle_images()) {
153+
// randomly shuffle data
154+
LOG(INFO) << "Shuffling data";
155+
std::random_shuffle(lines_.begin(), lines_.end());
156+
}
157+
LOG(INFO) << "A total of " << lines_.size() << " images.";
158+
159+
lines_id_ = 0;
160+
// Check if we would need to randomly skip a few data points
161+
if (this->layer_param_.rand_skip()) {
162+
unsigned int skip = rand() % this->layer_param_.rand_skip();
163+
LOG(INFO) << "Skipping first " << skip << " data points.";
164+
CHECK_GT(lines_.size(),skip) << "Not enought points to skip";
165+
lines_id_ = skip;
166+
}
167+
// Read a data point, and use it to initialize the top blob.
168+
Datum datum;
169+
CHECK(ReadImageToDatum(lines_[lines_id_].first, lines_[lines_id_].second,
170+
new_height,new_width,&datum));
171+
// image
172+
int cropsize = this->layer_param_.cropsize();
173+
if (cropsize > 0) {
174+
(*top)[0]->Reshape(
175+
this->layer_param_.batchsize(), datum.channels(), cropsize, cropsize);
176+
prefetch_data_.reset(new Blob<Dtype>(
177+
this->layer_param_.batchsize(), datum.channels(), cropsize, cropsize));
178+
} else {
179+
(*top)[0]->Reshape(
180+
this->layer_param_.batchsize(), datum.channels(), datum.height(),
181+
datum.width());
182+
prefetch_data_.reset(new Blob<Dtype>(
183+
this->layer_param_.batchsize(), datum.channels(), datum.height(),
184+
datum.width()));
185+
}
186+
LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
187+
<< (*top)[0]->channels() << "," << (*top)[0]->height() << ","
188+
<< (*top)[0]->width();
189+
// label
190+
(*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1);
191+
prefetch_label_.reset(
192+
new Blob<Dtype>(this->layer_param_.batchsize(), 1, 1, 1));
193+
// datum size
194+
datum_channels_ = datum.channels();
195+
datum_height_ = datum.height();
196+
datum_width_ = datum.width();
197+
datum_size_ = datum.channels() * datum.height() * datum.width();
198+
CHECK_GT(datum_height_, cropsize);
199+
CHECK_GT(datum_width_, cropsize);
200+
// check if we want to have mean
201+
if (this->layer_param_.has_meanfile()) {
202+
BlobProto blob_proto;
203+
LOG(INFO) << "Loading mean file from" << this->layer_param_.meanfile();
204+
ReadProtoFromBinaryFile(this->layer_param_.meanfile().c_str(), &blob_proto);
205+
data_mean_.FromProto(blob_proto);
206+
CHECK_EQ(data_mean_.num(), 1);
207+
CHECK_EQ(data_mean_.channels(), datum_channels_);
208+
CHECK_EQ(data_mean_.height(), datum_height_);
209+
CHECK_EQ(data_mean_.width(), datum_width_);
210+
} else {
211+
// Simply initialize an all-empty mean.
212+
data_mean_.Reshape(1, datum_channels_, datum_height_, datum_width_);
213+
}
214+
// Now, start the prefetch thread. Before calling prefetch, we make two
215+
// cpu_data calls so that the prefetch thread does not accidentally make
216+
// simultaneous cudaMalloc calls when the main thread is running. In some
217+
// GPUs this seems to cause failures if we do not so.
218+
prefetch_data_->mutable_cpu_data();
219+
prefetch_label_->mutable_cpu_data();
220+
data_mean_.cpu_data();
221+
DLOG(INFO) << "Initializing prefetch";
222+
CHECK(!pthread_create(&thread_, NULL, ImagesLayerPrefetch<Dtype>,
223+
reinterpret_cast<void*>(this))) << "Pthread execution failed.";
224+
DLOG(INFO) << "Prefetch initialized.";
225+
}
226+
227+
template <typename Dtype>
228+
void ImagesLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
229+
vector<Blob<Dtype>*>* top) {
230+
// First, join the thread
231+
CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
232+
// Copy the data
233+
memcpy((*top)[0]->mutable_cpu_data(), prefetch_data_->cpu_data(),
234+
sizeof(Dtype) * prefetch_data_->count());
235+
memcpy((*top)[1]->mutable_cpu_data(), prefetch_label_->cpu_data(),
236+
sizeof(Dtype) * prefetch_label_->count());
237+
// Start a new prefetch thread
238+
CHECK(!pthread_create(&thread_, NULL, ImagesLayerPrefetch<Dtype>,
239+
reinterpret_cast<void*>(this))) << "Pthread execution failed.";
240+
}
241+
242+
template <typename Dtype>
243+
void ImagesLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
244+
vector<Blob<Dtype>*>* top) {
245+
// First, join the thread
246+
CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
247+
// Copy the data
248+
CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
249+
prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
250+
cudaMemcpyHostToDevice));
251+
CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
252+
prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
253+
cudaMemcpyHostToDevice));
254+
// Start a new prefetch thread
255+
CHECK(!pthread_create(&thread_, NULL, ImagesLayerPrefetch<Dtype>,
256+
reinterpret_cast<void*>(this))) << "Pthread execution failed.";
257+
}
258+
259+
// The backward operations are dummy - they do not carry any computation.
260+
template <typename Dtype>
261+
Dtype ImagesLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
262+
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
263+
return Dtype(0.);
264+
}
265+
266+
template <typename Dtype>
267+
Dtype ImagesLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
268+
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
269+
return Dtype(0.);
270+
}
271+
272+
INSTANTIATE_CLASS(ImagesLayer);
273+
274+
} // namespace caffe

src/caffe/proto/caffe.proto

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,17 @@ message LayerParameter {
9191
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
9292
// be larger than the number of keys in the leveldb.
9393
optional uint32 rand_skip = 53 [ default = 0 ];
94+
95+
// For the Reshape Layer one need to specify the new dimensions
96+
optional int32 new_num = 60 [default = 0];
97+
optional int32 new_channels = 61 [default = 0];
98+
optional int32 new_height = 62 [default = 0];
99+
optional int32 new_width = 63 [default = 0];
100+
101+
// Used by ImageLayer to shuffle the list of files at every epoch it will also
102+
// resize images if new_height or new_width are not zero
103+
optional bool shuffle_images = 64 [default = false];
104+
94105
}
95106

96107
message LayerConnection {

0 commit comments

Comments
 (0)