Skip to content

Commit dce0d58

Browse files
committed
Fix multiple definition of get_column_index.
1 parent 9bd20c9 commit dce0d58

7 files changed

Lines changed: 42 additions & 30 deletions

File tree

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ To run the following examples you will also need to [register for GraphLab Creat
1717

1818
After that, try running the basic example:
1919
```
20-
python examples/basic.py
20+
ipython examples/basic.py
2121
```
2222

2323
If you want to try a less synthetic example, download the [1TB Criteo dataset](http://labs.criteo.com/downloads/download-terabyte-click-logs/).
@@ -28,12 +28,12 @@ gzip -cd day_0.gz| head -n 1000000 > criteo-sample.tsv
2828

2929
Next we have a sample script for performing some of the same types of feature engineering that the contest winners have been using:
3030
```
31-
python examples/criteo_process.py
31+
ipython examples/criteo_process.py
3232
```
3333

3434
Train a FFM model on this data.
3535
```
36-
python examples/criteo_sample.py
36+
ipython examples/criteo_sample.py
3737
```
3838

3939
You should see something like the following (which appears to be overfitting in this example):

examples/basic.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import graphlab as gl
12
import ffm
23
from convert import read_libffm_file
34

@@ -6,7 +7,9 @@
67
validfile = 'lib/bigdata.te.txt'
78
train = read_libffm_file(trainfile)
89
valid = read_libffm_file(validfile)
9-
valid['features.0'] = None
10+
11+
train['y'] = train['y'].astype(int)
12+
del train['features.0']
1013
valid = valid[train.column_names()]
1114
train.save('examples/small.tr.sframe')
1215
valid.save('examples/small.te.sframe')
@@ -16,5 +19,5 @@
1619
# Train a model
1720
m = ffm.FFM()
1821
m.fit(train, valid, target='y', features=features, nr_iters=15)
19-
yhat = m.predict(valid)
20-
print yhat
22+
# yhat = m.predict(valid)
23+
# print yhat

examples/small.te.sframe/dir_archive.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ contents=sframe
66
[prefixes]
77
0000=dir_archive.ini
88
0001=objects.bin
9-
0002=m_e9c35c0d5717c810
9+
0002=m_2e9354c8c87ae103

examples/small.tr.sframe/dir_archive.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ contents=sframe
66
[prefixes]
77
0000=dir_archive.ini
88
0001=objects.bin
9-
0002=m_3a9d01aa18c1db1f
9+
0002=m_9ab3a427b2c5b326

lib/ffm.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,23 @@
2222

2323
namespace ffm {
2424

25+
size_t get_column_index(graphlab::gl_sframe sf, std::string colname) {
26+
const auto colnames = sf.column_names();
27+
for (size_t i = 0; i < colnames.size(); ++i) {
28+
if (colnames[i] == colname) {
29+
return i;
30+
}
31+
}
32+
return -1;
33+
}
34+
35+
2536
namespace {
2637

2738
using namespace std;
2839
using namespace graphlab;
2940

41+
3042
ffm_int const kALIGNByte = 16;
3143
ffm_int const kALIGN = kALIGNByte/sizeof(ffm_float);
3244

@@ -229,11 +241,18 @@ shared_ptr<ffm_model> train(
229241
}
230242

231243
size_t target_col_idx = get_column_index(tr->sf, tr->target_column);
244+
// logprogress_stream << tr->target_column << " " << get_column_index(tr->sf, tr->target_column) << std::endl;
245+
// logprogress_stream << flex_type_enum_to_name(tr->sf.select_column(tr->target_column).dtype()) << std::endl;
246+
232247
std::vector<size_t> feature_col_idxs;
233248
for (auto col : tr->feature_columns) {
249+
// logprogress_stream << col << " " << get_column_index(tr->sf, col) << std::endl;
250+
// logprogress_stream << flex_type_enum_to_name(tr->sf.select_column(col).dtype()) << std::endl;
251+
234252
feature_col_idxs.push_back(get_column_index(tr->sf, col));
235253
}
236254

255+
237256
for(ffm_int iter = 0; iter < param.nr_iters; iter++)
238257
{
239258
ffm_double tr_loss = 0;
@@ -246,8 +265,18 @@ shared_ptr<ffm_model> train(
246265
for (; it != rsf.end(); ++it, ++i) {
247266

248267
row_nodes.clear();
249-
const std::vector<flexible_type>& row = *it;
268+
std::vector<flexible_type> row = *it;
250269
const auto& yval = row[target_col_idx];
270+
271+
if (row[target_col_idx].get_type() != flex_type_enum::INTEGER) {
272+
log_and_throw("Response must be integer type.");
273+
}
274+
275+
if (row[target_col_idx].get_type() != flex_type_enum::INTEGER) {
276+
logprogress_stream << "Column " << target_col_idx << std::endl;
277+
logprogress_stream << flex_type_enum_to_name(row[target_col_idx].get_type()) << std::endl;
278+
log_and_throw("Response must be integer type.");
279+
}
251280
ffm_float y = (yval.get<flex_int>() > 0) ? 1.0f : -1.0f;
252281

253282
for (size_t col : feature_col_idxs) {

lib/ffm.h

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,12 @@ namespace ffm
1212
{
1313
#endif
1414

15-
16-
1715
typedef float ffm_float;
1816
typedef double ffm_double;
1917
typedef int ffm_int;
2018
typedef long long ffm_long;
2119

22-
size_t get_column_index(graphlab::gl_sframe sf, std::string colname) {
23-
const auto colnames = sf.column_names();
24-
for (size_t i = 0; i < colnames.size(); ++i) {
25-
if (colnames[i] == colname) {
26-
return i;
27-
}
28-
}
29-
return -1;
30-
}
20+
size_t get_column_index(graphlab::gl_sframe sf, std::string colname);
3121

3222
typedef graphlab::gl_sarray blah;
3323

src/libffm.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,6 @@ using namespace graphlab;
1919
using namespace std;
2020
using namespace ffm;
2121

22-
size_t get_column_index(gl_sframe sf, string colname) {
23-
const auto colnames = sf.column_names();
24-
for (size_t i = 0; i < colnames.size(); ++i) {
25-
if (colnames[i] == colname) {
26-
return i;
27-
}
28-
}
29-
return -1;
30-
}
31-
3222
ffm_problem read_sframe(gl_sframe data, std::string target,
3323
std::vector<std::string> features,
3424
size_t max_field_idx,

0 commit comments

Comments
 (0)