|
142 | 142 | "metadata": {}, |
143 | 143 | "outputs": [], |
144 | 144 | "source": [ |
145 | | - "data[\"is_train\"] = (data[\"is_train\"].astype(np.float32) - data[\"is_train\"].astype(np.float32)).astype(np.bool)\n", |
146 | | - "plt.hist(data.is_train)" |
| 145 | + "plt.hist(data[\"is_train\"].astype(np.int))" |
147 | 146 | ] |
148 | 147 | }, |
149 | 148 | { |
|
216 | 215 | }, |
217 | 216 | "outputs": [], |
218 | 217 | "source": [ |
219 | | - "with open(\"/content/python-ml-course/resources/iris_dtree.dot\", \"w\") as dotfile:\n", |
| 218 | + "with open(\"/content/python-ml-course/notebooks/resources/iris_dtree.dot\", \"w\") as dotfile:\n", |
220 | 219 | " export_graphviz(tree, out_file=dotfile, feature_names=predictors)\n", |
221 | 220 | " dotfile.close()" |
222 | 221 | ] |
|
239 | 238 | }, |
240 | 239 | "outputs": [], |
241 | 240 | "source": [ |
242 | | - "file = open(\"/content/python-ml-course/resources/iris_dtree.dot\", \"r\")\n", |
| 241 | + "file = open(\"/content/python-ml-course/notebooks/resources/iris_dtree.dot\", \"r\")\n", |
243 | 242 | "text = file.read()\n", |
244 | 243 | "text" |
245 | 244 | ] |
|
286 | 285 | "metadata": {}, |
287 | 286 | "outputs": [], |
288 | 287 | "source": [ |
289 | | - "from sklearn.cross_validation import KFold" |
| 288 | + "from sklearn.model_selection import KFold" |
290 | 289 | ] |
291 | 290 | }, |
292 | 291 | { |
|
295 | 294 | "metadata": {}, |
296 | 295 | "outputs": [], |
297 | 296 | "source": [ |
298 | | - "cv = KFold(n = X.shape[0], n_folds=10, shuffle=True, random_state=1)" |
| 297 | + "cv = KFold(n_splits=10, shuffle=True, random_state=1)\n", |
| 298 | + "cv.get_n_splits(X)" |
299 | 299 | ] |
300 | 300 | }, |
301 | 301 | { |
302 | 302 | "cell_type": "code", |
303 | | - "execution_count": null, |
| 303 | + "execution_count": 1, |
304 | 304 | "metadata": {}, |
305 | 305 | "outputs": [], |
306 | 306 | "source": [ |
307 | | - "from sklearn.cross_validation import cross_val_score" |
| 307 | + "from sklearn.model_selection import cross_val_score\n", |
| 308 | + "from sklearn.metrics import accuracy_score, make_scorer" |
308 | 309 | ] |
309 | 310 | }, |
310 | 311 | { |
|
313 | 314 | "metadata": {}, |
314 | 315 | "outputs": [], |
315 | 316 | "source": [ |
316 | | - "scores = cross_val_score(tree, X, Y, scoring=\"accuracy\", cv = cv, n_jobs=1)\n", |
| 317 | + "scores = cross_val_score(tree, X, Y, scoring=make_scorer(accuracy_score), cv = cv, n_jobs=1)\n", |
317 | 318 | "scores" |
318 | 319 | ] |
319 | 320 | }, |
|
336 | 337 | "for i in range(1,11):\n", |
337 | 338 | " tree = DecisionTreeClassifier(criterion=\"entropy\", max_depth=i, min_samples_split=20, random_state=99)\n", |
338 | 339 | " tree.fit(X,Y)\n", |
339 | | - " cv = KFold(n = X.shape[0], n_folds=10, shuffle=True, random_state=1)\n", |
340 | | - " scores = cross_val_score(tree, X, Y, scoring=\"accuracy\", cv = cv, n_jobs=1)\n", |
| 340 | + " cv = KFold(n_splits=10, shuffle=True, random_state=1)\n", |
| 341 | + " cv.get_n_splits(X)\n", |
| 342 | + " scores = cross_val_score(tree, X, Y, scoring=\"accuracy\", cv = cv, n_jobs=-1)\n", |
341 | 343 | " score = np.mean(scores)\n", |
342 | 344 | " print(\"Score para i = \",i,\" es de \", score)\n", |
343 | 345 | " print(\" \",tree.feature_importances_)" |
|
374 | 376 | "metadata": {}, |
375 | 377 | "outputs": [], |
376 | 378 | "source": [ |
377 | | - "forest = RandomForestClassifier(n_jobs=2, oob_score=True, n_estimators=100)\n", |
| 379 | + "forest = RandomForestClassifier(n_jobs=-1, oob_score=True, n_estimators=100)\n", |
378 | 380 | "forest.fit(X,Y)" |
379 | 381 | ] |
380 | 382 | }, |
|
0 commit comments