{ "cells": [ { "cell_type": "heading", "metadata": {}, "level": 1, "source": [ "Loading an example dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0. 0. 5. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 10. 0. 0.]\n [ 0. 0. 0. ..., 16. 9. 0.]\n ..., \n [ 0. 0. 1. ..., 6. 0. 0.]\n [ 0. 0. 2. ..., 12. 0. 0.]\n [ 0. 0. 10. ..., 12. 1. 0.]]\n" ] } ], "source": [ "from sklearn import datasets\n", "iris = datasets.load_iris()\n", "digits = datasets.load_digits()\n", "\n", "print(digits.data)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 2, ..., 8, 9, 8])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digits.target" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0., 0., 5., 13., 9., 1., 0., 0.],\n [ 0., 0., 13., 15., 10., 15., 5., 0.],\n [ 0., 3., 15., 2., 0., 11., 8., 0.],\n [ 0., 4., 12., 0., 0., 8., 8., 0.],\n [ 0., 5., 8., 0., 0., 9., 8., 0.],\n [ 0., 4., 11., 0., 1., 12., 7., 0.],\n [ 0., 2., 14., 5., 10., 12., 0., 0.],\n [ 0., 0., 6., 13., 10., 0., 0., 0.]])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digits.images[0]" ] }, { "cell_type": "heading", "metadata": {}, "level": 1, "source": [ "Learning and predicting" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',\n max_iter=-1, probability=False, random_state=None, shrinking=True,\n tol=0.001, verbose=False)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn import svm\n", "clf = svm.SVC(gamma = 0.001,C=100.)\n", "clf.fit(digits.data[:-1],digits.target[:-1])" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([8])" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.predict(digits.data[-1:])" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',\n max_iter=-1, probability=False, random_state=None, shrinking=True,\n tol=0.001, verbose=False)\n0\n" ] } ], "source": [ "from sklearn import svm\n", "from sklearn import datasets\n", "clf = svm.SVC()\n", "iris = datasets.load_iris()\n", "X,y = iris.data,iris.target\n", "print(clf.fit(X,y))\n", "\n", "import pickle\n", "s = pickle.dumps(clf)\n", "clf2 = pickle.loads(s)\n", "clf2.predict(X[0:1])\n", "print(y[0])" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['data/filename.pkl']" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.externals import joblib\n", "joblib.dump(clf,'data/filename.pkl')" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',\n max_iter=-1, probability=False, random_state=None, shrinking=True,\n tol=0.001, verbose=False)\n" ] } ], "source": [ "clf = joblib.load('data/filename.pkl')\n", "print(clf)" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "float32\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "float64\n" ] } ], "source": [ "import numpy as np\n", "from sklearn import random_projection\n", "rng = np.random.RandomState(0)\n", "X = rng.rand(10,2000)\n", "X = np.array(X,dtype='float32')\n", "X.dtype\n", "print(X.dtype)\n", "\n", "transformer = random_projection.GaussianRandomProjection()\n", "X_new = transformer.fit_transform(X)\n", "X_new.dtype\n", "print(X_new.dtype)" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',\n max_iter=-1, probability=False, random_state=None, shrinking=True,\n tol=0.001, verbose=False)" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn import datasets\n", "from sklearn.svm import SVC\n", "iris = datasets.load_iris()\n", "clf = SVC()\n", "clf.fit(iris.data,iris.target)" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 0, 0]" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(clf.predict(iris.data[:3]))" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',\n max_iter=-1, probability=False, random_state=None, shrinking=True,\n tol=0.001, verbose=False)" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.fit(iris.data,iris.target_names[iris.target])" ] }, { "cell_type": "code", "execution_count": 107, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['setosa', 'setosa', 'setosa']" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(clf.predict(iris.data[:3]))" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape=None, degree=3, gamma='auto', kernel='linear',\n max_iter=-1, probability=False, random_state=None, shrinking=True,\n tol=0.001, verbose=False)" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "from sklearn.svm import SVC\n", "\n", "rng = np.random.RandomState(0)\n", "X = rng.rand(100, 10)\n", "y = rng.binomial(1, 0.5, 100)\n", "X_test = rng.rand(5, 10)\n", "clf = SVC()\n", "clf.set_params(kernel='linear').fit(X,y)" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 0, 1, 1, 0])" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',\n max_iter=-1, probability=False, random_state=None, shrinking=True,\n tol=0.001, verbose=False)" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.set_params(kernel='rbf').fit(X,y)" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 1, 0])" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.predict(X_test)" ] }, { "cell_type": "heading", "metadata": {}, "level": 1, "source": [ "Muliclass vs. multilabel fitting" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 1, 1, 2])" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.svm import SVC\n", "from sklearn.multiclass import OneVsRestClassifier\n", "from sklearn.preprocessing import LabelBinarizer\n", "X = [[1,2],[2,4],[4,5],[3,2],[3,1]]\n", "y = [0,0,1,1,2]\n", "classif = OneVsRestClassifier(estimator=SVC(random_state=0))\n", "classif.fit(X,y).predict(X)" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 0],\n [1, 0, 0],\n [0, 1, 0],\n [0, 0, 0],\n [0, 0, 0]])" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = LabelBinarizer().fit_transform(y)\n", "classif.fit(X,y).predict(X)" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 1, 0, 0, 0],\n [1, 0, 1, 0, 0],\n [0, 1, 0, 1, 0],\n [1, 0, 1, 0, 0],\n [1, 0, 1, 0, 0]])" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import MultiLabelBinarizer\n", "y =[ [0,1],[0,2],[1,3],[0,2,3],[2,4]]\n", "y = MultiLabelBinarizer().fit_transform(y)\n", "classif.fit(X,y).predict(X)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2.0 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }