|
1 | 1 | { |
2 | 2 | "metadata": { |
3 | 3 | "name": "", |
4 | | - "signature": "sha256:faa7cde23cd1ec6598ad386b44eabe2d06ad86d6bdd84c7d999dd174c7df48a6" |
| 4 | + "signature": "sha256:c5925e90d7a8c3e6fb36f4e3ca876248f4ba69529367d87b73ef3da2b5415766" |
5 | 5 | }, |
6 | 6 | "nbformat": 3, |
7 | 7 | "nbformat_minor": 0, |
|
64 | 64 | " - [Creating lists using conditional statements](#create_cond_list)\n", |
65 | 65 | "- [Dictionary operations](#dict_ops) \n", |
66 | 66 | " - [Adding elements to a dictionary](#adding_dict_elements)\n", |
67 | | - "- [Comprehensions vs. for-loops](#comprehensions)" |
| 67 | + "- [Comprehensions vs. for-loops](#comprehensions)\n", |
| 68 | + "- [Copying files by searching directory trees](#find_copy)" |
68 | 69 | ] |
69 | 70 | }, |
70 | 71 | { |
|
1023 | 1024 | "# Comprehesions vs. for-loops" |
1024 | 1025 | ] |
1025 | 1026 | }, |
| 1027 | + { |
| 1028 | + "cell_type": "markdown", |
| 1029 | + "metadata": {}, |
| 1030 | + "source": [ |
| 1031 | + "Comprehensions are not only shorter and prettier than ye goode olde for-loop, \n", |
| 1032 | + "but they are also up to ~1.2x faster." |
| 1033 | + ] |
| 1034 | + }, |
1026 | 1035 | { |
1027 | 1036 | "cell_type": "code", |
1028 | 1037 | "collapsed": false, |
|
1126 | 1135 | "language": "python", |
1127 | 1136 | "metadata": {}, |
1128 | 1137 | "outputs": [], |
1129 | | - "prompt_number": 11 |
| 1138 | + "prompt_number": 23 |
1130 | 1139 | }, |
1131 | 1140 | { |
1132 | 1141 | "cell_type": "code", |
|
1138 | 1147 | "language": "python", |
1139 | 1148 | "metadata": {}, |
1140 | 1149 | "outputs": [], |
1141 | | - "prompt_number": 12 |
| 1150 | + "prompt_number": 24 |
1142 | 1151 | }, |
1143 | 1152 | { |
1144 | 1153 | "cell_type": "code", |
|
1154 | 1163 | "output_type": "stream", |
1155 | 1164 | "stream": "stdout", |
1156 | 1165 | "text": [ |
1157 | | - "10000 loops, best of 3: 130 \u00b5s per loop\n", |
1158 | | - "10000 loops, best of 3: 114 \u00b5s per loop" |
| 1166 | + "10000 loops, best of 3: 129 \u00b5s per loop\n", |
| 1167 | + "10000 loops, best of 3: 111 \u00b5s per loop" |
1159 | 1168 | ] |
1160 | 1169 | }, |
1161 | 1170 | { |
|
1166 | 1175 | ] |
1167 | 1176 | } |
1168 | 1177 | ], |
1169 | | - "prompt_number": 14 |
| 1178 | + "prompt_number": 25 |
1170 | 1179 | }, |
1171 | 1180 | { |
1172 | 1181 | "cell_type": "markdown", |
|
1189 | 1198 | "language": "python", |
1190 | 1199 | "metadata": {}, |
1191 | 1200 | "outputs": [], |
1192 | | - "prompt_number": 15 |
| 1201 | + "prompt_number": 26 |
1193 | 1202 | }, |
1194 | 1203 | { |
1195 | 1204 | "cell_type": "code", |
|
1201 | 1210 | "language": "python", |
1202 | 1211 | "metadata": {}, |
1203 | 1212 | "outputs": [], |
1204 | | - "prompt_number": 17 |
| 1213 | + "prompt_number": 27 |
1205 | 1214 | }, |
1206 | 1215 | { |
1207 | 1216 | "cell_type": "code", |
|
1217 | 1226 | "output_type": "stream", |
1218 | 1227 | "stream": "stdout", |
1219 | 1228 | "text": [ |
1220 | | - "10000 loops, best of 3: 120 \u00b5s per loop\n", |
1221 | | - "10000 loops, best of 3: 118 \u00b5s per loop" |
| 1229 | + "10000 loops, best of 3: 121 \u00b5s per loop\n", |
| 1230 | + "10000 loops, best of 3: 127 \u00b5s per loop" |
1222 | 1231 | ] |
1223 | 1232 | }, |
1224 | 1233 | { |
|
1229 | 1238 | ] |
1230 | 1239 | } |
1231 | 1240 | ], |
1232 | | - "prompt_number": 18 |
| 1241 | + "prompt_number": 28 |
| 1242 | + }, |
| 1243 | + { |
| 1244 | + "cell_type": "markdown", |
| 1245 | + "metadata": {}, |
| 1246 | + "source": [ |
| 1247 | + "<a name=\"find_copy\"></a>\n", |
| 1248 | + "<br>\n", |
| 1249 | + "<br>" |
| 1250 | + ] |
| 1251 | + }, |
| 1252 | + { |
| 1253 | + "cell_type": "markdown", |
| 1254 | + "metadata": {}, |
| 1255 | + "source": [ |
| 1256 | + "# Copying files by searching directory trees" |
| 1257 | + ] |
| 1258 | + }, |
| 1259 | + { |
| 1260 | + "cell_type": "markdown", |
| 1261 | + "metadata": {}, |
| 1262 | + "source": [ |
| 1263 | + "Executing `Unix`/`Linux` shell commands:" |
| 1264 | + ] |
1233 | 1265 | }, |
1234 | 1266 | { |
1235 | 1267 | "cell_type": "code", |
1236 | 1268 | "collapsed": false, |
1237 | | - "input": [], |
| 1269 | + "input": [ |
| 1270 | + "import subprocess\n", |
| 1271 | + "\n", |
| 1272 | + "def subprocess_findcopy(path, search_str, dest): \n", |
| 1273 | + " query = 'find %s -name \"%s\" -exec cp {}\" %s \\;' %(path, search_str, dest)\n", |
| 1274 | + " subprocess.call(query, shell=True)\n", |
| 1275 | + " return " |
| 1276 | + ], |
1238 | 1277 | "language": "python", |
1239 | 1278 | "metadata": {}, |
1240 | | - "outputs": [] |
| 1279 | + "outputs": [], |
| 1280 | + "prompt_number": 2 |
| 1281 | + }, |
| 1282 | + { |
| 1283 | + "cell_type": "markdown", |
| 1284 | + "metadata": {}, |
| 1285 | + "source": [ |
| 1286 | + "Using Python's `os.walk()` to search the directory tree recursively and matching patterns via `fnmatch.filter()`" |
| 1287 | + ] |
| 1288 | + }, |
| 1289 | + { |
| 1290 | + "cell_type": "code", |
| 1291 | + "collapsed": false, |
| 1292 | + "input": [ |
| 1293 | + "import shutil\n", |
| 1294 | + "import os\n", |
| 1295 | + "import fnmatch\n", |
| 1296 | + "\n", |
| 1297 | + "def walk_findcopy(path, search_str, dest):\n", |
| 1298 | + " for path, subdirs, files in os.walk(path):\n", |
| 1299 | + " for name in fnmatch.filter(files, search_str):\n", |
| 1300 | + " shutil.copy(os.path.join(path,name), dest)" |
| 1301 | + ], |
| 1302 | + "language": "python", |
| 1303 | + "metadata": {}, |
| 1304 | + "outputs": [], |
| 1305 | + "prompt_number": 3 |
| 1306 | + }, |
| 1307 | + { |
| 1308 | + "cell_type": "code", |
| 1309 | + "collapsed": false, |
| 1310 | + "input": [ |
| 1311 | + "import timeit\n", |
| 1312 | + "\n", |
| 1313 | + "print('small tree')\n", |
| 1314 | + "inpath = '/Users/sebastian/Desktop/testdir_in'\n", |
| 1315 | + "outpath = '/Users/sebastian/Desktop/testdir_out'\n", |
| 1316 | + "searchstr = '*.png'\n", |
| 1317 | + "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n", |
| 1318 | + "%timeit walk_findcopy(inpath, searchstr, outpath)\n", |
| 1319 | + "\n", |
| 1320 | + "print('\\nlarger tree')\n", |
| 1321 | + "inpath = '/Users/sebastian/Dropbox'\n", |
| 1322 | + "outpath = '/Users/sebastian/Desktop/testdir_out'\n", |
| 1323 | + "searchstr = '*.csv'\n", |
| 1324 | + "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n", |
| 1325 | + "%timeit walk_findcopy(inpath, searchstr, outpath)" |
| 1326 | + ], |
| 1327 | + "language": "python", |
| 1328 | + "metadata": {}, |
| 1329 | + "outputs": [ |
| 1330 | + { |
| 1331 | + "output_type": "stream", |
| 1332 | + "stream": "stdout", |
| 1333 | + "text": [ |
| 1334 | + "small tree\n", |
| 1335 | + "100 loops, best of 3: 8.48 ms per loop" |
| 1336 | + ] |
| 1337 | + }, |
| 1338 | + { |
| 1339 | + "output_type": "stream", |
| 1340 | + "stream": "stdout", |
| 1341 | + "text": [ |
| 1342 | + "\n", |
| 1343 | + "100 loops, best of 3: 22.3 ms per loop" |
| 1344 | + ] |
| 1345 | + }, |
| 1346 | + { |
| 1347 | + "output_type": "stream", |
| 1348 | + "stream": "stdout", |
| 1349 | + "text": [ |
| 1350 | + "\n", |
| 1351 | + "\n", |
| 1352 | + "larger tree\n", |
| 1353 | + "10 loops, best of 3: 7.13 ms per loop" |
| 1354 | + ] |
| 1355 | + }, |
| 1356 | + { |
| 1357 | + "output_type": "stream", |
| 1358 | + "stream": "stdout", |
| 1359 | + "text": [ |
| 1360 | + "\n", |
| 1361 | + "1 loops, best of 3: 413 ms per loop" |
| 1362 | + ] |
| 1363 | + }, |
| 1364 | + { |
| 1365 | + "output_type": "stream", |
| 1366 | + "stream": "stdout", |
| 1367 | + "text": [ |
| 1368 | + "\n" |
| 1369 | + ] |
| 1370 | + } |
| 1371 | + ], |
| 1372 | + "prompt_number": 4 |
| 1373 | + }, |
| 1374 | + { |
| 1375 | + "cell_type": "markdown", |
| 1376 | + "metadata": {}, |
| 1377 | + "source": [ |
| 1378 | + "I have to say that I am really positvively surprised. The shell's `find` scales even better than expected!" |
| 1379 | + ] |
1241 | 1380 | } |
1242 | 1381 | ], |
1243 | 1382 | "metadata": {} |
|
0 commit comments