forked from pydata/xarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtutorial.py
More file actions
107 lines (85 loc) · 2.97 KB
/
tutorial.py
File metadata and controls
107 lines (85 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
'''
Useful for:
* users learning xarray
* building tutorials in the documentation.
'''
import hashlib
import os as _os
import warnings
from urllib.request import urlretrieve
from .backends.api import open_dataset as _open_dataset
_default_cache_dir = _os.sep.join(('~', '.xarray_tutorial_data'))
def file_md5_checksum(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
hash_md5.update(f.read())
return hash_md5.hexdigest()
# idea borrowed from Seaborn
def open_dataset(name, cache=True, cache_dir=_default_cache_dir,
github_url='https://github.com/pydata/xarray-data',
branch='master', **kws):
"""
Load a dataset from the online repository (requires internet).
If a local copy is found then always use that to avoid network traffic.
Parameters
----------
name : str
Name of the netcdf file containing the dataset
ie. 'air_temperature'
cache_dir : string, optional
The directory in which to search for and write cached data.
cache : boolean, optional
If True, then cache data locally for use on subsequent calls
github_url : string
Github repository where the data is stored
branch : string
The git branch to download from
kws : dict, optional
Passed to xarray.open_dataset
See Also
--------
xarray.open_dataset
"""
longdir = _os.path.expanduser(cache_dir)
fullname = name + '.nc'
localfile = _os.sep.join((longdir, fullname))
md5name = name + '.md5'
md5file = _os.sep.join((longdir, md5name))
if not _os.path.exists(localfile):
# This will always leave this directory on disk.
# May want to add an option to remove it.
if not _os.path.isdir(longdir):
_os.mkdir(longdir)
url = '/'.join((github_url, 'raw', branch, fullname))
urlretrieve(url, localfile)
url = '/'.join((github_url, 'raw', branch, md5name))
urlretrieve(url, md5file)
localmd5 = file_md5_checksum(localfile)
with open(md5file, 'r') as f:
remotemd5 = f.read()
if localmd5 != remotemd5:
_os.remove(localfile)
msg = """
MD5 checksum does not match, try downloading dataset again.
"""
raise IOError(msg)
ds = _open_dataset(localfile, **kws)
if not cache:
ds = ds.load()
_os.remove(localfile)
return ds
def load_dataset(*args, **kwargs):
"""
`load_dataset` will be removed a future version of xarray. The current
behavior of this function can be achived by using
`tutorial.open_dataset(...).load()`.
See Also
--------
open_dataset
"""
warnings.warn(
"load_dataset` will be removed in a future version of xarray. The "
"current behavior of this function can be achived by using "
"`tutorial.open_dataset(...).load()`.",
DeprecationWarning, stacklevel=2)
return open_dataset(*args, **kwargs).load()