forked from visualpython/visualpython
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpandasCommand.py
More file actions
106 lines (98 loc) · 3.36 KB
/
pandasCommand.py
File metadata and controls
106 lines (98 loc) · 3.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""
Pandas Objects Command
"""
import pandas as _vp_pd
import IPython
# LAB: prevent import error
_ipython_version = IPython.version_info
if _ipython_version[0] < 7 or ( _ipython_version[0] == 7 and _ipython_version[1] <= 13 ):
from IPython.core.display import display
else:
# from IPython.core.display is deprecated since IPython 7.14
from IPython.display import display
def _vp_get_rows_list(df):
"""
Get Rows List with Detail Information
"""
rowList = []
indexType = str(df.index.dtype)
# make dict for rows info
for i, r in enumerate(df.index):
rInfo = { 'label': r, 'value': r, 'location': i }
# value
if type(r).__name__ == 'str':
rInfo['value'] = "'{}'".format(r)
rInfo['index_dtype'] = indexType # object
elif type(r).__name__ == 'Timestamp':
rInfo['label'] = str(r)
rInfo['value'] = "'{}'".format(r)
rInfo['index_dtype'] = indexType # datetime64[ns] TODO: exception consideration needed
rowList.append(rInfo)
return rowList
def _vp_get_columns_list(df):
"""
Get Columns List with Detail Information
"""
colList = []
for i, c in enumerate(df.columns):
cInfo = { 'label': c, 'value': c, 'dtype': str(df[c].dtype), 'array': str(df[c].array), 'location': i }
# value
if type(c).__name__ == 'str':
cInfo['value'] = "'{}'".format(c)
elif type(c).__name__ == 'Timestamp':
cInfo['value'] = str(c)
# category - iopub data rate limit issue...
if str(df[c].dtype) == 'object':
uniqValues = df[c].dropna().unique()
if len(uniqValues) <= 20:
cInfo['category'] = [{ "value": "'{}'".format(u) if type(u) == str else u, "label": u } for u in uniqValues]
else:
cInfo['category'] = []
else:
cInfo['category'] = []
colList.append(cInfo)
return colList
def _vp_get_multi_columns_list(dfs = []):
"""
Get Columns List with Detail Information of multiple dataframe
"""
if len(dfs) <= 0:
return []
common_set = set(dfs[0].columns)
for df in dfs[1:]:
common_set = common_set & set(df.columns)
common_columns = list(common_set)
colList = []
for i, c in enumerate(common_columns):
cInfo = { 'label': c, 'value': c, 'dtype': str(dfs[0][c].dtype), 'location': i }
# value
if type(c).__name__ == 'str':
cInfo['value'] = "'{}'".format(c)
elif type(c).__name__ == 'Timestamp':
cInfo['value'] = str(c)
colList.append(cInfo)
return colList
def _vp_get_column_category(df, col):
"""
Get Column's Uniq values(Categrical data only, limit 20)
"""
uniqValues = df[col].dropna().unique()
category = []
if len(uniqValues) <= 20:
category = [{ "value": "{}".format(u) if type(u) == str else u, "label": u } for u in uniqValues]
return category
def _vp_get_dataframe_as_list(df):
"""
Get Dataframe as List
"""
return df.values.tolist()
def _vp_display_dataframe_info(df):
"""
Get info of dataframe
"""
# display(df.shape)
_notnull = df.notnull().sum()
_types = df.dtypes
_desc = df.describe().T
_info = _vp_pd.concat([_notnull, _types], axis=1, keys=['Non-Null Count','Dtype'])
display(_vp_pd.concat([_info, _desc], axis=1))