-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathkillLongRunningImpalaQueries.py
More file actions
156 lines (116 loc) · 4.79 KB
/
killLongRunningImpalaQueries.py
File metadata and controls
156 lines (116 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/python
## *******************************************************************************************
## killLongRunningImpalaQueries.py
##
## Kills Long Running Impala Queries
##
## Usage: ./killLongRunningImpalaQueries.py user ueryRunningSeconds [KILL]
##
## Set queryRunningSeconds to the threshold considered "too long"
## for an Impala query to run, so that queries that have been running
## longer than that will be identifed as queries to be killed
##
## The third argument "KILL" is optional
## Without this argument, no queries will actually be killed, instead a list
## of queries that are identified as running too long will just be printed to the console
## If the argument "KILL" is provided a cancel command will be issues for each selcted query
##
## CM versions <= 5.7 require Full Administrator role to cancel Impala queries
##
## Set the CM URL, Cluster Name, login and password in the settings below
##
## This script assumes there is only a single Impala service per cluster
##
## *******************************************************************************************
## ** imports *******************************
import sys
from datetime import datetime, timedelta
from cm_api.api_client import ApiResource
## ** Settings ******************************
## Cloudera Manager Host
cm_host = "amit-citi-2-1.gce.cloudera.com"
cm_port = "7180"
## Cloudera Manager login with Full Administrator role
cm_login = "admin"
## Cloudera Manager password
cm_password = "admin"
## Cluster Name
#cluster_name = "Cloudera QuickStart"
cluster_name = "Cluster 1"
## *****************************************
fmt = '%Y-%m-%d %H:%M:%S %Z'
def printUsageMessage():
print "Usage: killLongRunningImpalaQueries.py <user> <queryRunningSeconds> [KILL]"
print "Example that lists queries that have run more than 10 minutes for user jay:"
print "./killLongRunningImpalaQueries.py jay 600"
print "Example that kills queries that have run more than 10 minutes for user jay:"
print "./killLongRunningImpalaQueries.py jay 600 KILL"
## ** Validate command line args *************
if len(sys.argv) == 1 or len(sys.argv) == 2 or len(sys.argv) > 4:
printUsageMessage()
quit(1)
queryRunningSeconds = sys.argv[2]
inUser = sys.argv[1]
if not queryRunningSeconds.isdigit():
print "Error: the second argument must be a digit (number of seconds)"
printUsageMessage()
quit(1)
kill = False
if len(sys.argv) == 4:
if sys.argv[3] != 'KILL':
print "the only valid third argument is \"KILL\""
printUsageMessage()
quit(1)
else:
kill = True
impala_service = None
## Connect to CM
print "\nConnecting to Cloudera Manager at " + cm_host + ":" + cm_port
#api = ApiResource(server_host=cm_host, server_port=cm_port, username=cm_login, password=cm_password, version=11, use_tls=True)
api = ApiResource(server_host=cm_host, server_port=cm_port, username=cm_login, password=cm_password, version=11)
## Get the Cluster
cluster = api.get_cluster(cluster_name)
## Get the IMPALA service
service_list = cluster.get_all_services()
for service in service_list:
if service.type == "IMPALA":
impala_service = service
print "Located Impala Service: " + service.name
break
if impala_service is None:
print "Error: Could not locate Impala Service"
quit(1)
## A window of one day assumes queries have not been running more than 24 hours
now = datetime.utcnow()
start = now - timedelta(days=1)
print "Looking for Impala queries running more than " + str(queryRunningSeconds) + " seconds"
if kill:
print "Queries will be killed"
filterStr1 = 'queryDuration > ' + queryRunningSeconds + 's'
print "filter string1 " + filterStr1
filterStr = filterStr1 + ' AND user = "' + inUser + '"'
print "filter string " + filterStr
#filterStr = inUser
impala_query_response = impala_service.get_impala_queries(start_time=start, end_time=now, filter_str=filterStr, limit=1000)
queries = impala_query_response.queries
longRunningQueryCount = 0
for i in range (0, len(queries)):
query = queries[i]
if query.queryState != 'FINISHED' and query.queryState != 'EXCEPTION':
longRunningQueryCount = longRunningQueryCount + 1
if longRunningQueryCount == 1:
print '-- long running queries -------------'
print "queryState : " + query.queryState
print "queryId: " + query.queryId
print "user: " + query.user
print "startTime: " + query.startTime.strftime(fmt)
query_duration = now - query.startTime
print "query running time (seconds): " + str(query_duration.seconds + query_duration.days * 86400)
print "SQL: " + query.statement
if kill:
print "Attempting to kill query..."
impala_service.cancel_impala_query(query.queryId)
print '-------------------------------------'
if longRunningQueryCount == 0:
print "No queries found"
print "done"