Skip to content

Commit 4cf42e9

Browse files
sdiaojamesls
authored andcommitted
EMR: Support installing hive-site.xml in create-cluster and install-applications commands.
1 parent bf91ec8 commit 4cf42e9

5 files changed

Lines changed: 118 additions & 28 deletions

File tree

awscli/customizations/emr/applicationutils.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,19 @@ def build_applications(parsed_applications, parsed_globals, ami_version=None):
3333
if hive_version is None:
3434
hive_version = constants.LATEST
3535
step_list.append(
36-
emrutils.build_hive_install_step(
36+
_build_install_hive_step(
3737
region=parsed_globals.region,
3838
version=hive_version))
39+
args = app_config.get('Args')
40+
if args is not None:
41+
hive_site_path = _find_matching_arg(
42+
key=constants.HIVE_SITE_KEY, args_list=args)
43+
if hive_site_path is not None:
44+
step_list.append(
45+
_build_install_hive_site_step(
46+
region=parsed_globals.region,
47+
version=hive_version,
48+
hive_site_path=hive_site_path))
3949
elif app_name == constants.PIG:
4050
pig_version = app_config.get('Version')
4151
if pig_version is None:
@@ -122,4 +132,47 @@ def build_impala_install_bootstrap_action(region, version, args=None):
122132
path=emrutils.build_s3_link(
123133
relative_path=constants.IMPALA_INSTALL_PATH,
124134
region=region),
125-
args=args_list)
135+
args=args_list)
136+
137+
138+
def _build_install_hive_step(region, version,
139+
action_on_failure=constants.TERMINATE_CLUSTER):
140+
step_args = [
141+
emrutils.build_s3_link(constants.HIVE_SCRIPT_PATH, region),
142+
constants.INSTALL_HIVE_ARG,
143+
constants.BASE_PATH_ARG,
144+
emrutils.build_s3_link(constants.HIVE_BASE_PATH),
145+
constants.HIVE_VERSIONS,
146+
version]
147+
step = emrutils.build_step(
148+
name=constants.INSTALL_HIVE_NAME,
149+
action_on_failure=action_on_failure,
150+
jar=emrutils.build_s3_link(constants.SCRIPT_RUNNER_PATH, region),
151+
args=step_args)
152+
return step
153+
154+
155+
def _build_install_hive_site_step(region, version, hive_site_path,
156+
action_on_failure=constants.CANCEL_AND_WAIT):
157+
step_args = [
158+
emrutils.build_s3_link(constants.HIVE_SCRIPT_PATH, region),
159+
constants.BASE_PATH_ARG,
160+
emrutils.build_s3_link(constants.HIVE_BASE_PATH),
161+
constants.INSTALL_HIVE_SITE_ARG,
162+
hive_site_path,
163+
constants.HIVE_VERSIONS,
164+
version]
165+
step = emrutils.build_step(
166+
name=constants.INSTALL_HIVE_SITE_NAME,
167+
action_on_failure=action_on_failure,
168+
jar=emrutils.build_s3_link(constants.SCRIPT_RUNNER_PATH, region),
169+
args=step_args)
170+
return step
171+
172+
173+
def _find_matching_arg(key, args_list):
174+
for arg in args_list:
175+
if key in arg:
176+
return arg
177+
178+
return None

awscli/customizations/emr/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@
102102
INSTALL_PIG_NAME = 'Install Pig'
103103
INSTALL_HIVE_ARG = '--install-hive'
104104
INSTALL_HIVE_NAME = 'Install Hive'
105+
HIVE_SITE_KEY = '--hive-site'
106+
INSTALL_HIVE_SITE_ARG = '--install-hive-site'
107+
INSTALL_HIVE_SITE_NAME = 'Install Hive Site Configuration'
105108
BASE_PATH_ARG = '--base-path'
106109
INSTALL_GANGLIA_NAME = 'Install Ganglia'
107110
INSTALL_HBASE_NAME = 'Install HBase'

awscli/customizations/emr/emrutils.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -164,23 +164,6 @@ def build_pig_install_step(region, version,
164164
return step
165165

166166

167-
def build_hive_install_step(region, version,
168-
action_on_failure=constants.TERMINATE_CLUSTER):
169-
step_args = [
170-
build_s3_link(constants.HIVE_SCRIPT_PATH, region),
171-
constants.INSTALL_HIVE_ARG,
172-
constants.BASE_PATH_ARG,
173-
build_s3_link(constants.HIVE_BASE_PATH),
174-
constants.HIVE_VERSIONS,
175-
version]
176-
step = build_step(
177-
name=constants.INSTALL_HIVE_NAME,
178-
action_on_failure=action_on_failure,
179-
jar=build_s3_link(constants.SCRIPT_RUNNER_PATH, region),
180-
args=step_args)
181-
return step
182-
183-
184167
def call(session, operation_object, parameters, region_name=None,
185168
endpoint_url=None, verify=None):
186169
# We could get an error from get_endpoint() about not having

tests/unit/customizations/emr/test_create_cluster.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,22 @@
101101
'ActionOnFailure': 'TERMINATE_CLUSTER'
102102
}
103103

104+
INSTALL_HIVE_SITE_STEP = {
105+
'HadoopJarStep': {
106+
'Args': ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script',
107+
'--base-path',
108+
's3://us-east-1.elasticmapreduce/libs/hive',
109+
'--install-hive-site',
110+
'--hive-site=s3://test/hive-conf/hive-site.xml',
111+
'--hive-versions', 'latest'],
112+
'Jar':
113+
('s3://us-east-1.elasticmapreduce/libs/'
114+
'script-runner/script-runner.jar')
115+
},
116+
'Name': 'Install Hive Site Configuration',
117+
'ActionOnFailure': 'CANCEL_AND_WAIT'
118+
}
119+
104120
INSTALL_PIG_STEP = {
105121
'HadoopJarStep': {
106122
'Args': ['s3://elasticmapreduce/libs/pig/pig-script',
@@ -694,6 +710,16 @@ def test_install_hive_with_version(self):
694710
result['Steps'] = [steps]
695711
self.assert_params_for_cmd(cmd, result)
696712

713+
def test_install_hive_site(self):
714+
cmdline = (DEFAULT_CMD + '--applications Name=Hive,'
715+
'Args=[--hive-site=s3://test/hive-conf/hive-site.xml]')
716+
result = copy.deepcopy(DEFAULT_RESULT)
717+
result['Steps'] = [INSTALL_HIVE_STEP, INSTALL_HIVE_SITE_STEP]
718+
self.assert_params_for_cmd(cmdline, result)
719+
cmdline = (DEFAULT_CMD + '--applications Name=Hive,'
720+
'Args=[--hive-site=s3://test/hive-conf/hive-site.xml,k1]')
721+
self.assert_params_for_cmd(cmdline, result)
722+
697723
def test_install_pig_with_defaults(self):
698724
cmd = DEFAULT_CMD + '--applications Name=Pig'
699725
result = copy.deepcopy(DEFAULT_RESULT)

tests/unit/customizations/emr/test_install_applications.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,22 @@
2828
'ActionOnFailure': 'TERMINATE_CLUSTER'
2929
}
3030

31+
INSTALL_HIVE_SITE_STEP = {
32+
'HadoopJarStep': {
33+
'Args': ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script',
34+
'--base-path',
35+
's3://us-east-1.elasticmapreduce/libs/hive',
36+
'--install-hive-site',
37+
'--hive-site=s3://test/hive-conf/hive-site.xml',
38+
'--hive-versions', 'latest'],
39+
'Jar':
40+
('s3://us-east-1.elasticmapreduce/libs/'
41+
'script-runner/script-runner.jar')
42+
},
43+
'Name': 'Install Hive Site Configuration',
44+
'ActionOnFailure': 'CANCEL_AND_WAIT'
45+
}
46+
3147
INSTALL_PIG_STEP = {
3248
'HadoopJarStep': {
3349
'Args': ['s3://elasticmapreduce/libs/pig/pig-script',
@@ -42,19 +58,31 @@
4258

4359

4460
class TestInstallApplications(BaseAWSCommandParamsTest):
45-
prefix = 'emr install-applications --cluster-id j-ABC123456'
61+
prefix = ('emr install-applications --cluster-id '
62+
'j-ABC123456 --applications ')
4663

4764
def test_intall_hive_with_version(self):
48-
cmdline = self.prefix + ' --applications Name=Hive,Version=0.8.1.8'
65+
cmdline = self.prefix + 'Name=Hive,Version=0.8.1.8'
4966

5067
step = copy.deepcopy(INSTALL_HIVE_STEP)
5168
step['HadoopJarStep']['Args'][5] = '0.8.1.8'
5269

5370
result = {'JobFlowId': 'j-ABC123456', 'Steps': [step]}
5471
self.assert_params_for_cmd(cmdline, result)
5572

73+
def test_install_hive_site(self):
74+
cmdline = (self.prefix + 'Name=Hive,'
75+
'Args=[--hive-site=s3://test/hive-conf/hive-site.xml]')
76+
result = {'JobFlowId': 'j-ABC123456',
77+
'Steps': [INSTALL_HIVE_STEP, INSTALL_HIVE_SITE_STEP]
78+
}
79+
self.assert_params_for_cmd(cmdline, result)
80+
cmdline = (self.prefix + 'Name=Hive,'
81+
'Args=[--hive-site=s3://test/hive-conf/hive-site.xml,k1]')
82+
self.assert_params_for_cmd(cmdline, result)
83+
5684
def test_intall_pig_with_version(self):
57-
cmdline = self.prefix + ' --applications Name=Pig,Version=0.9.2.1'
85+
cmdline = self.prefix + 'Name=Pig,Version=0.9.2.1'
5886

5987
step = copy.deepcopy(INSTALL_PIG_STEP)
6088
step['HadoopJarStep']['Args'][5] = '0.9.2.1'
@@ -63,15 +91,13 @@ def test_intall_pig_with_version(self):
6391
self.assert_params_for_cmd(cmdline, result)
6492

6593
def test_intall_hive_and_pig_without_version(self):
66-
cmdline = self.prefix + ' --cluster-id j-ABC123456 --applications Name=Hive' +\
67-
' Name=Pig'
94+
cmdline = self.prefix + 'Name=Hive Name=Pig'
6895
result = {'JobFlowId': 'j-ABC123456', 'Steps': [INSTALL_HIVE_STEP,
6996
INSTALL_PIG_STEP]}
7097
self.assert_params_for_cmd(cmdline, result)
7198

7299
def test_install_impala_error(self):
73-
cmdline = self.prefix + \
74-
' --cluster-id j-ABC123456 --applications Name=Impala'
100+
cmdline = self.prefix + ' Name=Impala'
75101

76102
expected_error_msg = "\naws: error: Impala cannot be installed on" +\
77103
" a running cluster. 'Name' should be one of the following:" +\
@@ -80,8 +106,7 @@ def test_install_impala_error(self):
80106
self.assertEqual(result[1], expected_error_msg)
81107

82108
def test_install_unknown_app_error(self):
83-
cmdline = self.prefix + \
84-
' --cluster-id j-ABC123456 --applications Name=unknown'
109+
cmdline = self.prefix + 'Name=unknown'
85110

86111
expected_error_msg = "\naws: error: Unknown application: unknown." +\
87112
" 'Name' should be one of the following: HIVE, PIG, HBASE," +\

0 commit comments

Comments
 (0)