-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathhosts_to_domains
More file actions
executable file
·114 lines (99 loc) · 3.67 KB
/
hosts_to_domains
File metadata and controls
executable file
·114 lines (99 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
###########
# IMPORTS #
###########
import os
import sys
import argparse
def iter_input_lines(handle):
for line in handle:
value = line.strip()
if value and not value.startswith('#'):
yield value
########
# MAIN #
########
if __name__ == '__main__':
default_suffixes = "%s/wordlists/dns/tlds.txt" % os.path.dirname(os.path.realpath(__file__))
desc = (
'Extract parent domains from supplied FQDNs and print one domain per '
'line.'
)
epilog = '''
Input:
- Read hostnames from FILE or STDIN, one FQDN per line.
- Blank lines and lines starting with # are ignored.
Behavior:
- For each input hostname, emit all parent domains except suffixes listed
in the suffix file.
- Output order follows the first time a domain is discovered.
- --depth limits how many labels may appear to the left of the matched
suffix.
Examples:
hosts_to_domains hosts.txt
cat hosts.txt | hosts_to_domains
hosts_to_domains -d 1 hosts.txt
hosts_to_domains -s custom_suffixes.txt hosts.txt
Depth examples:
- With suffix com, --depth 1 keeps example.com but skips a.example.com.
- With suffix co.uk, --depth 1 keeps example.co.uk but skips a.example.co.uk.
'''
parser = argparse.ArgumentParser(
description=desc,
epilog=epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument('file',
nargs='?',
type=argparse.FileType('r'),
action='store',
help='file containing FQDNs or hostnames split by newlines, otherwise read from STDIN',
metavar='FILE',
default=sys.stdin)
parser.add_argument('-s', '--suffixes',
nargs='?',
type=argparse.FileType('r'),
action='store',
help='file containing suffixes/TLDs, one per line, used to avoid printing bare suffixes and to evaluate --depth (default: %s)' % default_suffixes,
metavar='FILE',
default=default_suffixes)
parser.add_argument('-d', '--depth',
type=int,
action='store',
help='maximum labels allowed to the left of the matched suffix; 0 disables depth filtering',
metavar='INT',
default=0)
args = parser.parse_args()
try:
hosts = [line for line in iter_input_lines(args.file)]
except KeyboardInterrupt:
exit()
try:
suffixes = [line for line in iter_input_lines(args.suffixes)]
except KeyboardInterrupt:
exit()
domains = []
for host in hosts:
elements = host.split('.')
# recursively walk through the elements
# extracting all possible (sub)domains
while len(elements) >= 2:
# account for domains stored as hosts
if len(elements) == 2:
domain = '.'.join(elements)
else:
# drop the host element
domain = '.'.join(elements[1:])
if domain not in domains + suffixes:
domains.append(domain)
del elements[0]
for domain in domains:
if args.depth < 1:
print(domain)
else:
for suffix in sorted(suffixes, key=lambda x: x.count('.'), reverse=True):
if domain.lower().endswith('.' + suffix.lower()):
sub = domain[:-(len(suffix) + 1)]
if sub.count('.') < args.depth:
print(domain)
break