-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataExtractor.py
More file actions
85 lines (60 loc) · 2.01 KB
/
DataExtractor.py
File metadata and controls
85 lines (60 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#! python3
# PhoneNumber and Email
import pyperclip
import re
# phoneRegex
phoneRegex = re.compile(r'''(
(\d{3}|\(\d{3}\))? # area code
(\s|-|\.)? # seperator
(\d{3}) # first 3 digits
(\s|-|\.)? # seperator
(\d{4}) # last four digits
(\s*(ext|x|ext.)\s*(\d{2,5}))? #extension
)''',re.VERBOSE)
# emailRegex
emailRegex = re.compile(r'''(
[a-zA-Z0-9._%+-]+ # userName
@ # @ symbol
[a-zA-Z0-9.-]+ # domainName
(\.[a-zA-Z]{2,4}) # dot-something
)''',re.VERBOSE)
# anotherPhoneRegex
anphoneRegex = re.compile(r'''(
(\+\d{2})? # Country code
(\s|-|\.)? # seperator
(\d{3}) # 3 digits
(\s|-|.|)? # seperator
(\d{3}) # 3 digits
(\s|-|.|)? # seperator
(\d{4}) # 4 digits
)''',re.VERBOSE)
# WebsiteRegex
webRegex = re.compile(r'''(
([htps]{3,5})? # https or http
(://)? # ://
(www\.)? # www.
[a-zA-Z0-9]+ # website name
(\.[a-zA-Z]{2,4}) # .com or .co or .org
(\.[a-zA-Z]{2,4})? # .co.in or .co.org
)''',re.VERBOSE)
# Find matches in clipboard text
text = str(pyperclip.paste())
matches = []
'''for groups in phoneRegex.findall(text):
phoneNum = '-'.join([groups[1],groups[3],groups[5]])
if groups[8] != '':
phoneNum += ' x'+groups[8]
matches.append(phoneNum) '''
for groups in emailRegex.findall(text):
matches.append(groups[0])
for groups in anphoneRegex.findall(text):
matches.append(groups[0])
for groups in webRegex.findall(text):
matches.append(groups[0])
# Copy results to clipboard
if len(matches) > 0:
pyperclip.copy('\n'.join(matches))
print('Copied to clipboard')
print('\n'.join(matches))
else:
print('No numbers or email addresses were found')