-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiff-logs
More file actions
executable file
·111 lines (105 loc) · 5.1 KB
/
diff-logs
File metadata and controls
executable file
·111 lines (105 loc) · 5.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use File::Temp ();
my @PATTERNS = (
# Date/time
[ qr/\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?Z?/, '2111-11-11 11:11:11' ],
[ qr/\w{3,}, \d{1,2} \w{3,} \d{4,4} \d{1,2}(?::\d{1,2}){2} [A-Z]{3}/, 'Thu, 11 Nov 2111 11:11:11 GMT' ],
[ qr/\d{2}-\d{2}-\d{4} \d{2}(?::\d{2}){2}\.\d+/, '11-11-2111 11:11:11.111111' ],
[ qr/[A-Z][a-z]{2} [ \d]\d \d{2}(:\d{2}){1,2}/, 'Nov 11 11:11' ], # `ls -l` / journalctl format
# Other timestamp
[ qr/\b\d+(?:\.\d+)?s(?:ec)?\b/, '1.1s' ],
[ qr/\b(in|since) \d+\.\d+/, 'in 1.1' ],
[ qr/^\[ *\d+\.\d+\] /, '[ 111.1] ' ], # dmesg output
# File/download sizes
[ qr/\d+(?:\.\d+)?(?:\/\d)? ?(?P<suffix>[kmg](?:i?b)?)\b/i, q["1 " . $+{suffix}] ],
# TCP / HTTP
[ qr/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/, '11.1.1.1' ], # IPv4
[ qr/:\d{5,5}\b/, ':11111' ], # Remote port
[ qr/\bport \d{5,5}\b/, 'port 11111' ], # Remote port
[ qr{\bW/(?<quote>\\?")[^"]*\k<quote>}, 'W/"ETag"' ], # ETag header
# Common files
[ qr{/tmp/[^\s/:"']{6,}(?:/[^\s/:"']+)*/?}, '/tmp/d1ff1065' ],
# journald "process[pid]:"
[ qr/(?<= )(?<process>[-\w]+)\[\d{3,}\]:/, q[$+{process} . "[11111]:"] ],
[ qr/(?<= )\((?<process>[-\w ]+)\)\[\d{3,}\]:/, q[$+{process} . "[11111]:"] ],
# Common tools
[ qr/(?<step_no>(?:\s|\A)#\d+) \d+\.\d+/, q[$+{step_no} . " 1"] ], # Docker build steps
# strace process PIDs
[ qr/(?<prefix>(?:\b|_?)pid[ =])\d{4,}\b/, q[$+{prefix} . "11111"] ],
[ qr/(?<prefix>strace: Process )\d+/, q[$+{prefix} . "111111"] ],
[ qr/(?:[\da-fA-F]{4,}-){4,}[\da-fA-F]{4,}/, 'd1ff1065-d1ff-1065-1007-d1ff1065' ], # UUID
[ qr/[a-zA-Z0-9]{18,}/, 'AAAAAAAAAAAAAAAAAA' ], # Long payload
[ qr/\d{3,}\.\d{1,16}/, '111.1' ], # Floating numbers
[ qr/[a-fA-F0-9]{7,}/, 'd1ff1065' ], # Hash digest
# Function pointer args, followed by bufsize, such as in strace
[ qr/\b0x[a-f\d]+(, 0x[a-f\d]{4,})*(, \d{2,})?/, '0xd1ff1065' ],
# Progress bar, e.g. in pip, tqdm
[ qr{(?<indent>[ \t]*)(?: *(?:\[ *)?\d+%(?:])? *)?[[|]?[\x{2500}-\x{259F}=.\-]{5,} *[\]|]?(?: *(?:\[ *)?\d+%(?:])? *)?[(]?[\d.KMGB ]+/.*}, q[$+{indent} . "......."] ],
);
# Self-test to ensure idempotence for simple replacements
for my $pair (@PATTERNS) {
my ($pattern, $replacement) = @$pair;
if (index($replacement, '$+') == -1) {
if ($replacement !~ m/\A(?:$pattern)\z/s) {
die "Assertion failed: Pattern-replacement pair '$pattern' => '$replacement' not idempotent!";
}
}
}
# Subroutine to read from an input handle, apply all normalizations,
# and write the result to an output handle
sub normalize {
my ($in_fh, $out_fh) = @_;
while (my $line = <$in_fh>) {
# sanitize bytes: preserve valid UTF-8 sequences, escape invalid single bytes as \xHH
$line =~ s{( # capture either:
(?: # valid UTF-8 sequences
[\x00-\x7F] # 1
| [\xC2-\xDF][\x80-\xBF] # 2
| \xE0[\xA0-\xBF][\x80-\xBF] # 3 (E0)
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # 3 (others)
| \xED[\x80-\x9F][\x80-\xBF] # 3 (ED)
| \xF0[\x90-\xBF][\x80-\xBF]{2} # 4 (F0)
| [\xF1-\xF3][\x80-\xBF]{3} # 4 (F1-F3)
| \xF4[\x80-\x8F][\x80-\xBF]{2} # 4 (F4)
)
|
(.) # or a single (invalid) byte
)}{ defined $2 ? sprintf('\\x%02X', ord $2) : $1 }gex;
utf8::decode($line) or die "Assertion failed: invalid UTF-8 even after sanitization";
for my $rule (@PATTERNS) {
my ($pattern, $replacement) = @$rule;
# Use /ee (evaluate 2x) for replacements with named backreferences
if (index($replacement, '$+') != -1) {
$line =~ s/$pattern/$replacement/gee;
} else {
$line =~ s/$pattern/$replacement/g;
}
}
print $out_fh $line;
}
}
# Main
my $argc = @ARGV;
if ($argc == 0) {
binmode(STDIN, ":raw"); binmode(STDOUT, ":utf8"); ## no critic
normalize(\*STDIN, \*STDOUT);
} elsif ($argc == 2) {
my ($file1, $file2) = @ARGV;
my $temp1 = File::Temp->new(UNLINK => 1);
my $temp2 = File::Temp->new(UNLINK => 1);
binmode($temp1, ':utf8'); binmode($temp2, ':utf8'); ## no critic
open my $fh1_in, '<:raw', $file1 or die "Error: Cannot read '$file1': $!"; ## no critic
open my $fh2_in, '<:raw', $file2 or die "Error: Cannot read '$file2': $!"; ## no critic
normalize($fh1_in, $temp1);
normalize($fh2_in, $temp2);
close $fh1_in; close $fh2_in;
my $difftool = $ENV{DIFFTOOL} || 'diff';
exec $difftool, ($difftool eq 'diff' ? '--color=auto' : ()), $temp1->filename, $temp2->filename;
} else {
print STDERR "Usage: $0 < FILE # Print log file diff-friendly\n";
print STDERR " $0 FILE1 FILE2 # Invoke \$DIFFTOOL (e.g. diff)\n";
exit 1;
}