forked from munibanust/febrl
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathname_corr.lst
More file actions
executable file
·69 lines (65 loc) · 2.78 KB
/
name_corr.lst
File metadata and controls
executable file
·69 lines (65 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# =============================================================================
# name_corr.lst - Correction list for name component
#
# Source: - NSW Health Department pattern and classification files
#
# Last update: 12/09/2002, Peter Christen
#
# Entries in the file are of the form:
#
# replacement := values
#
# where 'values' can be one word or character or a comma separated list of
# words or characters. Each value will be replaced by the 'replacement'
# string on the left side.
# Both the replacement string and each of the value strings must be enclosed
# in either single or double quotes.
#
# =============================================================================
# Freely extensible biomedical record linkage (Febrl) Version 0.2.2
# See http://datamining.anu.edu.au/projects/linkage.html
#
# AUSTRALIAN NATIONAL UNIVERSITY OPEN SOURCE LICENSE (ANUOS LICENSE)
# VERSION 1.1
#
# The contents of this file are subject to the ANUOS License Version 1.1 (the
# "License"); you may not use this file except in compliance with the License.
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
# The Original Software is "name_corr.lst".
# The Initial Developers of the Original Software are Dr Peter Christen
# (Department of Computer Science, Australian National University) and Dr Tim
# Churches (Centre for Epidemiology and Research, New South Wales Department
# of Health). Copyright (C) 2002, 2003 the Australian National University and
# others. All Rights Reserved.
# Contributors:
#
# =============================================================================
# Remove characters and words from input
' ' := '.', '?', '~', '_', ':', ';', '^', '=', ' n a ', '*',
' n/a ', ' n.a.', '\', ' also ', ' name ', ' only ',
' abbrev ', ' initials ', ' unk ', ' unkn ',
' missing ', ' unknown '
# Correct words and symbols
' and ' := '+', '&'
' baby ' := ' babe '
' baby of ' := ' babyof ', ' babeof ', ' b/o ', ' b.o.'
' daughter of ' := ' daughterof ', ' d/o ', ' d.o.'
' son of ' := ' sonof ', ' s/o ', ' s.o.'
' known as ' := ' knownas ', ' a.k.a. ', ' aka '
' - ' := '-', '/'
' | ' := '<', '>', '(', ')', '[', ']', '{', '}', '"', "'", '|'
# Remove ' from o'brian etc
' o' := " o'"
' a' := " a'"
' l' := " l'"
' i' := " i'"
'-o' := "-o'"
'-a' := "-a'"
'-l' := "-l'"
'-i' := "-i'"
# Correct roman numbers
# ' i ' := ' first '
' ii ' := ' second '
' iii ' := ' third '