-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpapacito.php
More file actions
179 lines (156 loc) · 5.13 KB
/
papacito.php
File metadata and controls
179 lines (156 loc) · 5.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
<?
/**
* Class to parse Spanish surnames according to the information found on wikipedia (http://en.wikipedia.org/wiki/Spanish_naming_customs).
*
* @version 0.8
*/
class papacito {
// Internal variables
private $raw_name = '';
private $maternal = '';
private $paternal = '';
private $married = '';
private $is_widow = false;
private $excess = '';
// Array of what characters will be removed from the last name prior to tokenizing.
private $replace_chars = array('.', ',');
// Class constant of the characters that delimit the tokens in the last name.
const TOKEN_CHARS = " \n\t";
/**
* Constructor. The full last name is passed as a string argument. The internal parser is
* called on instantiation, so the object is configured with the name fields set after construction.
*
* @params string raw_last_name The full last name that will be parsed.
* @throws Exception if the parameter is not a string, is only whitespace, or is a zero length string.
*/
function __construct($raw_last_name) {
// Check that the right data type was passed.
if ( false == is_string($raw_last_name) ) {
throw new Exception(__METHOD__ . ' called with non-string argument.');
}
// Trim up the value of the last name
$raw_last_name = trim($raw_last_name);
if ( 0 == strlen($raw_last_name ) ) {
throw new Exception(__METHOD__ . ' called with zero length value for the last name');
}
// Assign the string to the internal variable for the raw name
$this->raw_name = $raw_last_name;
// Call the internal parser
$this->_parse();
}
/**
* Internal function for parsing the last name.
*/
private function _parse() {
// Set up an array of the properties that tokens of the full last name could be assigned to.
$fields = array('paternal', 'maternal', 'married', 'excess');
// Internal pointer of where we are in the $fields array.
$pointer = 0;
// Variable for the max number used as an index for the $fields array
$max_index = count($fields) - 1;
// Clean up the raw last name with any characters that are unwanted.
$search_string = str_replace($this->replace_chars, '', $this->raw_name);
// Tokenize the last name
$token = strtok($search_string, self::TOKEN_CHARS);
while ( false !== $token ) {
// Check that we have not run past the end of the fields array.
if ( $pointer > $max_index ) {
$pointer = $max_index;
}
// Set which property will be written to based on the internal pointer
$property = $fields[$pointer];
// Switch on the token for what we want to do.
switch ( strtoupper($token) ) {
case 'Y':
case 'I':
// The 'y' conjunction is a separator between the paternal and maternal names.
// In Catalan-Valencian names, the 'i' conjuction serves the same purpose.
// Set the pointer to the maternal name
$pointer = array_search('maternal', $fields);
break;
case 'DE':
// 'de' can take a number of forms. It could be for a regional placename,
// or for indication of the married name. When used as a particle at the
// beginning of the married name, it can be ignored.
if ( array_search('married', $fields) != $pointer || '' != $this->$property ) {
$this->$property .= $token . ' ';
}
break;
case 'LA':
case 'DEL':
// Add these particles to the current property field with a space,
// but do not increment the pointer to the next property.
$this->$property .= $token . ' ';
break;
case 'V':
case 'VDA':
case 'VIUDA':
// This is the abbreviation for widow. Set the widow property
$this->is_widow = true;
// The next chunk of the search string is the married name.
// Fall through to the SRA block for handing that.
case 'SRA':
// This is the abbreviation for Senora, which is an indicator that the married name is next.
$pointer = array_search('married', $fields);
// Do nothing else and get the next token
break;
default:
$this->$property .= $token;
$pointer++;
break;
}
// Get the next token
$token = strtok(self::TOKEN_CHARS);
}
}
// Public functions for getting values from the private variables.
/**
* Function to return the raw last name passed to the constructor at instantiation.
*
* @return string raw_last_name
*/
public function get_raw_name() {
return $this->raw_name;
}
/**
* Function to return the maternal name.
*
* @return string maternal_name
*/
public function get_maternal() {
return $this->maternal;
}
/**
* Function to return the paternal name.
*
* @return string paternal_name
*/
public function get_paternal() {
return $this->paternal;
}
/**
* Function to return the married name.
*
* @return string married_name
*/
public function get_married() {
return $this->married;
}
/**
* Function to return a flag if the name indicates this is a widow.
*
* @return bool is_widow
*/
public function is_widow() {
return $this->is_widow;
}
/**
* Function to return the excess name tokens remaining at the end of parsing.
*
* @return string excess_name
*/
public function get_excess() {
return $this->excess;
}
}
?>