Source code for ged4py.detail.name

"""Internal module for parsing names in gedcom format.
"""


[docs]def split_name(name): """Extracts pieces of name from full name string. Parameters ---------- name : `str` Full name string. Returns ------- name : `tuple` 3-tuple `(given1, surname, given2)`, `surname` or `given` will be empty strings if they are not present in full string. Notes ----- Full name can have one of these formats:: <NAME_TEXT> | /<NAME_TEXT>/ | <NAME_TEXT> /<NAME_TEXT>/ | /<NAME_TEXT>/ <NAME_TEXT> | <NAME_TEXT> /<NAME_TEXT>/ <NAME_TEXT> <NAME_TEXT> can include almost anything excluding commas, numbers, special characters (though some test files use numbers for the names). Text between slashes is considered a surname, outside slashes - given name. This method splits full name into pieces at slashes, e.g.:: "First /Last/" -> ("First", "Last", "") "/Last/ First" -> ("", "Last", "First") "First /Last/ Jr." -> ("First", "Last", "Jr.") "First Jr." -> ("First Jr.", "", "") """ given1, _, rem = name.partition("/") surname, _, given2 = rem.partition("/") return given1.strip(), surname.strip(), given2.strip()
[docs]def parse_name_altree(record): """Parse NAME structure assuming ALTREE dialect. Parameters ---------- record : `ged4py.model.Record` NAME record. Returns ------- parsed_name : `tuple` Tuple with 3 or 4 elements, first three elements of tuple are the same as returned from `split_name` method, fourth element (if present) denotes maiden name. Notes ----- In ALTREE dialect maiden name (if present) is saved as SURN sub-record and is also appended to family name in parens. Given name is saved in GIVN sub-record. Few examples: No maiden name:: 1 NAME John /Smith/ 2 GIVN John With maiden name:: 1 NAME Jane /Smith (Ivanova)/ 2 GIVN Jane 2 SURN Ivanova No maiden name:: 1 NAME Mers /Daimler (-Benz)/ 2 GIVN Mers Because family name can also contain parens it's not enough to parse family name and guess maiden name from it, we also have to check for SURN record. ALTREE also replaces empty names with question mark, we undo that too. """ name_tuple = split_name(record.value) if name_tuple[1] == '?': name_tuple = (name_tuple[0], '', name_tuple[2]) maiden = record.sub_tag_value("SURN") if maiden: # strip "(maiden)" from family name ending = '(' + maiden + ')' surname = name_tuple[1] if surname.endswith(ending): surname = surname[:-len(ending)].rstrip() if surname == '?': surname = '' name_tuple = (name_tuple[0], surname, name_tuple[2], maiden) return name_tuple
[docs]def parse_name_myher(record): """Parse NAME structure assuming MYHERITAGE dialect. Parameters ---------- record : `ged4py.model.Record` NAME record. Returns ------- parsed_name : `tuple` Tuple with 3 or 4 elements, first three elements of tuple are the same as returned from `split_name` method, fourth element (if present) denotes maiden name. Notes ----- In MYHERITAGE dialect married name (if present) is saved as _MARNM sub-record. Maiden name is stored in SURN record. Few examples: No maiden name:: 1 NAME John /Smith/ 2 GIVN John 2 SURN Smith With maiden name:: 1 NAME Jane /Ivanova/ 2 GIVN Jane 2 SURN Ivanova 2 _MARNM Smith No maiden name:: 1 NAME Mers /Daimler (-Benz)/ 2 GIVN Mers 2 SURN Daimler (-Benz) """ name_tuple = split_name(record.value) married = record.sub_tag_value("_MARNM") if married: maiden = name_tuple[1] name_tuple = (name_tuple[0], married, name_tuple[2], maiden) return name_tuple
[docs]def parse_name_ancestris(record): """Parse NAME structure assuming ANCESTRIS dialect. As far as I can tell Ancestris does not have any standard convention for representing maiden or married names. Best we can do in this situation is to use NAME record value and ignore any other fields. Parameters ---------- record : `ged4py.model.Record` NAME record. Returns ------- parsed_name : `tuple` Tuple with 3 or 4 elements, first three elements of tuple are the same as returned from `split_name` method, fourth element (if present) denotes maiden name. """ name_tuple = split_name(record.value) return name_tuple