"""Module for parsing and representing calendar dates in gedcom format.
"""
__all__ = [
"CalendarType", "CalendarDate", "FrenchDate", "GregorianDate",
"HebrewDate", "JulianDate", "CalendarDateVisitor",
]
import abc
import enum
import re
import convertdate.french_republican
import convertdate.gregorian
import convertdate.hebrew
import convertdate.julian
MONTHS_GREG = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG',
'SEP', 'OCT', 'NOV', 'DEC']
MONTHS_HEBR = ['TSH', 'CSH', 'KSL', 'TVT', 'SHV', 'ADR', 'ADS', 'NSN',
'IYR', 'SVN', 'TMZ', 'AAV', 'ELL']
MONTHS_FREN = ['VEND', 'BRUM', 'FRIM', 'NIVO', 'PLUV', 'VENT', 'GERM',
'FLOR', 'PRAI', 'MESS', 'THER', 'FRUC', 'COMP']
# DATE := [<DATE_CALENDAR_ESCAPE> | <NULL>] <DATE_CALENDAR>
# <DATE_CALENDAR> := [<YEAR> | <MONTH> <YEAR> | <DAY> <MONTH> <YEAR>]
# <YEAR can be specified as "1000B.C." or "1699/00"
# <MONTH> is all characters.
# This does not use named groups, it may appear few times in other expressions
# Groups: 1: calendar; 2: day; 3: month; 4: year
# Note: this definition is also used in date.py
DATE = r"""
(?:@\#D([\w ]+)@\s+)? # @#DCALENDAR@, optional (group=1)
(?:
(?:(\d+)\s+)? # day (int), optional (group=2)
([A-Z]{3,4})\s+ # month, name 3-4 chars (group=3)
)?
(?:
(\d+)(?:/(\d+))? # year, required, number with optional /NUMBER
# (group=4,5)
(\s*?B\.C\.)? # optional B.C. suffix (group=6)
)
"""
DATE_RE = re.compile("^" + DATE + "$", re.X | re.I)
[docs]@enum.unique
class CalendarType(enum.Enum):
"""Namespace for constants defining names of calendars.
Note that it does not define constants for ``ROMAN`` calendar which is
declared in GEDCOM standard as a placeholder for future definition, or
``UNKNOWN`` calendar which is not supported by this library.
The constants defined in this namespace are used for the values of the
`CalendarDate.calendar` attribute. Each separate class implementing
`CalendarDate` interface uses distinct value for that attribute,
and this value can be used to deduce actual type of the
`CalendarDate` instance.
"""
GREGORIAN = "GREGORIAN"
"""This is the value assigned to `GregorianDate.calendar` attribute.
"""
JULIAN = "JULIAN"
"""This is the value assigned to `JulianDate.calendar` attribute.
"""
HEBREW = "HEBREW"
"""This is the value assigned to `HebrewDate.calendar` attribute.
"""
FRENCH_R = "FRENCH R"
"""This is the value assigned to `FrenchDate.calendar` attribute.
"""
[docs]class CalendarDate(metaclass=abc.ABCMeta):
"""Interface for calendar date representation.
Parameters
----------
year : `int`
Calendar year number. If ``bc`` parameter is ``True`` then this year
is before "epoch" of that calendar.
month : `str`
Name of the month. Optional, but if day is given then month cannot be
None.
day : `int`
Day in a month, optional.
bc : `bool`
``True`` if year has "B.C."
original : `str`
Original string representation of this date as it was specified in
GEDCOM file, could be ``None``.
Notes
-----
This class defines attributes and methods that are common for all
calendars defined in GEDCOM (though the meaning and representation can be
different in different calendars). In GEDCOM date consists of year, month,
and day; day and month are optional (either day or day+month), year must
be present. Day is a number, month is month name in a given calendar.
Year is a number optionally followed by ``B.C.`` or ``/NUMBER`` (latter
is defined for Gregorian calendar only).
Implementation for different calendars are provided by subclasses which
can implement additional attributes or methods. All subclasses need to
implement `key()` method to support ordering of the dates from
different calendars. There are presently four implementations defined
in this module:
- `GregorianDate` for "GREGORIAN" calendar
- `JulianDate` for "JULIAN" calendar
- `HebrewDate` for "HEBREW" calendar
- `FrenchDate` for "FRENCH R" calendar
To implement type-specific code on client side one can use one of these
approaches:
- dispatch based on the value of `calendar` attribute, it has
one of the values defined in `CalendarType` enum,
the value maps uniquely to an implementation class;
- dispatch based on the type of the instance using ``isinstance``
method to check the type (e.g. ``isinstance(date, GregorianDate)``);
- double dispatch (visitor pattern) by implementing
`CalendarDateVisitor` interface.
"""
def __init__(self, year, month=None, day=None, bc=False, original=None):
self.year = year
"""Calendar year number (`int`)"""
self.month = None if month is None else month.upper()
"""Month name or ``None`` (`str`)"""
self.day = day
"""Day number or ``None`` (`int`)"""
self.bc = bc
"""Flag which is ``True`` if year has a "B.C" suffix (`bool`)."""
self.original = original
"""Original string representation of this date as it was specified in
GEDCOM file, could be ``None`` (`str`).
"""
self.month_num = None
"""Integer month number (1-based) or ``None`` if month name is not
given or unknown (`int`).
"""
# determine month number
months = self.months()
try:
self.month_num = months.index(self.month) + 1
except ValueError:
pass
[docs] @classmethod
@abc.abstractmethod
def months(self):
"""Ordered list of month names (in GEDCOM format) defined in calendar.
"""
raise NotImplementedError()
[docs] @abc.abstractmethod
def key(self):
"""Return ordering key for this instance.
Returned key is a tuple with two numbers (jd, flag). ``jd`` is the
Julian Day number as floating point, ``flag`` is an integer flag.
If month or day is not known then last month or last day should be
returned in its place (in corresponding calendar, and converted to
JD) and ``flag`` should be set to 1. If date and month are known then
flag should be set to 0.
"""
raise NotImplementedError()
@property
def year_str(self):
"""Calendar year in string representation, this can include dual year
and/or B.C. suffix (`str`)
"""
year = str(self.year)
if self.bc:
year += " B.C."
return year
@property
@abc.abstractmethod
def calendar(self):
"""Calendar used for this date, one of the `CalendarType` enums
(`CalendarType`)
"""
raise NotImplementedError()
[docs] @abc.abstractmethod
def accept(self, visitor):
"""Implementation of visitor pattern.
Each concrete sub-class will implement this method by dispatching the
call to corresponding visitor method.
Parameters
----------
visitor : `CalendarDateVisitor`
Visitor instance.
Returns
-------
value : `object`
Value returned from a visitor method.
"""
raise NotImplementedError()
[docs] @classmethod
def parse(cls, datestr):
"""Parse ``<DATE>`` string and make `CalendarDate` from it.
Parameters
----------
datestr : `str`
String with GEDCOM date.
Returns
-------
date : `CalendarDate`
Date instance.
Raises
------
ValueError
Raised if parsing fails.
"""
def _dual_year(year_str, dual_year_str):
"""Guess dual year, returns actual year number.
In GEDCOM dual year uses last two digits of the year number
(though some implementations use four digits). This method
tries to guess actual year number from the digits that were
given, e.g. "1650/51" -> 1651; "1699/00" -> 1700.
"""
if dual_year_str is None:
return None
if len(dual_year_str) >= len(year_str):
return int(dual_year_str)
dual_year_str = year_str[:len(year_str)-len(dual_year_str)] + dual_year_str
year = int(year_str)
dual_year = int(dual_year_str)
while dual_year < year:
dual_year += 100
return dual_year
m = DATE_RE.match(datestr)
if m is None:
raise ValueError("Failed to parse date: " + datestr)
calendar_name = m.group(1) or "GREGORIAN"
try:
calendar = CalendarType(calendar_name)
except ValueError:
raise ValueError("Unknown calendar: " + datestr)
day = None if m.group(2) is None else int(m.group(2))
month = m.group(3)
year = int(m.group(4))
dual_year = _dual_year(m.group(4), m.group(5))
bc = m.group(6) is not None
if dual_year is not None and calendar != CalendarType.GREGORIAN:
raise ValueError("Cannot use dual year (YYYY/YY) in non-Gregorian calendar: " + datestr)
if calendar == CalendarType.GREGORIAN:
return GregorianDate(year, month, day, bc=bc, original=datestr, dual_year=dual_year)
elif calendar == CalendarType.JULIAN:
return JulianDate(year, month, day, bc=bc, original=datestr)
elif calendar == CalendarType.FRENCH_R:
return FrenchDate(year, month, day, bc=bc, original=datestr)
elif calendar == CalendarType.HEBREW:
return HebrewDate(year, month, day, bc=bc, original=datestr)
else:
raise ValueError("Unknown calendar: " + datestr)
def __lt__(self, other):
return self.key() < other.key()
def __le__(self, other):
return self.key() <= other.key()
def __eq__(self, other):
return self.key() == other.key()
def __ne__(self, other):
return self.key() != other.key()
def __gt__(self, other):
return self.key() > other.key()
def __ge__(self, other):
return self.key() >= other.key()
def __hash__(self):
return hash(self.key())
def __str__(self):
"""Make printable representation out of this instance.
"""
val = [self.day, self.month, self.year_str]
if self.calendar != CalendarType.GREGORIAN:
val = ["@#D{}@".format(self.calendar.value)] + val
return " ".join([str(item) for item in val if item is not None])
def __repr__(self):
return str(self)
[docs]class GregorianDate(CalendarDate):
"""Implementation of `CalendarDate` for Gregorian calendar.
Parameter ``dual_year`` (and corresponding attribute) is used for dual
year. Other parameters have the same meaning as in `CalendarDate`
class.
Parameters
----------
dual_year : `int`, optional
Dual year number or ``None``. Actual year should be given, not just
two last digits.
Notes
-----
In GEDCOM Gregorian calendar dates are allowed to specify year in the
form YEAR1/YEAR2 (a.k.a.) dual-dating. Second number is used to specify
year as if calendar year starts in January, while the first number is
used for actual calendar year which starts at different date. Note that
GEDCOM specifies that dual year uses just two last digits in the dual
year number, though some implementations use 4 digits. This class expects
actual year number (e.g. as if it was specified as "1699/1700").
"""
def __init__(self, year, month=None, day=None, bc=False, original=None, dual_year=None):
CalendarDate.__init__(self, year, month, day, bc, original)
self.dual_year = dual_year
"""If not ``None`` then this number represent year in a calendar with
year starting on January 1st (`int`).
"""
[docs] @classmethod
def months(self):
"""Ordered list of month names (in GEDCOM format) defined in calendar.
"""
return MONTHS_GREG
@property
def calendar(self):
# docstring inherited from base class
return CalendarType.GREGORIAN
[docs] def key(self):
"""Return ordering key for this instance.
"""
calendar = convertdate.gregorian
# In dual dating use second year
year = self.dual_year if self.dual_year is not None else self.year
if self.bc:
year = - year
day = self.day
offset = 0.
if self.month_num is None:
# Take Jan 1 as next year
year += 1
month = 1
day = 1
offset = 1.
elif self.day is None:
month = self.month_num + 1
if month == 13:
month -= 12
year += 1
day = 1
offset = 1.
else:
month = self.month_num
dates = [
(year, month, day, offset),
(year, month + 1, 1, 1.),
(year + 1, 1, 1, 1.),
]
for year, month, day, offset in dates:
try:
jd = calendar.to_jd(year, month, day) - offset
break
except ValueError:
# Likely a non-existing date, use another
pass
else:
# nothing works, use arbitrary date in the future
jd = 2816787.5
flag = 1 if self.day is None or self.month_num is None else 0
return jd, flag
@property
def year_str(self):
"""Calendar year in string representation, this can include dual year
and/or B.C. suffix (`str`)
"""
year = str(self.year)
if self.dual_year is not None:
year += "/" + str(self.dual_year)[-2:]
if self.bc:
year += " B.C."
return year
def __str__(self):
"""Make printable representation out of this instance.
"""
val = [self.day, self.month, self.year_str]
return " ".join([str(item) for item in val if item is not None])
[docs] def accept(self, visitor):
return visitor.visitGregorian(self)
[docs]class JulianDate(CalendarDate):
"""Implementation of `CalendarDate` for Julian calendar.
All parameters have the same meaning as in `CalendarDate` class.
"""
def __init__(self, year, month=None, day=None, bc=False, original=None):
CalendarDate.__init__(self, year, month, day, bc, original)
[docs] @classmethod
def months(self):
"""Ordered list of month names (in GEDCOM format) defined in calendar.
"""
return MONTHS_GREG
[docs] def key(self):
"""Return ordering key for this instance.
"""
calendar = convertdate.julian
year = - self.year if self.bc else self.year
day = self.day
offset = 0.
if self.month_num is None:
# Take Jan 1 as next year
year += 1
month = 1
day = 1
offset = 1.
elif self.day is None:
month = self.month_num + 1
if month == 13:
month -= 12
year += 1
day = 1
offset = 1.
else:
month = self.month_num
dates = [
(year, month, day, offset),
(year, month + 1, 1, 1.),
(year + 1, 1, 1, 1.),
]
for year, month, day, offset in dates:
try:
jd = calendar.to_jd(year, month, day) - offset
break
except ValueError:
# Likely a non-existing date, use another
pass
else:
# nothing works, use arbitrary date in the future
jd = 2816787.5
flag = 1 if self.day is None or self.month_num is None else 0
return jd, flag
@property
def calendar(self):
# docstring inherited from base class
return CalendarType.JULIAN
[docs] def accept(self, visitor):
return visitor.visitJulian(self)
[docs]class HebrewDate(CalendarDate):
"""Implementation of `CalendarDate` for Hebrew calendar.
All parameters have the same meaning as in `CalendarDate` class.
"""
def __init__(self, year, month=None, day=None, bc=False, original=None):
CalendarDate.__init__(self, year, month, day, bc, original)
[docs] @classmethod
def months(self):
"""Ordered list of month names (in GEDCOM format) defined in calendar.
"""
return MONTHS_HEBR
[docs] def key(self):
"""Return ordering key for this instance.
"""
calendar = convertdate.hebrew
year = - self.year if self.bc else self.year
month = self.month_num or calendar.year_months(year)
day = self.day if self.day is not None else calendar.month_days(year, month)
dates = [
(year, month, day, 0.),
(year, month + 1, 1, 1.),
(year + 1, 1, 1, 1.),
]
for year, month, day, offset in dates:
try:
jd = calendar.to_jd(year, month, day) - offset
break
except ValueError:
# Likely a non-existing date, use another
pass
else:
# nothing works, use arbitrary date in the future
jd = 2816787.5
flag = 1 if self.day is None or self.month_num is None else 0
return jd, flag
@property
def calendar(self):
# docstring inherited from base class
return CalendarType.HEBREW
[docs] def accept(self, visitor):
return visitor.visitHebrew(self)
[docs]class FrenchDate(CalendarDate):
"""Implementation of `CalendarDate` for French Republican calendar.
All parameters have the same meaning as in `CalendarDate` class.
"""
def __init__(self, year, month=None, day=None, bc=False, original=None):
CalendarDate.__init__(self, year, month, day, bc, original)
[docs] @classmethod
def months(self):
"""Ordered list of month names (in GEDCOM format) defined in calendar.
"""
return MONTHS_FREN
[docs] def key(self):
"""Return ordering key for this instance.
"""
calendar = convertdate.french_republican
year = - self.year if self.bc else self.year
month = self.month_num or 13
day = self.day
if day is None:
if month == 13:
# very short "month"
day = 5
else:
day = 30
dates = [
(year, month, day, 0.),
(year, month + 1, 1, 1.),
(year + 1, 1, 1, 1.),
]
for year, month, day, offset in dates:
try:
jd = calendar.to_jd(year, month, day) - offset
break
except ValueError:
# Likely a non-existing date, use another
pass
else:
# nothing works, use arbitrary date in the future
jd = 2816787.5
flag = 1 if self.day is None or self.month_num is None else 0
return jd, flag
@property
def calendar(self):
# docstring inherited from base class
return CalendarType.FRENCH_R
[docs] def accept(self, visitor):
return visitor.visitFrench(self)
[docs]class CalendarDateVisitor(metaclass=abc.ABCMeta):
"""Interface for implementation of Visitor pattern for
`CalendarDate` classes.
One can easily extend behavior of the `CalendarDate` class
hierarchy without modifying classes themselves. Clients need to implement
new behavior by sub-classing `CalendarDateVisitor` and calling
`CalendarDate.accept()` method, e.g.::
class FormatterVisitor(CalendarDateVisitor):
def visitGregorian(self, date):
return "Gregorian date:" + str(date)
# and so on for each date type
visitor = FormatterVisitor()
date = CalendarDate.parse(date_string)
formatted = date.accept(visitor)
"""
[docs] @abc.abstractmethod
def visitGregorian(self, date):
"""Visit an instance of `GregorianDate` type.
Parameters
----------
date : `GregorianDate`
Date instance.
Returns
-------
value : `object`
Implementation of this method can return anything, value will be
returned from `CalendarDate.accept()` method.
"""
raise NotImplementedError()
[docs] @abc.abstractmethod
def visitJulian(self, date):
"""Visit an instance of `JulianDate` type.
Parameters
----------
date : `JulianDate`
Date instance.
Returns
-------
value : `object`
Implementation of this method can return anything, value will be
returned from `CalendarDate.accept()` method.
"""
raise NotImplementedError()
[docs] @abc.abstractmethod
def visitHebrew(self, date):
"""Visit an instance of `HebrewDate` type.
Parameters
----------
date : `HebrewDate`
Date instance.
Returns
-------
value : `object`
Implementation of this method can return anything, value will be
returned from `CalendarDate.accept()` method.
"""
raise NotImplementedError()
[docs] @abc.abstractmethod
def visitFrench(self, date):
"""Visit an instance of `FrenchDate` type.
Parameters
----------
date : `FrenchDate`
Date instance.
Returns
-------
value : `object`
Implementation of this method can return anything, value will be
returned from `CalendarDate.accept()` method.
"""
raise NotImplementedError()