Skip to content

Commit 5c3f7aa

Browse files
chetmanciniChet Mancini
andauthored
Fix dateutils edge-case handling regressions (#32)
* Fix dateutils edge-case regressions * Format dateutils for CI * Fix parse_date typecheck nullability --------- Co-authored-by: Chet Mancini <[email protected]>
1 parent d4ba169 commit 5c3f7aa

2 files changed

Lines changed: 236 additions & 47 deletions

File tree

dateutils/dateutils.py

Lines changed: 137 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from datetime import date, datetime, timedelta, timezone
4848
from email.utils import format_datetime as _format_http_datetime
4949
from functools import lru_cache
50+
from typing import cast
5051
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError, available_timezones
5152

5253
##################
@@ -87,6 +88,36 @@
8788
re.VERBOSE,
8889
)
8990

91+
# Locale-independent English month parsing for parse_date()
92+
_ENGLISH_MONTH_BY_NAME = {
93+
"january": 1,
94+
"jan": 1,
95+
"february": 2,
96+
"feb": 2,
97+
"march": 3,
98+
"mar": 3,
99+
"april": 4,
100+
"apr": 4,
101+
"may": 5,
102+
"june": 6,
103+
"jun": 6,
104+
"july": 7,
105+
"jul": 7,
106+
"august": 8,
107+
"aug": 8,
108+
"september": 9,
109+
"sep": 9,
110+
"sept": 9,
111+
"october": 10,
112+
"oct": 10,
113+
"november": 11,
114+
"nov": 11,
115+
"december": 12,
116+
"dec": 12,
117+
}
118+
_MONTH_DAY_YEAR_PATTERN = re.compile(r"^(?P<month>[A-Za-z.]+)\s+(?P<day>\d{1,2}),\s*(?P<year>\d{4})$")
119+
_DAY_MONTH_YEAR_PATTERN = re.compile(r"^(?P<day>\d{1,2})\s+(?P<month>[A-Za-z.]+)\s+(?P<year>\d{4})$")
120+
90121

91122
##################
92123
# UTC
@@ -666,6 +697,22 @@ def is_weekend(dt: date) -> bool:
666697
return dt.weekday() >= calendar.SATURDAY # 5 = Saturday, 6 = Sunday
667698

668699

700+
def _normalize_holiday_dates(holidays: Iterable[date | datetime] | None) -> set[date]:
701+
"""Normalize holiday values to plain date objects."""
702+
if holidays is None:
703+
return set()
704+
705+
normalized: set[date] = set()
706+
for holiday in holidays:
707+
if isinstance(holiday, datetime):
708+
normalized.add(holiday.date())
709+
elif isinstance(holiday, date):
710+
normalized.add(holiday)
711+
else:
712+
raise TypeError(f"holidays must contain date or datetime values, got {type(holiday).__name__}")
713+
return normalized
714+
715+
669716
def get_us_federal_holidays(year: int, holiday_types: tuple[str, ...] | None = None) -> list[date]:
670717
"""
671718
Get a list of US federal holidays for a given year.
@@ -702,6 +749,9 @@ def get_us_federal_holidays(year: int, holiday_types: tuple[str, ...] | None = N
702749
List[date]: A list of date objects for the holidays in that year,
703750
sorted in chronological order.
704751
752+
Raises:
753+
ValueError: If `holiday_types` contains unknown holiday names.
754+
705755
Examples:
706756
>>> from datetime import date
707757
>>> holidays_2024 = get_us_federal_holidays(2024)
@@ -789,27 +839,31 @@ def find_nth_weekday(year: int, month: int, weekday: int, n: int, from_end: bool
789839
if holiday_types is None:
790840
return tuple(sorted(all_holiday_types.values()))
791841

792-
# Otherwise, return only the specified holiday types
793-
result = []
794-
for holiday_type in holiday_types:
795-
if holiday_type in all_holiday_types:
796-
result.append(all_holiday_types[holiday_type])
842+
invalid_holiday_types = sorted(
843+
{holiday_type for holiday_type in holiday_types if holiday_type not in all_holiday_types}
844+
)
845+
if invalid_holiday_types:
846+
invalid_types = ", ".join(invalid_holiday_types)
847+
valid_types = ", ".join(sorted(all_holiday_types))
848+
raise ValueError(f"Invalid holiday type(s): {invalid_types}. Valid values: {valid_types}")
797849

798-
return tuple(sorted(result))
850+
# Deduplicate by date in case duplicate types are requested.
851+
return tuple(sorted({all_holiday_types[holiday_type] for holiday_type in holiday_types}))
799852

800853

801854
def get_us_federal_holidays_list(year: int, holiday_types: list[str] | None = None) -> list[date]:
802855
"""
803856
Convenience wrapper for get_us_federal_holidays that accepts a list instead of tuple.
804857
805858
This function converts the list to a tuple and calls the cached version.
859+
Unknown holiday types raise ValueError.
806860
"""
807861
if holiday_types is None:
808862
return get_us_federal_holidays(year, None)
809863
return get_us_federal_holidays(year, tuple(holiday_types))
810864

811865

812-
def workdays_between(start_date: date, end_date: date, holidays: Iterable[date] | None = None) -> int:
866+
def workdays_between(start_date: date, end_date: date, holidays: Iterable[date | datetime] | None = None) -> int:
813867
"""
814868
Count workdays (Monday-Friday) between two dates, inclusive.
815869
@@ -823,8 +877,9 @@ def workdays_between(start_date: date, end_date: date, holidays: Iterable[date]
823877
Args:
824878
start_date: The start date (inclusive).
825879
end_date: The end date (inclusive).
826-
holidays: Optional collection of holiday dates to exclude (list, set, tuple,
827-
generator, etc.). Duplicates are automatically handled.
880+
holidays: Optional collection of holiday dates/datetimes to exclude
881+
(list, set, tuple, generator, etc.). Duplicates are automatically
882+
handled.
828883
829884
Warning:
830885
**Generators are consumed on first use.** If you need to call this function
@@ -839,6 +894,7 @@ def workdays_between(start_date: date, end_date: date, holidays: Iterable[date]
839894
840895
Raises:
841896
ValueError: If start_date is after end_date
897+
TypeError: If `holidays` contains values that are not date/datetime
842898
843899
Examples:
844900
>>> from datetime import date
@@ -888,15 +944,15 @@ def workdays_between(start_date: date, end_date: date, holidays: Iterable[date]
888944
# Subtract holidays that fall on weekdays within the range
889945
# Convert to set to handle duplicates and consume generators safely
890946
if holidays is not None:
891-
holidays_set = set(holidays)
947+
holidays_set = _normalize_holiday_dates(holidays)
892948
for holiday in holidays_set:
893949
if start_date <= holiday <= end_date and holiday.weekday() < WEEKDAYS_IN_WEEK:
894950
workdays -= 1
895951

896952
return workdays
897953

898954

899-
def add_business_days(dt: date, num_days: int, holidays: Iterable[date] | None = None) -> date:
955+
def add_business_days(dt: date, num_days: int, holidays: Iterable[date | datetime] | None = None) -> date:
900956
"""
901957
Add business days to a date, skipping weekends and holidays.
902958
@@ -906,7 +962,8 @@ def add_business_days(dt: date, num_days: int, holidays: Iterable[date] | None =
906962
Args:
907963
dt: The starting date.
908964
num_days: Number of business days to add (can be negative).
909-
holidays: Optional collection of holiday dates to skip (list, set, tuple, etc.).
965+
holidays: Optional collection of holiday dates/datetimes to skip (list,
966+
set, tuple, etc.).
910967
Using a set provides O(1) lookup performance for large holiday lists.
911968
**Note:** Generators are consumed on first use.
912969
@@ -915,6 +972,7 @@ def add_business_days(dt: date, num_days: int, holidays: Iterable[date] | None =
915972
916973
Raises:
917974
ValueError: If num_days is extremely large (> 10000 or < -10000)
975+
TypeError: If `holidays` contains values that are not date/datetime
918976
919977
Examples:
920978
>>> from datetime import date
@@ -945,7 +1003,10 @@ def add_business_days(dt: date, num_days: int, holidays: Iterable[date] | None =
9451003
if holidays is None or (isinstance(holidays, (set, frozenset, list, tuple)) and len(holidays) == 0):
9461004
return _add_business_days_no_holidays(dt, num_days)
9471005

948-
holidays_set: set[date] = set(holidays)
1006+
holidays_set = _normalize_holiday_dates(holidays)
1007+
if not holidays_set:
1008+
return _add_business_days_no_holidays(dt, num_days)
1009+
9491010
current = dt
9501011
added = 0
9511012
days_to_add = 1 if num_days >= 0 else -1
@@ -1000,7 +1061,7 @@ def _business_days_to_calendar_days(weekday: int, remaining: int, direction: int
10001061
return days
10011062

10021063

1003-
def next_business_day(dt: date, holidays: Iterable[date] | None = None) -> date:
1064+
def next_business_day(dt: date, holidays: Iterable[date | datetime] | None = None) -> date:
10041065
"""
10051066
Find the next business day from a given date, skipping weekends and holidays.
10061067
@@ -1030,7 +1091,7 @@ def next_business_day(dt: date, holidays: Iterable[date] | None = None) -> date:
10301091
return add_business_days(dt, 1, holidays)
10311092

10321093

1033-
def previous_business_day(dt: date, holidays: Iterable[date] | None = None) -> date:
1094+
def previous_business_day(dt: date, holidays: Iterable[date | datetime] | None = None) -> date:
10341095
"""
10351096
Find the previous business day from a given date, skipping weekends and holidays.
10361097
@@ -1063,7 +1124,10 @@ def previous_business_day(dt: date, holidays: Iterable[date] | None = None) -> d
10631124
def _ts_difference(timestamp: int | datetime | None = None, now_override: int | None = None) -> timedelta:
10641125
"""Helper function to calculate time difference for pretty_date."""
10651126
if now_override is not None:
1066-
now = datetime.fromtimestamp(now_override, tz=timezone.utc)
1127+
try:
1128+
now = datetime.fromtimestamp(now_override, tz=timezone.utc)
1129+
except (ValueError, OSError, OverflowError) as e:
1130+
raise ValueError(f"Invalid now_override timestamp: {now_override}") from e
10671131
else:
10681132
now = datetime.now(timezone.utc)
10691133

@@ -1072,16 +1136,16 @@ def _ts_difference(timestamp: int | datetime | None = None, now_override: int |
10721136
elif isinstance(timestamp, int):
10731137
try:
10741138
ts_dt = datetime.fromtimestamp(timestamp, tz=timezone.utc)
1075-
except (ValueError, OSError, OverflowError):
1076-
return timedelta(0) # Return zero diff for invalid timestamps
1139+
except (ValueError, OSError, OverflowError) as e:
1140+
raise ValueError(f"Invalid timestamp: {timestamp}") from e
10771141
return now - ts_dt
10781142
elif isinstance(timestamp, datetime):
10791143
# Handle naive datetimes by assuming UTC
10801144
if timestamp.tzinfo is None:
10811145
timestamp = timestamp.replace(tzinfo=timezone.utc)
10821146
return now - timestamp
1083-
# Type system guarantees we never reach here (timestamp is int | datetime | None)
1084-
return timedelta(0) # pragma: no cover
1147+
# Runtime callers may still violate type hints.
1148+
raise TypeError(f"timestamp must be int, datetime, or None; got {type(timestamp).__name__}") # pragma: no cover
10851149

10861150

10871151
def pretty_date(timestamp: int | datetime | None = None, now_override: int | None = None) -> str: # noqa: C901, PLR0911, PLR0912
@@ -1115,6 +1179,10 @@ def pretty_date(timestamp: int | datetime | None = None, now_override: int | Non
11151179
- "X months ago" / "in X months"
11161180
- "X years ago" / "in X years"
11171181
1182+
Raises:
1183+
ValueError: If `timestamp` or `now_override` is an invalid Unix timestamp.
1184+
TypeError: If `timestamp` is not an int, datetime, or None.
1185+
11181186
Examples:
11191187
>>> from datetime import datetime, timezone
11201188
>>> # Use now_override for deterministic testing
@@ -1310,44 +1378,66 @@ def parse_date(
13101378
"""
13111379
date_str = date_str.strip()
13121380

1313-
if formats is None:
1381+
default_formats = formats is None
1382+
parse_formats: list[str]
1383+
if default_formats:
13141384
if dayfirst:
13151385
# European/international style: day before month
1316-
formats = [
1386+
parse_formats = [
13171387
"%Y-%m-%d", # 2023-01-31 (ISO - unambiguous)
13181388
"%d/%m/%Y", # 31/01/2023
13191389
"%m/%d/%Y", # 01/31/2023 (fallback for US)
13201390
"%d-%m-%Y", # 31-01-2023
13211391
"%m-%d-%Y", # 01-31-2023 (fallback for US)
13221392
"%d.%m.%Y", # 31.01.2023
13231393
"%Y/%m/%d", # 2023/01/31
1324-
"%B %d, %Y", # January 31, 2023
1325-
"%d %B %Y", # 31 January 2023
1326-
"%b %d, %Y", # Jan 31, 2023
1327-
"%d %b %Y", # 31 Jan 2023
13281394
]
13291395
else:
13301396
# US style (default): month before day
1331-
formats = [
1397+
parse_formats = [
13321398
"%Y-%m-%d", # 2023-01-31 (ISO - unambiguous)
13331399
"%m/%d/%Y", # 01/31/2023
13341400
"%d/%m/%Y", # 31/01/2023 (fallback for European)
13351401
"%m-%d-%Y", # 01-31-2023
13361402
"%d-%m-%Y", # 31-01-2023 (fallback for European)
13371403
"%d.%m.%Y", # 31.01.2023
13381404
"%Y/%m/%d", # 2023/01/31
1339-
"%B %d, %Y", # January 31, 2023
1340-
"%d %B %Y", # 31 January 2023
1341-
"%b %d, %Y", # Jan 31, 2023
1342-
"%d %b %Y", # 31 Jan 2023
13431405
]
1406+
else:
1407+
parse_formats = cast(list[str], formats)
13441408

1345-
for fmt in formats:
1409+
for fmt in parse_formats:
13461410
try:
13471411
return datetime.strptime(date_str, fmt).date()
13481412
except ValueError:
13491413
continue
13501414

1415+
# Locale-independent fallback for English month names in the default parser.
1416+
if default_formats:
1417+
parsed_english = _parse_english_textual_date(date_str)
1418+
if parsed_english is not None:
1419+
return parsed_english
1420+
1421+
return None
1422+
1423+
1424+
def _parse_english_textual_date(date_str: str) -> date | None:
1425+
"""Parse common English month-name date formats without locale dependencies."""
1426+
normalized = re.sub(r"\s+", " ", date_str.strip())
1427+
for pattern in (_MONTH_DAY_YEAR_PATTERN, _DAY_MONTH_YEAR_PATTERN):
1428+
match = pattern.match(normalized)
1429+
if not match:
1430+
continue
1431+
month_name = match.group("month").lower().rstrip(".")
1432+
month = _ENGLISH_MONTH_BY_NAME.get(month_name)
1433+
if month is None:
1434+
return None
1435+
year = int(match.group("year"))
1436+
day = int(match.group("day"))
1437+
try:
1438+
return date(year, month, day)
1439+
except ValueError:
1440+
return None
13511441
return None
13521442

13531443

@@ -1753,14 +1843,15 @@ def format_timezone_offset(tz_name: str) -> str:
17531843
##################
17541844
# Additional utility functions
17551845
##################
1756-
def is_business_day(dt: date, holidays: Iterable[date] | None = None) -> bool:
1846+
def is_business_day(dt: date, holidays: Iterable[date | datetime] | None = None) -> bool:
17571847
"""
17581848
Check if a date is a business day (not weekend or holiday).
17591849
17601850
Args:
17611851
dt: Date to check
1762-
holidays: Optional collection of holiday dates (list, set, tuple, generator, etc.).
1763-
Generators will be consumed. Internally converted to a set for O(1) lookup.
1852+
holidays: Optional collection of holiday dates/datetimes (list, set,
1853+
tuple, generator, etc.). Generators will be consumed. Internally
1854+
converted to a set for O(1) lookup.
17641855
17651856
Returns:
17661857
bool: True if the date is a business day, False otherwise
@@ -1781,7 +1872,7 @@ def is_business_day(dt: date, holidays: Iterable[date] | None = None) -> bool:
17811872
if holidays is None:
17821873
return True
17831874
# Convert to set to handle generators and ensure O(1) lookup
1784-
holidays_set = set(holidays) if not isinstance(holidays, (set, frozenset)) else holidays
1875+
holidays_set = _normalize_holiday_dates(holidays)
17851876
return dt not in holidays_set
17861877

17871878

@@ -1949,11 +2040,17 @@ def time_until_next_occurrence(target_time: datetime, from_time: datetime | None
19492040
elif target_time.tzinfo is not None and from_time.tzinfo is None:
19502041
from_time = from_time.replace(tzinfo=target_time.tzinfo)
19512042

1952-
# Calculate next occurrence
1953-
next_occurrence = target_time.replace(year=from_time.year, month=from_time.month, day=from_time.day)
2043+
from_time_in_target_tz = from_time.astimezone(target_time.tzinfo) if target_time.tzinfo is not None else from_time
2044+
2045+
# Calculate next occurrence based on the target timezone's local date.
2046+
next_occurrence = target_time.replace(
2047+
year=from_time_in_target_tz.year,
2048+
month=from_time_in_target_tz.month,
2049+
day=from_time_in_target_tz.day,
2050+
)
19542051

1955-
if next_occurrence <= from_time:
2052+
if next_occurrence <= from_time_in_target_tz:
19562053
# Target time has already passed today, move to next day
19572054
next_occurrence += timedelta(days=1)
19582055

1959-
return next_occurrence - from_time
2056+
return next_occurrence - from_time_in_target_tz

0 commit comments

Comments
 (0)