Merge pull request #3625 from Kelimion/iso8061

Add dedicated ISO 8601 parser.
This commit is contained in:
Jeroen van Rijn
2024-05-24 23:56:20 +02:00
committed by GitHub
2 changed files with 180 additions and 4 deletions

113
core/time/iso8061.odin Normal file
View File

@@ -0,0 +1,113 @@
package time
// Parsing ISO 8601 date/time strings into time.Time.
import dt "core:time/datetime"
// Parses an ISO 8601 string and returns Time in UTC, with any UTC offset applied to it.
// Only 4-digit years are accepted.
// Optional pointer to boolean `is_leap` will return `true` if the moment was a leap second.
// Leap seconds are smeared into 23:59:59.
iso8601_to_time_utc :: proc(iso_datetime: string, is_leap: ^bool = nil) -> (res: Time, consumed: int) {
offset: int
res, offset, consumed = iso8601_to_time_and_offset(iso_datetime, is_leap)
res._nsec += (i64(-offset) * i64(Minute))
return res, consumed
}
// Parses an ISO 8601 string and returns Time and a UTC offset in minutes.
// e.g. 1985-04-12T23:20:50.52Z
// Note: Only 4-digit years are accepted.
// Optional pointer to boolean `is_leap` will return `true` if the moment was a leap second.
// Leap seconds are smeared into 23:59:59.
iso8601_to_time_and_offset :: proc(iso_datetime: string, is_leap: ^bool = nil) -> (res: Time, utc_offset: int, consumed: int) {
moment, offset, leap_second, count := iso8601_to_components(iso_datetime)
if count == 0 {
return
}
if is_leap != nil {
is_leap^ = leap_second
}
if _res, ok := datetime_to_time(moment.year, moment.month, moment.day, moment.hour, moment.minute, moment.second, moment.nano); !ok {
return {}, 0, 0
} else {
return _res, offset, count
}
}
// Parses an ISO 8601 string and returns Time and a UTC offset in minutes.
// e.g. 1985-04-12T23:20:50.52Z
// Performs no validation on whether components are valid, e.g. it'll return hour = 25 if that's what it's given
iso8601_to_components :: proc(iso_datetime: string) -> (res: dt.DateTime, utc_offset: int, is_leap: bool, consumed: int) {
moment, offset, count, leap_second, ok := _iso8601_to_components(iso_datetime)
if !ok {
return
}
return moment, offset, leap_second, count
}
// Parses an ISO 8601 string and returns datetime.DateTime.
// Performs no validation on whether components are valid, e.g. it'll return hour = 25 if that's what it's given
@(private)
_iso8601_to_components :: proc(iso_datetime: string) -> (res: dt.DateTime, utc_offset: int, consumed: int, is_leap: bool, ok: bool) {
// A compliant date is at minimum 20 characters long, e.g. YYYY-MM-DDThh:mm:ssZ
(len(iso_datetime) >= 20) or_return
// Scan and eat YYYY-MM-DD[Tt], then scan and eat HH:MM:SS, leave separator
year := scan_digits(iso_datetime[0:], "-", 4) or_return
month := scan_digits(iso_datetime[5:], "-", 2) or_return
day := scan_digits(iso_datetime[8:], "Tt ", 2) or_return
hour := scan_digits(iso_datetime[11:], ":", 2) or_return
minute := scan_digits(iso_datetime[14:], ":", 2) or_return
second := scan_digits(iso_datetime[17:], "", 2) or_return
nanos := 0
count := 19
// Scan fractional seconds
if iso_datetime[count] == '.' {
count += 1 // consume '.'
multiplier := 100_000_000
for digit in iso_datetime[count:] {
if multiplier >= 1 && int(digit) >= '0' && int(digit) <= '9' {
nanos += int(digit - '0') * multiplier
multiplier /= 10
count += 1
} else {
break
}
}
}
// Leap second handling
if minute == 59 && second == 60 {
second = 59
is_leap = true
}
err: dt.Error
if res, err = dt.components_to_datetime(year, month, day, hour, minute, second, nanos); err != .None {
return {}, 0, 0, false, false
}
if len(iso_datetime[count:]) == 0 {
return res, utc_offset, count, is_leap, true
}
// Scan UTC offset
switch iso_datetime[count] {
case 'Z', 'z':
utc_offset = 0
count += 1
case '+', '-':
(len(iso_datetime[count:]) >= 6) or_return
offset_hour := scan_digits(iso_datetime[count+1:], ":", 2) or_return
offset_minute := scan_digits(iso_datetime[count+4:], "", 2) or_return
utc_offset = 60 * offset_hour + offset_minute
utc_offset *= -1 if iso_datetime[count] == '-' else 1
count += 6
}
return res, utc_offset, count, is_leap, true
}

View File

@@ -42,6 +42,7 @@ main :: proc() {
test_ordinal_date_roundtrip(&t)
test_component_to_time_roundtrip(&t)
test_parse_rfc3339_string(&t)
test_parse_iso8601_string(&t)
for _, leak in track.allocation_map {
expect(&t, false, fmt.tprintf("%v leaked %m\n", leak.location, leak.size))
@@ -91,12 +92,47 @@ RFC3339_Test :: struct{
// These are based on RFC 3339's examples, see https://www.rfc-editor.org/rfc/rfc3339#page-10
rfc3339_tests :: []RFC3339_Test{
// This represents 20 minutes and 50.52 seconds after the 23rd hour of April 12th, 1985 in UTC.
{"1985-04-12T23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
{"1985-04-12t23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
{"1985-04-12 23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
// Same, but lowercase z
{"1985-04-12 23:20:50.52z", {482196050520000000}, true, 0, 23, false},
// This represents 39 minutes and 57 seconds after the 16th hour of December 19th, 1996 with an offset of -08:00 from UTC (Pacific Standard Time).
// Note that this is equivalent to 1996-12-20T00:39:57Z in UTC.
{"1996-12-19 16:39:57-08:00", {851013597000000000}, false, -480, 25, false},
{"1996-12-19 16:39:57-08:00", {851042397000000000}, true, 0, 25, false},
{"1996-12-20 00:39:57Z", {851042397000000000}, false, 0, 20, false},
// This represents the leap second inserted at the end of 1990.
// It'll be represented as 1990-12-31 23:59:59 UTC after parsing, and `is_leap` will be set to `true`.
{"1990-12-31 23:59:60Z", {662687999000000000}, true, 0, 20, true},
// This represents the same leap second in Pacific Standard Time, 8 hours behind UTC.
{"1990-12-31 15:59:60-08:00", {662687999000000000}, true, 0, 25, true},
// This represents the same instant of time as noon, January 1, 1937, Netherlands time.
// Standard time in the Netherlands was exactly 19 minutes and 32.13 seconds ahead of UTC by law
// from 1909-05-01 through 1937-06-30. This time zone cannot be represented exactly using the
// HH:MM format, and this timestamp uses the closest representable UTC offset.
{"1937-01-01 12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
{"1937-01-01 12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
}
ISO8601_Test :: struct{
iso_8601: string,
datetime: time.Time,
apply_offset: bool,
utc_offset: int,
consumed: int,
is_leap: bool,
}
// These are based on RFC 3339's examples, see https://www.rfc-editor.org/rfc/rfc3339#page-10
iso8601_tests :: []ISO8601_Test{
// This represents 20 minutes and .003362 seconds after the 23rd hour of April 12th, 1985 in UTC.
{"1985-04-12T23:20:50.003362", {482196050003362000}, true, 0, 26, false},
{"1985-04-12t23:20:50.003362", {482196050003362000}, true, 0, 26, false},
{"1985-04-12 23:20:50.003362", {482196050003362000}, true, 0, 26, false},
// This represents 39 minutes and 57 seconds after the 16th hour of December 19th, 1996 with an offset of -08:00 from UTC (Pacific Standard Time).
// Note that this is equivalent to 1996-12-20T00:39:57Z in UTC.
{"1996-12-19T16:39:57-08:00", {851013597000000000}, false, -480, 25, false},
@@ -114,8 +150,8 @@ rfc3339_tests :: []RFC3339_Test{
// Standard time in the Netherlands was exactly 19 minutes and 32.13 seconds ahead of UTC by law
// from 1909-05-01 through 1937-06-30. This time zone cannot be represented exactly using the
// HH:MM format, and this timestamp uses the closest representable UTC offset.
{"1937-01-01T12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
{"1937-01-01T12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
{"1937-01-01 12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
{"1937-01-01 12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
}
@test
@@ -145,6 +181,33 @@ test_parse_rfc3339_string :: proc(t: ^testing.T) {
}
}
@test
test_parse_iso8601_string :: proc(t: ^testing.T) {
for test in iso8601_tests {
is_leap := false
if test.apply_offset {
res, consumed := time.iso8601_to_time_utc(test.iso_8601, &is_leap)
msg := fmt.tprintf("[apply offet] Parsing failed: %v -> %v (nsec: %v). Expected %v consumed, got %v", test.iso_8601, res, res._nsec, test.consumed, consumed)
expect(t, test.consumed == consumed, msg)
if test.consumed == consumed {
expect(t, test.datetime == res, fmt.tprintf("Time didn't match. Expected %v (%v), got %v (%v)", test.datetime, test.datetime._nsec, res, res._nsec))
expect(t, test.is_leap == is_leap, "Expected a leap second, got none.")
}
} else {
res, offset, consumed := time.iso8601_to_time_and_offset(test.iso_8601)
msg := fmt.tprintf("Parsing failed: %v -> %v (nsec: %v), offset: %v. Expected %v consumed, got %v", test.iso_8601, res, res._nsec, offset, test.consumed, consumed)
expect(t, test.consumed == consumed, msg)
if test.consumed == consumed {
expect(t, test.datetime == res, fmt.tprintf("Time didn't match. Expected %v (%v), got %v (%v)", test.datetime, test.datetime._nsec, res, res._nsec))
expect(t, test.utc_offset == offset, fmt.tprintf("UTC offset didn't match. Expected %v, got %v", test.utc_offset, offset))
expect(t, test.is_leap == is_leap, "Expected a leap second, got none.")
}
}
}
}
MONTH_DAYS := []int{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}
YEAR_START :: 1900
YEAR_END :: 2024