diff --git a/core/time/iso8061.odin b/core/time/iso8061.odin new file mode 100644 index 000000000..528e0b00a --- /dev/null +++ b/core/time/iso8061.odin @@ -0,0 +1,113 @@ +package time +// Parsing ISO 8601 date/time strings into time.Time. + +import dt "core:time/datetime" + +// Parses an ISO 8601 string and returns Time in UTC, with any UTC offset applied to it. +// Only 4-digit years are accepted. +// Optional pointer to boolean `is_leap` will return `true` if the moment was a leap second. +// Leap seconds are smeared into 23:59:59. +iso8601_to_time_utc :: proc(iso_datetime: string, is_leap: ^bool = nil) -> (res: Time, consumed: int) { + offset: int + + res, offset, consumed = iso8601_to_time_and_offset(iso_datetime, is_leap) + res._nsec += (i64(-offset) * i64(Minute)) + return res, consumed +} + +// Parses an ISO 8601 string and returns Time and a UTC offset in minutes. +// e.g. 1985-04-12T23:20:50.52Z +// Note: Only 4-digit years are accepted. +// Optional pointer to boolean `is_leap` will return `true` if the moment was a leap second. +// Leap seconds are smeared into 23:59:59. +iso8601_to_time_and_offset :: proc(iso_datetime: string, is_leap: ^bool = nil) -> (res: Time, utc_offset: int, consumed: int) { + moment, offset, leap_second, count := iso8601_to_components(iso_datetime) + if count == 0 { + return + } + + if is_leap != nil { + is_leap^ = leap_second + } + + if _res, ok := datetime_to_time(moment.year, moment.month, moment.day, moment.hour, moment.minute, moment.second, moment.nano); !ok { + return {}, 0, 0 + } else { + return _res, offset, count + } +} + +// Parses an ISO 8601 string and returns Time and a UTC offset in minutes. +// e.g. 1985-04-12T23:20:50.52Z +// Performs no validation on whether components are valid, e.g. it'll return hour = 25 if that's what it's given +iso8601_to_components :: proc(iso_datetime: string) -> (res: dt.DateTime, utc_offset: int, is_leap: bool, consumed: int) { + moment, offset, count, leap_second, ok := _iso8601_to_components(iso_datetime) + if !ok { + return + } + return moment, offset, leap_second, count +} + +// Parses an ISO 8601 string and returns datetime.DateTime. +// Performs no validation on whether components are valid, e.g. it'll return hour = 25 if that's what it's given +@(private) +_iso8601_to_components :: proc(iso_datetime: string) -> (res: dt.DateTime, utc_offset: int, consumed: int, is_leap: bool, ok: bool) { + // A compliant date is at minimum 20 characters long, e.g. YYYY-MM-DDThh:mm:ssZ + (len(iso_datetime) >= 20) or_return + + // Scan and eat YYYY-MM-DD[Tt], then scan and eat HH:MM:SS, leave separator + year := scan_digits(iso_datetime[0:], "-", 4) or_return + month := scan_digits(iso_datetime[5:], "-", 2) or_return + day := scan_digits(iso_datetime[8:], "Tt ", 2) or_return + hour := scan_digits(iso_datetime[11:], ":", 2) or_return + minute := scan_digits(iso_datetime[14:], ":", 2) or_return + second := scan_digits(iso_datetime[17:], "", 2) or_return + nanos := 0 + count := 19 + + // Scan fractional seconds + if iso_datetime[count] == '.' { + count += 1 // consume '.' + multiplier := 100_000_000 + for digit in iso_datetime[count:] { + if multiplier >= 1 && int(digit) >= '0' && int(digit) <= '9' { + nanos += int(digit - '0') * multiplier + multiplier /= 10 + count += 1 + } else { + break + } + } + } + + // Leap second handling + if minute == 59 && second == 60 { + second = 59 + is_leap = true + } + + err: dt.Error + if res, err = dt.components_to_datetime(year, month, day, hour, minute, second, nanos); err != .None { + return {}, 0, 0, false, false + } + + if len(iso_datetime[count:]) == 0 { + return res, utc_offset, count, is_leap, true + } + + // Scan UTC offset + switch iso_datetime[count] { + case 'Z', 'z': + utc_offset = 0 + count += 1 + case '+', '-': + (len(iso_datetime[count:]) >= 6) or_return + offset_hour := scan_digits(iso_datetime[count+1:], ":", 2) or_return + offset_minute := scan_digits(iso_datetime[count+4:], "", 2) or_return + + utc_offset = 60 * offset_hour + offset_minute + utc_offset *= -1 if iso_datetime[count] == '-' else 1 + count += 6 + } + return res, utc_offset, count, is_leap, true +} \ No newline at end of file diff --git a/tests/core/time/test_core_time.odin b/tests/core/time/test_core_time.odin index 1f936e4a7..c6c6869a7 100644 --- a/tests/core/time/test_core_time.odin +++ b/tests/core/time/test_core_time.odin @@ -42,6 +42,7 @@ main :: proc() { test_ordinal_date_roundtrip(&t) test_component_to_time_roundtrip(&t) test_parse_rfc3339_string(&t) + test_parse_iso8601_string(&t) for _, leak in track.allocation_map { expect(&t, false, fmt.tprintf("%v leaked %m\n", leak.location, leak.size)) @@ -91,12 +92,47 @@ RFC3339_Test :: struct{ // These are based on RFC 3339's examples, see https://www.rfc-editor.org/rfc/rfc3339#page-10 rfc3339_tests :: []RFC3339_Test{ // This represents 20 minutes and 50.52 seconds after the 23rd hour of April 12th, 1985 in UTC. - {"1985-04-12T23:20:50.52Z", {482196050520000000}, true, 0, 23, false}, - {"1985-04-12t23:20:50.52Z", {482196050520000000}, true, 0, 23, false}, {"1985-04-12 23:20:50.52Z", {482196050520000000}, true, 0, 23, false}, // Same, but lowercase z {"1985-04-12 23:20:50.52z", {482196050520000000}, true, 0, 23, false}, + // This represents 39 minutes and 57 seconds after the 16th hour of December 19th, 1996 with an offset of -08:00 from UTC (Pacific Standard Time). + // Note that this is equivalent to 1996-12-20T00:39:57Z in UTC. + {"1996-12-19 16:39:57-08:00", {851013597000000000}, false, -480, 25, false}, + {"1996-12-19 16:39:57-08:00", {851042397000000000}, true, 0, 25, false}, + {"1996-12-20 00:39:57Z", {851042397000000000}, false, 0, 20, false}, + + // This represents the leap second inserted at the end of 1990. + // It'll be represented as 1990-12-31 23:59:59 UTC after parsing, and `is_leap` will be set to `true`. + {"1990-12-31 23:59:60Z", {662687999000000000}, true, 0, 20, true}, + + // This represents the same leap second in Pacific Standard Time, 8 hours behind UTC. + {"1990-12-31 15:59:60-08:00", {662687999000000000}, true, 0, 25, true}, + + // This represents the same instant of time as noon, January 1, 1937, Netherlands time. + // Standard time in the Netherlands was exactly 19 minutes and 32.13 seconds ahead of UTC by law + // from 1909-05-01 through 1937-06-30. This time zone cannot be represented exactly using the + // HH:MM format, and this timestamp uses the closest representable UTC offset. + {"1937-01-01 12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false}, + {"1937-01-01 12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false}, +} + +ISO8601_Test :: struct{ + iso_8601: string, + datetime: time.Time, + apply_offset: bool, + utc_offset: int, + consumed: int, + is_leap: bool, +} + +// These are based on RFC 3339's examples, see https://www.rfc-editor.org/rfc/rfc3339#page-10 +iso8601_tests :: []ISO8601_Test{ + // This represents 20 minutes and .003362 seconds after the 23rd hour of April 12th, 1985 in UTC. + {"1985-04-12T23:20:50.003362", {482196050003362000}, true, 0, 26, false}, + {"1985-04-12t23:20:50.003362", {482196050003362000}, true, 0, 26, false}, + {"1985-04-12 23:20:50.003362", {482196050003362000}, true, 0, 26, false}, + // This represents 39 minutes and 57 seconds after the 16th hour of December 19th, 1996 with an offset of -08:00 from UTC (Pacific Standard Time). // Note that this is equivalent to 1996-12-20T00:39:57Z in UTC. {"1996-12-19T16:39:57-08:00", {851013597000000000}, false, -480, 25, false}, @@ -114,8 +150,8 @@ rfc3339_tests :: []RFC3339_Test{ // Standard time in the Netherlands was exactly 19 minutes and 32.13 seconds ahead of UTC by law // from 1909-05-01 through 1937-06-30. This time zone cannot be represented exactly using the // HH:MM format, and this timestamp uses the closest representable UTC offset. - {"1937-01-01T12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false}, - {"1937-01-01T12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false}, + {"1937-01-01 12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false}, + {"1937-01-01 12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false}, } @test @@ -145,6 +181,33 @@ test_parse_rfc3339_string :: proc(t: ^testing.T) { } } +@test +test_parse_iso8601_string :: proc(t: ^testing.T) { + for test in iso8601_tests { + is_leap := false + if test.apply_offset { + res, consumed := time.iso8601_to_time_utc(test.iso_8601, &is_leap) + msg := fmt.tprintf("[apply offet] Parsing failed: %v -> %v (nsec: %v). Expected %v consumed, got %v", test.iso_8601, res, res._nsec, test.consumed, consumed) + expect(t, test.consumed == consumed, msg) + + if test.consumed == consumed { + expect(t, test.datetime == res, fmt.tprintf("Time didn't match. Expected %v (%v), got %v (%v)", test.datetime, test.datetime._nsec, res, res._nsec)) + expect(t, test.is_leap == is_leap, "Expected a leap second, got none.") + } + } else { + res, offset, consumed := time.iso8601_to_time_and_offset(test.iso_8601) + msg := fmt.tprintf("Parsing failed: %v -> %v (nsec: %v), offset: %v. Expected %v consumed, got %v", test.iso_8601, res, res._nsec, offset, test.consumed, consumed) + expect(t, test.consumed == consumed, msg) + + if test.consumed == consumed { + expect(t, test.datetime == res, fmt.tprintf("Time didn't match. Expected %v (%v), got %v (%v)", test.datetime, test.datetime._nsec, res, res._nsec)) + expect(t, test.utc_offset == offset, fmt.tprintf("UTC offset didn't match. Expected %v, got %v", test.utc_offset, offset)) + expect(t, test.is_leap == is_leap, "Expected a leap second, got none.") + } + } + } +} + MONTH_DAYS := []int{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} YEAR_START :: 1900 YEAR_END :: 2024