mirror of
https://github.com/odin-lang/Odin.git
synced 2026-01-04 04:02:33 +00:00
Merge pull request #3625 from Kelimion/iso8061
Add dedicated ISO 8601 parser.
This commit is contained in:
113
core/time/iso8061.odin
Normal file
113
core/time/iso8061.odin
Normal file
@@ -0,0 +1,113 @@
|
||||
package time
|
||||
// Parsing ISO 8601 date/time strings into time.Time.
|
||||
|
||||
import dt "core:time/datetime"
|
||||
|
||||
// Parses an ISO 8601 string and returns Time in UTC, with any UTC offset applied to it.
|
||||
// Only 4-digit years are accepted.
|
||||
// Optional pointer to boolean `is_leap` will return `true` if the moment was a leap second.
|
||||
// Leap seconds are smeared into 23:59:59.
|
||||
iso8601_to_time_utc :: proc(iso_datetime: string, is_leap: ^bool = nil) -> (res: Time, consumed: int) {
|
||||
offset: int
|
||||
|
||||
res, offset, consumed = iso8601_to_time_and_offset(iso_datetime, is_leap)
|
||||
res._nsec += (i64(-offset) * i64(Minute))
|
||||
return res, consumed
|
||||
}
|
||||
|
||||
// Parses an ISO 8601 string and returns Time and a UTC offset in minutes.
|
||||
// e.g. 1985-04-12T23:20:50.52Z
|
||||
// Note: Only 4-digit years are accepted.
|
||||
// Optional pointer to boolean `is_leap` will return `true` if the moment was a leap second.
|
||||
// Leap seconds are smeared into 23:59:59.
|
||||
iso8601_to_time_and_offset :: proc(iso_datetime: string, is_leap: ^bool = nil) -> (res: Time, utc_offset: int, consumed: int) {
|
||||
moment, offset, leap_second, count := iso8601_to_components(iso_datetime)
|
||||
if count == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if is_leap != nil {
|
||||
is_leap^ = leap_second
|
||||
}
|
||||
|
||||
if _res, ok := datetime_to_time(moment.year, moment.month, moment.day, moment.hour, moment.minute, moment.second, moment.nano); !ok {
|
||||
return {}, 0, 0
|
||||
} else {
|
||||
return _res, offset, count
|
||||
}
|
||||
}
|
||||
|
||||
// Parses an ISO 8601 string and returns Time and a UTC offset in minutes.
|
||||
// e.g. 1985-04-12T23:20:50.52Z
|
||||
// Performs no validation on whether components are valid, e.g. it'll return hour = 25 if that's what it's given
|
||||
iso8601_to_components :: proc(iso_datetime: string) -> (res: dt.DateTime, utc_offset: int, is_leap: bool, consumed: int) {
|
||||
moment, offset, count, leap_second, ok := _iso8601_to_components(iso_datetime)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
return moment, offset, leap_second, count
|
||||
}
|
||||
|
||||
// Parses an ISO 8601 string and returns datetime.DateTime.
|
||||
// Performs no validation on whether components are valid, e.g. it'll return hour = 25 if that's what it's given
|
||||
@(private)
|
||||
_iso8601_to_components :: proc(iso_datetime: string) -> (res: dt.DateTime, utc_offset: int, consumed: int, is_leap: bool, ok: bool) {
|
||||
// A compliant date is at minimum 20 characters long, e.g. YYYY-MM-DDThh:mm:ssZ
|
||||
(len(iso_datetime) >= 20) or_return
|
||||
|
||||
// Scan and eat YYYY-MM-DD[Tt], then scan and eat HH:MM:SS, leave separator
|
||||
year := scan_digits(iso_datetime[0:], "-", 4) or_return
|
||||
month := scan_digits(iso_datetime[5:], "-", 2) or_return
|
||||
day := scan_digits(iso_datetime[8:], "Tt ", 2) or_return
|
||||
hour := scan_digits(iso_datetime[11:], ":", 2) or_return
|
||||
minute := scan_digits(iso_datetime[14:], ":", 2) or_return
|
||||
second := scan_digits(iso_datetime[17:], "", 2) or_return
|
||||
nanos := 0
|
||||
count := 19
|
||||
|
||||
// Scan fractional seconds
|
||||
if iso_datetime[count] == '.' {
|
||||
count += 1 // consume '.'
|
||||
multiplier := 100_000_000
|
||||
for digit in iso_datetime[count:] {
|
||||
if multiplier >= 1 && int(digit) >= '0' && int(digit) <= '9' {
|
||||
nanos += int(digit - '0') * multiplier
|
||||
multiplier /= 10
|
||||
count += 1
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Leap second handling
|
||||
if minute == 59 && second == 60 {
|
||||
second = 59
|
||||
is_leap = true
|
||||
}
|
||||
|
||||
err: dt.Error
|
||||
if res, err = dt.components_to_datetime(year, month, day, hour, minute, second, nanos); err != .None {
|
||||
return {}, 0, 0, false, false
|
||||
}
|
||||
|
||||
if len(iso_datetime[count:]) == 0 {
|
||||
return res, utc_offset, count, is_leap, true
|
||||
}
|
||||
|
||||
// Scan UTC offset
|
||||
switch iso_datetime[count] {
|
||||
case 'Z', 'z':
|
||||
utc_offset = 0
|
||||
count += 1
|
||||
case '+', '-':
|
||||
(len(iso_datetime[count:]) >= 6) or_return
|
||||
offset_hour := scan_digits(iso_datetime[count+1:], ":", 2) or_return
|
||||
offset_minute := scan_digits(iso_datetime[count+4:], "", 2) or_return
|
||||
|
||||
utc_offset = 60 * offset_hour + offset_minute
|
||||
utc_offset *= -1 if iso_datetime[count] == '-' else 1
|
||||
count += 6
|
||||
}
|
||||
return res, utc_offset, count, is_leap, true
|
||||
}
|
||||
@@ -42,6 +42,7 @@ main :: proc() {
|
||||
test_ordinal_date_roundtrip(&t)
|
||||
test_component_to_time_roundtrip(&t)
|
||||
test_parse_rfc3339_string(&t)
|
||||
test_parse_iso8601_string(&t)
|
||||
|
||||
for _, leak in track.allocation_map {
|
||||
expect(&t, false, fmt.tprintf("%v leaked %m\n", leak.location, leak.size))
|
||||
@@ -91,12 +92,47 @@ RFC3339_Test :: struct{
|
||||
// These are based on RFC 3339's examples, see https://www.rfc-editor.org/rfc/rfc3339#page-10
|
||||
rfc3339_tests :: []RFC3339_Test{
|
||||
// This represents 20 minutes and 50.52 seconds after the 23rd hour of April 12th, 1985 in UTC.
|
||||
{"1985-04-12T23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
|
||||
{"1985-04-12t23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
|
||||
{"1985-04-12 23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
|
||||
// Same, but lowercase z
|
||||
{"1985-04-12 23:20:50.52z", {482196050520000000}, true, 0, 23, false},
|
||||
|
||||
// This represents 39 minutes and 57 seconds after the 16th hour of December 19th, 1996 with an offset of -08:00 from UTC (Pacific Standard Time).
|
||||
// Note that this is equivalent to 1996-12-20T00:39:57Z in UTC.
|
||||
{"1996-12-19 16:39:57-08:00", {851013597000000000}, false, -480, 25, false},
|
||||
{"1996-12-19 16:39:57-08:00", {851042397000000000}, true, 0, 25, false},
|
||||
{"1996-12-20 00:39:57Z", {851042397000000000}, false, 0, 20, false},
|
||||
|
||||
// This represents the leap second inserted at the end of 1990.
|
||||
// It'll be represented as 1990-12-31 23:59:59 UTC after parsing, and `is_leap` will be set to `true`.
|
||||
{"1990-12-31 23:59:60Z", {662687999000000000}, true, 0, 20, true},
|
||||
|
||||
// This represents the same leap second in Pacific Standard Time, 8 hours behind UTC.
|
||||
{"1990-12-31 15:59:60-08:00", {662687999000000000}, true, 0, 25, true},
|
||||
|
||||
// This represents the same instant of time as noon, January 1, 1937, Netherlands time.
|
||||
// Standard time in the Netherlands was exactly 19 minutes and 32.13 seconds ahead of UTC by law
|
||||
// from 1909-05-01 through 1937-06-30. This time zone cannot be represented exactly using the
|
||||
// HH:MM format, and this timestamp uses the closest representable UTC offset.
|
||||
{"1937-01-01 12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
|
||||
{"1937-01-01 12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
|
||||
}
|
||||
|
||||
ISO8601_Test :: struct{
|
||||
iso_8601: string,
|
||||
datetime: time.Time,
|
||||
apply_offset: bool,
|
||||
utc_offset: int,
|
||||
consumed: int,
|
||||
is_leap: bool,
|
||||
}
|
||||
|
||||
// These are based on RFC 3339's examples, see https://www.rfc-editor.org/rfc/rfc3339#page-10
|
||||
iso8601_tests :: []ISO8601_Test{
|
||||
// This represents 20 minutes and .003362 seconds after the 23rd hour of April 12th, 1985 in UTC.
|
||||
{"1985-04-12T23:20:50.003362", {482196050003362000}, true, 0, 26, false},
|
||||
{"1985-04-12t23:20:50.003362", {482196050003362000}, true, 0, 26, false},
|
||||
{"1985-04-12 23:20:50.003362", {482196050003362000}, true, 0, 26, false},
|
||||
|
||||
// This represents 39 minutes and 57 seconds after the 16th hour of December 19th, 1996 with an offset of -08:00 from UTC (Pacific Standard Time).
|
||||
// Note that this is equivalent to 1996-12-20T00:39:57Z in UTC.
|
||||
{"1996-12-19T16:39:57-08:00", {851013597000000000}, false, -480, 25, false},
|
||||
@@ -114,8 +150,8 @@ rfc3339_tests :: []RFC3339_Test{
|
||||
// Standard time in the Netherlands was exactly 19 minutes and 32.13 seconds ahead of UTC by law
|
||||
// from 1909-05-01 through 1937-06-30. This time zone cannot be represented exactly using the
|
||||
// HH:MM format, and this timestamp uses the closest representable UTC offset.
|
||||
{"1937-01-01T12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
|
||||
{"1937-01-01T12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
|
||||
{"1937-01-01 12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
|
||||
{"1937-01-01 12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
|
||||
}
|
||||
|
||||
@test
|
||||
@@ -145,6 +181,33 @@ test_parse_rfc3339_string :: proc(t: ^testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_parse_iso8601_string :: proc(t: ^testing.T) {
|
||||
for test in iso8601_tests {
|
||||
is_leap := false
|
||||
if test.apply_offset {
|
||||
res, consumed := time.iso8601_to_time_utc(test.iso_8601, &is_leap)
|
||||
msg := fmt.tprintf("[apply offet] Parsing failed: %v -> %v (nsec: %v). Expected %v consumed, got %v", test.iso_8601, res, res._nsec, test.consumed, consumed)
|
||||
expect(t, test.consumed == consumed, msg)
|
||||
|
||||
if test.consumed == consumed {
|
||||
expect(t, test.datetime == res, fmt.tprintf("Time didn't match. Expected %v (%v), got %v (%v)", test.datetime, test.datetime._nsec, res, res._nsec))
|
||||
expect(t, test.is_leap == is_leap, "Expected a leap second, got none.")
|
||||
}
|
||||
} else {
|
||||
res, offset, consumed := time.iso8601_to_time_and_offset(test.iso_8601)
|
||||
msg := fmt.tprintf("Parsing failed: %v -> %v (nsec: %v), offset: %v. Expected %v consumed, got %v", test.iso_8601, res, res._nsec, offset, test.consumed, consumed)
|
||||
expect(t, test.consumed == consumed, msg)
|
||||
|
||||
if test.consumed == consumed {
|
||||
expect(t, test.datetime == res, fmt.tprintf("Time didn't match. Expected %v (%v), got %v (%v)", test.datetime, test.datetime._nsec, res, res._nsec))
|
||||
expect(t, test.utc_offset == offset, fmt.tprintf("UTC offset didn't match. Expected %v, got %v", test.utc_offset, offset))
|
||||
expect(t, test.is_leap == is_leap, "Expected a leap second, got none.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MONTH_DAYS := []int{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}
|
||||
YEAR_START :: 1900
|
||||
YEAR_END :: 2024
|
||||
|
||||
Reference in New Issue
Block a user