Skip to content

Commit 7c76fc5

Browse files
authored
Bug: Include military time for semantic infer (#250)
* Add time flexible case * Remove print statements * Change naming convention * Adjust spacing * Fix formatting * Update naming convention to military time * Remove unnecessary test case. * test file properly formatted
1 parent f5583be commit 7c76fc5

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed

preprocessing/series_semantic.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,23 @@ def check_type(self, series: pl.Series):
3434
return self.column_type(series.dtype)
3535

3636

37+
def parse_time_military(s: pl.Series) -> pl.Series:
38+
"""Parse time strings with multiple format attempts"""
39+
# Try different time formats
40+
FORMATS_TO_TRY = ["%H:%M:%S", "%H:%M", "%I:%M:%S %p", "%I:%M %p"]
41+
42+
for fmt in FORMATS_TO_TRY:
43+
try:
44+
result = s.str.strptime(pl.Time, format=fmt, strict=False)
45+
if result.is_not_null().sum() > 0: # If any parsed successfully
46+
return result
47+
except:
48+
continue
49+
50+
# If all formats fail, return nulls
51+
return pl.Series([None] * s.len(), dtype=pl.Time)
52+
53+
3754
def parse_datetime_with_tz(s: pl.Series) -> pl.Series:
3855
"""Parse datetime strings with timezone info (both abbreviations and offsets)"""
3956
import warnings
@@ -97,6 +114,14 @@ def parse_datetime_with_tz(s: pl.Series) -> pl.Series:
97114
data_type="datetime",
98115
)
99116

117+
time_military = SeriesSemantic(
118+
semantic_name="time_military",
119+
column_type=pl.String,
120+
try_convert=parse_time_military,
121+
validate_result=lambda s: s.is_not_null(),
122+
data_type="datetime",
123+
)
124+
100125
datetime_string = SeriesSemantic(
101126
semantic_name="datetime",
102127
column_type=pl.String,
@@ -191,6 +216,7 @@ def parse_datetime_with_tz(s: pl.Series) -> pl.Series:
191216
datetime_string,
192217
date_string,
193218
time_string,
219+
time_military,
194220
timestamp_seconds,
195221
timestamp_milliseconds,
196222
url,

preprocessing/test_series_semantic.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
native_date,
1010
native_datetime,
1111
parse_datetime_with_tz,
12+
parse_time_military,
1213
text_catch_all,
1314
time_string,
1415
)
@@ -122,6 +123,21 @@ def test_parse_datetime_with_tz():
122123
assert result.is_not_null().all()
123124

124125

126+
def test_parse_time_military():
127+
"""Test military time parsing function with various formats"""
128+
# Test 24-hour format without seconds (HH:MM)
129+
series_24h = pl.Series(["23:39", "12:45", "00:30"])
130+
result = parse_time_military(series_24h)
131+
assert result.dtype == pl.Time
132+
assert result.is_not_null().all()
133+
134+
# Test 24-hour format with seconds (HH:MM:SS)
135+
series_24h_sec = pl.Series(["14:30:15", "09:15:30", "23:59:59"])
136+
result = parse_time_military(series_24h_sec)
137+
assert result.dtype == pl.Time
138+
assert result.is_not_null().all()
139+
140+
125141
def test_parse_datetime_with_tz_no_timezone():
126142
"""Test datetime parsing without timezone suffix"""
127143
series = pl.Series(["2025-02-28 00:36:15", "2025-02-28 00:36:13"])
@@ -186,6 +202,16 @@ def test_parse_datetime_mixed_timezones_warning():
186202
assert result.is_not_null().all()
187203

188204

205+
def test_time_military_semantic_inference():
206+
"""Test that time_military semantic gets properly detected"""
207+
# Test 24-hour format detection
208+
series_24h = pl.Series(["23:47", "14:30", "09:15", "00:00", "12:45"])
209+
semantic = infer_series_semantic(series_24h)
210+
assert semantic is not None
211+
assert semantic.semantic_name == "time_military"
212+
assert semantic.data_type == "datetime"
213+
214+
189215
# Edge cases
190216
def test_all_none_series():
191217
"""Test series with all null values"""

0 commit comments

Comments
 (0)