Skip to content

Commit 84f6d88

Browse files
committed
MODD completed, unittest competed
1 parent c27a23b commit 84f6d88

2 files changed

Lines changed: 32 additions & 199 deletions

File tree

iglu_python/modd.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,16 @@ def modd_single(data: pd.DataFrame) -> float:
6060
"""Calculate MODD for a single subject"""
6161
# Convert data to day-by-day format
6262
data_ip = CGMS2DayByDay(data, tz=tz)
63-
gl_by_id_ip = data_ip[1] # Get interpolated glucose values
63+
gl_by_id_ip = data_ip[0].flatten() # Get interpolated glucose values
64+
dt0 = data_ip[2] # Get time frequency
6465

6566
# Calculate absolute differences with specified lag
67+
# lag is in days, so we need to convert to minutes and divide of dt0 frequency
68+
shift = int(lag * 24 * 60 / dt0) # Convert lag to minutes and divide by dt0
6669
# Shift array by lag and calculate differences
67-
gl_shifted = np.roll(gl_by_id_ip, -lag, axis=0) # Shift down by lag
68-
abs_diffs = np.abs(gl_by_id_ip - gl_shifted)
70+
abs_diffs = np.abs(gl_by_id_ip[shift:] - gl_by_id_ip[:-shift])
71+
# Remove NaNs
72+
abs_diffs = abs_diffs[~np.isnan(abs_diffs)] # Remove NaNs
6973

7074
# Calculate mean of absolute differences, ignoring NaN values
7175
modd_val = np.nanmean(abs_diffs)
@@ -74,13 +78,12 @@ def modd_single(data: pd.DataFrame) -> float:
7478

7579
# Handle Series input
7680
if isinstance(data, pd.Series):
77-
# Convert Series to DataFrame format
81+
if not isinstance(data.index, pd.DatetimeIndex):
82+
raise ValueError("Series must have a DatetimeIndex")
7883
data_df = pd.DataFrame(
7984
{
80-
"id": ["subject1"] * len(data),
81-
"time": pd.date_range(
82-
start="2020-01-01", periods=len(data), freq="5min"
83-
),
85+
"id": ["subject1"] * len(data.values),
86+
"time": data.index,
8487
"gl": data.values,
8588
}
8689
)

tests/test_modd.py

Lines changed: 21 additions & 191 deletions
Original file line numberDiff line numberDiff line change
@@ -50,19 +50,20 @@ def test_modd_iglu_r_compatible(scenario):
5050

5151
# Compare DataFrames with precision to 0.001 for numeric columns
5252
pd.testing.assert_frame_equal(
53-
result_df.round(3),
54-
expected_df.round(3),
53+
result_df,
54+
expected_df,
5555
check_dtype=False, # Don't check dtypes since we might have different numeric types
5656
check_index_type=True,
5757
check_column_type=True,
5858
check_frame_type=True,
5959
check_names=True,
60-
check_exact=True,
6160
check_datetimelike_compat=True,
6261
check_categorical=True,
6362
check_like=True,
6463
check_freq=True,
6564
check_flags=True,
65+
check_exact=False,
66+
rtol=1e-3,
6667
)
6768

6869

@@ -96,32 +97,17 @@ def test_modd_default_output():
9697

9798
assert isinstance(result, pd.DataFrame)
9899
assert all(col in result.columns for col in ["id", "MODD"])
99-
assert all(result["MODD"] >= 0)
100+
assert all(pd.isna(result["MODD"])) # Should be NaN for insufficient data
100101

101102

102103
def test_modd_custom_lag():
103104
"""Test modd calculation with custom lag value"""
105+
samples_per_day = int(24*60/5) # sample each 5 min
104106
data = pd.DataFrame(
105107
{
106-
"id": [
107-
"subject1",
108-
"subject1",
109-
"subject1",
110-
"subject1",
111-
"subject2",
112-
"subject2",
113-
],
114-
"time": pd.to_datetime(
115-
[
116-
"2020-01-01 00:00:00", # 0 min
117-
"2020-01-01 00:05:00", # 5 min
118-
"2020-01-01 00:10:00", # 10 min
119-
"2020-01-01 00:15:00", # 15 min
120-
"2020-01-01 00:00:00", # subject2
121-
"2020-01-01 00:05:00", # subject2
122-
]
123-
),
124-
"gl": [150, 200, 180, 160, 140, 190],
108+
"id": ["subject1"] * 3 * samples_per_day,
109+
"time": pd.date_range(start="2020-01-01 00:00:00", periods=3*samples_per_day, freq="5min"),
110+
"gl": [150]*samples_per_day + [200]*samples_per_day + [180]*samples_per_day,
125111
}
126112
)
127113

@@ -133,180 +119,24 @@ def test_modd_custom_lag():
133119

134120
def test_modd_series_input():
135121
"""Test modd calculation with Series input"""
136-
series_data = pd.Series([150, 200, 180, 160, 140, 190])
122+
samples_per_day = int(24*60/5) # sample each 5 min
123+
series_data = pd.Series(
124+
[150]*samples_per_day + [200]*samples_per_day + [250]*samples_per_day,
125+
index=pd.date_range(start="2020-01-01 00:00:00", periods=3*samples_per_day, freq="5min")
126+
)
137127
result = iglu.modd(series_data)
138128
assert isinstance(result, pd.DataFrame)
139129
assert "MODD" in result.columns
140130
assert len(result) == 1
131+
assert result["MODD"].iloc[0] == 50.0
132+
133+
# Exception for series without DatetimeIndex
134+
with pytest.raises(ValueError):
135+
iglu.modd(series_data.reset_index(drop=True))
141136

142137

143138
def test_modd_empty_input():
144139
"""Test modd calculation with empty DataFrame"""
145140
empty_data = pd.DataFrame(columns=["id", "time", "gl"])
146-
result = iglu.modd(empty_data)
147-
assert isinstance(result, pd.DataFrame)
148-
assert len(result) == 0
149-
150-
151-
def test_modd_single_subject():
152-
"""Test modd calculation with single subject data"""
153-
single_subject = pd.DataFrame(
154-
{
155-
"id": ["subject1"] * 4,
156-
"time": pd.to_datetime(
157-
[
158-
"2020-01-01 00:00:00",
159-
"2020-01-01 00:05:00",
160-
"2020-01-01 00:10:00",
161-
"2020-01-01 00:15:00",
162-
]
163-
),
164-
"gl": [150, 150, 150, 150], # Constant glucose
165-
}
166-
)
167-
result = iglu.modd(single_subject)
168-
assert isinstance(result, pd.DataFrame)
169-
assert all(col in result.columns for col in ["id", "MODD"])
170-
assert len(result) == 1
171-
"""Test the output format of modd function"""
172-
173-
# Create test data with known values
174-
data = pd.DataFrame(
175-
{
176-
"id": [
177-
"subject1",
178-
"subject1",
179-
"subject1",
180-
"subject1",
181-
"subject2",
182-
"subject2",
183-
],
184-
"time": pd.to_datetime(
185-
[
186-
"2020-01-01 00:00:00", # 0 min
187-
"2020-01-01 00:05:00", # 5 min
188-
"2020-01-01 00:10:00", # 10 min
189-
"2020-01-01 00:15:00", # 15 min
190-
"2020-01-01 00:00:00", # subject2
191-
"2020-01-01 00:05:00", # subject2
192-
]
193-
),
194-
"gl": [150, 200, 180, 160, 140, 190],
195-
}
196-
)
197-
198-
# Test with default parameters
199-
result = iglu.modd(data)
200-
201-
# Check DataFrame structure
202-
assert isinstance(result, pd.DataFrame)
203-
assert all(col in result.columns for col in ["id", "MODD"])
204-
205-
# Check values are non-negative
206-
assert all(result["MODD"] >= 0)
207-
208-
# Test with different lag values
209-
result_lag2 = iglu.modd(data, lag=2)
210-
assert isinstance(result_lag2, pd.DataFrame)
211-
assert all(col in result_lag2.columns for col in ["id", "MODD"])
212-
assert all(result_lag2["MODD"] >= 0)
213-
214-
# Test with Series input
215-
series_data = pd.Series([150, 200, 180, 160, 140, 190])
216-
result_series = iglu.modd(series_data)
217-
assert isinstance(result_series, pd.DataFrame)
218-
assert "MODD" in result_series.columns
219-
assert len(result_series) == 1
220-
221-
# Test with empty data
222-
empty_data = pd.DataFrame(columns=["id", "time", "gl"])
223-
result_empty = iglu.modd(empty_data)
224-
assert isinstance(result_empty, pd.DataFrame)
225-
assert len(result_empty) == 0
226-
227-
# Test with single subject and constant glucose
228-
single_subject = pd.DataFrame(
229-
{
230-
"id": ["subject1"] * 4,
231-
"time": pd.to_datetime(
232-
[
233-
"2020-01-01 00:00:00",
234-
"2020-01-01 00:05:00",
235-
"2020-01-01 00:10:00",
236-
"2020-01-01 00:15:00",
237-
]
238-
),
239-
"gl": [150, 150, 150, 150], # Constant glucose
240-
}
241-
)
242-
result_single = iglu.modd(single_subject)
243-
assert len(result_single) == 1
244-
assert result_single["MODD"].iloc[0] == 0 # Should be 0 for constant glucose
245-
246-
# Test with missing values
247-
data_with_na = pd.DataFrame(
248-
{
249-
"id": ["subject1"] * 4,
250-
"time": pd.to_datetime(
251-
[
252-
"2020-01-01 00:00:00",
253-
"2020-01-01 00:05:00",
254-
"2020-01-01 00:10:00",
255-
"2020-01-01 00:15:00",
256-
]
257-
),
258-
"gl": [150, np.nan, 180, 160],
259-
}
260-
)
261-
result_na = iglu.modd(data_with_na)
262-
assert isinstance(result_na, pd.DataFrame)
263-
assert len(result_na) == 1
264-
265-
# Test with timezone parameter
266-
result_tz = iglu.modd(data, tz="UTC")
267-
assert len(result_tz) == 1
268-
assert isinstance(result_tz["MODD"].iloc[0], float)
269-
270-
# Test with multiple days of data
271-
multi_day_data = pd.DataFrame(
272-
{
273-
"id": ["subject1"] * 8,
274-
"time": pd.to_datetime(
275-
[
276-
"2020-01-01 00:00:00",
277-
"2020-01-01 00:05:00",
278-
"2020-01-01 00:10:00",
279-
"2020-01-01 00:15:00",
280-
"2020-01-02 00:00:00",
281-
"2020-01-02 00:05:00",
282-
"2020-01-02 00:10:00",
283-
"2020-01-02 00:15:00",
284-
]
285-
),
286-
"gl": [150, 200, 180, 160, 140, 190, 170, 210],
287-
}
288-
)
289-
result_multi = iglu.modd(multi_day_data)
290-
assert len(result_multi) == 1
291-
assert isinstance(result_multi["MODD"].iloc[0], float)
292-
293-
# Test with insufficient data points
294-
small_data = pd.DataFrame(
295-
{
296-
"id": ["subject1"] * 3,
297-
"time": pd.to_datetime(
298-
["2020-01-01 00:00:00", "2020-01-01 00:05:00", "2020-01-01 00:10:00"]
299-
),
300-
"gl": [150, 160, 170],
301-
}
302-
)
303-
result_small = iglu.modd(small_data)
304-
assert len(result_small) == 1
305-
assert isinstance(result_small["MODD"].iloc[0], float)
306-
307-
# Test with lag larger than available data
308-
result_large_lag = iglu.modd(multi_day_data, lag=3)
309-
assert len(result_large_lag) == 1
310-
assert pd.isna(
311-
result_large_lag["MODD"].iloc[0]
312-
) # Should be NaN for insufficient data
141+
with pytest.raises(ValueError):
142+
iglu.modd(empty_data)

0 commit comments

Comments
 (0)