Skip to content

Make timestamp handling timezone-aware #151

@yoid2000

Description

@yoid2000

Currently we fail when dealing with timezone-aware datetime columns.

Here is a hit how we might go about it:

import pandas as pd
import numpy as np
from typing import Union
from random import Random

# Assuming these are defined elsewhere in your code
class DataConvertor:
    pass

class ColumnType:
    TIMESTAMP = "timestamp"

class Interval:
    pass

class MicrodataValue:
    pass

def _generate_float(interval: Interval, rng: Random) -> float:
    # Placeholder implementation
    return rng.uniform(interval.lower, interval.upper)

# Define TIMESTAMP_REFERENCE
TIMESTAMP_REFERENCE = pd.Timestamp("1800-01-01T00:00:00")

class TimestampConvertor(DataConvertor):
    def column_type(self) -> ColumnType:
        return ColumnType.TIMESTAMP

    def to_float(self, value: pd.Timestamp) -> float:
        assert isinstance(value, pd.Timestamp)
        
        # Ensure both value and TIMESTAMP_REFERENCE are timezone-aware or timezone-naive
        if value.tzinfo is not None and TIMESTAMP_REFERENCE.tzinfo is None:
            timestamp_reference_localized = TIMESTAMP_REFERENCE.tz_localize(value.tzinfo)
        elif value.tzinfo is None and TIMESTAMP_REFERENCE.tzinfo is not None:
            value = value.tz_localize(TIMESTAMP_REFERENCE.tzinfo)
            timestamp_reference_localized = TIMESTAMP_REFERENCE
        else:
            timestamp_reference_localized = TIMESTAMP_REFERENCE
        
        # Converting date time into second timestamp, counting from reference.
        return float((value - timestamp_reference_localized) / pd.Timedelta(1, "s"))

    def from_interval(self, interval: Interval, rng: Random) -> MicrodataValue:
        value = _generate_float(interval, rng)
        datetime = TIMESTAMP_REFERENCE + np.timedelta64(int(value), "s")
        
        # If TIMESTAMP_REFERENCE is timezone-aware, ensure the generated datetime is also timezone-aware
        if TIMESTAMP_REFERENCE.tzinfo is not None:
            datetime = datetime.tz_localize(TIMESTAMP_REFERENCE.tzinfo)
        
        return (datetime, value)

# Example usage
example = TimestampConvertor()
value_naive = pd.Timestamp('2023-01-01')
value_aware = pd.Timestamp('2023-01-01', tz='UTC')

print(example.to_float(value_naive))  # Should work without error
print(example.to_float(value_aware))  # Should work without error

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions