Source code for caikit.interfaces.ts.data_model.backends.util

# Copyright The Caikit Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Internal utilities for supporting backend implementations"""

# Standard
from datetime import datetime
from typing import Union

# Third Party
import numpy as np
import pandas as pd


[docs] def timezoneoffset(adatetime: datetime) -> int: """Returns the timezone offset (in seconds) for a given datetime object relative to the local system's time. Args: adatetime (datetime): a date of interest. Returns: int: offset in seconds (can be negative) """ return ( adatetime.timestamp() - datetime( year=adatetime.year, month=adatetime.month, day=adatetime.day, hour=adatetime.hour, minute=adatetime.minute, second=adatetime.second, microsecond=adatetime.microsecond, ).timestamp() )
[docs] def pd_timestamp_to_seconds(ts) -> float: """Extract the seconds-since-epoch representation of the timestamp NOTE: The pandas Timestamp.timestamp() function returns a different value than Timestamp.to_pydatetime().timestamp()! Since we want this to round-trip with python datetime, we want the latter. They both claim to be POSIX, so something is missing leap-something! """ if isinstance(ts, pd.Period): return ts.to_timestamp().timestamp() # no utc shift if isinstance(ts, np.datetime64): return ts.astype("datetime64[ns]").astype(float) / 1e9 if isinstance(ts, datetime): return ts.timestamp() if isinstance(ts, (int, float, np.int32, np.int64, np.float32, np.float64)): return float(ts) raise ValueError(f"invalid type {type(ts)} for parameter ts.")
[docs] def strip_periodic( input_df: pd.DataFrame, ts_col_name: Union[str, None] = None, create_copy=True ) -> pd.DataFrame: """ Removes **the first instance** of a periodic timestamp info (because spark doesn't like these when constructing a pyspark.sql.DataFrame.) If no periodic timestamp values can be found, input_df is returned as is. This method is always a no-op if input_df is not a native pandas.DataFrame. """ if not isinstance(input_df, pd.DataFrame): return input_df # find location of period field try: index = ( [type(x) for x in input_df.dtypes].index(pd.PeriodDtype) if ts_col_name is None else input_df.columns.to_list().index(ts_col_name) ) except ValueError: index = -1 df = input_df if index >= 0: df = input_df if not create_copy else input_df.copy(deep=False) # df.iloc[:, index] df[df.columns[index]] = [ x.to_timestamp() if hasattr(x, "to_timestamp") else x for x in df.iloc[:, index] ] return df