from __future__ import annotations
from datetime import date
from datetime import timedelta
from typing import NamedTuple
from urllib.parse import quote
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from flask_babel import _
from plotly.graph_objects import Figure
from scipy import stats
@np.vectorize
def _format_labels(x: float) -> str:
"""
Helper to remove ``nan`` values from the labels. If not done, ``nan``s are
displayed as ``0`` in the calendar plot. Floats are converted to ints.
:param x: a floating point number which may be ``nan``
:returns: A string representation of a float/int with 0 decimals and
``nan`` represented as ``''`` (an empty string)
"""
return f'{x:.0f}' if not np.isnan(x) else ''
[docs]
class ColumnMapping(NamedTuple):
"""Class for defining the columns mapping the different parameters needed
:param datetime: the column name of the column that stores the date
(and maybe time) information
:param temp_mean: the column name of the column that stores the average
air-temperature information
:param temp_max: the column name of the column that stores the maximum
air-temperature information
:param temp_min: the column name of the column that stores the minimum
air-temperature information
:param day_of_year: the column name of the column that stores the day of
year number
"""
datetime: str
temp_mean: str
temp_max: str
temp_min: str
day_of_year: str
[docs]
class DataProvider:
"""Base Class for defining a custom data provider. :meth:`get_daily_data`
and :meth:`get_current_data` need to be overridden.
:param col_mapping: a :func:`ColumnMapping` mapping the column names
returned by :meth:`get_daily_data` or :meth:`get_current_data` to
variables so they can be used later
:param name: the name of the station that is displayed on the
website
:param id: the ID of the station that is used for compiling links.
If multiple DataProviders are used, each one must have a unique
``station_id``.
:param min_year: the minimum year for which data is available. This is
used to determine the first year for which a calendar plot is
created.
"""
def __init__(
self,
col_mapping: ColumnMapping,
name: str,
id: str,
min_year: int,
) -> None:
self.col_mapping = col_mapping
self.name = name
self.id = quote(id)
self.min_year = min_year
[docs]
def get_daily_data(self, d: date) -> pd.DataFrame:
"""This needs to be implemented and most likely be a database query or
a file that is read. It might makes sense to cache this function. ``d``
may be used as a cache-key.
This should return a :func:`pd.DataFrame` with columns containing:
- date a datetime object
- mean temperature
- the day of the year
The index must be a :func:`pd.DatetimeIndex`
The column names must match those defined via :attr:`col_mapping`
:param d: the date for which to prepare data. This will usually be
today
"""
raise NotImplementedError('getting daily data needs to be implemented')
[docs]
def get_current_data(self, d: date) -> pd.DataFrame:
"""This needs to be implemented and most likely be a database query or
a file that is read. It might makes sense to cache this function. ``d``
may be used as a cache-key.
This should return a :func:`pd.DataFrame` with columns containing:
- date (as a datetime object)
- maximum temperature
- minimum temperature
The index must be a :func:`pd.DatetimeIndex`
The column names must match those defined via :attr:`col_mapping`
:param d: the date for which to prepare data. This will usually be
today
"""
raise NotImplementedError(
'getting current data needs to be implemented',
)
[docs]
def prepare_daily_and_calendar_data(
self,
d: date,
current_avg: float | None = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
This get the daily data from the database and creates the calendar plot
data. This is separated from :func:`_prepare_data` so it can be used
via :func:`last_years_calendar`
:param d: the date for which to prepare data. This will usually be
today or in this case the first day of the year to prepare the
calendar data for
:param current_avg: This is used to add the current day which has no
entry in the daily data just yet. When working with previous years,
this should be left as ``None``
:returns: a tuple of :func:`pd.DataFrame`: ``(daily, calendar_data)``
"""
daily = self.get_daily_data(d)
_daily = daily.loc[daily.index.year < d.year].dropna()
def _calc_perc(x: pd.Series) -> pd.Series:
allowed_doy = pd.date_range(
start=x.name - timedelta(days=7),
end=x.name + timedelta(days=7),
periods=15,
).day_of_year
perc, = stats.percentileofscore(
_daily[
_daily[self.col_mapping.day_of_year].isin(
allowed_doy,
)
][self.col_mapping.temp_mean],
x,
)
return perc
calendar_data: pd.DataFrame = daily.loc[
(daily.index.year >= d.year) & (daily.index.year < d.year + 1)
]
if current_avg is not None:
# add the current day to the calendar plot
calendar_data.loc[pd.Timestamp(d)] = [
current_avg, d.timetuple().tm_yday,
]
calendar_data.loc[:, 'perc'] = calendar_data[[
self.col_mapping.temp_mean,
]].apply(_calc_perc, axis=1)
# fill the year, so the plot always shows the entire year
days = pd.date_range(
start=date(d.year, 1, 1),
end=date(d.year, 12, 31), freq='1D',
name=self.col_mapping.datetime,
)
calendar_data = calendar_data.reindex(days)
calendar_data.loc[:, 'day'] = calendar_data.index.day
calendar_data.loc[:, 'month'] = calendar_data.index.month
calendar_data.loc[:, 'month_name'] = calendar_data.index.strftime('%b')
calendar_data = calendar_data.pivot(
index=['month', 'month_name'],
columns='day',
values='perc',
).droplevel('month')
return (daily, calendar_data)
[docs]
def prepare_data(self, d: date) -> PlotData:
"""
The purpose of this function is to compile a
:func:`isithot.blueprints.plots.PlotData()` object which is used
for the creation of all plots.
:param d: the date for which to prepare data. This will usually be
today
:returns: the data needed for creating the plots and texts all
contained in a :func:`isithot.blueprints.plots.PlotData()`
object
"""
now = self.get_current_data(d)
# compile the current data
today_data = now.loc[now.index >= pd.Timestamp(d)].agg(
{
self.col_mapping.temp_min: 'min',
self.col_mapping.temp_max: 'max',
},
)
# TODO: what if it's the next day and no data is there (yet)
current_avg = (
today_data[self.col_mapping.temp_max] +
today_data[self.col_mapping.temp_min]
) / 2
daily, calendar_data = self.prepare_daily_and_calendar_data(
d=d,
current_avg=current_avg,
)
daily = daily.dropna()
# warming trend for the entire time series
first_doy = pd.Timestamp(year=d.year, month=1, day=1)
trend_overall_data = daily[self.col_mapping.temp_mean].loc[
daily.index < first_doy
].resample('1YE').mean().reset_index(
drop=self.col_mapping.datetime,
).dropna()
trend_overall = stats.linregress(
x=trend_overall_data.index.values,
y=trend_overall_data.values,
)
# extract data for distribution plots
allowed_doy = pd.date_range(
start=d - timedelta(days=7),
end=(d + timedelta(days=7)),
periods=15,
).day_of_year
data: pd.DataFrame = daily.loc[
(daily.index.year < d.year) & daily[self.col_mapping.day_of_year].isin(allowed_doy) # noqa: E501
]
# warming trend for current time span of the year
trend_month_data = data[self.col_mapping.temp_mean].resample(
'1YE',
).mean().reset_index(drop=self.col_mapping.datetime).dropna()
trend_month = stats.linregress(
x=trend_month_data.index.values,
y=trend_month_data.values,
)
current_avg_perc = stats.percentileofscore(
a=data[self.col_mapping.temp_mean],
score=current_avg,
)
q5 = data[self.col_mapping.temp_mean].quantile(q=0.05)
q95 = data[self.col_mapping.temp_mean].quantile(q=0.95)
med = data[self.col_mapping.temp_mean].median()
return PlotData(
current_date=d,
daily=daily,
now=now,
toy_data=data,
trend_overall_data=trend_overall_data,
trend_month_data=trend_month_data,
calendar_data=calendar_data,
trend_overall_slope=trend_overall.slope,
trend_overall_intercept=trend_overall.intercept,
trend_month_slope=trend_month.slope,
trend_month_intercept=trend_month.intercept,
current_avg=current_avg,
current_avg_percentile=current_avg_perc,
q5=q5,
q95=q95,
median=med,
)
[docs]
def distrib_fig(self, fig_data: PlotData) -> Figure:
"""
Creates a figures representing the distribution with 5% and 95%
percentile and the trends for the time of year and the overall warming
trend.
:param fig_data: a :func:`PlotData` object containing all data
necessary for creating the plot
:returns: a :func:`Figure` object that can be used as a ``json`` on the
page, defining the plot including all data
"""
fig = go.Figure()
# the dots representing the daily mean temperature
fig.add_trace(
go.Scatter(
x=fig_data.toy_data.index,
y=fig_data.toy_data[self.col_mapping.temp_mean],
mode='markers',
name=_('Daily Average Temperature'),
marker={'size': 5, 'color': 'rgba(0, 0, 0, 0.2)'},
showlegend=False,
hovertemplate='<b>%{x|%Y-%m-%d}</b>: %{y:.1f} °C',
),
).update_layout(
modebar={
'bgcolor': 'rgba(0,0,0,0)',
'color': 'rgba(0,0,0,1)',
'activecolor': 'rgba(0,0,0,0.5)',
},
plot_bgcolor='rgba(0, 0, 0, 0)',
paper_bgcolor='rgba(0, 0, 0, 0)',
yaxis_title=_('Daily Average Temperature (°C)'),
template='simple_white',
margin={'l': 0, 'r': 0, 't': 0, 'b': 0},
yaxis={
'fixedrange': True,
'nticks': 10,
},
xaxis={
'fixedrange': True,
'nticks': 20,
},
)
# the horizontal line indicating the 5% percentile
fig.add_trace(
go.Scatter(
x=[
(fig_data.toy_data.index.min() - timedelta(days=365)),
(fig_data.toy_data.index.max() + timedelta(days=365*2)),
],
y=[fig_data.q5, fig_data.q5],
mode='lines+text',
text=[_('<b>5th percentile: %(q5).1f °C</b>', q5=fig_data.q5)],
textposition='top right',
textfont_size=14,
showlegend=False,
line={'color': 'black', 'dash': 'dash', 'width': 3},
hoverinfo='none',
),
)
# the horizontal line indicating the 95% percentile
fig.add_trace(
go.Scatter(
x=[
(fig_data.toy_data.index.min() - timedelta(days=365)),
(fig_data.toy_data.index.max() + timedelta(days=365*2)),
],
y=[fig_data.q95, fig_data.q95],
mode='lines+text',
showlegend=False,
text=[_('<b>95th percentile: %(q95).1f °C</b>', q95=fig_data.q95)], # noqa: E501
textposition='top right',
textfont_size=14,
line={'color': 'black', 'dash': 'dash', 'width': 3},
hoverinfo='none',
),
)
# the trend line for this time of the year
fig.add_trace(
go.Scatter(
x=[
fig_data.toy_data.index.min(),
(fig_data.toy_data.index.max() + timedelta(days=365*2)),
],
y=[
fig_data.trend_month_intercept,
fig_data.trend_month_intercept +
len(fig_data.trend_month_data) *
fig_data.trend_month_slope,
],
mode='lines+text',
showlegend=False,
text=[
_(
'<b>Trend for this time of year: '
'%(century_trend).1f K/century</b>',
century_trend=fig_data.trend_month_slope * 100,
),
],
textposition='bottom right',
textfont_size=14,
line={'color': 'red', 'width': 3},
hoverinfo='none',
),
)
# the overall trend line across all data
fig.add_trace(
go.Scatter(
x=[
(fig_data.toy_data.index.max() + timedelta(days=365*2)),
fig_data.toy_data.index.min(),
],
y=[
fig_data.trend_month_intercept +
len(fig_data.trend_overall_data) *
fig_data.trend_overall_slope,
fig_data.trend_month_intercept,
],
mode='lines+text',
showlegend=False,
text=[
_(
'<b>Overall Trend: %(century_trend).1f '
'K/century</b>',
century_trend=fig_data.trend_overall_slope * 100,
),
],
textposition='top left',
textfont_size=14,
line={'color': 'red', 'width': 2, 'dash': 'dash'},
hoverinfo='none',
),
)
# the red marker showing today's value
fig.add_trace(
go.Scatter(
x=[fig_data.current_date],
y=[fig_data.current_avg],
mode='markers+text',
marker={
'size': 12, 'color': 'red', 'line': {
'color': 'rgba(255, 0, 0, 0.5)', 'width': 2,
},
},
text=[
_(
'<b>Today: %(cur_avg).1f °C</b>',
cur_avg=fig_data.current_avg,
),
],
textfont_size=14,
textposition='top left',
showlegend=False,
hoverinfo='none',
),
)
return fig
[docs]
def hist_fig(self, fig_data: PlotData) -> Figure:
"""
Creates a figures representing a histogram or more specifically a
kernel density estimate. This includes lines for the 5% percentile and
95% percentile as well as the median. A red line for today's value is
added.
:param fig_data: a :func:`PlotData` object containing all data
necessary for creating the plot
:returns: a :func:`Figure` object that can be used as a ``json`` on the
page, defining the plot including all data
"""
# calculate the kernel density estimation curve
kde = stats.gaussian_kde(
fig_data.toy_data[self.col_mapping.temp_mean].dropna(),
)
# check the spacing with today's value. If we have a record, the plot
# may be cut off - adjust this!
kde_min = fig_data.toy_data[self.col_mapping.temp_mean].min()
kde_max = fig_data.toy_data[self.col_mapping.temp_mean].max()
# this ensures that today does not lay outside of the kde curve
if fig_data.current_avg < kde_min:
kde_min = fig_data.current_avg
elif fig_data.current_avg > kde_max:
kde_max = fig_data.current_avg
x_vals = np.linspace(kde_min - 1, kde_max + 1, 200)
y_vals = kde.evaluate(x_vals)
fig = go.Figure()
# Create line plot for KDE curve
fig.add_trace(
go.Scatter(
x=x_vals,
y=y_vals,
mode='lines',
line={'color': 'grey'},
fill='tozeroy',
showlegend=False,
hoverinfo='none',
),
)
# the vertical line for the 5% percentile
fig.add_trace(
go.Scatter(
x=[fig_data.q5, fig_data.q5],
y=[max(y_vals), 0],
mode='lines',
showlegend=False,
line={'color': 'black', 'dash': 'dash', 'width': 2},
hoverinfo='none',
),
)
# the vertical line for the 95% percentile
fig.add_trace(
go.Scatter(
x=[fig_data.q95, fig_data.q95],
y=[max(y_vals), 0],
mode='lines',
showlegend=False,
line={'color': 'black', 'dash': 'dash', 'width': 2},
hoverinfo='none',
),
)
# the vertical line for the 50%/median percentile
fig.add_trace(
go.Scatter(
x=[fig_data.median, fig_data.median],
y=[max(y_vals), 0],
mode='lines',
showlegend=False,
line={'color': 'black', 'dash': 'dash', 'width': 2},
hoverinfo='none',
),
)
# # the vertical red line for today's temperature
fig.add_trace(
go.Scatter(
x=[fig_data.current_avg, fig_data.current_avg],
y=[max(y_vals), 0],
mode='lines',
showlegend=False,
line={'color': 'red', 'width': 3},
hoverinfo='none',
),
)
# making the plot transparent and adding the annotation for the lines
# created above
fig.update_layout(
modebar={
'bgcolor': 'rgba(0,0,0,0)',
'color': 'rgba(0,0,0,1)',
'activecolor': 'rgba(0,0,0,0.5)',
},
plot_bgcolor='rgba(0, 0, 0, 0)',
paper_bgcolor='rgba(0, 0, 0, 0)',
xaxis_title=_('Daily Average Temperature (°C)'),
template='simple_white',
margin={'l': 0, 'r': 0, 't': 0, 'b': 0},
yaxis={'visible': False},
xaxis={
'fixedrange': True,
'nticks': 20,
},
annotations=[
go.layout.Annotation(
x=fig_data.q95,
y=0,
xref='x',
yref='y',
text=_('<b> 95th percentile: %(q95).1f °C</b>', q95=fig_data.q95), # noqa: E501
showarrow=False,
yanchor='bottom',
textangle=-90,
xshift=-10,
),
go.layout.Annotation(
x=fig_data.q5,
y=0,
xref='x',
yref='y',
text=_('<b> 5th percentile: %(q5).1f °C</b>', q5=fig_data.q5), # noqa: E501
showarrow=False,
yanchor='bottom',
textangle=-90,
xshift=-10,
),
go.layout.Annotation(
x=fig_data.median,
y=0,
xref='x',
yref='y',
text=_(
'<b> 50th percentile: %(med).1f °C</b>',
med=fig_data.median,
),
showarrow=False,
yanchor='bottom',
textangle=-90,
xshift=-10,
),
],
)
# there might be cases where we don't have data for today, so we cannot
# annotate the red line (which is not drawn if it is nan)
if not np.isnan(fig_data.current_avg):
fig.add_annotation(
go.layout.Annotation(
x=fig_data.current_avg,
y=max(y_vals),
xref='x',
yref='y',
text=_(
'<b>Today: %(cur_avg).1f °C</b>',
cur_avg=fig_data.current_avg,
),
showarrow=False,
yanchor='top',
textangle=-90,
xshift=-10,
),
)
return fig
[docs]
def calendar_fig(self, calendar_data: pd.DataFrame) -> Figure:
"""
Creates a figures representing a calendar plot of the current year
indicating the percentile of each day as a color and a number.
:param calendar_data: a :func:`pd.DataFrame` containing all data
necessary for creating the plot
:returns: a :func:`Figure` object that can be used as a ``json`` on the
page, defining the plot including all data
"""
text = _format_labels(calendar_data.values)
fig = px.imshow(
calendar_data,
color_continuous_scale='RdBu_r',
aspect='auto',
zmax=100,
zmin=0,
)
fig.update_traces(text=text, texttemplate='%{text}')
fig.update_coloraxes(colorbar={'thickness': 12, 'xpad': 0})
fig.update_layout(
modebar={
'bgcolor': 'rgba(0,0,0,0)',
'color': 'rgba(0,0,0,1)',
'activecolor': 'rgba(0,0,0,0.5)',
},
plot_bgcolor='rgba(0, 0, 0, 0)',
paper_bgcolor='rgba(0, 0, 0, 0)',
margin={'l': 0, 'r': 0, 't': 0, 'b': 0},
hovermode=False,
template='simple_white',
xaxis={
'fixedrange': True,
'tickmode': 'linear',
'tick0': 0,
'dtick': 1,
'title': None,
},
yaxis={
'fixedrange': True,
'tickmode': 'linear',
'tick0': 0,
'dtick': 1,
'title': None,
},
)
return fig
[docs]
class PlotData(NamedTuple):
"""
:param current_date: The date for which the data is compiled. This is
usually today
:param daily: A pandas dataframe containing all daily data that is
available in the database
:param now: The latest data from the station (high resolution raw data)
:param toy_data: Data for the current time of year (toy). For this a week
before ``current_data`` and a week after ``current_date`` is extracted
:param trend_overall_data: (Yearly) data needed to calculate the overall
trend since the start of the measurements
:param trend_month_data: Data needed for calculating the trend for the
current month
:param calendar_data: Data needed to create a calendar plot for the current
year
:param trend_overall_slope: The slope of the line for the overall warming
trend across all years and times of year
:param trend_overall_intercept: The intercept of the line for the overall
warming trend across all years and times of year
:param trend_month_slope: The slope of the line for the current warming
trend across all years for the current time of year :math:`\\pm` 7 days
:param trend_month_intercept: The intercept of the line for the current
warming trend across all years for the current time of year
:math:`\\pm` 7 days
:param current_avg: The current average of today calculated from averaging
the minimum and maximum temperature
:param current_avg_percentile: The percentile of ``current_avg``
:param q5: the 5% percentile for this time of the year
:param median: the median/50% percentile for this time of the year
:param q95: the 95% percentile for this time of the year
"""
current_date: date
daily: pd.DataFrame
now: pd.DataFrame
toy_data: pd.DataFrame
trend_overall_data: pd.DataFrame
trend_month_data: pd.DataFrame
calendar_data: pd.DataFrame
trend_overall_slope: float
trend_overall_intercept: float
trend_month_slope: float
trend_month_intercept: float
current_avg: float
current_avg_percentile: float
q5: float
median: float
q95: float
@property
def yes_no(self) -> str:
"""returns a yes/no equivalent depending on the percentile"""
if self.current_avg_percentile < 5:
return _('Hell no!')
elif 5 <= self.current_avg_percentile < 10:
return _('No!')
elif 10 <= self.current_avg_percentile < 40:
return _('Nope')
elif 40 <= self.current_avg_percentile < 50:
return _('Not really')
elif 50 <= self.current_avg_percentile < 60:
return _('Yup')
elif 60 <= self.current_avg_percentile < 90:
return _('Yeah!')
elif 90 <= self.current_avg_percentile < 95:
return _('Hell yeah!')
elif 95 <= self.current_avg_percentile <= 100:
return _('Bloody hell yes!')
else:
return _('not sure, we have no data yet')
@property
def avg_compare(self) -> str:
"""returns a more comprehensive sentence of yes/no"""
if self.current_avg_percentile < 5:
return _("Are you kidding?! It's bloody cold")
elif 5 <= self.current_avg_percentile < 10:
return _("It's actually really cold")
elif 10 <= self.current_avg_percentile < 40:
return _("It's actually kinda cool")
elif 40 <= self.current_avg_percentile < 50:
return _("It's about average")
elif 50 <= self.current_avg_percentile < 60:
return _("It's warmer than average")
elif 60 <= self.current_avg_percentile < 90:
return _("It's quite %(hot_warm)s!", hot_warm=self.hot_warm)
elif 90 <= self.current_avg_percentile < 95:
return _("It's really %(hot_warm)s!", hot_warm=self.hot_warm)
elif 95 <= self.current_avg_percentile <= 100:
return _("It's bloody %(hot_warm)s!", hot_warm=self.hot_warm)
else:
return _('could be hotter, could be cooler')
@property
def hot_warm(self) -> str:
if self.current_avg > 15:
return _('hot')
else:
return _('warm')