Source code for isithot.blueprints.plots

from __future__ import annotations

from datetime import date
from datetime import timedelta
from typing import NamedTuple
from urllib.parse import quote

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from flask_babel import _
from plotly.graph_objects import Figure
from scipy import stats


@np.vectorize
def _format_labels(x: float) -> str:
    """
    Helper to remove ``nan`` values from the labels. If not done, ``nan``s are
    displayed as ``0`` in the calendar plot. Floats are converted to ints.

    :param x: a floating point number which may be ``nan``

    :returns: A string representation of a float/int with 0 decimals and
        ``nan`` represented as ``''`` (an empty string)
    """
    return f'{x:.0f}' if not np.isnan(x) else ''


[docs] class ColumnMapping(NamedTuple): """Class for defining the columns mapping the different parameters needed :param datetime: the column name of the column that stores the date (and maybe time) information :param temp_mean: the column name of the column that stores the average air-temperature information :param temp_max: the column name of the column that stores the maximum air-temperature information :param temp_min: the column name of the column that stores the minimum air-temperature information :param day_of_year: the column name of the column that stores the day of year number """ datetime: str temp_mean: str temp_max: str temp_min: str day_of_year: str
[docs] class DataProvider: """Base Class for defining a custom data provider. :meth:`get_daily_data` and :meth:`get_current_data` need to be overridden. :param col_mapping: a :func:`ColumnMapping` mapping the column names returned by :meth:`get_daily_data` or :meth:`get_current_data` to variables so they can be used later :param name: the name of the station that is displayed on the website :param id: the ID of the station that is used for compiling links. If multiple DataProviders are used, each one must have a unique ``station_id``. :param min_year: the minimum year for which data is available. This is used to determine the first year for which a calendar plot is created. """ def __init__( self, col_mapping: ColumnMapping, name: str, id: str, min_year: int, ) -> None: self.col_mapping = col_mapping self.name = name self.id = quote(id) self.min_year = min_year
[docs] def get_daily_data(self, d: date) -> pd.DataFrame: """This needs to be implemented and most likely be a database query or a file that is read. It might makes sense to cache this function. ``d`` may be used as a cache-key. This should return a :func:`pd.DataFrame` with columns containing: - date a datetime object - mean temperature - the day of the year The index must be a :func:`pd.DatetimeIndex` The column names must match those defined via :attr:`col_mapping` :param d: the date for which to prepare data. This will usually be today """ raise NotImplementedError('getting daily data needs to be implemented')
[docs] def get_current_data(self, d: date) -> pd.DataFrame: """This needs to be implemented and most likely be a database query or a file that is read. It might makes sense to cache this function. ``d`` may be used as a cache-key. This should return a :func:`pd.DataFrame` with columns containing: - date (as a datetime object) - maximum temperature - minimum temperature The index must be a :func:`pd.DatetimeIndex` The column names must match those defined via :attr:`col_mapping` :param d: the date for which to prepare data. This will usually be today """ raise NotImplementedError( 'getting current data needs to be implemented', )
[docs] def prepare_daily_and_calendar_data( self, d: date, current_avg: float | None = None, ) -> tuple[pd.DataFrame, pd.DataFrame]: """ This get the daily data from the database and creates the calendar plot data. This is separated from :func:`_prepare_data` so it can be used via :func:`last_years_calendar` :param d: the date for which to prepare data. This will usually be today or in this case the first day of the year to prepare the calendar data for :param current_avg: This is used to add the current day which has no entry in the daily data just yet. When working with previous years, this should be left as ``None`` :returns: a tuple of :func:`pd.DataFrame`: ``(daily, calendar_data)`` """ daily = self.get_daily_data(d) _daily = daily.loc[daily.index.year < d.year].dropna() def _calc_perc(x: pd.Series) -> pd.Series: allowed_doy = pd.date_range( start=x.name - timedelta(days=7), end=x.name + timedelta(days=7), periods=15, ).day_of_year perc, = stats.percentileofscore( _daily[ _daily[self.col_mapping.day_of_year].isin( allowed_doy, ) ][self.col_mapping.temp_mean], x, ) return perc calendar_data: pd.DataFrame = daily.loc[ (daily.index.year >= d.year) & (daily.index.year < d.year + 1) ] if current_avg is not None: # add the current day to the calendar plot calendar_data.loc[pd.Timestamp(d)] = [ current_avg, d.timetuple().tm_yday, ] calendar_data.loc[:, 'perc'] = calendar_data[[ self.col_mapping.temp_mean, ]].apply(_calc_perc, axis=1) # fill the year, so the plot always shows the entire year days = pd.date_range( start=date(d.year, 1, 1), end=date(d.year, 12, 31), freq='1D', name=self.col_mapping.datetime, ) calendar_data = calendar_data.reindex(days) calendar_data.loc[:, 'day'] = calendar_data.index.day calendar_data.loc[:, 'month'] = calendar_data.index.month calendar_data.loc[:, 'month_name'] = calendar_data.index.strftime('%b') calendar_data = calendar_data.pivot( index=['month', 'month_name'], columns='day', values='perc', ).droplevel('month') return (daily, calendar_data)
[docs] def prepare_data(self, d: date) -> PlotData: """ The purpose of this function is to compile a :func:`isithot.blueprints.plots.PlotData()` object which is used for the creation of all plots. :param d: the date for which to prepare data. This will usually be today :returns: the data needed for creating the plots and texts all contained in a :func:`isithot.blueprints.plots.PlotData()` object """ now = self.get_current_data(d) # compile the current data today_data = now.loc[now.index >= pd.Timestamp(d)].agg( { self.col_mapping.temp_min: 'min', self.col_mapping.temp_max: 'max', }, ) # TODO: what if it's the next day and no data is there (yet) current_avg = ( today_data[self.col_mapping.temp_max] + today_data[self.col_mapping.temp_min] ) / 2 daily, calendar_data = self.prepare_daily_and_calendar_data( d=d, current_avg=current_avg, ) daily = daily.dropna() # warming trend for the entire time series first_doy = pd.Timestamp(year=d.year, month=1, day=1) trend_overall_data = daily[self.col_mapping.temp_mean].loc[ daily.index < first_doy ].resample('1YE').mean().reset_index( drop=self.col_mapping.datetime, ).dropna() trend_overall = stats.linregress( x=trend_overall_data.index.values, y=trend_overall_data.values, ) # extract data for distribution plots allowed_doy = pd.date_range( start=d - timedelta(days=7), end=(d + timedelta(days=7)), periods=15, ).day_of_year data: pd.DataFrame = daily.loc[ (daily.index.year < d.year) & daily[self.col_mapping.day_of_year].isin(allowed_doy) # noqa: E501 ] # warming trend for current time span of the year trend_month_data = data[self.col_mapping.temp_mean].resample( '1YE', ).mean().reset_index(drop=self.col_mapping.datetime).dropna() trend_month = stats.linregress( x=trend_month_data.index.values, y=trend_month_data.values, ) current_avg_perc = stats.percentileofscore( a=data[self.col_mapping.temp_mean], score=current_avg, ) q5 = data[self.col_mapping.temp_mean].quantile(q=0.05) q95 = data[self.col_mapping.temp_mean].quantile(q=0.95) med = data[self.col_mapping.temp_mean].median() return PlotData( current_date=d, daily=daily, now=now, toy_data=data, trend_overall_data=trend_overall_data, trend_month_data=trend_month_data, calendar_data=calendar_data, trend_overall_slope=trend_overall.slope, trend_overall_intercept=trend_overall.intercept, trend_month_slope=trend_month.slope, trend_month_intercept=trend_month.intercept, current_avg=current_avg, current_avg_percentile=current_avg_perc, q5=q5, q95=q95, median=med, )
[docs] def distrib_fig(self, fig_data: PlotData) -> Figure: """ Creates a figures representing the distribution with 5% and 95% percentile and the trends for the time of year and the overall warming trend. :param fig_data: a :func:`PlotData` object containing all data necessary for creating the plot :returns: a :func:`Figure` object that can be used as a ``json`` on the page, defining the plot including all data """ fig = go.Figure() # the dots representing the daily mean temperature fig.add_trace( go.Scatter( x=fig_data.toy_data.index, y=fig_data.toy_data[self.col_mapping.temp_mean], mode='markers', name=_('Daily Average Temperature'), marker={'size': 5, 'color': 'rgba(0, 0, 0, 0.2)'}, showlegend=False, hovertemplate='<b>%{x|%Y-%m-%d}</b>: %{y:.1f} °C', ), ).update_layout( modebar={ 'bgcolor': 'rgba(0,0,0,0)', 'color': 'rgba(0,0,0,1)', 'activecolor': 'rgba(0,0,0,0.5)', }, plot_bgcolor='rgba(0, 0, 0, 0)', paper_bgcolor='rgba(0, 0, 0, 0)', yaxis_title=_('Daily Average Temperature (°C)'), template='simple_white', margin={'l': 0, 'r': 0, 't': 0, 'b': 0}, yaxis={ 'fixedrange': True, 'nticks': 10, }, xaxis={ 'fixedrange': True, 'nticks': 20, }, ) # the horizontal line indicating the 5% percentile fig.add_trace( go.Scatter( x=[ (fig_data.toy_data.index.min() - timedelta(days=365)), (fig_data.toy_data.index.max() + timedelta(days=365*2)), ], y=[fig_data.q5, fig_data.q5], mode='lines+text', text=[_('<b>5th percentile: %(q5).1f °C</b>', q5=fig_data.q5)], textposition='top right', textfont_size=14, showlegend=False, line={'color': 'black', 'dash': 'dash', 'width': 3}, hoverinfo='none', ), ) # the horizontal line indicating the 95% percentile fig.add_trace( go.Scatter( x=[ (fig_data.toy_data.index.min() - timedelta(days=365)), (fig_data.toy_data.index.max() + timedelta(days=365*2)), ], y=[fig_data.q95, fig_data.q95], mode='lines+text', showlegend=False, text=[_('<b>95th percentile: %(q95).1f °C</b>', q95=fig_data.q95)], # noqa: E501 textposition='top right', textfont_size=14, line={'color': 'black', 'dash': 'dash', 'width': 3}, hoverinfo='none', ), ) # the trend line for this time of the year fig.add_trace( go.Scatter( x=[ fig_data.toy_data.index.min(), (fig_data.toy_data.index.max() + timedelta(days=365*2)), ], y=[ fig_data.trend_month_intercept, fig_data.trend_month_intercept + len(fig_data.trend_month_data) * fig_data.trend_month_slope, ], mode='lines+text', showlegend=False, text=[ _( '<b>Trend for this time of year: ' '%(century_trend).1f K/century</b>', century_trend=fig_data.trend_month_slope * 100, ), ], textposition='bottom right', textfont_size=14, line={'color': 'red', 'width': 3}, hoverinfo='none', ), ) # the overall trend line across all data fig.add_trace( go.Scatter( x=[ (fig_data.toy_data.index.max() + timedelta(days=365*2)), fig_data.toy_data.index.min(), ], y=[ fig_data.trend_month_intercept + len(fig_data.trend_overall_data) * fig_data.trend_overall_slope, fig_data.trend_month_intercept, ], mode='lines+text', showlegend=False, text=[ _( '<b>Overall Trend: %(century_trend).1f ' 'K/century</b>', century_trend=fig_data.trend_overall_slope * 100, ), ], textposition='top left', textfont_size=14, line={'color': 'red', 'width': 2, 'dash': 'dash'}, hoverinfo='none', ), ) # the red marker showing today's value fig.add_trace( go.Scatter( x=[fig_data.current_date], y=[fig_data.current_avg], mode='markers+text', marker={ 'size': 12, 'color': 'red', 'line': { 'color': 'rgba(255, 0, 0, 0.5)', 'width': 2, }, }, text=[ _( '<b>Today: %(cur_avg).1f °C</b>', cur_avg=fig_data.current_avg, ), ], textfont_size=14, textposition='top left', showlegend=False, hoverinfo='none', ), ) return fig
[docs] def hist_fig(self, fig_data: PlotData) -> Figure: """ Creates a figures representing a histogram or more specifically a kernel density estimate. This includes lines for the 5% percentile and 95% percentile as well as the median. A red line for today's value is added. :param fig_data: a :func:`PlotData` object containing all data necessary for creating the plot :returns: a :func:`Figure` object that can be used as a ``json`` on the page, defining the plot including all data """ # calculate the kernel density estimation curve kde = stats.gaussian_kde( fig_data.toy_data[self.col_mapping.temp_mean].dropna(), ) # check the spacing with today's value. If we have a record, the plot # may be cut off - adjust this! kde_min = fig_data.toy_data[self.col_mapping.temp_mean].min() kde_max = fig_data.toy_data[self.col_mapping.temp_mean].max() # this ensures that today does not lay outside of the kde curve if fig_data.current_avg < kde_min: kde_min = fig_data.current_avg elif fig_data.current_avg > kde_max: kde_max = fig_data.current_avg x_vals = np.linspace(kde_min - 1, kde_max + 1, 200) y_vals = kde.evaluate(x_vals) fig = go.Figure() # Create line plot for KDE curve fig.add_trace( go.Scatter( x=x_vals, y=y_vals, mode='lines', line={'color': 'grey'}, fill='tozeroy', showlegend=False, hoverinfo='none', ), ) # the vertical line for the 5% percentile fig.add_trace( go.Scatter( x=[fig_data.q5, fig_data.q5], y=[max(y_vals), 0], mode='lines', showlegend=False, line={'color': 'black', 'dash': 'dash', 'width': 2}, hoverinfo='none', ), ) # the vertical line for the 95% percentile fig.add_trace( go.Scatter( x=[fig_data.q95, fig_data.q95], y=[max(y_vals), 0], mode='lines', showlegend=False, line={'color': 'black', 'dash': 'dash', 'width': 2}, hoverinfo='none', ), ) # the vertical line for the 50%/median percentile fig.add_trace( go.Scatter( x=[fig_data.median, fig_data.median], y=[max(y_vals), 0], mode='lines', showlegend=False, line={'color': 'black', 'dash': 'dash', 'width': 2}, hoverinfo='none', ), ) # # the vertical red line for today's temperature fig.add_trace( go.Scatter( x=[fig_data.current_avg, fig_data.current_avg], y=[max(y_vals), 0], mode='lines', showlegend=False, line={'color': 'red', 'width': 3}, hoverinfo='none', ), ) # making the plot transparent and adding the annotation for the lines # created above fig.update_layout( modebar={ 'bgcolor': 'rgba(0,0,0,0)', 'color': 'rgba(0,0,0,1)', 'activecolor': 'rgba(0,0,0,0.5)', }, plot_bgcolor='rgba(0, 0, 0, 0)', paper_bgcolor='rgba(0, 0, 0, 0)', xaxis_title=_('Daily Average Temperature (°C)'), template='simple_white', margin={'l': 0, 'r': 0, 't': 0, 'b': 0}, yaxis={'visible': False}, xaxis={ 'fixedrange': True, 'nticks': 20, }, annotations=[ go.layout.Annotation( x=fig_data.q95, y=0, xref='x', yref='y', text=_('<b> 95th percentile: %(q95).1f °C</b>', q95=fig_data.q95), # noqa: E501 showarrow=False, yanchor='bottom', textangle=-90, xshift=-10, ), go.layout.Annotation( x=fig_data.q5, y=0, xref='x', yref='y', text=_('<b> 5th percentile: %(q5).1f °C</b>', q5=fig_data.q5), # noqa: E501 showarrow=False, yanchor='bottom', textangle=-90, xshift=-10, ), go.layout.Annotation( x=fig_data.median, y=0, xref='x', yref='y', text=_( '<b> 50th percentile: %(med).1f °C</b>', med=fig_data.median, ), showarrow=False, yanchor='bottom', textangle=-90, xshift=-10, ), ], ) # there might be cases where we don't have data for today, so we cannot # annotate the red line (which is not drawn if it is nan) if not np.isnan(fig_data.current_avg): fig.add_annotation( go.layout.Annotation( x=fig_data.current_avg, y=max(y_vals), xref='x', yref='y', text=_( '<b>Today: %(cur_avg).1f °C</b>', cur_avg=fig_data.current_avg, ), showarrow=False, yanchor='top', textangle=-90, xshift=-10, ), ) return fig
[docs] def calendar_fig(self, calendar_data: pd.DataFrame) -> Figure: """ Creates a figures representing a calendar plot of the current year indicating the percentile of each day as a color and a number. :param calendar_data: a :func:`pd.DataFrame` containing all data necessary for creating the plot :returns: a :func:`Figure` object that can be used as a ``json`` on the page, defining the plot including all data """ text = _format_labels(calendar_data.values) fig = px.imshow( calendar_data, color_continuous_scale='RdBu_r', aspect='auto', zmax=100, zmin=0, ) fig.update_traces(text=text, texttemplate='%{text}') fig.update_coloraxes(colorbar={'thickness': 12, 'xpad': 0}) fig.update_layout( modebar={ 'bgcolor': 'rgba(0,0,0,0)', 'color': 'rgba(0,0,0,1)', 'activecolor': 'rgba(0,0,0,0.5)', }, plot_bgcolor='rgba(0, 0, 0, 0)', paper_bgcolor='rgba(0, 0, 0, 0)', margin={'l': 0, 'r': 0, 't': 0, 'b': 0}, hovermode=False, template='simple_white', xaxis={ 'fixedrange': True, 'tickmode': 'linear', 'tick0': 0, 'dtick': 1, 'title': None, }, yaxis={ 'fixedrange': True, 'tickmode': 'linear', 'tick0': 0, 'dtick': 1, 'title': None, }, ) return fig
[docs] class PlotData(NamedTuple): """ :param current_date: The date for which the data is compiled. This is usually today :param daily: A pandas dataframe containing all daily data that is available in the database :param now: The latest data from the station (high resolution raw data) :param toy_data: Data for the current time of year (toy). For this a week before ``current_data`` and a week after ``current_date`` is extracted :param trend_overall_data: (Yearly) data needed to calculate the overall trend since the start of the measurements :param trend_month_data: Data needed for calculating the trend for the current month :param calendar_data: Data needed to create a calendar plot for the current year :param trend_overall_slope: The slope of the line for the overall warming trend across all years and times of year :param trend_overall_intercept: The intercept of the line for the overall warming trend across all years and times of year :param trend_month_slope: The slope of the line for the current warming trend across all years for the current time of year :math:`\\pm` 7 days :param trend_month_intercept: The intercept of the line for the current warming trend across all years for the current time of year :math:`\\pm` 7 days :param current_avg: The current average of today calculated from averaging the minimum and maximum temperature :param current_avg_percentile: The percentile of ``current_avg`` :param q5: the 5% percentile for this time of the year :param median: the median/50% percentile for this time of the year :param q95: the 95% percentile for this time of the year """ current_date: date daily: pd.DataFrame now: pd.DataFrame toy_data: pd.DataFrame trend_overall_data: pd.DataFrame trend_month_data: pd.DataFrame calendar_data: pd.DataFrame trend_overall_slope: float trend_overall_intercept: float trend_month_slope: float trend_month_intercept: float current_avg: float current_avg_percentile: float q5: float median: float q95: float @property def yes_no(self) -> str: """returns a yes/no equivalent depending on the percentile""" if self.current_avg_percentile < 5: return _('Hell no!') elif 5 <= self.current_avg_percentile < 10: return _('No!') elif 10 <= self.current_avg_percentile < 40: return _('Nope') elif 40 <= self.current_avg_percentile < 50: return _('Not really') elif 50 <= self.current_avg_percentile < 60: return _('Yup') elif 60 <= self.current_avg_percentile < 90: return _('Yeah!') elif 90 <= self.current_avg_percentile < 95: return _('Hell yeah!') elif 95 <= self.current_avg_percentile <= 100: return _('Bloody hell yes!') else: return _('not sure, we have no data yet') @property def avg_compare(self) -> str: """returns a more comprehensive sentence of yes/no""" if self.current_avg_percentile < 5: return _("Are you kidding?! It's bloody cold") elif 5 <= self.current_avg_percentile < 10: return _("It's actually really cold") elif 10 <= self.current_avg_percentile < 40: return _("It's actually kinda cool") elif 40 <= self.current_avg_percentile < 50: return _("It's about average") elif 50 <= self.current_avg_percentile < 60: return _("It's warmer than average") elif 60 <= self.current_avg_percentile < 90: return _("It's quite %(hot_warm)s!", hot_warm=self.hot_warm) elif 90 <= self.current_avg_percentile < 95: return _("It's really %(hot_warm)s!", hot_warm=self.hot_warm) elif 95 <= self.current_avg_percentile <= 100: return _("It's bloody %(hot_warm)s!", hot_warm=self.hot_warm) else: return _('could be hotter, could be cooler') @property def hot_warm(self) -> str: if self.current_avg > 15: return _('hot') else: return _('warm')