Usage

Installation

To use Top-K Insights, first install it using pip:

(.venv) $ pip install git+https://github.com/Der-Henning/TopK-Insights

Usage Example

import logging

import matplotlib.pyplot as plt
import pandas as pd

from tki import TKI
from tki.aggregators import SumAggregator
from tki.dimensions import (CardinalDimension, NominalDimension,
                            TemporalDimension)
from tki.extractors import (DeltaMeanExtractor, DeltaPrevExtractor,
                            ProportionExtractor, RankExtractor)
from tki.insights import (CorrelationInsight, EvennessInsight,
                        OutstandingFirstInsight, OutstandingLastInsight,
                        TrendInsight)

logging.basicConfig()
logging.getLogger('tki').setLevel(logging.INFO)

data = [
    ['H', 2010, 40], ['T', 2010, 38], ['F', 2010, 13], ['B', 2010, 20],
    ['H', 2011, 35], ['T', 2011, 34], ['F', 2011, 10], ['B', 2011, 18],
    ['H', 2012, 36], ['T', 2012, 34], ['F', 2012, 14], ['B', 2012, 20],
    ['H', 2013, 43], ['T', 2013, 29], ['F', 2013, 23], ['B', 2013, 17],
    ['H', 2014, 58], ['T', 2014, 36], ['F', 2014, 27], ['B', 2014, 19]
]
extractors = {
    RankExtractor,
    DeltaPrevExtractor,
    DeltaMeanExtractor,
    ProportionExtractor
}
aggregators = {
    SumAggregator
}
insights = {
    OutstandingFirstInsight(),
    OutstandingLastInsight(),
    TrendInsight(),
    EvennessInsight(),
    CorrelationInsight()
}
tki = TKI(
    pd.DataFrame(data, columns=['Brand', 'year', 'Cars Sold']),
    dimensions=[
        NominalDimension('Brand'),
        TemporalDimension('year', date_format='%Y', freq='1Y')],
    measurements=[CardinalDimension('Cars Sold')],
    extractors=extractors,
    aggregators=aggregators,
    insights=insights,
    depth=3,
    result_size=21)
tki.run()

fig, axes = plt.subplots(7, 3, figsize=(25, 40), dpi=80)
for idx, i in enumerate(tki.heap.insights):
    plt.axes(axes[int(idx / 3)][idx % 3])
    i.plot()
    plt.title(
        f"{idx + 1}) {type(i.insight).__name__} "
        f"score: {i.impact:.2f} * {i.significance:.2f} = {i.score:.2f}\n"
        f"{(i.sibling_group, i.composite_extractor)}")
    x_index = i.data.index.get_level_values(i.data.index.names[-1])
    plt.xticks(rotation=0)
    if isinstance(x_index, pd.DatetimeIndex):
        plt.xticks(
            range(i.data.index.size),
            x_index.to_series().dt.year)
fig.tight_layout()
plt.savefig('insights.svg')
tki.save('insights.pkl')