[1]:

import sys
import logging

sys.path.insert(1, '../')

from importlib import reload

reload(logging)
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format='%(message)s')

[2]:

from tsad.pipelines import Pipeline
from tsad.tasks.eda import HighLevelDatasetAnalysisTask, TimeDiscretizationTask, FindNaNTask, EquipmentDowntimeTask
from tsad.tasks.preprocess import ScalingTask, ValueRangeProcessingTask

from tsad.datasets import load_combines

[5]:

dataset = load_combines()

Working with Task without Pipeline¶

[6]:

cdt = HighLevelDatasetAnalysisTask()
cdt_df, cdt_result = cdt.fit(dataset.frame)
cdt_result.show()

'Датасет размером 450, признаков: 4'

'В период с 2023-04-21 13:32:48.228000 по 2023-04-25 23:59:59.999000'

'Общей длительностью 4 days 10:27:11.771000'

float64    4
Name: count, dtype: int64

Описание
Anker     float64
Cut       float64
Go        float64
Uncert    float64
dtype: object

[ ]:

discretization = TimeDiscretizationTask()
discretization.com = cdt_result

_, discretization_result = discretization.fit(cdt_df)

discretization_result.dataset_analysis_result = cdt_result
discretization_result.show()

Working with separate pipelines for group of tasks¶

[7]:

eda_tasks = [
    HighLevelDatasetAnalysisTask(),
    TimeDiscretizationTask(freq_tobe='1s'),
    FindNaNTask(),
    EquipmentDowntimeTask()
]

eda_pipeline = Pipeline(eda_tasks, show=False)
eda_fit_df = eda_pipeline.fit(dataset.frame)

[11]:

preprocess_tasks = [
    ScalingTask(),
    ValueRangeProcessingTask()
]

preprocess_pipeline = Pipeline(preprocess_tasks, results=eda_pipeline.results, show=True)
preprocess_fit_df = preprocess_pipeline.fit(eda_fit_df)
preprocess_pipeline.predict(eda_fit_df)

Adding parameter nan_result with type FindNaNResult from Pipeline results.
Adding parameter downtime_result with type EquipmentDowntimeResult from Pipeline results.
Значения вышедшие за интервал будут удалены
Adding parameter vrp_result with type ValueRangeProcessingResult from Pipeline results.
Значения вышедшие за интервал будут удалены

[11]:

array([[ 0.        ,         nan,  0.        ,         nan],
       [        nan,         nan,         nan,         nan],
       [        nan,         nan,         nan,         nan],
       ...,
       [        nan,         nan,         nan,         nan],
       [        nan,         nan,         nan,         nan],
       [ 0.        ,  1.04027283,  0.        , -1.04027283]])

Work with Pipeline¶

[ ]:

pipeline = Pipeline(eda_tasks + preprocess_tasks)
pipeline.fit(dataset.frame)