[1]:
import sys
import logging

sys.path.insert(1, '../')

from importlib import reload

reload(logging)
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format='%(message)s')
[2]:
from tsad.pipelines import Pipeline
from tsad.tasks.eda import HighLevelDatasetAnalysisTask, TimeDiscretizationTask, FindNaNTask, EquipmentDowntimeTask
from tsad.tasks.preprocess import ScalingTask, ValueRangeProcessingTask

from tsad.datasets import load_combines
[5]:
dataset = load_combines()

Working with Task without Pipeline

[6]:
cdt = HighLevelDatasetAnalysisTask()
cdt_df, cdt_result = cdt.fit(dataset.frame)
cdt_result.show()
'Датасет размером 450, признаков: 4'
'В период с 2023-04-21 13:32:48.228000 по 2023-04-25 23:59:59.999000'
'Общей длительностью 4 days 10:27:11.771000'
float64    4
Name: count, dtype: int64
Описание
Anker     float64
Cut       float64
Go        float64
Uncert    float64
dtype: object
[ ]:
discretization = TimeDiscretizationTask()
discretization.com = cdt_result

_, discretization_result = discretization.fit(cdt_df)

discretization_result.dataset_analysis_result = cdt_result
discretization_result.show()

Working with separate pipelines for group of tasks

[7]:
eda_tasks = [
    HighLevelDatasetAnalysisTask(),
    TimeDiscretizationTask(freq_tobe='1s'),
    FindNaNTask(),
    EquipmentDowntimeTask()
]

eda_pipeline = Pipeline(eda_tasks, show=False)
eda_fit_df = eda_pipeline.fit(dataset.frame)
[11]:
preprocess_tasks = [
    ScalingTask(),
    ValueRangeProcessingTask()
]

preprocess_pipeline = Pipeline(preprocess_tasks, results=eda_pipeline.results, show=True)
preprocess_fit_df = preprocess_pipeline.fit(eda_fit_df)
preprocess_pipeline.predict(eda_fit_df)
Adding parameter nan_result with type FindNaNResult from Pipeline results.
Adding parameter downtime_result with type EquipmentDowntimeResult from Pipeline results.
Значения вышедшие за интервал будут удалены
Adding parameter vrp_result with type ValueRangeProcessingResult from Pipeline results.
Значения вышедшие за интервал будут удалены
[11]:
array([[ 0.        ,         nan,  0.        ,         nan],
       [        nan,         nan,         nan,         nan],
       [        nan,         nan,         nan,         nan],
       ...,
       [        nan,         nan,         nan,         nan],
       [        nan,         nan,         nan,         nan],
       [ 0.        ,  1.04027283,  0.        , -1.04027283]])

Work with Pipeline

[ ]:
pipeline = Pipeline(eda_tasks + preprocess_tasks)
pipeline.fit(dataset.frame)