[1]:
import sys
import logging
sys.path.insert(1, '../')
from importlib import reload
reload(logging)
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format='%(message)s')
[2]:
from tsad.pipelines import Pipeline
from tsad.tasks.eda import HighLevelDatasetAnalysisTask, TimeDiscretizationTask, FindNaNTask, EquipmentDowntimeTask
from tsad.tasks.preprocess import ScalingTask, ValueRangeProcessingTask
from tsad.datasets import load_combines
[5]:
dataset = load_combines()
Working with Task without Pipeline¶
[6]:
cdt = HighLevelDatasetAnalysisTask()
cdt_df, cdt_result = cdt.fit(dataset.frame)
cdt_result.show()
'Датасет размером 450, признаков: 4'
'В период с 2023-04-21 13:32:48.228000 по 2023-04-25 23:59:59.999000'
'Общей длительностью 4 days 10:27:11.771000'
float64 4
Name: count, dtype: int64
Описание
Anker float64
Cut float64
Go float64
Uncert float64
dtype: object
[ ]:
discretization = TimeDiscretizationTask()
discretization.com = cdt_result
_, discretization_result = discretization.fit(cdt_df)
discretization_result.dataset_analysis_result = cdt_result
discretization_result.show()
Working with separate pipelines for group of tasks¶
[7]:
eda_tasks = [
HighLevelDatasetAnalysisTask(),
TimeDiscretizationTask(freq_tobe='1s'),
FindNaNTask(),
EquipmentDowntimeTask()
]
eda_pipeline = Pipeline(eda_tasks, show=False)
eda_fit_df = eda_pipeline.fit(dataset.frame)
[11]:
preprocess_tasks = [
ScalingTask(),
ValueRangeProcessingTask()
]
preprocess_pipeline = Pipeline(preprocess_tasks, results=eda_pipeline.results, show=True)
preprocess_fit_df = preprocess_pipeline.fit(eda_fit_df)
preprocess_pipeline.predict(eda_fit_df)
Adding parameter nan_result with type FindNaNResult from Pipeline results.
Adding parameter downtime_result with type EquipmentDowntimeResult from Pipeline results.
Значения вышедшие за интервал будут удалены
Adding parameter vrp_result with type ValueRangeProcessingResult from Pipeline results.
Значения вышедшие за интервал будут удалены
[11]:
array([[ 0. , nan, 0. , nan],
[ nan, nan, nan, nan],
[ nan, nan, nan, nan],
...,
[ nan, nan, nan, nan],
[ nan, nan, nan, nan],
[ 0. , 1.04027283, 0. , -1.04027283]])
Work with Pipeline¶
[ ]:
pipeline = Pipeline(eda_tasks + preprocess_tasks)
pipeline.fit(dataset.frame)