mirror of
https://github.com/clearml/clearml
synced 2025-03-12 22:58:59 +00:00
Add inference step to pipeline example (#765)
* Fix docs * Update pipeline example
This commit is contained in:
parent
447714eaa4
commit
01dc126167
@ -26,8 +26,10 @@ def step_one(pickle_data_url: str, extra: int = 43):
|
||||
# notice all package imports inside the function will be automatically logged as
|
||||
# required packages for the pipeline execution step.
|
||||
# Specifying `return_values` makes sure the function step can return an object to the pipeline logic
|
||||
# In this case, the returned tuple will be stored as an artifact named "processed_data"
|
||||
@PipelineDecorator.component(return_values=['processed_data'], cache=True, task_type=TaskTypes.data_processing)
|
||||
# In this case, the returned tuple will be stored as an artifact named "X_train, X_test, y_train, y_test"
|
||||
@PipelineDecorator.component(
|
||||
return_values=['X_train, X_test, y_train, y_test'], cache=True, task_type=TaskTypes.data_processing
|
||||
)
|
||||
def step_two(data_frame, test_size=0.2, random_state=42):
|
||||
print('step_two')
|
||||
# make sure we have pandas for this step, we need it to use the data_frame
|
||||
@ -36,7 +38,8 @@ def step_two(data_frame, test_size=0.2, random_state=42):
|
||||
y = data_frame['target']
|
||||
X = data_frame[(c for c in data_frame.columns if c != 'target')]
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=test_size, random_state=random_state)
|
||||
X, y, test_size=test_size, random_state=random_state
|
||||
)
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
|
||||
@ -47,21 +50,26 @@ def step_two(data_frame, test_size=0.2, random_state=42):
|
||||
# Specifying `return_values` makes sure the function step can return an object to the pipeline logic
|
||||
# In this case, the returned object will be stored as an artifact named "model"
|
||||
@PipelineDecorator.component(return_values=['model'], cache=True, task_type=TaskTypes.training)
|
||||
def step_three(data):
|
||||
def step_three(X_train, y_train):
|
||||
print('step_three')
|
||||
# make sure we have pandas for this step, we need it to use the data_frame
|
||||
import pandas as pd # noqa
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from clearml import Task
|
||||
X_train, X_test, y_train, y_test = data
|
||||
model = LogisticRegression(solver='liblinear', multi_class='auto')
|
||||
model.fit(X_train, y_train)
|
||||
score = model.score(X_test,y_test)
|
||||
# Get current step's Task
|
||||
task = Task.current_task()
|
||||
task.get_logger().report_single_value(name='accuracy',value=score)
|
||||
return model
|
||||
|
||||
# Make the following function an independent pipeline component step
|
||||
# notice all package imports inside the function will be automatically logged as
|
||||
# required packages for the pipeline execution step
|
||||
# Specifying `return_values` makes sure the function step can return an object to the pipeline logic
|
||||
# In this case, the returned object will be stored as an artifact named "accuracy"
|
||||
@PipelineDecorator.component(return_values=['accuracy'], cache=True, task_type=TaskTypes.qc)
|
||||
def step_four(model, X_data, Y_data):
|
||||
from sklearn.linear_model import LogisticRegression # noqa
|
||||
from sklearn.metrics import accuracy_score
|
||||
Y_pred = model.predict(X_data)
|
||||
return accuracy_score(Y_data, Y_pred, normalize=True)
|
||||
|
||||
# The actual pipeline execution context
|
||||
# notice that all pipeline component function calls are actually executed remotely
|
||||
@ -80,14 +88,21 @@ def executing_pipeline(pickle_url, mock_parameter='mock'):
|
||||
# When actually passing the `data_frame` object into a new step,
|
||||
# It waits for the creating step/function (`step_one`) to complete the execution
|
||||
print('launch step two')
|
||||
processed_data = step_two(data_frame)
|
||||
X_train, X_test, y_train, y_test = step_two(data_frame)
|
||||
|
||||
print('launch step three')
|
||||
model = step_three(processed_data)
|
||||
model = step_three(X_train, y_train)
|
||||
|
||||
# Notice since we are "printing" the `model` object,
|
||||
# we actually deserialize the object from the third step, and thus wait for the third step to complete.
|
||||
print('pipeline completed with model: {}'.format(model))
|
||||
print('returned model: {}'.format(model))
|
||||
|
||||
print('launch step four')
|
||||
accuracy = 100 * step_four(model, X_data=X_test, Y_data=y_test)
|
||||
|
||||
# Notice since we are "printing" the `accuracy` object,
|
||||
# we actually deserialize the object from the fourth step, and thus wait for the fourth step to complete.
|
||||
print(f"Accuracy={accuracy}%")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
Reference in New Issue
Block a user