Azure Machine Learning SDK v2 for Python

Client library for managing Azure ML resources: workspaces, jobs, models, data, and compute.

Installation

pip install azure-ai-ml

Environment Variables

AZURE_SUBSCRIPTION_ID=<your-subscription-id> AZURE_RESOURCE_GROUP=<your-resource-group> AZURE_ML_WORKSPACE_NAME=<your-workspace-name>

Authentication

from azure.ai.ml import MLClient from azure.identity import DefaultAzureCredential

ml_client = MLClient( credential=DefaultAzureCredential(), subscription_id=os.environ["AZURE_SUBSCRIPTION_ID"], resource_group_name=os.environ["AZURE_RESOURCE_GROUP"], workspace_name=os.environ["AZURE_ML_WORKSPACE_NAME"] )

From Config File

from azure.ai.ml import MLClient from azure.identity import DefaultAzureCredential

Uses config.json in current directory or parent

ml_client = MLClient.from_config( credential=DefaultAzureCredential() )

Workspace Management

Create Workspace

from azure.ai.ml.entities import Workspace

ws = Workspace( name="my-workspace", location="eastus", display_name="My Workspace", description="ML workspace for experiments", tags={"purpose": "demo"} )

ml_client.workspaces.begin_create(ws).result()

List Workspaces

for ws in ml_client.workspaces.list(): print(f"{ws.name}: {ws.location}")

Data Assets

from azure.ai.ml.entities import Data from azure.ai.ml.constants import AssetTypes

Register a file

my_data = Data( name="my-dataset", version="1", path="azureml://datastores/workspaceblobstore/paths/data/train.csv", type=AssetTypes.URI_FILE, description="Training data" )

ml_client.data.create_or_update(my_data)

my_data = Data( name="my-folder-dataset", version="1", path="azureml://datastores/workspaceblobstore/paths/data/", type=AssetTypes.URI_FOLDER )

ml_client.data.create_or_update(my_data)

Model Registry

from azure.ai.ml.entities import Model from azure.ai.ml.constants import AssetTypes

model = Model( name="my-model", version="1", path="./model/", type=AssetTypes.CUSTOM_MODEL, description="My trained model" )

ml_client.models.create_or_update(model)

List Models

for model in ml_client.models.list(name="my-model"): print(f"{model.name} v{model.version}")

Compute

Create Compute Cluster

from azure.ai.ml.entities import AmlCompute

cluster = AmlCompute( name="cpu-cluster", type="amlcompute", size="Standard_DS3_v2", min_instances=0, max_instances=4, idle_time_before_scale_down=120 )

ml_client.compute.begin_create_or_update(cluster).result()

List Compute

for compute in ml_client.compute.list(): print(f"{compute.name}: {compute.type}")

Jobs

Command Job

from azure.ai.ml import command, Input

job = command( code="./src", command="python train.py --data ${{inputs.data}} --lr ${{inputs.learning_rate}}", inputs={ "data": Input(type="uri_folder", path="azureml:my-dataset:1"), "learning_rate": 0.01 }, environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest", compute="cpu-cluster", display_name="training-job" )

returned_job = ml_client.jobs.create_or_update(job) print(f"Job URL: {returned_job.studio_url}")

Monitor Job

ml_client.jobs.stream(returned_job.name)

Pipelines

from azure.ai.ml import dsl, Input, Output from azure.ai.ml.entities import Pipeline

@dsl.pipeline( compute="cpu-cluster", description="Training pipeline" ) def training_pipeline(data_input): prep_step = prep_component(data=data_input) train_step = train_component( data=prep_step.outputs.output_data, learning_rate=0.01 ) return {"model": train_step.outputs.model}

pipeline = training_pipeline( data_input=Input(type="uri_folder", path="azureml:my-dataset:1") )

pipeline_job = ml_client.jobs.create_or_update(pipeline)

Environments

Create Custom Environment

from azure.ai.ml.entities import Environment

env = Environment( name="my-env", version="1", image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04", conda_file="./environment.yml" )

ml_client.environments.create_or_update(env)

Datastores

List Datastores

for ds in ml_client.datastores.list(): print(f"{ds.name}: {ds.type}")

Get Default Datastore

default_ds = ml_client.datastores.get_default() print(f"Default: {default_ds.name}")

MLClient Operations

Property Operations

workspaces

create, get, list, delete

jobs

create_or_update, get, list, stream, cancel

models

create_or_update, get, list, archive

data

create_or_update, get, list

compute

begin_create_or_update, get, list, delete

environments

create_or_update, get, list

datastores

create_or_update, get, list, get_default

components

create_or_update, get, list

Best Practices

Use versioning for data, models, and environments
Configure idle scale-down to reduce compute costs
Use environments for reproducible training
Stream job logs to monitor progress
Register models after successful training jobs
Use pipelines for multi-step workflows
Tag resources for organization and cost tracking

azure-ai-ml-py

Safety Notice

Copy this and send it to your AI assistant to learn

Uses config.json in current directory or parent

Register a file

Source Transparency

Related Skills

github-issue-creator

azure-observability

azure-appconfiguration-java

copilot-sdk