Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,19 @@
"from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n",
"\n",
"\n",
"\n",
"# Register dataset in SageMaker AI Registry\n",
"# This creates a versioned dataset that can be referenced by ARN\n",
"# Provide a source (it can be local file path or S3 URL)\n",
"# Provide a source: a local file path or your own S3 URI pointing to a JSONL file.\n",
"# The dataset must be in JSONL format with each line containing a JSON object\n",
"# with 'prompt' and 'completion' fields. See the SageMaker documentation for details:\n",
"# https://docs.aws.amazon.com/sagemaker/latest/dg/model-customize-sft.html\n",
"#\n",
"# Replace the placeholder below with your own S3 URI or local file path:\n",
"MY_DATASET_S3_URI = \"s3://<your-bucket>/<path-to-your-dataset>.jsonl\" # TODO: replace with your dataset URI\n",
"\n",
"dataset = DataSet.create(\n",
" name=\"demo-1\",\n",
" source=\"s3://mc-flows-sdk-testing/input_data/sft/sample_data_256_final.jsonl\"\n",
" source=MY_DATASET_S3_URI\n",
")\n",
"\n",
"print(f\"Dataset ARN: {dataset.arn}\")"
Expand Down Expand Up @@ -163,7 +169,7 @@
" mlflow_experiment_name=\"test-finetuned-models-exp\", \n",
" mlflow_run_name=\"test-finetuned-models-run\", \n",
" training_dataset=dataset.arn, \n",
" s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n",
" s3_output_path=\"s3://<your-bucket>/output/\", # TODO: replace with your S3 output path\n",
" accept_eula=True\n",
")\n"
]
Expand Down Expand Up @@ -378,7 +384,7 @@
" mlflow_experiment_name=\"test-finetuned-models-exp\", # Optional[str]\n",
" mlflow_run_name=\"test-finetuned-models-run\", # Optional[str]\n",
" training_dataset=dataset.arn, #Optional[]\n",
" s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n",
" s3_output_path=\"s3://<your-bucket>/output/\", # TODO: replace with your S3 output path\n",
")\n"
]
},
Expand Down Expand Up @@ -439,7 +445,7 @@
" mlflow_experiment_name=\"test-nova-finetuned-models-exp\", \n",
" mlflow_run_name=\"test-nova-finetuned-models-run\", \n",
" training_dataset=\"arn:aws:sagemaker:us-east-1:<>:hub-content/sdktest/DataSet/sft-nova-test-dataset/0.0.1\",\n",
" s3_output_path=\"s3://mc-flows-sdk-testing-us-east-1/output/\"\n",
" s3_output_path=\"s3://<your-bucket>/output/\" # TODO: replace with your S3 output path\n",
")\n"
]
},
Expand Down Expand Up @@ -487,4 +493,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Loading