diff --git a/v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3.ipynb b/v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3.ipynb index 946debc7d7..48e5122d4d 100644 --- a/v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3.ipynb +++ b/v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3.ipynb @@ -85,13 +85,19 @@ "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", "\n", "\n", - "\n", "# Register dataset in SageMaker AI Registry\n", "# This creates a versioned dataset that can be referenced by ARN\n", - "# Provide a source (it can be local file path or S3 URL)\n", + "# Provide a source: a local file path or your own S3 URI pointing to a JSONL file.\n", + "# The dataset must be in JSONL format with each line containing a JSON object\n", + "# with 'prompt' and 'completion' fields. See the SageMaker documentation for details:\n", + "# https://docs.aws.amazon.com/sagemaker/latest/dg/model-customize-sft.html\n", + "#\n", + "# Replace the placeholder below with your own S3 URI or local file path:\n", + "MY_DATASET_S3_URI = \"s3:///.jsonl\" # TODO: replace with your dataset URI\n", + "\n", "dataset = DataSet.create(\n", " name=\"demo-1\",\n", - " source=\"s3://mc-flows-sdk-testing/input_data/sft/sample_data_256_final.jsonl\"\n", + " source=MY_DATASET_S3_URI\n", ")\n", "\n", "print(f\"Dataset ARN: {dataset.arn}\")" @@ -163,7 +169,7 @@ " mlflow_experiment_name=\"test-finetuned-models-exp\", \n", " mlflow_run_name=\"test-finetuned-models-run\", \n", " training_dataset=dataset.arn, \n", - " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", + " s3_output_path=\"s3:///output/\", # TODO: replace with your S3 output path\n", " accept_eula=True\n", ")\n" ] @@ -378,7 +384,7 @@ " mlflow_experiment_name=\"test-finetuned-models-exp\", # Optional[str]\n", " mlflow_run_name=\"test-finetuned-models-run\", # Optional[str]\n", " training_dataset=dataset.arn, #Optional[]\n", - " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", + " s3_output_path=\"s3:///output/\", # TODO: replace with your S3 output path\n", ")\n" ] }, @@ -439,7 +445,7 @@ " mlflow_experiment_name=\"test-nova-finetuned-models-exp\", \n", " mlflow_run_name=\"test-nova-finetuned-models-run\", \n", " training_dataset=\"arn:aws:sagemaker:us-east-1:<>:hub-content/sdktest/DataSet/sft-nova-test-dataset/0.0.1\",\n", - " s3_output_path=\"s3://mc-flows-sdk-testing-us-east-1/output/\"\n", + " s3_output_path=\"s3:///output/\" # TODO: replace with your S3 output path\n", ")\n" ] }, @@ -487,4 +493,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file