diff --git a/nemo/NeMo-Data-Designer/intro-tutorials/1-the-basics.ipynb b/nemo/NeMo-Data-Designer/intro-tutorials/1-the-basics.ipynb index 07c3c7f4..f41a03f4 100644 --- a/nemo/NeMo-Data-Designer/intro-tutorials/1-the-basics.ipynb +++ b/nemo/NeMo-Data-Designer/intro-tutorials/1-the-basics.ipynb @@ -28,16 +28,19 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "from getpass import getpass\n", "\n", - "from nemo_microservices import NeMoMicroservices\n", - "from nemo_microservices.beta.data_designer import (\n", + "from nemo_microservices.data_designer.essentials import (\n", + " CategorySamplerParams,\n", " DataDesignerConfigBuilder,\n", - " DataDesignerClient,\n", - ")\n", - "from nemo_microservices.beta.data_designer.config import columns as C\n", - "from nemo_microservices.beta.data_designer.config import params as P" + " LLMTextColumnConfig,\n", + " NeMoDataDesignerClient,\n", + " PersonSamplerParams,\n", + " SamplerColumnConfig,\n", + " SamplerType,\n", + " SubcategorySamplerParams,\n", + " UniformSamplerParams,\n", + ")" ] }, { @@ -51,7 +54,7 @@ "- If you have an instance of data designer running locally, you can connect to it as follows\n", "\n", " ```python\n", - " data_designer_client = DataDesignerClient(client=NeMoMicroservices(base_url=\"http://localhost:8080\"))\n", + " data_designer_client = NeMoDataDesignerClient(base_url=\"http://localhost:8080\")\n", " ```\n" ] }, @@ -76,11 +79,9 @@ "metadata": {}, "outputs": [], "source": [ - "data_designer_client = DataDesignerClient(\n", - " client=NeMoMicroservices(\n", - " base_url=\"https://ai.api.nvidia.com/v1/nemo/dd\",\n", - " default_headers={\"Authorization\": f\"Bearer {api_key}\"} # auto-generated API KEY\n", - " )\n", + "data_designer_client = NeMoDataDesignerClient(\n", + " base_url=\"https://ai.api.nvidia.com/v1/nemo/dd\",\n", + " default_headers={\"Authorization\": f\"Bearer {api_key}\"} # auto-generated API KEY\n", ")" ] }, @@ -149,10 +150,10 @@ "outputs": [], "source": [ "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"product_category\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\n", " \"Electronics\",\n", " \"Clothing\",\n", @@ -165,10 +166,10 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"product_subcategory\",\n", - " type=P.SamplerType.SUBCATEGORY,\n", - " params=P.SubcategorySamplerParams(\n", + " sampler_type=SamplerType.SUBCATEGORY,\n", + " params=SubcategorySamplerParams(\n", " category=\"product_category\",\n", " values={\n", " \"Electronics\": [\n", @@ -212,10 +213,10 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"target_age_range\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]\n", " ),\n", " )\n", @@ -240,27 +241,27 @@ "source": [ "# This column will sample synthetic person data based on statistics from the US Census.\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"customer\",\n", - " type=P.SamplerType.PERSON,\n", - " params=P.PersonSamplerParams(age_range=[18, 70]),\n", + " sampler_type=SamplerType.PERSON,\n", + " params=PersonSamplerParams(age_range=[18, 70]),\n", " )\n", ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"number_of_stars\",\n", - " type=P.SamplerType.UNIFORM,\n", - " params=P.UniformSamplerParams(low=1, high=5),\n", + " sampler_type=SamplerType.UNIFORM,\n", + " params=UniformSamplerParams(low=1, high=5),\n", " convert_to=\"int\",\n", " )\n", ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"review_style\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n", " weights=[1, 2, 2, 1],\n", " ),\n", @@ -292,7 +293,7 @@ "outputs": [], "source": [ "config_builder.add_column(\n", - " C.LLMTextColumn(\n", + " LLMTextColumnConfig(\n", " name=\"product_name\",\n", " prompt=(\n", " \"Come up with a creative product name for a product in the '{{ product_category }}' category, focusing \"\n", @@ -310,7 +311,7 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.LLMTextColumn(\n", + " LLMTextColumnConfig(\n", " name=\"customer_review\",\n", " prompt=(\n", " \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n", @@ -342,7 +343,7 @@ "metadata": {}, "outputs": [], "source": [ - "preview = data_designer_client.preview(config_builder, verbose_logging=True)" + "preview = data_designer_client.preview(config_builder)" ] }, { diff --git a/nemo/NeMo-Data-Designer/intro-tutorials/2-structured-outputs-and-jinja-expressions.ipynb b/nemo/NeMo-Data-Designer/intro-tutorials/2-structured-outputs-and-jinja-expressions.ipynb index 55a6acf6..cc59e447 100644 --- a/nemo/NeMo-Data-Designer/intro-tutorials/2-structured-outputs-and-jinja-expressions.ipynb +++ b/nemo/NeMo-Data-Designer/intro-tutorials/2-structured-outputs-and-jinja-expressions.ipynb @@ -34,13 +34,17 @@ "source": [ "from getpass import getpass\n", "\n", - "from nemo_microservices import NeMoMicroservices\n", - "from nemo_microservices.beta.data_designer import (\n", + "from nemo_microservices.data_designer.essentials import (\n", + " CategorySamplerParams,\n", " DataDesignerConfigBuilder,\n", - " DataDesignerClient,\n", - ")\n", - "from nemo_microservices.beta.data_designer.config import columns as C\n", - "from nemo_microservices.beta.data_designer.config import params as P" + " ExpressionColumnConfig,\n", + " LLMStructuredColumnConfig,\n", + " NeMoDataDesignerClient,\n", + " PersonSamplerParams,\n", + " SamplerColumnConfig,\n", + " SamplerType,\n", + " SubcategorySamplerParams,\n", + ")" ] }, { @@ -54,7 +58,7 @@ "- If you have an instance of data designer running locally, you can connect to it as follows\n", "\n", " ```python\n", - " data_designer_client = DataDesignerClient(client=NeMoMicroservices(base_url=\"http://localhost:8080\"))\n", + " data_designer_client = NeMoDataDesignerClient(base_url=\"http://localhost:8080\")\n", " ```\n" ] }, @@ -79,11 +83,9 @@ "metadata": {}, "outputs": [], "source": [ - "data_designer_client = DataDesignerClient(\n", - " client=NeMoMicroservices(\n", - " base_url=\"https://ai.api.nvidia.com/v1/nemo/dd\",\n", - " default_headers={\"Authorization\": f\"Bearer {api_key}\"} # auto-generated API KEY\n", - " )\n", + "data_designer_client = NeMoDataDesignerClient(\n", + " base_url=\"https://ai.api.nvidia.com/v1/nemo/dd\",\n", + " default_headers={\"Authorization\": f\"Bearer {api_key}\"} # auto-generated API KEY\n", ")" ] }, @@ -189,18 +191,20 @@ "metadata": {}, "outputs": [], "source": [ - "# Since we often just want a few attributes from Person objects, we can use\n", - "# Data Designer's `with_person_samplers` method to create multiple person samplers\n", - "# at once and drop the person object columns from the final dataset.\n", - "config_builder.with_person_samplers(\n", - " {\"customer\": P.PersonSamplerParams(age_range=[18, 65])}\n", + "# This column will sample synthetic person data based on statistics from the US Census.\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"customer\",\n", + " sampler_type=SamplerType.PERSON,\n", + " params=PersonSamplerParams(age_range=[18, 70]),\n", + " )\n", ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"product_category\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\n", " \"Electronics\",\n", " \"Clothing\",\n", @@ -213,10 +217,10 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"product_subcategory\",\n", - " type=P.SamplerType.SUBCATEGORY,\n", - " params=P.SubcategorySamplerParams(\n", + " sampler_type=SamplerType.SUBCATEGORY,\n", + " params=SubcategorySamplerParams(\n", " category=\"product_category\",\n", " values={\n", " \"Electronics\": [\n", @@ -260,10 +264,10 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"target_age_range\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]\n", " ),\n", " )\n", @@ -273,34 +277,34 @@ "# we also show how we can we use conditional params to set the values for the sampler if a given condition is met\n", "# in this example, we set the review style to rambling if the target age range is 18-25\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"review_style\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n", - " weights=[1, 2, 2, 1],\n", - " conditional_params={\n", - " \"target_age_range == '18-25'\": P.CategorySamplerParams(values=[\"rambling\"]),\n", - " }\n", + " weights=[1, 2, 2, 1]\n", " ),\n", + " conditional_params={\n", + " \"target_age_range == '18-25'\": CategorySamplerParams(values=[\"rambling\"]),\n", + " }\n", " )\n", ")\n", "\n", "# We can create new columns using Jinja expressions that reference\n", "# existing columns, including attributes of nested objects.\n", "config_builder.add_column(\n", - " C.ExpressionColumn(\n", + " ExpressionColumnConfig(\n", " name=\"customer_name\", expr=\"{{ customer.first_name }} {{ customer.last_name }}\"\n", " )\n", ")\n", "\n", "config_builder.add_column(\n", - " C.ExpressionColumn(name=\"customer_age\", expr=\"{{ customer.age }}\")\n", + " ExpressionColumnConfig(name=\"customer_age\", expr=\"{{ customer.age }}\")\n", ")\n", "\n", - "# Add an `LLMStructuredColumn` column to generate structured outputs.\n", + "# Add a column to generate structured outputs.\n", "config_builder.add_column(\n", - " C.LLMStructuredColumn(\n", + " LLMStructuredColumnConfig(\n", " name=\"product\",\n", " prompt=(\n", " \"Create a product in the '{{ product_category }}' category, focusing on products \"\n", @@ -315,7 +319,7 @@ "# Another powerful feature we can use is the ability to use conditional statements in our prompt using Jinja expressions\n", "# in this example, we add additional conditions to the prompt based on the target age range\n", "config_builder.add_column(\n", - " C.LLMStructuredColumn(\n", + " LLMStructuredColumnConfig(\n", " name=\"customer_review\",\n", " prompt=(\n", " \"Your task is to write a review for the following product:\\n\\n\"\n", @@ -333,10 +337,7 @@ " output_format=ProductReview,\n", " model_alias=model_alias,\n", " )\n", - ")\n", - "\n", - "# Let's add an evaluation report to our dataset.\n", - "config_builder.with_evaluation_report().validate()" + ")" ] }, { @@ -347,9 +348,7 @@ "\n", "- Iteration is key to generating high-quality synthetic data.\n", "\n", - "- Use the `preview` method to generate 10 records for inspection.\n", - "\n", - "- Setting `verbose_logging=True` prints logs within each task of the generation process.\n" + "- Use the `preview` method to generate 10 records for inspection.\n" ] }, { @@ -358,7 +357,7 @@ "metadata": {}, "outputs": [], "source": [ - "preview = data_designer_client.preview(config_builder, verbose_logging=True)" + "preview = data_designer_client.preview(config_builder)" ] }, { @@ -381,6 +380,16 @@ "preview.dataset" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# See analysis report on the preview dataset\n", + "preview.analysis.to_report()" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/nemo/NeMo-Data-Designer/intro-tutorials/3-seeding-with-a-dataset.ipynb b/nemo/NeMo-Data-Designer/intro-tutorials/3-seeding-with-a-dataset.ipynb index 4df72ec8..75d1127d 100644 --- a/nemo/NeMo-Data-Designer/intro-tutorials/3-seeding-with-a-dataset.ipynb +++ b/nemo/NeMo-Data-Designer/intro-tutorials/3-seeding-with-a-dataset.ipynb @@ -34,14 +34,11 @@ "source": [ "from getpass import getpass\n", "\n", - "from nemo_microservices import NeMoMicroservices\n", - "from nemo_microservices.beta.data_designer import (\n", + "from nemo_microservices.data_designer.essentials import (\n", " DataDesignerConfigBuilder,\n", - " DataDesignerClient,\n", - ")\n", - "\n", - "from nemo_microservices.beta.data_designer.config import columns as C\n", - "from nemo_microservices.beta.data_designer.config import params as P" + " NeMoDataDesignerClient,\n", + " SeedDatasetReference,\n", + ")" ] }, { @@ -55,7 +52,7 @@ "- If you have an instance of data designer running locally, you can connect to it as follows\n", "\n", " ```python\n", - " data_designer_client = DataDesignerClient(client=NeMoMicroservices(base_url=\"http://localhost:8080\"))\n", + " data_designer_client = NeMoDataDesignerClient(base_url=\"http://localhost:8080\")\n", " ```\n" ] }, @@ -80,11 +77,9 @@ "metadata": {}, "outputs": [], "source": [ - "data_designer_client = DataDesignerClient(\n", - " client=NeMoMicroservices(\n", - " base_url=\"https://ai.api.nvidia.com/v1/nemo/dd\",\n", - " default_headers={\"Authorization\": f\"Bearer {api_key}\"} # auto-generated API KEY\n", - " )\n", + "data_designer_client = NeMoDataDesignerClient(\n", + " base_url=\"https://ai.api.nvidia.com/v1/nemo/dd\",\n", + " default_headers={\"Authorization\": f\"Bearer {api_key}\"} # auto-generated API KEY\n", ")" ] }, @@ -152,16 +147,13 @@ "metadata": {}, "outputs": [], "source": [ - "# The repo_id and filename arguments follow the Hugging Face Hub API format.\n", - "# Passing the dataset_path argument signals that we need to upload the dataset\n", - "# to the datastore. Note we need to pass in the datastore's endpoint, which\n", - "# must match the endpoint in the docker-compose file.\n", + "# The dataset argument follows Hugging Face Hub API format for datasets.\n", "config_builder.with_seed_dataset(\n", - " repo_id=\"gretelai/symptom_to_diagnosis\",\n", - " filename=\"train.jsonl\",\n", + " dataset_reference=SeedDatasetReference(\n", + " dataset=\"gretelai/symptom_to_diagnosis/train.jsonl\",\n", + " datastore_settings={\"endpoint\": \"https://huggingface.co\"}\n", + " ),\n", " sampling_strategy=\"shuffle\",\n", - " with_replacement=False,\n", - " datastore={\"endpoint\": \"https://huggingface.co\"}\n", ")" ] }, @@ -173,23 +165,7 @@ "\n", "- We set the seed dataset using the `with_seed_dataset` method.\n", "\n", - "- We use the `shuffle` sampling strategy, which shuffles the seed dataset before sampling.\n", - "\n", - "- We set `with_replacement=False`, which limits our max number of records to 853, which is the number of records in the seed dataset.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Since we often just want a few attributes from Person objects, we can use\n", - "# Data Designer's `with_person_samplers` method to create multiple person samplers\n", - "# at once and drop the person object columns from the final dataset.\n", - "\n", - "# Empty dictionaries mean use default settings for the person samplers.\n", - "config_builder.with_person_samplers({\"patient_sampler\": {}, \"doctor_sampler\": {}})" + "- We use the `shuffle` sampling strategy, which shuffles the seed dataset before sampling.\n" ] }, { @@ -206,51 +182,68 @@ "# familiar with the required arguments for each type.\n", "\n", "config_builder.add_column(\n", + " name=\"patient_sampler\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"person\",\n", + " params={},\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"doctor_sampler\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"person\",\n", + " params={},\n", + ")\n", + "\n", + "config_builder.add_column(\n", " name=\"patient_id\",\n", - " type=\"uuid\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"uuid\",\n", " params={\"prefix\": \"PT-\", \"short_form\": True, \"uppercase\": True},\n", ")\n", "\n", "config_builder.add_column(\n", " name=\"first_name\",\n", - " type=\"expression\",\n", + " column_type=\"expression\",\n", " expr=\"{{ patient_sampler.first_name}} \",\n", ")\n", "\n", "config_builder.add_column(\n", " name=\"last_name\",\n", - " type=\"expression\",\n", + " column_type=\"expression\",\n", " expr=\"{{ patient_sampler.last_name }}\",\n", ")\n", "\n", "\n", "config_builder.add_column(\n", - " name=\"dob\", type=\"expression\", expr=\"{{ patient_sampler.birth_date }}\"\n", + " name=\"dob\", column_type=\"expression\", expr=\"{{ patient_sampler.birth_date }}\"\n", ")\n", "\n", "\n", "config_builder.add_column(\n", " name=\"patient_email\",\n", - " type=\"expression\",\n", + " column_type=\"expression\",\n", " expr=\"{{ patient_sampler.email_address }}\",\n", ")\n", "\n", "\n", "config_builder.add_column(\n", " name=\"symptom_onset_date\",\n", - " type=\"datetime\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"datetime\",\n", " params={\"start\": \"2024-01-01\", \"end\": \"2024-12-31\"},\n", ")\n", "\n", "config_builder.add_column(\n", " name=\"date_of_visit\",\n", - " type=\"timedelta\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"timedelta\",\n", " params={\"dt_min\": 1, \"dt_max\": 30, \"reference_column_name\": \"symptom_onset_date\"},\n", ")\n", "\n", "config_builder.add_column(\n", " name=\"physician\",\n", - " type=\"expression\",\n", + " column_type=\"expression\",\n", " expr=\"Dr. {{ doctor_sampler.last_name }}\",\n", ")\n", "\n", @@ -292,7 +285,7 @@ "metadata": {}, "outputs": [], "source": [ - "preview = data_designer_client.preview(config_builder, num_records=2, verbose_logging=True)" + "preview = data_designer_client.preview(config_builder, num_records=2)" ] }, { diff --git a/nemo/NeMo-Data-Designer/intro-tutorials/4-custom-model-configs.ipynb b/nemo/NeMo-Data-Designer/intro-tutorials/4-custom-model-configs.ipynb index 660bed43..d37164f0 100644 --- a/nemo/NeMo-Data-Designer/intro-tutorials/4-custom-model-configs.ipynb +++ b/nemo/NeMo-Data-Designer/intro-tutorials/4-custom-model-configs.ipynb @@ -32,16 +32,23 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "from getpass import getpass\n", "\n", - "from nemo_microservices import NeMoMicroservices\n", - "from nemo_microservices.beta.data_designer import (\n", + "from nemo_microservices.data_designer.essentials import (\n", + " CategorySamplerParams,\n", " DataDesignerConfigBuilder,\n", - " DataDesignerClient,\n", - ")\n", - "from nemo_microservices.beta.data_designer.config import columns as C\n", - "from nemo_microservices.beta.data_designer.config import params as P" + " InferenceParameters,\n", + " LLMTextColumnConfig,\n", + " ModelConfig,\n", + " NeMoDataDesignerClient,\n", + " PersonSamplerParams,\n", + " SamplerColumnConfig,\n", + " SamplerType,\n", + " SubcategorySamplerParams,\n", + " UniformDistribution,\n", + " UniformDistributionParams,\n", + " UniformSamplerParams\n", + ")" ] }, { @@ -55,7 +62,7 @@ "- If you have an instance of data designer running locally, you can connect to it as follows\n", "\n", " ```python\n", - " data_designer_client = DataDesignerClient(client=NeMoMicroservices(base_url=\"http://localhost:8080\"))\n", + " data_designer_client = NeMoDataDesignerClient(base_url=\"http://localhost:8080\")\n", " ```\n" ] }, @@ -80,11 +87,9 @@ "metadata": {}, "outputs": [], "source": [ - "data_designer_client = DataDesignerClient(\n", - " client=NeMoMicroservices(\n", - " base_url=\"https://ai.api.nvidia.com/v1/nemo/dd\",\n", - " default_headers={\"Authorization\": f\"Bearer {api_key}\"} # auto-generated API KEY\n", - " )\n", + "data_designer_client = NeMoDataDesignerClient(\n", + " base_url=\"https://ai.api.nvidia.com/v1/nemo/dd\",\n", + " default_headers={\"Authorization\": f\"Bearer {api_key}\"} # auto-generated API KEY\n", ")" ] }, @@ -135,24 +140,23 @@ "source": [ "config_builder = DataDesignerConfigBuilder(\n", " model_configs = [\n", - " P.ModelConfig(\n", + " ModelConfig(\n", " alias=model_alias_static_temp,\n", " model=model_id,\n", - " inference_parameters=P.InferenceParameters(\n", + " inference_parameters=InferenceParameters(\n", " max_tokens=1024,\n", " temperature=0.0,\n", " top_p=0.95,\n", " timeout=120\n", " ),\n", - " is_reasoner=True\n", " ),\n", - " P.ModelConfig(\n", + " ModelConfig(\n", " alias=model_alias_variable_temp,\n", " model=model_id,\n", - " inference_parameters=P.InferenceParameters(\n", + " inference_parameters=InferenceParameters(\n", " max_tokens=1024,\n", - " temperature=P.UniformDistribution(\n", - " params=P.UniformDistributionParams(\n", + " temperature=UniformDistribution(\n", + " params=UniformDistributionParams(\n", " low=0.5,\n", " high=0.9\n", " )\n", @@ -160,7 +164,6 @@ " top_p=0.95,\n", " timeout=120\n", " ),\n", - " is_reasoner=True\n", " ),\n", " ]\n", ")" @@ -184,10 +187,10 @@ "outputs": [], "source": [ "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"product_category\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\n", " \"Electronics\",\n", " \"Clothing\",\n", @@ -200,10 +203,10 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"product_subcategory\",\n", - " type=P.SamplerType.SUBCATEGORY,\n", - " params=P.SubcategorySamplerParams(\n", + " sampler_type=SamplerType.SUBCATEGORY,\n", + " params=SubcategorySamplerParams(\n", " category=\"product_category\",\n", " values={\n", " \"Electronics\": [\n", @@ -247,10 +250,10 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"target_age_range\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]\n", " ),\n", " )\n", @@ -275,27 +278,27 @@ "source": [ "# This column will sample synthetic person data based on statistics from the US Census.\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"customer\",\n", - " type=P.SamplerType.PERSON,\n", - " params=P.PersonSamplerParams(age_range=[18, 70]),\n", + " sampler_type=SamplerType.PERSON,\n", + " params=PersonSamplerParams(age_range=[18, 70]),\n", " )\n", ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"number_of_stars\",\n", - " type=P.SamplerType.UNIFORM,\n", - " params=P.UniformSamplerParams(low=1, high=5),\n", + " sampler_type=SamplerType.UNIFORM,\n", + " params=UniformSamplerParams(low=1, high=5),\n", " convert_to=\"int\",\n", " )\n", ")\n", "\n", "config_builder.add_column(\n", - " C.SamplerColumn(\n", + " SamplerColumnConfig(\n", " name=\"review_style\",\n", - " type=P.SamplerType.CATEGORY,\n", - " params=P.CategorySamplerParams(\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", " values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n", " weights=[1, 2, 2, 1],\n", " ),\n", @@ -321,7 +324,7 @@ "outputs": [], "source": [ "config_builder.add_column(\n", - " C.LLMTextColumn(\n", + " LLMTextColumnConfig(\n", " name=\"product_name\",\n", " prompt=(\n", " \"Come up with a creative product name for a product in the '{{ product_category }}' category, focusing \"\n", @@ -339,7 +342,7 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.LLMTextColumn(\n", + " LLMTextColumnConfig(\n", " name=\"customer_review_base\",\n", " prompt=(\n", " \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n", @@ -353,7 +356,7 @@ "\n", "\n", "config_builder.add_column(\n", - " C.LLMTextColumn(\n", + " LLMTextColumnConfig(\n", " name=\"customer_review_set_2\",\n", " prompt=(\n", " \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n", @@ -366,7 +369,7 @@ ")\n", "\n", "config_builder.add_column(\n", - " C.LLMTextColumn(\n", + " LLMTextColumnConfig(\n", " name=\"customer_review_set_3\",\n", " prompt=(\n", " \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n", @@ -396,7 +399,7 @@ "metadata": {}, "outputs": [], "source": [ - "preview = data_designer_client.preview(config_builder, num_records=3, verbose_logging=True)" + "preview = data_designer_client.preview(config_builder, num_records=3)" ] }, {