From 63cc68eeaecf2095fa5dd115692063170b485531 Mon Sep 17 00:00:00 2001 From: liustve Date: Mon, 30 Jun 2025 01:28:07 +0000 Subject: [PATCH 1/3] init commit --- .github/workflows/python-ec2-genesis-test.yml | 129 +++++++++++++++ .../python/langchain_service/Dockerfile | 11 ++ .../langchain_service/ec2-requirements.txt | 13 ++ .../python/langchain_service/server.py | 148 ++++++++++++++++++ .../genai/generate_traffic.sh | 126 +++++++++++++++ terraform/python/ec2/langchain/main.tf | 82 ++++++++++ terraform/python/ec2/langchain/user_data.sh | 25 +++ terraform/python/ec2/langchain/variables.tf | 14 ++ 8 files changed, 548 insertions(+) create mode 100644 .github/workflows/python-ec2-genesis-test.yml create mode 100644 sample-apps/python/langchain_service/Dockerfile create mode 100644 sample-apps/python/langchain_service/ec2-requirements.txt create mode 100644 sample-apps/python/langchain_service/server.py create mode 100644 sample-apps/traffic-generator/genai/generate_traffic.sh create mode 100644 terraform/python/ec2/langchain/main.tf create mode 100644 terraform/python/ec2/langchain/user_data.sh create mode 100644 terraform/python/ec2/langchain/variables.tf diff --git a/.github/workflows/python-ec2-genesis-test.yml b/.github/workflows/python-ec2-genesis-test.yml new file mode 100644 index 000000000..281dee75f --- /dev/null +++ b/.github/workflows/python-ec2-genesis-test.yml @@ -0,0 +1,129 @@ +## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +## SPDX-License-Identifier: Apache-2.0 + +name: Python EC2 LangChain Service Deployment +on: + push: + branches: + - gen-ai-sample-app + +permissions: + id-token: write + contents: read + +env: + E2E_TEST_AWS_REGION: 'us-west-2' + # E2E_TEST_ACCOUNT_ID: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }} + # E2E_TEST_ROLE_NAME: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }} + E2E_TEST_ACCOUNT_ID: 571600841604 + E2E_TEST_ROLE_NAME: github + METRIC_NAMESPACE: genesis + LOG_GROUP_NAME: test/genesis + TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE} + +jobs: + python-ec2-adot-langchain: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + + - name: Initiate Gradlew Daemon + uses: ./.github/workflows/actions/execute_and_retry + continue-on-error: true + with: + command: "./gradlew :validator:build" + cleanup: "./gradlew clean" + max_retry: 3 + sleep_time: 60 + + - name: Generate testing id + run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}-${RANDOM}" >> $GITHUB_ENV + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} + aws-region: ${{ env.E2E_TEST_AWS_REGION }} + + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + + - name: Initiate Terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/python/ec2/langchain && terraform init && terraform validate" + cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" + max_retry: 6 + sleep_time: 60 + + - name: Deploy LangChain service via terraform + working-directory: terraform/python/ec2/langchain + run: | + terraform apply -auto-approve \ + -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="service_zip_url=s3://sigv4perfresults/langchain-service.zip" + + - name: Get deployment info + working-directory: terraform/python/ec2/langchain + run: | + echo "INSTANCE_IP=$(terraform output public_ip)" >> $GITHUB_ENV + echo "INSTANCE_ID=$(terraform output instance_id)" >> $GITHUB_ENV + + - name: Test LangChain service + run: | + echo "Testing LangChain service at ${{ env.INSTANCE_IP }}:8000" + curl -f http://${{ env.INSTANCE_IP }}:8000/health + curl -f http://${{ env.INSTANCE_IP }}:8000/ + + - name: Generate traffic + run: | + cd sample-apps/traffic-generator/genai + chmod +x generate_traffic.sh + ./generate_traffic.sh http://${{ env.INSTANCE_IP }}:8000 + + - name: Validate generated logs + run: ./gradlew validator:run --args='-c python/ec2/default/log-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }}' + + - name: Validate generated metrics + if: (success() || failure()) && !cancelled() + run: ./gradlew validator:run --args='-c python/ec2/default/metric-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }}' + + - name: Validate generated traces + if: (success() || failure()) && !cancelled() + run: ./gradlew validator:run --args='-c python/ec2/default/trace-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --account-id ${{ env.E2E_TEST_ACCOUNT_ID }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }}' + + - name: Cleanup + if: always() + continue-on-error: true + working-directory: terraform/python/ec2/langchain + run: | + terraform destroy -auto-approve \ + -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="service_zip_url=s3://sigv4perfresults/langchain-service.zip" \ No newline at end of file diff --git a/sample-apps/python/langchain_service/Dockerfile b/sample-apps/python/langchain_service/Dockerfile new file mode 100644 index 000000000..c77ba9668 --- /dev/null +++ b/sample-apps/python/langchain_service/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.12-slim + +WORKDIR /app + +COPY . . + +RUN pip install --no-cache-dir -r ec2-requirements.txt + +EXPOSE 8000 + +CMD ["opentelemetry-instrument", "python", "server.py"] \ No newline at end of file diff --git a/sample-apps/python/langchain_service/ec2-requirements.txt b/sample-apps/python/langchain_service/ec2-requirements.txt new file mode 100644 index 000000000..87f34f891 --- /dev/null +++ b/sample-apps/python/langchain_service/ec2-requirements.txt @@ -0,0 +1,13 @@ +langchain +langchain-community +opentelemetry-sdk +opentelemetry-api +opentelemetry-semantic-conventions +python-dotenv +openlit +botocore +setuptools +boto3 +./aws_opentelemetry_distro_genai_beta-0.1.0b8-py3-none-any.whl +fastapi +uvicorn[standard] \ No newline at end of file diff --git a/sample-apps/python/langchain_service/server.py b/sample-apps/python/langchain_service/server.py new file mode 100644 index 000000000..cf50b7c18 --- /dev/null +++ b/sample-apps/python/langchain_service/server.py @@ -0,0 +1,148 @@ +import os +from typing import Dict, List +from dotenv import load_dotenv +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from langchain_aws import ChatBedrock +from langchain.prompts import ChatPromptTemplate +from langchain.chains import LLMChain +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from openinference.instrumentation.langchain import LangChainInstrumentor + +# Load environment variables +load_dotenv() + +# Set up OpenTelemetry with BOTH exporters +tracer_provider = TracerProvider() + +# Add Console exporter +console_exporter = ConsoleSpanExporter() +console_processor = BatchSpanProcessor(console_exporter) +tracer_provider.add_span_processor(console_processor) + +# Add OTLP exporter +otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4318/v1/traces") +otlp_processor = BatchSpanProcessor(otlp_exporter) +tracer_provider.add_span_processor(otlp_processor) + +# Set as global provider +trace.set_tracer_provider(tracer_provider) + +# Instrument LangChain with OpenInference +LangChainInstrumentor().instrument(tracer_provider=tracer_provider) + +# Initialize FastAPI app +app = FastAPI(title="LangChain Bedrock OpenInference API", version="1.0.0") + +# Initialize the LLM with AWS Bedrock +llm = ChatBedrock( + model_id="anthropic.claude-3-haiku-20240307-v1:0", + model_kwargs={ + "temperature": 0.7, + "max_tokens": 500 + }, + region_name=os.getenv("AWS_DEFAULT_REGION", "us-west-2") +) + +# Create a prompt template +prompt = ChatPromptTemplate.from_template( + "You are a helpful assistant. The user says: {input}. Provide a helpful response." +) + +# Create a chain +chain = LLMChain(llm=llm, prompt=prompt) + +# Request models +class ChatRequest(BaseModel): + message: str + +class BatchChatRequest(BaseModel): + messages: List[str] + +class ChatResponse(BaseModel): + response: str + +class BatchChatResponse(BaseModel): + responses: List[Dict[str, str]] + +# Sample prompts for testing +SAMPLE_PROMPTS = [ + "What is the capital of France?", + "How do I make a cup of coffee?", + "What are the benefits of exercise?", + "Explain quantum computing in simple terms", + "What's the best way to learn programming?" +] + +@app.get("/") +async def root(): + return { + "message": "LangChain Bedrock OpenInference API is running!", + "endpoints": { + "/chat": "Single message chat endpoint", + "/batch": "Batch message processing endpoint", + "/sample": "Run sample prompts" + } + } + +@app.post("/chat", response_model=ChatResponse) +async def chat(request: ChatRequest): + """ + Chat endpoint that processes a single user message through AWS Bedrock + """ + try: + # Process the input through the chain + result = await chain.ainvoke({"input": request.message}) + return ChatResponse(response=result["text"]) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/batch", response_model=BatchChatResponse) +async def batch_chat(request: BatchChatRequest): + """ + Batch endpoint that processes multiple messages + """ + try: + responses = [] + for message in request.messages: + result = await chain.ainvoke({"input": message}) + responses.append({ + "message": message, + "response": result["text"] + }) + return BatchChatResponse(responses=responses) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/sample", response_model=BatchChatResponse) +async def run_samples(): + """ + Run the predefined sample prompts + """ + try: + responses = [] + for prompt in SAMPLE_PROMPTS: + result = await chain.ainvoke({"input": prompt}) + responses.append({ + "message": prompt, + "response": result["text"] + }) + return BatchChatResponse(responses=responses) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/health") +async def health(): + """Health check endpoint""" + return {"status": "healthy", "llm": "AWS Bedrock Claude 3 Haiku"} + +if __name__ == "__main__": + import uvicorn + print("Starting FastAPI server with AWS Bedrock and OpenInference instrumentation...") + print("Make sure AWS credentials are configured") + print("Server will run on http://localhost:8000") + print("API docs available at http://localhost:8000/docs") + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/sample-apps/traffic-generator/genai/generate_traffic.sh b/sample-apps/traffic-generator/genai/generate_traffic.sh new file mode 100644 index 000000000..ff6fe0425 --- /dev/null +++ b/sample-apps/traffic-generator/genai/generate_traffic.sh @@ -0,0 +1,126 @@ +#!/bin/bash + +# Configuration +SERVER_URL="${SERVER_URL:-http://localhost:8000}" +ENDPOINT="${SERVER_URL}/chat" +DELAY_SECONDS="${DELAY_SECONDS:-3600}" # Default 1 hour (3600 seconds) between requests +NUM_REQUESTS="${NUM_REQUESTS:-0}" # 0 means infinite +TIMEOUT="${TIMEOUT:-30}" # Request timeout in seconds + +# Color codes for output +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Array of sample messages +MESSAGES=( + "What is the weather like today?" + "Tell me a joke" + "How do I make a cup of coffee?" + "What are the benefits of exercise?" + "Explain quantum computing in simple terms" + "What's the capital of France?" + "How do I learn programming?" + "What are some healthy breakfast ideas?" + "Tell me about artificial intelligence" + "How can I improve my productivity?" + "What's the difference between a list and a tuple in Python?" + "Explain the concept of microservices" + "What are some best practices for API design?" + "How does machine learning work?" + "What's the purpose of unit testing?" +) + +# Function to send a request +send_request() { + local message="$1" + local request_num="$2" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + echo -e "${YELLOW}[$timestamp] Request #$request_num${NC}" + echo "Message: \"$message\"" + + # Send request with timeout + response=$(curl -s -X POST "$ENDPOINT" \ + -H "Content-Type: application/json" \ + -d "{\"message\": \"$message\"}" \ + -m "$TIMEOUT" \ + -w "\nHTTP_STATUS:%{http_code}\nTIME_TOTAL:%{time_total}") + + # Extract HTTP status and response time + http_status=$(echo "$response" | grep "HTTP_STATUS:" | cut -d: -f2) + time_total=$(echo "$response" | grep "TIME_TOTAL:" | cut -d: -f2) + body=$(echo "$response" | sed '/HTTP_STATUS:/d' | sed '/TIME_TOTAL:/d') + + if [ "$http_status" = "200" ]; then + echo -e "${GREEN}✓ Success${NC} (${time_total}s)" + echo "Response: $body" + else + echo -e "${RED}✗ Error: HTTP $http_status${NC}" + if [ -n "$body" ]; then + echo "Response: $body" + fi + fi + echo "---" +} + +# Trap Ctrl+C to exit gracefully +trap 'echo -e "\n${YELLOW}Traffic generation stopped by user${NC}"; exit 0' INT + +# Main execution +echo -e "${GREEN}Starting traffic generation to $ENDPOINT${NC}" +echo "Configuration:" +echo " - Delay between requests: ${DELAY_SECONDS}s" +echo " - Request timeout: ${TIMEOUT}s" +echo " - Number of requests: ${NUM_REQUESTS} (0 = infinite)" +echo " - Requests per minute: ~$((60 / DELAY_SECONDS))" +echo -e "${YELLOW}Press Ctrl+C to stop${NC}" +echo "==================================" + +# Check if server is reachable +echo "Checking server health..." +health_check=$(curl -s -o /dev/null -w "%{http_code}" "$SERVER_URL/health" -m 5) +if [ "$health_check" != "200" ]; then + echo -e "${RED}Warning: Server health check failed (HTTP $health_check)${NC}" + echo "Make sure the server is running at $SERVER_URL" + read -p "Continue anyway? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + exit 1 + fi +fi + +count=0 +start_time=$(date +%s) + +while true; do + # Select a random message + random_index=$((RANDOM % ${#MESSAGES[@]})) + message="${MESSAGES[$random_index]}" + + # Increment counter + count=$((count + 1)) + + # Send the request + send_request "$message" "$count" + + # Check if we've reached the limit + if [ "$NUM_REQUESTS" -gt 0 ] && [ "$count" -ge "$NUM_REQUESTS" ]; then + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo -e "${GREEN}Completed $count requests in ${duration}s${NC}" + break + fi + + # Show progress every 10 requests + if [ $((count % 10)) -eq 0 ]; then + current_time=$(date +%s) + elapsed=$((current_time - start_time)) + rate=$(echo "scale=2; $count / $elapsed * 60" | bc 2>/dev/null || echo "N/A") + echo -e "${YELLOW}Progress: $count requests sent, Rate: ${rate} req/min${NC}" + fi + + # Wait before next request + sleep "$DELAY_SECONDS" +done \ No newline at end of file diff --git a/terraform/python/ec2/langchain/main.tf b/terraform/python/ec2/langchain/main.tf new file mode 100644 index 000000000..93e5a6d93 --- /dev/null +++ b/terraform/python/ec2/langchain/main.tf @@ -0,0 +1,82 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.aws_region +} + +data "aws_ami" "amazon_linux" { + most_recent = true + owners = ["amazon"] + filter { + name = "name" + values = ["amzn2-ami-hvm-*-x86_64-gp2"] + } +} + +data "aws_vpc" "default" { + default = true +} + +data "aws_subnets" "default" { + filter { + name = "vpc-id" + values = [data.aws_vpc.default.id] + } +} + +resource "aws_security_group" "langchain_sg" { + name_prefix = "langchain-${var.test_id}" + vpc_id = data.aws_vpc.default.id + + ingress { + from_port = 8000 + to_port = 8000 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_instance" "langchain_instance" { + ami = data.aws_ami.amazon_linux.id + instance_type = "t3.medium" + subnet_id = data.aws_subnets.default.ids[0] + vpc_security_group_ids = [aws_security_group.langchain_sg.id] + + user_data = base64encode(templatefile("${path.module}/user_data.sh", { + service_zip_url = var.service_zip_url + aws_region = var.aws_region + })) + + tags = { + Name = "langchain-service-${var.test_id}" + } +} + +output "instance_id" { + value = aws_instance.langchain_instance.id +} + +output "public_ip" { + value = aws_instance.langchain_instance.public_ip +} \ No newline at end of file diff --git a/terraform/python/ec2/langchain/user_data.sh b/terraform/python/ec2/langchain/user_data.sh new file mode 100644 index 000000000..baafe2667 --- /dev/null +++ b/terraform/python/ec2/langchain/user_data.sh @@ -0,0 +1,25 @@ +#!/bin/bash +yum update -y +yum install -y docker python3.12 python3.12-pip unzip + +systemctl start docker +systemctl enable docker + +# Download and setup LangChain service +mkdir -p /app +cd /app +aws s3 cp ${service_zip_url} langchain-service.zip +unzip langchain-service.zip + +# Install dependencies +pip3.12 install -r ec2-requirements.txt + +# Run with OTEL instrumentation +export OTEL_PYTHON_DISTRO=aws_distro +export OTEL_PYTHON_CONFIGURATOR=aws_configurator +export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf +export OTEL_EXPORTER_OTLP_LOGS_HEADERS="x-aws-log-group=test/genesis,x-aws-log-stream=default,x-aws-metric-namespace=genesis" +export OTEL_RESOURCE_ATTRIBUTES="service.name=langchain-traceloop-app" +export AGENT_OBSERVABILITY_ENABLED="true" + +opentelemetry-instrument python3.12 server.py \ No newline at end of file diff --git a/terraform/python/ec2/langchain/variables.tf b/terraform/python/ec2/langchain/variables.tf new file mode 100644 index 000000000..069c415e5 --- /dev/null +++ b/terraform/python/ec2/langchain/variables.tf @@ -0,0 +1,14 @@ +variable "aws_region" { + description = "AWS region" + type = string +} + +variable "test_id" { + description = "Test ID for resource naming" + type = string +} + +variable "service_zip_url" { + description = "S3 URL for the LangChain service zip file" + type = string +} \ No newline at end of file From f1e683e72368a7e99fc6053fffb586ca8b7bde8b Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 9 Jul 2025 14:43:35 +0000 Subject: [PATCH 2/3] add python gen ai release test --- .github/workflows/python-ec2-genai-test.yml | 179 ++++++++++++++++++ .github/workflows/python-ec2-genesis-test.yml | 129 ------------- .../ec2-requirements.txt | 4 +- .../server.py | 42 +--- .../python/langchain_service/Dockerfile | 11 -- .../genai/generate_traffic.sh | 8 +- terraform/python/ec2/adot-genai/main.tf | 129 +++++++++++++ terraform/python/ec2/adot-genai/variables.tf | 38 ++++ terraform/python/ec2/langchain/main.tf | 82 -------- terraform/python/ec2/langchain/user_data.sh | 25 --- terraform/python/ec2/langchain/variables.tf | 14 -- .../src/main/java/com/amazon/aoc/App.java | 4 + .../PredefinedExpectedTemplate.java | 5 + .../java/com/amazon/aoc/models/Context.java | 2 + .../amazon/aoc/validators/CWLogValidator.java | 65 +++++-- .../aoc/validators/CWMetricValidator.java | 19 ++ .../amazon/aoc/validators/TraceValidator.java | 20 +- .../python/ec2/adot-genai/genai-log.mustache | 36 ++++ .../ec2/adot-genai/genai-metric.mustache | 4 + .../ec2/adot-genai/genai-trace.mustache | 20 ++ .../python/ec2/adot-genai/log-validation.yml | 6 + .../ec2/adot-genai/metric-validation.yml | 6 + .../ec2/adot-genai/trace-validation.yml | 6 + 23 files changed, 529 insertions(+), 325 deletions(-) create mode 100644 .github/workflows/python-ec2-genai-test.yml delete mode 100644 .github/workflows/python-ec2-genesis-test.yml rename sample-apps/python/{langchain_service => genai_service}/ec2-requirements.txt (65%) rename sample-apps/python/{langchain_service => genai_service}/server.py (72%) delete mode 100644 sample-apps/python/langchain_service/Dockerfile create mode 100644 terraform/python/ec2/adot-genai/main.tf create mode 100644 terraform/python/ec2/adot-genai/variables.tf delete mode 100644 terraform/python/ec2/langchain/main.tf delete mode 100644 terraform/python/ec2/langchain/user_data.sh delete mode 100644 terraform/python/ec2/langchain/variables.tf create mode 100644 validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-log.mustache create mode 100644 validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-metric.mustache create mode 100644 validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-trace.mustache create mode 100644 validator/src/main/resources/validations/python/ec2/adot-genai/log-validation.yml create mode 100644 validator/src/main/resources/validations/python/ec2/adot-genai/metric-validation.yml create mode 100644 validator/src/main/resources/validations/python/ec2/adot-genai/trace-validation.yml diff --git a/.github/workflows/python-ec2-genai-test.yml b/.github/workflows/python-ec2-genai-test.yml new file mode 100644 index 000000000..527c3c397 --- /dev/null +++ b/.github/workflows/python-ec2-genai-test.yml @@ -0,0 +1,179 @@ +## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +## SPDX-License-Identifier: Apache-2.0 + +name: Python EC2 LangChain Service Deployment +on: + push: + branches: + - origin/gen-ai-sample-app + +permissions: + id-token: write + contents: read + +env: + E2E_TEST_AWS_REGION: 'us-west-2' + # E2E_TEST_ACCOUNT_ID: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }} + # E2E_TEST_ROLE_NAME: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }} + E2E_TEST_ACCOUNT_ID: 571600841604 + E2E_TEST_ROLE_NAME: github + METRIC_NAMESPACE: genesis + LOG_GROUP_NAME: test/genesis + TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE} + SAMPLE_APP_ZIP: s3://sigv4perfresults/langchain-service.zip + +jobs: + python-ec2-adot-genai: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + + # - name: Set Get ADOT Wheel command environment variable + # run: | + # if [ "${{ github.event.repository.name }}" = "aws-otel-python-instrumentation" ]; then + # # Reusing the adot-main-build-staging-jar bucket to store the python wheel file + # echo GET_ADOT_WHEEL_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && sudo python${{ env.PYTHON_VERSION }} -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV + # elif [ "${{ env.OTEL_SOURCE }}" == "pypi" ]; then + # echo GET_ADOT_WHEEL_COMMAND="sudo python${{ env.PYTHON_VERSION }} -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV + # else + # latest_release_version=$(curl -sL https://github.com/aws-observability/aws-otel-python-instrumentation/releases/latest | grep -oP '/releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -n 1) + # echo "The latest version is $latest_release_version" + # echo GET_ADOT_WHEEL_COMMAND="wget -O ${{ env.ADOT_WHEEL_NAME }} https://github.com/aws-observability/aws-otel-python-instrumentation/releases/latest/download/aws_opentelemetry_distro-$latest_release_version-py3-none-any.whl \ + # && sudo python${{ env.PYTHON_VERSION }} -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV + # fi + + - name: Initiate Gradlew Daemon + uses: ./.github/workflows/actions/execute_and_retry + continue-on-error: true + with: + command: "./gradlew :validator:build" + cleanup: "./gradlew clean" + max_retry: 3 + sleep_time: 60 + + - name: Generate testing id + run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}-${RANDOM}" >> $GITHUB_ENV + + - name: Generate XRay and W3C trace ID + run: | + ID_1="$(printf '%08x' $(date +%s))" + ID_2="$(openssl rand -hex 12)" + W3C_TRACE_ID="${ID_1}${ID_2}" + XRAY_TRACE_ID="1-${ID_1}-${ID_2}" + echo "XRAY_TRACE_ID=${XRAY_TRACE_ID}" >> $GITHUB_ENV + echo "W3C_TRACE_ID=${W3C_TRACE_ID}" >> $GITHUB_ENV + echo "Generated XRay Trace ID: ${XRAY_TRACE_ID}" + echo "Generated W3C Trace ID: ${W3C_TRACE_ID}" + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} + aws-region: ${{ env.E2E_TEST_AWS_REGION }} + + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + + - name: Initiate Terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/python/ec2/adot-genai && terraform init && terraform validate" + cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" + max_retry: 6 + + - name: Deploy service via terraform + working-directory: terraform/python/ec2/adot-genai + run: | + terraform apply -auto-approve \ + -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="service_zip_url=${{ env.SAMPLE_APP_ZIP }}" + + - name: Get deployment info + working-directory: terraform/python/ec2/adot-genai + run: | + echo "INSTANCE_IP=$(terraform output langchain_service_public_ip)" >> $GITHUB_ENV + echo "INSTANCE_ID=$(terraform output langchain_service_instance_id)" >> $GITHUB_ENV + + - name: Wait for Gen AI Chatbot service to be ready + run: | + echo "Waiting for service to be ready at http://${{ env.INSTANCE_IP }}:8000" + for i in {1..60}; do + if curl -f -s "http://${{ env.INSTANCE_IP }}:8000/health" > /dev/null 2>&1; then + echo "Service is ready!" + break + fi + echo "Attempt $i: Service not ready yet, waiting 10 seconds..." + sleep 10 + done + + # Final check + if ! curl -f -s "http://${{ env.INSTANCE_IP }}:8000/health" > /dev/null 2>&1; then + echo "Service failed to become ready after 10 minutes" + exit 1 + fi + + - name: Generate traffic + run: | + cd sample-apps/traffic-generator/genai + chmod +x generate_traffic.sh + export SERVER_URL="http://${{ env.INSTANCE_IP }}:8000" + export NUM_REQUESTS="5" + export DELAY_SECONDS="5" + export TIMEOUT="30" + export TRACE_ID="Root=${XRAY_TRACE_ID};Parent=$(openssl rand -hex 8);Sampled=1" + ./generate_traffic.sh + + - name: Validate generated logs + run: ./gradlew validator:run --args='-c python/ec2/adot-genai/log-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }} + --trace-id ${{ env.W3C_TRACE_ID }}' + + - name: Validate generated traces + if: (success() || failure()) && !cancelled() + run: ./gradlew validator:run --args='-c python/ec2/adot-genai/trace-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }} + --trace-id ${{ env.XRAY_TRACE_ID }}' + + - name: Wait for metrics to be published + if: (success() || failure()) && !cancelled() + run: | + echo "Waiting 60 seconds to ensure EMF metrics are published to CloudWatch" + sleep 60 + + - name: Validate generated metrics + if: (success() || failure()) && !cancelled() + run: ./gradlew validator:run --args='-c python/ec2/adot-genai/metric-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }}' + + - name: Cleanup + if: always() + continue-on-error: true + working-directory: terraform/python/ec2/adot-genai + run: | + terraform destroy -auto-approve \ + -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="service_zip_url=${{ env.SAMPLE_APP_ZIP }}" diff --git a/.github/workflows/python-ec2-genesis-test.yml b/.github/workflows/python-ec2-genesis-test.yml deleted file mode 100644 index 281dee75f..000000000 --- a/.github/workflows/python-ec2-genesis-test.yml +++ /dev/null @@ -1,129 +0,0 @@ -## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -## SPDX-License-Identifier: Apache-2.0 - -name: Python EC2 LangChain Service Deployment -on: - push: - branches: - - gen-ai-sample-app - -permissions: - id-token: write - contents: read - -env: - E2E_TEST_AWS_REGION: 'us-west-2' - # E2E_TEST_ACCOUNT_ID: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }} - # E2E_TEST_ROLE_NAME: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }} - E2E_TEST_ACCOUNT_ID: 571600841604 - E2E_TEST_ROLE_NAME: github - METRIC_NAMESPACE: genesis - LOG_GROUP_NAME: test/genesis - TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE} - -jobs: - python-ec2-adot-langchain: - runs-on: ubuntu-latest - timeout-minutes: 30 - steps: - - uses: actions/checkout@v4 - - - name: Initiate Gradlew Daemon - uses: ./.github/workflows/actions/execute_and_retry - continue-on-error: true - with: - command: "./gradlew :validator:build" - cleanup: "./gradlew clean" - max_retry: 3 - sleep_time: 60 - - - name: Generate testing id - run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}-${RANDOM}" >> $GITHUB_ENV - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} - aws-region: ${{ env.E2E_TEST_AWS_REGION }} - - - name: Set up terraform - uses: ./.github/workflows/actions/execute_and_retry - with: - command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" - post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list - && sudo apt update && sudo apt install terraform' - sleep_time: 60 - - - name: Initiate Terraform - uses: ./.github/workflows/actions/execute_and_retry - with: - command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/python/ec2/langchain && terraform init && terraform validate" - cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" - max_retry: 6 - sleep_time: 60 - - - name: Deploy LangChain service via terraform - working-directory: terraform/python/ec2/langchain - run: | - terraform apply -auto-approve \ - -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="service_zip_url=s3://sigv4perfresults/langchain-service.zip" - - - name: Get deployment info - working-directory: terraform/python/ec2/langchain - run: | - echo "INSTANCE_IP=$(terraform output public_ip)" >> $GITHUB_ENV - echo "INSTANCE_ID=$(terraform output instance_id)" >> $GITHUB_ENV - - - name: Test LangChain service - run: | - echo "Testing LangChain service at ${{ env.INSTANCE_IP }}:8000" - curl -f http://${{ env.INSTANCE_IP }}:8000/health - curl -f http://${{ env.INSTANCE_IP }}:8000/ - - - name: Generate traffic - run: | - cd sample-apps/traffic-generator/genai - chmod +x generate_traffic.sh - ./generate_traffic.sh http://${{ env.INSTANCE_IP }}:8000 - - - name: Validate generated logs - run: ./gradlew validator:run --args='-c python/ec2/default/log-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.INSTANCE_IP }}:8000 - --region ${{ env.E2E_TEST_AWS_REGION }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP_NAME }} - --service-name langchain-traceloop-app - --instance-id ${{ env.INSTANCE_ID }}' - - - name: Validate generated metrics - if: (success() || failure()) && !cancelled() - run: ./gradlew validator:run --args='-c python/ec2/default/metric-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.INSTANCE_IP }}:8000 - --region ${{ env.E2E_TEST_AWS_REGION }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --service-name langchain-traceloop-app - --instance-id ${{ env.INSTANCE_ID }}' - - - name: Validate generated traces - if: (success() || failure()) && !cancelled() - run: ./gradlew validator:run --args='-c python/ec2/default/trace-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.INSTANCE_IP }}:8000 - --region ${{ env.E2E_TEST_AWS_REGION }} - --account-id ${{ env.E2E_TEST_ACCOUNT_ID }} - --service-name langchain-traceloop-app - --instance-id ${{ env.INSTANCE_ID }}' - - - name: Cleanup - if: always() - continue-on-error: true - working-directory: terraform/python/ec2/langchain - run: | - terraform destroy -auto-approve \ - -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="service_zip_url=s3://sigv4perfresults/langchain-service.zip" \ No newline at end of file diff --git a/sample-apps/python/langchain_service/ec2-requirements.txt b/sample-apps/python/genai_service/ec2-requirements.txt similarity index 65% rename from sample-apps/python/langchain_service/ec2-requirements.txt rename to sample-apps/python/genai_service/ec2-requirements.txt index 87f34f891..9b270e251 100644 --- a/sample-apps/python/langchain_service/ec2-requirements.txt +++ b/sample-apps/python/genai_service/ec2-requirements.txt @@ -1,6 +1,8 @@ langchain langchain-community +langchain_aws opentelemetry-sdk +openinference-instrumentation-langchain opentelemetry-api opentelemetry-semantic-conventions python-dotenv @@ -8,6 +10,6 @@ openlit botocore setuptools boto3 -./aws_opentelemetry_distro_genai_beta-0.1.0b8-py3-none-any.whl +aws_opentelemetry_distro_genai_beta fastapi uvicorn[standard] \ No newline at end of file diff --git a/sample-apps/python/langchain_service/server.py b/sample-apps/python/genai_service/server.py similarity index 72% rename from sample-apps/python/langchain_service/server.py rename to sample-apps/python/genai_service/server.py index cf50b7c18..28572c7b7 100644 --- a/sample-apps/python/langchain_service/server.py +++ b/sample-apps/python/genai_service/server.py @@ -82,13 +82,12 @@ async def root(): return { "message": "LangChain Bedrock OpenInference API is running!", "endpoints": { - "/chat": "Single message chat endpoint", - "/batch": "Batch message processing endpoint", - "/sample": "Run sample prompts" + "/ai-chat": "Single message chat endpoint", + "/hello": "Simple hello endpoint" } } -@app.post("/chat", response_model=ChatResponse) +@app.post("/ai-chat", response_model=ChatResponse) async def chat(request: ChatRequest): """ Chat endpoint that processes a single user message through AWS Bedrock @@ -100,40 +99,6 @@ async def chat(request: ChatRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) -@app.post("/batch", response_model=BatchChatResponse) -async def batch_chat(request: BatchChatRequest): - """ - Batch endpoint that processes multiple messages - """ - try: - responses = [] - for message in request.messages: - result = await chain.ainvoke({"input": message}) - responses.append({ - "message": message, - "response": result["text"] - }) - return BatchChatResponse(responses=responses) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.get("/sample", response_model=BatchChatResponse) -async def run_samples(): - """ - Run the predefined sample prompts - """ - try: - responses = [] - for prompt in SAMPLE_PROMPTS: - result = await chain.ainvoke({"input": prompt}) - responses.append({ - "message": prompt, - "response": result["text"] - }) - return BatchChatResponse(responses=responses) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - @app.get("/health") async def health(): """Health check endpoint""" @@ -145,4 +110,5 @@ async def health(): print("Make sure AWS credentials are configured") print("Server will run on http://localhost:8000") print("API docs available at http://localhost:8000/docs") + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/sample-apps/python/langchain_service/Dockerfile b/sample-apps/python/langchain_service/Dockerfile deleted file mode 100644 index c77ba9668..000000000 --- a/sample-apps/python/langchain_service/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.12-slim - -WORKDIR /app - -COPY . . - -RUN pip install --no-cache-dir -r ec2-requirements.txt - -EXPOSE 8000 - -CMD ["opentelemetry-instrument", "python", "server.py"] \ No newline at end of file diff --git a/sample-apps/traffic-generator/genai/generate_traffic.sh b/sample-apps/traffic-generator/genai/generate_traffic.sh index ff6fe0425..6d2e2666a 100644 --- a/sample-apps/traffic-generator/genai/generate_traffic.sh +++ b/sample-apps/traffic-generator/genai/generate_traffic.sh @@ -2,7 +2,7 @@ # Configuration SERVER_URL="${SERVER_URL:-http://localhost:8000}" -ENDPOINT="${SERVER_URL}/chat" +ENDPOINT="${SERVER_URL}/ai-chat" DELAY_SECONDS="${DELAY_SECONDS:-3600}" # Default 1 hour (3600 seconds) between requests NUM_REQUESTS="${NUM_REQUESTS:-0}" # 0 means infinite TIMEOUT="${TIMEOUT:-30}" # Request timeout in seconds @@ -41,9 +41,15 @@ send_request() { echo -e "${YELLOW}[$timestamp] Request #$request_num${NC}" echo "Message: \"$message\"" + # Use environment variables or defaults for headers + local trace_id_header="${TRACE_ID:-Root=1-5759e988-bd862e3fe1be46a994272793;Parent=53995c3f42cd8ad8;Sampled=1}" + + echo "Using Trace ID: $trace_id_header" + # Send request with timeout response=$(curl -s -X POST "$ENDPOINT" \ -H "Content-Type: application/json" \ + -H "X-Amzn-Trace-Id: $trace_id_header" \ -d "{\"message\": \"$message\"}" \ -m "$TIMEOUT" \ -w "\nHTTP_STATUS:%{http_code}\nTIME_TOTAL:%{time_total}") diff --git a/terraform/python/ec2/adot-genai/main.tf b/terraform/python/ec2/adot-genai/main.tf new file mode 100644 index 000000000..a789da78f --- /dev/null +++ b/terraform/python/ec2/adot-genai/main.tf @@ -0,0 +1,129 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.aws_region +} + +resource "aws_default_vpc" "default" { + tags = { + Name = "Default VPC" + } +} + +resource "tls_private_key" "ssh_key" { + algorithm = "RSA" + rsa_bits = 4096 +} + +resource "aws_key_pair" "aws_ssh_key" { + key_name = "instance_key-${var.test_id}" + public_key = tls_private_key.ssh_key.public_key_openssh +} + +locals { + ssh_key_name = aws_key_pair.aws_ssh_key.key_name + private_key_content = tls_private_key.ssh_key.private_key_pem +} + +data "aws_ami" "ami" { + owners = ["amazon"] + most_recent = true + filter { + name = "name" + values = ["al20*-ami-minimal-*-${var.cpu_architecture}"] + } + filter { + name = "state" + values = ["available"] + } + filter { + name = "architecture" + values = [var.cpu_architecture] + } + filter { + name = "image-type" + values = ["machine"] + } + + filter { + name = "root-device-name" + values = ["/dev/xvda"] + } + + filter { + name = "root-device-type" + values = ["ebs"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +resource "aws_instance" "main_service_instance" { + ami = data.aws_ami.ami.id + instance_type = var.cpu_architecture == "x86_64" ? "t3.medium" : "t4g.medium" + key_name = local.ssh_key_name + iam_instance_profile = "APP_SIGNALS_EC2_TEST_ROLE" + vpc_security_group_ids = [aws_default_vpc.default.default_security_group_id] + associate_public_ip_address = true + instance_initiated_shutdown_behavior = "terminate" + + metadata_options { + http_tokens = "required" + } + + root_block_device { + volume_size = 5 + } + + user_data = base64encode(<<-EOF +#!/bin/bash +yum update -y +yum install -y python3.12 python3.12-pip unzip + +mkdir -p /app +cd /app +aws s3 cp ${var.service_zip_url} langchain-service.zip +unzip langchain-service.zip + +# Having issues installing dependencies from ec2-requirements.txt as these dependencies are quite large and cause timeouts/memory issues on EC2, manually installing instead +python3.12 -m pip install fastapi uvicorn[standard] --no-cache-dir +python3.12 -m pip install boto3 botocore setuptools --no-cache-dir +python3.12 -m pip install opentelemetry-api opentelemetry-sdk opentelemetry-semantic-conventions --no-cache-dir +python3.12 -m pip install langchain langchain-community langchain_aws --no-cache-dir +python3.12 -m pip install python-dotenv openlit --no-cache-dir +python3.12 -m pip install openinference-instrumentation-langchain aws_opentelemetry_distro_genai_beta --no-cache-dir + +export AWS_REGION=${var.aws_region} +export OTEL_PROPAGATORS=tracecontext,xray,baggage +export OTEL_PYTHON_DISTRO=aws_distro +export OTEL_PYTHON_CONFIGURATOR=aws_configurator +export OTEL_EXPORTER_OTLP_LOGS_HEADERS="x-aws-log-group=test/genesis,x-aws-log-stream=default,x-aws-metric-namespace=genesis" +export OTEL_RESOURCE_ATTRIBUTES="service.name=langchain-traceloop-app" +export AGENT_OBSERVABILITY_ENABLED="true" + +nohup opentelemetry-instrument python3.12 server.py > /var/log/langchain-service.log 2>&1 & +EOF + ) + + tags = { + Name = "langchain-service-${var.test_id}" + } +} + +output "langchain_service_instance_id" { + value = aws_instance.main_service_instance.id +} + +output "langchain_service_public_ip" { + value = aws_instance.main_service_instance.public_ip +} \ No newline at end of file diff --git a/terraform/python/ec2/adot-genai/variables.tf b/terraform/python/ec2/adot-genai/variables.tf new file mode 100644 index 000000000..364ddb2bd --- /dev/null +++ b/terraform/python/ec2/adot-genai/variables.tf @@ -0,0 +1,38 @@ +# ------------------------------------------------------------------------ +# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# ------------------------------------------------------------------------- + +variable "aws_region" { + default = "us-west-2" +} + +variable "test_id" { + default = "dummy-123" +} + +variable "service_zip_url" { + description = "S3 URL for the service zip file" +} + +variable "language_version" { + default = "3.12" +} + +variable "cpu_architecture" { + default = "x86_64" +} + +variable "user" { + default = "ec2-user" +} \ No newline at end of file diff --git a/terraform/python/ec2/langchain/main.tf b/terraform/python/ec2/langchain/main.tf deleted file mode 100644 index 93e5a6d93..000000000 --- a/terraform/python/ec2/langchain/main.tf +++ /dev/null @@ -1,82 +0,0 @@ -terraform { - required_providers { - aws = { - source = "hashicorp/aws" - version = "~> 5.0" - } - } -} - -provider "aws" { - region = var.aws_region -} - -data "aws_ami" "amazon_linux" { - most_recent = true - owners = ["amazon"] - filter { - name = "name" - values = ["amzn2-ami-hvm-*-x86_64-gp2"] - } -} - -data "aws_vpc" "default" { - default = true -} - -data "aws_subnets" "default" { - filter { - name = "vpc-id" - values = [data.aws_vpc.default.id] - } -} - -resource "aws_security_group" "langchain_sg" { - name_prefix = "langchain-${var.test_id}" - vpc_id = data.aws_vpc.default.id - - ingress { - from_port = 8000 - to_port = 8000 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - } - - ingress { - from_port = 22 - to_port = 22 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - } - - egress { - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - } -} - -resource "aws_instance" "langchain_instance" { - ami = data.aws_ami.amazon_linux.id - instance_type = "t3.medium" - subnet_id = data.aws_subnets.default.ids[0] - vpc_security_group_ids = [aws_security_group.langchain_sg.id] - - user_data = base64encode(templatefile("${path.module}/user_data.sh", { - service_zip_url = var.service_zip_url - aws_region = var.aws_region - })) - - tags = { - Name = "langchain-service-${var.test_id}" - } -} - -output "instance_id" { - value = aws_instance.langchain_instance.id -} - -output "public_ip" { - value = aws_instance.langchain_instance.public_ip -} \ No newline at end of file diff --git a/terraform/python/ec2/langchain/user_data.sh b/terraform/python/ec2/langchain/user_data.sh deleted file mode 100644 index baafe2667..000000000 --- a/terraform/python/ec2/langchain/user_data.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -yum update -y -yum install -y docker python3.12 python3.12-pip unzip - -systemctl start docker -systemctl enable docker - -# Download and setup LangChain service -mkdir -p /app -cd /app -aws s3 cp ${service_zip_url} langchain-service.zip -unzip langchain-service.zip - -# Install dependencies -pip3.12 install -r ec2-requirements.txt - -# Run with OTEL instrumentation -export OTEL_PYTHON_DISTRO=aws_distro -export OTEL_PYTHON_CONFIGURATOR=aws_configurator -export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf -export OTEL_EXPORTER_OTLP_LOGS_HEADERS="x-aws-log-group=test/genesis,x-aws-log-stream=default,x-aws-metric-namespace=genesis" -export OTEL_RESOURCE_ATTRIBUTES="service.name=langchain-traceloop-app" -export AGENT_OBSERVABILITY_ENABLED="true" - -opentelemetry-instrument python3.12 server.py \ No newline at end of file diff --git a/terraform/python/ec2/langchain/variables.tf b/terraform/python/ec2/langchain/variables.tf deleted file mode 100644 index 069c415e5..000000000 --- a/terraform/python/ec2/langchain/variables.tf +++ /dev/null @@ -1,14 +0,0 @@ -variable "aws_region" { - description = "AWS region" - type = string -} - -variable "test_id" { - description = "Test ID for resource naming" - type = string -} - -variable "service_zip_url" { - description = "S3 URL for the LangChain service zip file" - type = string -} \ No newline at end of file diff --git a/validator/src/main/java/com/amazon/aoc/App.java b/validator/src/main/java/com/amazon/aoc/App.java index 5074eccf4..674cfb2ef 100644 --- a/validator/src/main/java/com/amazon/aoc/App.java +++ b/validator/src/main/java/com/amazon/aoc/App.java @@ -157,6 +157,9 @@ public class App implements Callable { defaultValue = "defaultDnsName") private String privateDnsName; + @CommandLine.Option(names = {"--trace-id"}) + private String traceId; + private static final String TEST_CASE_DIM_KEY = "testcase"; private static final String CANARY_NAMESPACE = "Otel/Canary"; private static final String CANARY_METRIC_NAME = "Success"; @@ -196,6 +199,7 @@ public Integer call() throws Exception { context.setInstanceAmi(this.instanceAmi); context.setInstanceId(this.instanceId); context.setPrivateDnsName(this.privateDnsName); + context.setTraceId(this.traceId); log.info(context); diff --git a/validator/src/main/java/com/amazon/aoc/fileconfigs/PredefinedExpectedTemplate.java b/validator/src/main/java/com/amazon/aoc/fileconfigs/PredefinedExpectedTemplate.java index dc3427945..5a206444f 100644 --- a/validator/src/main/java/com/amazon/aoc/fileconfigs/PredefinedExpectedTemplate.java +++ b/validator/src/main/java/com/amazon/aoc/fileconfigs/PredefinedExpectedTemplate.java @@ -246,6 +246,11 @@ public enum PredefinedExpectedTemplate implements FileConfig { /** Python EC2 ADOT SigV4 Log Exporter Test Case Validation */ PYTHON_EC2_ADOT_OTLP_LOG("/expected-data-template/python/ec2/adot-aws-otlp/application-log.mustache"), + /** Python EC2 ADOT Gen AI Test Case Validation */ + PYTHON_EC2_ADOT_GENAI_LOG("/expected-data-template/python/ec2/adot-genai/genai-log.mustache"), + PYTHON_EC2_ADOT_GENAI_TRACE("/expected-data-template/python/ec2/adot-genai/genai-trace.mustache"), + PYTHON_EC2_ADOT_GENAI_METRIC("/expected-data-template/python/ec2/adot-genai/genai-metric.mustache"), + /** Python K8S Test Case Validations */ PYTHON_K8S_OUTGOING_HTTP_CALL_LOG("/expected-data-template/python/k8s/outgoing-http-call-log.mustache"), PYTHON_K8S_OUTGOING_HTTP_CALL_METRIC("/expected-data-template/python/k8s/outgoing-http-call-metric.mustache"), diff --git a/validator/src/main/java/com/amazon/aoc/models/Context.java b/validator/src/main/java/com/amazon/aoc/models/Context.java index 6e607591a..6f5db130c 100644 --- a/validator/src/main/java/com/amazon/aoc/models/Context.java +++ b/validator/src/main/java/com/amazon/aoc/models/Context.java @@ -67,6 +67,8 @@ public class Context { private String privateDnsName; + private String traceId; + private ECSContext ecsContext; private CloudWatchContext cloudWatchContext; diff --git a/validator/src/main/java/com/amazon/aoc/validators/CWLogValidator.java b/validator/src/main/java/com/amazon/aoc/validators/CWLogValidator.java index 49af8d396..dec8d1dda 100644 --- a/validator/src/main/java/com/amazon/aoc/validators/CWLogValidator.java +++ b/validator/src/main/java/com/amazon/aoc/validators/CWLogValidator.java @@ -27,6 +27,8 @@ import com.github.wnameless.json.flattener.FlattenMode; import com.github.wnameless.json.flattener.JsonFlattener; import com.github.wnameless.json.flattener.JsonifyArrayList; + +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -68,7 +70,23 @@ public void validate() throws Exception { Map actualLog; if (isAwsOtlpLog(expectedAttributes)) { - actualLog = this.getActualAwsOtlpLog(); + String otlpLogFilterPattern = String.format( + "{ ($.resource.attributes.['service.name'] = \"%s\") && ($.body = \"This is a custom log for validation testing\") }", + context.getServiceName() + ); + String addTraceIdFilter = (context.getTraceId() != null ? "&& ($.traceId = \"" + context.getTraceId() + "\") " : ""); + String genAILogFilterPattern = String.format( + "{ ($.resource.attributes.['service.name'] = \"%s\") " + + "&& ($.body.output.messages[0].role = \"assistant\") " + + "&& ($.body.input.messages[0].role = \"user\") " + + "&& ($.body.output.messages[1] NOT EXISTS) " + + "&& ($.body.input.messages[1] NOT EXISTS) " + + "%s" + + "}", + context.getServiceName(), + addTraceIdFilter + ); + actualLog = this.getActualAwsOtlpLog(Arrays.asList(otlpLogFilterPattern, genAILogFilterPattern)); } else { String operation = (String) expectedAttributes.get("Operation"); String remoteService = (String) expectedAttributes.get("RemoteService"); @@ -153,9 +171,12 @@ private JsonifyArrayList> getExpectedAttributes() throws Exc private boolean isAwsOtlpLog(Map expectedAttributes) { // OTLP SigV4 logs have 'body' as a top-level attribute - return expectedAttributes.containsKey("body") && - expectedAttributes.containsKey("severityNumber") && - expectedAttributes.containsKey("severityText"); + boolean hasBodyKey = expectedAttributes.keySet().stream() + .anyMatch(key -> key.startsWith("body")); + + return expectedAttributes.containsKey("severityNumber") && + expectedAttributes.containsKey("severityText") && + hasBodyKey; } private Map getActualLog( @@ -234,25 +255,33 @@ private Map getActualOtelSpanLog(String operation, String remote return JsonFlattener.flattenAsMap(retrievedLogs.get(0).getMessage()); } - private Map getActualAwsOtlpLog() throws Exception { - String filterPattern= String.format( - "{ ($.resource.attributes.['service.name'] = \"%s\") && ($.body = \"This is a custom log for validation testing\") }", - context.getServiceName() - ); - log.info("Filter Pattern for OTLP Log Search: " + filterPattern); + private Map getActualAwsOtlpLog(List filterPatterns) throws Exception { + log.info("Filter patterns {}", filterPatterns); - List retrievedLogs = - this.cloudWatchService.filterLogs( - context.getLogGroup(), - filterPattern, - System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(5), - 10); + List retrievedLogs = null; + + for (String pattern : filterPatterns) { + log.info("Attempting filter Pattern for OTLP Log Search: {}", pattern); + + retrievedLogs = this.cloudWatchService.filterLogs( + context.getLogGroup(), + pattern, + System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(5), + 10); + + if (retrievedLogs != null && !retrievedLogs.isEmpty()) { + log.info("Found logs for filter pattern {}", pattern); + break; + } + } if (retrievedLogs == null || retrievedLogs.isEmpty()) { - throw new BaseException(ExceptionCode.EMPTY_LIST); + throw new BaseException(ExceptionCode.EMPTY_LIST); } - return JsonFlattener.flattenAsMap(retrievedLogs.get(0).getMessage()); + return new JsonFlattener(retrievedLogs.get(0).getMessage()) + .withFlattenMode(FlattenMode.KEEP_ARRAYS) + .flattenAsMap(); } @Override diff --git a/validator/src/main/java/com/amazon/aoc/validators/CWMetricValidator.java b/validator/src/main/java/com/amazon/aoc/validators/CWMetricValidator.java index 3553bc1a8..2c5b01fbb 100644 --- a/validator/src/main/java/com/amazon/aoc/validators/CWMetricValidator.java +++ b/validator/src/main/java/com/amazon/aoc/validators/CWMetricValidator.java @@ -91,6 +91,14 @@ public void validate() throws Exception { RetryHelper.retry( maxRetryCount, () -> { + + // Special handling for Genesis path - just check if any metrics exists in namespace + // since ADOT will just capture any OTel Metrics emitted from the instrumentation library used + // and convert them into EMF metrics, it's impossible to create a validation template for this. + if (validationConfig.getHttpPath().contains("ai-chat")) { + validateAnyMetricExists(); + return; + } // We will query the Service, RemoteService, and RemoteTarget dimensions to ensure we // get all metrics from all aggregations, specifically the [RemoteService] aggregation. List serviceNames = @@ -210,6 +218,17 @@ private void compareMetricLists(List expectedMetricList, List ac matchAny.stream().findAny().get(), actualMetricSnapshot)); } } + + private void validateAnyMetricExists() throws Exception { + // This will grab all metrics from last 3 hours + // See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_ListMetrics.html + List allMetricsInNamespace = cloudWatchService.listMetrics(context.getMetricNamespace(), null, null, null); + log.info("Found {} metrics in namespace {}", allMetricsInNamespace.size(), context.getMetricNamespace()); + if (allMetricsInNamespace.isEmpty()) { + throw new BaseException(ExceptionCode.EXPECTED_METRIC_NOT_FOUND, "No metrics found in namespace: " + context.getMetricNamespace()); + } + log.info("validation is passed for path {}", validationConfig.getHttpPath()); + } private List listMetricFromCloudWatch( CloudWatchService cloudWatchService, diff --git a/validator/src/main/java/com/amazon/aoc/validators/TraceValidator.java b/validator/src/main/java/com/amazon/aoc/validators/TraceValidator.java index 21b762b9e..fa16dff4b 100644 --- a/validator/src/main/java/com/amazon/aoc/validators/TraceValidator.java +++ b/validator/src/main/java/com/amazon/aoc/validators/TraceValidator.java @@ -147,15 +147,15 @@ private Map getTrace() throws Exception { validationConfig.getHttpMethod().toUpperCase(), validationConfig.getHttpPath())); } + + if (validationConfig.getHttpPath().contains("ai-chat")) { + return this.getTraceById(Collections.singletonList(context.getTraceId())); + } + log.info("Trace Filter: {}", traceFilter); List retrieveTraceLists = xrayService.searchTraces(traceFilter); List traceIdLists = Collections.singletonList(retrieveTraceLists.get(0).getId()); - List retrievedTraceList = xrayService.listTraceByIds(traceIdLists); - - if (retrievedTraceList == null || retrievedTraceList.isEmpty()) { - throw new BaseException(ExceptionCode.EMPTY_LIST); - } - return this.flattenDocument(retrievedTraceList.get(0).getSegments()); + return getTraceById(traceIdLists); } private Map flattenDocument(List segmentList) { @@ -190,6 +190,14 @@ private Map flattenDocument(List segmentList) { return JsonFlattener.flattenAsMap(segmentsJson.toString()); } + private Map getTraceById(List traceIdLists) throws Exception { + List retrievedTraceList = xrayService.listTraceByIds(traceIdLists); + if (retrievedTraceList == null || retrievedTraceList.isEmpty()) { + throw new BaseException(ExceptionCode.EMPTY_LIST); + } + return this.flattenDocument(retrievedTraceList.get(0).getSegments()); + } + // This method will get the stored traces private Map getStoredTrace() throws Exception { Map flattenedJsonMapForStoredTraces = null; diff --git a/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-log.mustache b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-log.mustache new file mode 100644 index 000000000..1eb79a202 --- /dev/null +++ b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-log.mustache @@ -0,0 +1,36 @@ +[{ + "resource": { + "attributes": { + "aws.local.service": "{{serviceName}}", + "aws.service.type": "gen_ai_agent", + "service.name": "{{serviceName}}" + } + }, + "scope": { + "name": "openinference.instrumentation.langchain" + }, + "severityNumber": "^[0-9]+$", + "severityText": ".*", + "body": { + "output": { + "messages": [ + { + "content": "^.+$", + "role": "assistant" + } + ] + }, + "input": { + "messages": [ + { + "content": "^.+$", + "role": "user" + } + ] + } + }, + "attributes": { + "event.name": "openinference.instrumentation.langchain" + }, + "traceId": "{{traceId}}" +}] \ No newline at end of file diff --git a/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-metric.mustache b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-metric.mustache new file mode 100644 index 000000000..04d120344 --- /dev/null +++ b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-metric.mustache @@ -0,0 +1,4 @@ +- + metricName: ANY_VALUE + namespace: {{metricNamespace}} + dimensions: [] \ No newline at end of file diff --git a/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-trace.mustache b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-trace.mustache new file mode 100644 index 000000000..bbad8e1ae --- /dev/null +++ b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-trace.mustache @@ -0,0 +1,20 @@ +[{ + "name": "^{{serviceName}}$", + "trace_id": "^{{traceId}}$", + "http": { + "request": { + "url": "^.*/ai-chat$", + "method": "^POST$" + } + }, + "aws": { + "service.type": "^gen_ai_agent$" + }, + "annotations": { + "aws.local.service": "^{{serviceName}}$", + "aws.local.operation": "^POST /ai-chat$" + }, + "metadata": { + "service.name": "^{{serviceName}}$" + } +}] \ No newline at end of file diff --git a/validator/src/main/resources/validations/python/ec2/adot-genai/log-validation.yml b/validator/src/main/resources/validations/python/ec2/adot-genai/log-validation.yml new file mode 100644 index 000000000..92c64fd06 --- /dev/null +++ b/validator/src/main/resources/validations/python/ec2/adot-genai/log-validation.yml @@ -0,0 +1,6 @@ +- + validationType: "cw-log" + httpPath: "ai-chat" + httpMethod: "post" + callingType: "http" + expectedLogStructureTemplate: "PYTHON_EC2_ADOT_GENAI_LOG" \ No newline at end of file diff --git a/validator/src/main/resources/validations/python/ec2/adot-genai/metric-validation.yml b/validator/src/main/resources/validations/python/ec2/adot-genai/metric-validation.yml new file mode 100644 index 000000000..2fd94b30a --- /dev/null +++ b/validator/src/main/resources/validations/python/ec2/adot-genai/metric-validation.yml @@ -0,0 +1,6 @@ +- + validationType: "cw-metric" + httpPath: "ai-chat" + httpMethod: "post" + callingType: "http" + expectedMetricTemplate: "PYTHON_EC2_ADOT_GENAI_METRIC" \ No newline at end of file diff --git a/validator/src/main/resources/validations/python/ec2/adot-genai/trace-validation.yml b/validator/src/main/resources/validations/python/ec2/adot-genai/trace-validation.yml new file mode 100644 index 000000000..34a3573c4 --- /dev/null +++ b/validator/src/main/resources/validations/python/ec2/adot-genai/trace-validation.yml @@ -0,0 +1,6 @@ +- + validationType: "trace" + httpPath: "ai-chat" + httpMethod: "post" + callingType: "http" + expectedTraceTemplate: "PYTHON_EC2_ADOT_GENAI_TRACE" \ No newline at end of file From 53637ad53b8397bfa457ee8f4c9defc45bf16e3e Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 9 Jul 2025 14:50:59 +0000 Subject: [PATCH 3/3] test --- .github/workflows/python-ec2-genai-test.yml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-ec2-genai-test.yml b/.github/workflows/python-ec2-genai-test.yml index 527c3c397..7a8d14116 100644 --- a/.github/workflows/python-ec2-genai-test.yml +++ b/.github/workflows/python-ec2-genai-test.yml @@ -3,9 +3,20 @@ name: Python EC2 LangChain Service Deployment on: - push: - branches: - - origin/gen-ai-sample-app + workflow_call: + inputs: + caller-workflow-name: + required: true + type: string + cpu-architecture: + description: "Permitted values: x86_64 or arm64" + required: false + type: string + default: "x86_64" + staging-wheel-name: + required: false + default: 'aws-opentelemetry-distro' + type: string permissions: id-token: write @@ -16,6 +27,7 @@ env: # E2E_TEST_ACCOUNT_ID: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }} # E2E_TEST_ROLE_NAME: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }} E2E_TEST_ACCOUNT_ID: 571600841604 + ADOT_WHEEL_NAME: ${{ inputs.staging-wheel-name }} E2E_TEST_ROLE_NAME: github METRIC_NAMESPACE: genesis LOG_GROUP_NAME: test/genesis