./knowledge-base/cdk/lib/historical-research-stack.ts

import * as cdk from "aws-cdk-lib";
import { Construct } from "constructs";
import * as s3 from "aws-cdk-lib/aws-s3";
import * as s3n from "aws-cdk-lib/aws-s3-notifications";
import * as dynamodb from "aws-cdk-lib/aws-dynamodb";
import * as sqs from "aws-cdk-lib/aws-sqs";
import * as lambda from "aws-cdk-lib/aws-lambda";
import * as ec2 from "aws-cdk-lib/aws-ec2";
import * as iam from "aws-cdk-lib/aws-iam";
import * as opensearch from "aws-cdk-lib/aws-opensearchservice";
import * as apigateway from "aws-cdk-lib/aws-apigateway";
import * as cloudfront from "aws-cdk-lib/aws-cloudfront";
import * as origins from "aws-cdk-lib/aws-cloudfront-origins";
import * as lambdaEventSources from "aws-cdk-lib/aws-lambda-event-sources";
import * as secretsmanager from "aws-cdk-lib/aws-secretsmanager";
import * as certificatemanager from "aws-cdk-lib/aws-certificatemanager";
import * as wafv2 from "aws-cdk-lib/aws-wafv2";
import { Duration, RemovalPolicy } from "aws-cdk-lib";
import * as path from "path";
import * as lambdaNodejs from "aws-cdk-lib/aws-lambda-nodejs";
import { ALLOWED_IPS } from "./allowed-ips";

interface HistoricalResearchStackProps extends cdk.StackProps {
  /** WAF WebACL ARN(us-east-1のWafStackから渡す / CloudFrontのIP制限) */
  webAclArn: string;
}

export class HistoricalResearchStack extends cdk.Stack {
  constructor(
    scope: Construct,
    id: string,
    props: HistoricalResearchStackProps,
  ) {
    super(scope, id, props);

    // ========================================
    // VPC参照(既存VPCを使用)
    // ========================================
    const vpc = ec2.Vpc.fromLookup(this, "ExistingVPC", {
      vpcId: "vpc-08d84efb87d052cf9",
    });

    const privateSubnetA = ec2.Subnet.fromSubnetId(
      this,
      "PrivateSubnetA",
      "subnet-0ebcb5a9bc54d1bd1",
    );
    const privateSubnetC = ec2.Subnet.fromSubnetId(
      this,
      "PrivateSubnetC",
      "subnet-03fa4782f20ba49ec",
    );

    // ========================================
    // Security Groups
    // ========================================
    const processingLambdaSG = new ec2.SecurityGroup(
      this,
      "ProcessingLambdaSG",
      {
        vpc,
        description:
          "Security group for processing Lambda functions (ImageAnalyzer, EmbeddingGenerator)",
        securityGroupName: "historical-research-processing-lambda-sg",
      },
    );

    const searchLambdaSG = new ec2.SecurityGroup(this, "SearchLambdaSG", {
      vpc,
      description: "Security group for search Lambda function",
      securityGroupName: "historical-research-search-lambda-sg",
    });

    const vpcEndpointSG = new ec2.SecurityGroup(this, "VPCEndpointSG", {
      vpc,
      description: "Security group for VPC Endpoint (API Gateway)",
      securityGroupName: "historical-research-vpc-endpoint-sg",
    });

    // VPC Endpoint SGのインバウンドルール
    vpcEndpointSG.addIngressRule(
      searchLambdaSG,
      ec2.Port.tcp(443),
      "Allow from search consumers",
    );

    const openSearchSG = new ec2.SecurityGroup(this, "OpenSearchSG", {
      vpc,
      description: "Security group for OpenSearch domain",
      securityGroupName: "historical-research-opensearch-sg",
    });

    // OpenSearchへのアクセス許可
    openSearchSG.addIngressRule(
      processingLambdaSG,
      ec2.Port.tcp(443),
      "Allow from processing Lambda",
    );
    openSearchSG.addIngressRule(
      searchLambdaSG,
      ec2.Port.tcp(443),
      "Allow from search Lambda",
    );

    // 既存のNHK AI Platform用SG
    const nhkAIPlatformSG = ec2.SecurityGroup.fromSecurityGroupId(
      this,
      "NHKAIPlatformSG",
      "sg-0faff8d9c30c94dc0",
    );

    // ========================================
    // S3 Buckets
    // ========================================

    // 画像保存バケット
    const imagesBucket = new s3.Bucket(this, "ImagesBucket", {
      bucketName: "historical-research-images",
      versioned: true,
      encryption: s3.BucketEncryption.S3_MANAGED,
      blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
      // CORS は CloudFront URL 確定後に cfn escape hatch で設定
      lifecycleRules: [
        {
          id: "DeleteOldVersions",
          noncurrentVersionExpiration: Duration.days(90),
        },
      ],
      removalPolicy: RemovalPolicy.RETAIN,
    });

    // フロントエンド配信バケット
    const webBucket = new s3.Bucket(this, "WebBucket", {
      bucketName: "historical-research-web",
      encryption: s3.BucketEncryption.S3_MANAGED,
      blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
      removalPolicy: RemovalPolicy.RETAIN,
    });

    // プロンプト管理バケット
    const promptsBucket = new s3.Bucket(this, "PromptsBucket", {
      bucketName: "historical-research-prompts",
      versioned: true,
      encryption: s3.BucketEncryption.S3_MANAGED,
      blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
      removalPolicy: RemovalPolicy.RETAIN,
    });

    // PDFアップロード用バケット(原始 PDF ファイルを平時保存)
    // pdf-splitter Lambdaがこのバケットの ObjectCreated イベントで起動する
    const pdfBucket = new s3.Bucket(this, "PdfBucket", {
      bucketName: "historical-research-pdfs",
      encryption: s3.BucketEncryption.S3_MANAGED,
      blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
      removalPolicy: RemovalPolicy.RETAIN,
      // Presigned URLで直接 PUT するため CORS を許可
      cors: [
        {
          allowedMethods: [s3.HttpMethods.PUT],
          allowedOrigins: [`https://historical-research-dev.xmc.nhk.or.jp`],
          allowedHeaders: ["*"], // 全ヘッダー許可(ブラウザが送るヘッダーをすべて許容)
          maxAge: 3000,
        },
      ],
    });

    // ========================================
    // DynamoDB Table
    // ========================================
    const processingStatusTable = new dynamodb.Table(
      this,
      "ProcessingStatusTable",
      {
        tableName: "historical-research-processing-status",
        partitionKey: { name: "page_id", type: dynamodb.AttributeType.STRING },
        billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
        pointInTimeRecovery: true,
        removalPolicy: RemovalPolicy.RETAIN,
        timeToLiveAttribute: "ttl",
      },
    );

    // GSI for document_id
    processingStatusTable.addGlobalSecondaryIndex({
      indexName: "document_id-index",
      partitionKey: {
        name: "document_id",
        type: dynamodb.AttributeType.STRING,
      },
      sortKey: { name: "page_number", type: dynamodb.AttributeType.NUMBER },
    });

    // GSI for status
    processingStatusTable.addGlobalSecondaryIndex({
      indexName: "status-index",
      partitionKey: { name: "status", type: dynamodb.AttributeType.STRING },
      sortKey: { name: "updated_at", type: dynamodb.AttributeType.STRING },
    });

    // Books table(文献レベルのメタデータ管理)
    const booksTable = new dynamodb.Table(this, "BooksTable", {
      tableName: "historical-research-books",
      partitionKey: {
        name: "document_id",
        type: dynamodb.AttributeType.STRING,
      },
      billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
      pointInTimeRecovery: true,
      removalPolicy: RemovalPolicy.RETAIN,
    });

    // ========================================
    // SQS Queues
    // ========================================

    // DLQ for Image Analysis
    const imageAnalysisDLQ = new sqs.Queue(this, "ImageAnalysisDLQ", {
      queueName: "historical-research-image-analysis-dlq",
      retentionPeriod: Duration.days(14),
      visibilityTimeout: Duration.seconds(1800),
    });

    // Image Analysis Queue
    const imageAnalysisQueue = new sqs.Queue(this, "ImageAnalysisQueue", {
      queueName: "historical-research-image-analysis-queue",
      visibilityTimeout: Duration.minutes(15),
      retentionPeriod: Duration.days(4),
      deadLetterQueue: {
        queue: imageAnalysisDLQ,
        maxReceiveCount: 3,
      },
    });

    // DLQ for Embedding Generation
    const embeddingDLQ = new sqs.Queue(this, "EmbeddingDLQ", {
      queueName: "historical-research-embedding-dlq",
      retentionPeriod: Duration.days(14),
      visibilityTimeout: Duration.seconds(1800),
    });

    // Embedding Generation Queue
    const embeddingQueue = new sqs.Queue(this, "EmbeddingQueue", {
      queueName: "historical-research-embedding-queue",
      visibilityTimeout: Duration.minutes(5),
      retentionPeriod: Duration.days(4),
      deadLetterQueue: {
        queue: embeddingDLQ,
        maxReceiveCount: 3,
      },
    });

    // DLQ for TOC Extraction
    const tocExtractionDLQ = new sqs.Queue(this, "TocExtractionDLQ", {
      queueName: "historical-research-toc-extraction-dlq",
      retentionPeriod: Duration.days(14),
      visibilityTimeout: Duration.seconds(1800),
    });

    // TOC Extraction Queue
    const tocExtractionQueue = new sqs.Queue(this, "TocExtractionQueue", {
      queueName: "historical-research-toc-extraction-queue",
      visibilityTimeout: Duration.minutes(10),
      retentionPeriod: Duration.days(4),
      deadLetterQueue: {
        queue: tocExtractionDLQ,
        maxReceiveCount: 3,
      },
    });

    // ========================================
    // Secrets Manager参照
    // ========================================
    const liteLLMApiKey = secretsmanager.Secret.fromSecretCompleteArn(
      this,
      "LiteLLMApiKey",
      "arn:aws:secretsmanager:ap-northeast-1:903877990773:secret:nhk_ai_api_key_lite_llm-Yymln7",
    );

    // ========================================
    // OpenSearch Domain
    // ========================================
    const openSearchDomain = new opensearch.Domain(this, "OpenSearchDomain", {
      domainName: "historical-research-pages",
      version: opensearch.EngineVersion.OPENSEARCH_2_11,
      capacity: {
        dataNodes: 1,
        dataNodeInstanceType: "m5.large.search",
        multiAzWithStandbyEnabled: false,
      },
      ebs: {
        volumeSize: 20,
        volumeType: ec2.EbsDeviceVolumeType.GP3,
      },
      vpc,
      vpcSubnets: [
        {
          subnets: [privateSubnetA],
        },
      ],
      securityGroups: [openSearchSG],
      enforceHttps: true,
      nodeToNodeEncryption: true,
      encryptionAtRest: {
        enabled: true,
      },
      removalPolicy: RemovalPolicy.RETAIN,
    });

    // OpenSearch アクセスポリシーは Lambda 関数定義後に特定ロールで設定

    // ========================================
    // Lambda Layers(共通ライブラリ)
    // ========================================
    const commonLayer = new lambda.LayerVersion(this, "CommonLayer", {
      code: lambda.Code.fromAsset(
        path.join(__dirname, "../../lambda/layers/common"),
      ),
      compatibleRuntimes: [lambda.Runtime.PYTHON_3_11],
      description: "Common utilities for Historical Research Lambda functions",
    });

    // ========================================
    // Lambda Functions
    // ========================================

    // 共通環境変数
    const commonEnvVars = {
      IMAGES_BUCKET: imagesBucket.bucketName,
      PDF_BUCKET: pdfBucket.bucketName,
      PROMPTS_BUCKET: promptsBucket.bucketName,
      PROCESSING_STATUS_TABLE: processingStatusTable.tableName,
      OPENSEARCH_ENDPOINT: openSearchDomain.domainEndpoint,
      IMAGE_ANALYSIS_QUEUE_URL: imageAnalysisQueue.queueUrl,
      EMBEDDING_QUEUE_URL: embeddingQueue.queueUrl,
      TOC_EXTRACTION_QUEUE_URL: tocExtractionQueue.queueUrl,
      LITELLM_BASE_URL: "https://api2.ai.dev.nhk.jp",
      LITELLM_MODEL: "claude-sonnet-4-5",
      LITELLM_API_KEY_SECRET_ARN: liteLLMApiKey.secretArn,
      BOOKS_TABLE: booksTable.tableName,
    };

    // 1. Upload Handler Lambda
    const uploadHandler = new lambda.Function(this, "UploadHandler", {
      functionName: "historical-research-upload-handler",
      runtime: lambda.Runtime.PYTHON_3_11,
      handler: "handler.lambda_handler",
      code: lambda.Code.fromAsset(
        path.join(__dirname, "../../lambda/upload-handler"),
      ),
      timeout: Duration.seconds(30),
      memorySize: 512,
      environment: commonEnvVars,
      layers: [commonLayer],
    });

    // 2. Image Analyzer Lambda (VPC内)
    const imageAnalyzer = new lambda.Function(this, "ImageAnalyzer", {
      functionName: "historical-research-image-analyzer",
      runtime: lambda.Runtime.PYTHON_3_11,
      handler: "handler.lambda_handler",
      code: lambda.Code.fromAsset(
        path.join(__dirname, "../../lambda/image-analyzer"),
      ),
      timeout: Duration.minutes(15),
      memorySize: 2048,
      environment: commonEnvVars,
      layers: [commonLayer],
      vpc,
      vpcSubnets: {
        subnets: [privateSubnetA, privateSubnetC],
      },
      securityGroups: [processingLambdaSG, nhkAIPlatformSG],
      reservedConcurrentExecutions: 10,
    });

    // SQSトリガー設定
    imageAnalyzer.addEventSource(
      new lambdaEventSources.SqsEventSource(imageAnalysisQueue, {
        batchSize: 1,
        maxBatchingWindow: Duration.seconds(0),
      }),
    );

    // 3. Embedding Generator Lambda (VPC内)
    const embeddingGenerator = new lambda.Function(this, "EmbeddingGenerator", {
      functionName: "historical-research-embedding-generator",
      runtime: lambda.Runtime.PYTHON_3_11,
      handler: "handler.lambda_handler",
      code: lambda.Code.fromAsset(
        path.join(__dirname, "../../lambda/embedding-generator"),
      ),
      timeout: Duration.minutes(5),
      memorySize: 1024,
      environment: commonEnvVars,
      layers: [commonLayer],
      vpc,
      vpcSubnets: {
        subnets: [privateSubnetA, privateSubnetC],
      },
      securityGroups: [processingLambdaSG, nhkAIPlatformSG],