./knowledge-base/cdk/lib/historical-research-stack.js

"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    var desc = Object.getOwnPropertyDescriptor(m, k);
    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
      desc = { enumerable: true, get: function() { return m[k]; } };
    }
    Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
    Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
    o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
    var ownKeys = function(o) {
        ownKeys = Object.getOwnPropertyNames || function (o) {
            var ar = [];
            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
            return ar;
        };
        return ownKeys(o);
    };
    return function (mod) {
        if (mod && mod.__esModule) return mod;
        var result = {};
        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
        __setModuleDefault(result, mod);
        return result;
    };
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.HistoricalResearchStack = void 0;
const cdk = __importStar(require("aws-cdk-lib"));
const s3 = __importStar(require("aws-cdk-lib/aws-s3"));
const s3n = __importStar(require("aws-cdk-lib/aws-s3-notifications"));
const dynamodb = __importStar(require("aws-cdk-lib/aws-dynamodb"));
const sqs = __importStar(require("aws-cdk-lib/aws-sqs"));
const lambda = __importStar(require("aws-cdk-lib/aws-lambda"));
const ec2 = __importStar(require("aws-cdk-lib/aws-ec2"));
const iam = __importStar(require("aws-cdk-lib/aws-iam"));
const opensearch = __importStar(require("aws-cdk-lib/aws-opensearchservice"));
const apigateway = __importStar(require("aws-cdk-lib/aws-apigateway"));
const cloudfront = __importStar(require("aws-cdk-lib/aws-cloudfront"));
const origins = __importStar(require("aws-cdk-lib/aws-cloudfront-origins"));
const lambdaEventSources = __importStar(require("aws-cdk-lib/aws-lambda-event-sources"));
const secretsmanager = __importStar(require("aws-cdk-lib/aws-secretsmanager"));
const certificatemanager = __importStar(require("aws-cdk-lib/aws-certificatemanager"));
const wafv2 = __importStar(require("aws-cdk-lib/aws-wafv2"));
const aws_cdk_lib_1 = require("aws-cdk-lib");
const path = __importStar(require("path"));
const allowed_ips_1 = require("./allowed-ips");
class HistoricalResearchStack extends cdk.Stack {
    constructor(scope, id, props) {
        super(scope, id, props);
        // ========================================
        // VPC参照(既存VPCを使用)
        // ========================================
        const vpc = ec2.Vpc.fromLookup(this, "ExistingVPC", {
            vpcId: "vpc-08d84efb87d052cf9",
        });
        const privateSubnetA = ec2.Subnet.fromSubnetId(this, "PrivateSubnetA", "subnet-0ebcb5a9bc54d1bd1");
        const privateSubnetC = ec2.Subnet.fromSubnetId(this, "PrivateSubnetC", "subnet-03fa4782f20ba49ec");
        // ========================================
        // Security Groups
        // ========================================
        const processingLambdaSG = new ec2.SecurityGroup(this, "ProcessingLambdaSG", {
            vpc,
            description: "Security group for processing Lambda functions (ImageAnalyzer, EmbeddingGenerator)",
            securityGroupName: "historical-research-processing-lambda-sg",
        });
        const searchLambdaSG = new ec2.SecurityGroup(this, "SearchLambdaSG", {
            vpc,
            description: "Security group for search Lambda function",
            securityGroupName: "historical-research-search-lambda-sg",
        });
        const vpcEndpointSG = new ec2.SecurityGroup(this, "VPCEndpointSG", {
            vpc,
            description: "Security group for VPC Endpoint (API Gateway)",
            securityGroupName: "historical-research-vpc-endpoint-sg",
        });
        // VPC Endpoint SGのインバウンドルール
        vpcEndpointSG.addIngressRule(searchLambdaSG, ec2.Port.tcp(443), "Allow from search consumers");
        const openSearchSG = new ec2.SecurityGroup(this, "OpenSearchSG", {
            vpc,
            description: "Security group for OpenSearch domain",
            securityGroupName: "historical-research-opensearch-sg",
        });
        // OpenSearchへのアクセス許可
        openSearchSG.addIngressRule(processingLambdaSG, ec2.Port.tcp(443), "Allow from processing Lambda");
        openSearchSG.addIngressRule(searchLambdaSG, ec2.Port.tcp(443), "Allow from search Lambda");
        // 既存のNHK AI Platform用SG
        const nhkAIPlatformSG = ec2.SecurityGroup.fromSecurityGroupId(this, "NHKAIPlatformSG", "sg-0faff8d9c30c94dc0");
        // ========================================
        // S3 Buckets
        // ========================================
        // 画像保存バケット
        const imagesBucket = new s3.Bucket(this, "ImagesBucket", {
            bucketName: "historical-research-images",
            versioned: true,
            encryption: s3.BucketEncryption.S3_MANAGED,
            blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
            // CORS は CloudFront URL 確定後に cfn escape hatch で設定
            lifecycleRules: [
                {
                    id: "DeleteOldVersions",
                    noncurrentVersionExpiration: aws_cdk_lib_1.Duration.days(90),
                },
            ],
            removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
        });
        // フロントエンド配信バケット
        const webBucket = new s3.Bucket(this, "WebBucket", {
            bucketName: "historical-research-web",
            encryption: s3.BucketEncryption.S3_MANAGED,
            blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
            removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
        });
        // プロンプト管理バケット
        const promptsBucket = new s3.Bucket(this, "PromptsBucket", {
            bucketName: "historical-research-prompts",
            versioned: true,
            encryption: s3.BucketEncryption.S3_MANAGED,
            blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
            removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
        });
        // PDFアップロード用バケット(原始 PDF ファイルを平時保存)
        // pdf-splitter Lambdaがこのバケットの ObjectCreated イベントで起動する
        const pdfBucket = new s3.Bucket(this, "PdfBucket", {
            bucketName: "historical-research-pdfs",
            encryption: s3.BucketEncryption.S3_MANAGED,
            blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
            removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
            // Presigned URLで直接 PUT するため CORS を許可
            cors: [
                {
                    allowedMethods: [s3.HttpMethods.PUT],
                    allowedOrigins: [`https://historical-research-dev.xmc.nhk.or.jp`],
                    allowedHeaders: ["*"], // 全ヘッダー許可(ブラウザが送るヘッダーをすべて許容)
                    maxAge: 3000,
                },
            ],
        });
        // ========================================
        // DynamoDB Table
        // ========================================
        const processingStatusTable = new dynamodb.Table(this, "ProcessingStatusTable", {
            tableName: "historical-research-processing-status",
            partitionKey: { name: "page_id", type: dynamodb.AttributeType.STRING },
            billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
            pointInTimeRecovery: true,
            removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
            timeToLiveAttribute: "ttl",
        });
        // GSI for document_id
        processingStatusTable.addGlobalSecondaryIndex({
            indexName: "document_id-index",
            partitionKey: {
                name: "document_id",
                type: dynamodb.AttributeType.STRING,
            },
            sortKey: { name: "page_number", type: dynamodb.AttributeType.NUMBER },
        });
        // GSI for status
        processingStatusTable.addGlobalSecondaryIndex({
            indexName: "status-index",
            partitionKey: { name: "status", type: dynamodb.AttributeType.STRING },
            sortKey: { name: "updated_at", type: dynamodb.AttributeType.STRING },
        });
        // Books table(文献レベルのメタデータ管理)
        const booksTable = new dynamodb.Table(this, "BooksTable", {
            tableName: "historical-research-books",
            partitionKey: {
                name: "document_id",
                type: dynamodb.AttributeType.STRING,
            },
            billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
            pointInTimeRecovery: true,
            removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
        });
        // ========================================
        // SQS Queues
        // ========================================
        // DLQ for Image Analysis
        const imageAnalysisDLQ = new sqs.Queue(this, "ImageAnalysisDLQ", {
            queueName: "historical-research-image-analysis-dlq",
            retentionPeriod: aws_cdk_lib_1.Duration.days(14),
            visibilityTimeout: aws_cdk_lib_1.Duration.seconds(1800),
        });
        // Image Analysis Queue
        const imageAnalysisQueue = new sqs.Queue(this, "ImageAnalysisQueue", {
            queueName: "historical-research-image-analysis-queue",
            visibilityTimeout: aws_cdk_lib_1.Duration.minutes(15),
            retentionPeriod: aws_cdk_lib_1.Duration.days(4),
            deadLetterQueue: {
                queue: imageAnalysisDLQ,
                maxReceiveCount: 3,
            },
        });
        // DLQ for Embedding Generation
        const embeddingDLQ = new sqs.Queue(this, "EmbeddingDLQ", {
            queueName: "historical-research-embedding-dlq",
            retentionPeriod: aws_cdk_lib_1.Duration.days(14),
            visibilityTimeout: aws_cdk_lib_1.Duration.seconds(1800),
        });
        // Embedding Generation Queue
        const embeddingQueue = new sqs.Queue(this, "EmbeddingQueue", {
            queueName: "historical-research-embedding-queue",
            visibilityTimeout: aws_cdk_lib_1.Duration.minutes(5),
            retentionPeriod: aws_cdk_lib_1.Duration.days(4),
            deadLetterQueue: {
                queue: embeddingDLQ,
                maxReceiveCount: 3,
            },
        });
        // DLQ for TOC Extraction
        const tocExtractionDLQ = new sqs.Queue(this, "TocExtractionDLQ", {
            queueName: "historical-research-toc-extraction-dlq",
            retentionPeriod: aws_cdk_lib_1.Duration.days(14),
            visibilityTimeout: aws_cdk_lib_1.Duration.seconds(1800),
        });
        // TOC Extraction Queue
        const tocExtractionQueue = new sqs.Queue(this, "TocExtractionQueue", {
            queueName: "historical-research-toc-extraction-queue",
            visibilityTimeout: aws_cdk_lib_1.Duration.minutes(10),
            retentionPeriod: aws_cdk_lib_1.Duration.days(4),
            deadLetterQueue: {
                queue: tocExtractionDLQ,
                maxReceiveCount: 3,
            },
        });
        // ========================================
        // Secrets Manager参照
        // ========================================
        const liteLLMApiKey = secretsmanager.Secret.fromSecretCompleteArn(this, "LiteLLMApiKey", "arn:aws:secretsmanager:ap-northeast-1:903877990773:secret:nhk_ai_api_key_lite_llm-Yymln7");
        // ========================================
        // OpenSearch Domain
        // ========================================
        const openSearchDomain = new opensearch.Domain(this, "OpenSearchDomain", {
            domainName: "historical-research-pages",
            version: opensearch.EngineVersion.OPENSEARCH_2_11,
            capacity: {
                dataNodes: 1,
                dataNodeInstanceType: "m5.large.search",
                multiAzWithStandbyEnabled: false,
            },
            ebs: {
                volumeSize: 20,
                volumeType: ec2.EbsDeviceVolumeType.GP3,
            },
            vpc,
            vpcSubnets: [
                {
                    subnets: [privateSubnetA],
                },
            ],
            securityGroups: [openSearchSG],
            enforceHttps: true,
            nodeToNodeEncryption: true,
            encryptionAtRest: {
                enabled: true,
            },
            removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
        });
        // OpenSearch アクセスポリシーは Lambda 関数定義後に特定ロールで設定
        // ========================================
        // Lambda Layers(共通ライブラリ)
        // ========================================
        const commonLayer = new lambda.LayerVersion(this, "CommonLayer", {
            code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/layers/common")),
            compatibleRuntimes: [lambda.Runtime.PYTHON_3_11],
            description: "Common utilities for Historical Research Lambda functions",
        });
        // ========================================
        // Lambda Functions
        // ========================================
        // 共通環境変数
        const commonEnvVars = {
            IMAGES_BUCKET: imagesBucket.bucketName,
            PDF_BUCKET: pdfBucket.bucketName,
            PROMPTS_BUCKET: promptsBucket.bucketName,
            PROCESSING_STATUS_TABLE: processingStatusTable.tableName,
            OPENSEARCH_ENDPOINT: openSearchDomain.domainEndpoint,
            IMAGE_ANALYSIS_QUEUE_URL: imageAnalysisQueue.queueUrl,
            EMBEDDING_QUEUE_URL: embeddingQueue.queueUrl,
            TOC_EXTRACTION_QUEUE_URL: tocExtractionQueue.queueUrl,
            LITELLM_BASE_URL: "https://api2.ai.dev.nhk.jp",
            LITELLM_MODEL: "claude-sonnet-4-5",
            LITELLM_API_KEY_SECRET_ARN: liteLLMApiKey.secretArn,
            BOOKS_TABLE: booksTable.tableName,
        };
        // 1. Upload Handler Lambda
        const uploadHandler = new lambda.Function(this, "UploadHandler", {
            functionName: "historical-research-upload-handler",
            runtime: lambda.Runtime.PYTHON_3_11,
            handler: "handler.lambda_handler",
            code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/upload-handler")),
            timeout: aws_cdk_lib_1.Duration.seconds(30),
            memorySize: 512,
            environment: commonEnvVars,
            layers: [commonLayer],
        });
        // 2. Image Analyzer Lambda (VPC内)
        const imageAnalyzer = new lambda.Function(this, "ImageAnalyzer", {
            functionName: "historical-research-image-analyzer",
            runtime: lambda.Runtime.PYTHON_3_11,
            handler: "handler.lambda_handler",
            code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/image-analyzer")),
            timeout: aws_cdk_lib_1.Duration.minutes(15),
            memorySize: 2048,
            environment: commonEnvVars,
            layers: [commonLayer],
            vpc,
            vpcSubnets: {
                subnets: [privateSubnetA, privateSubnetC],
            },
            securityGroups: [processingLambdaSG, nhkAIPlatformSG],
            reservedConcurrentExecutions: 10,
        });
        // SQSトリガー設定
        imageAnalyzer.addEventSource(new lambdaEventSources.SqsEventSource(imageAnalysisQueue, {
            batchSize: 1,
            maxBatchingWindow: aws_cdk_lib_1.Duration.seconds(0),
        }));
        // 3. Embedding Generator Lambda (VPC内)
        const embeddingGenerator = new lambda.Function(this, "EmbeddingGenerator", {
            functionName: "historical-research-embedding-generator",
            runtime: lambda.Runtime.PYTHON_3_11,
            handler: "handler.lambda_handler",
            code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/embedding-generator")),
            timeout: aws_cdk_lib_1.Duration.minutes(5),
            memorySize: 1024,
            environment: commonEnvVars,
            layers: [commonLayer],
            vpc,
            vpcSubnets: {
                subnets: [privateSubnetA, privateSubnetC],
            },
            securityGroups: [processingLambdaSG, nhkAIPlatformSG],
        });
        // SQSトリガー設定
        embeddingGenerator.addEventSource(new lambdaEventSources.SqsEventSource(embeddingQueue, {
            batchSize: 1,
            maxBatchingWindow: aws_cdk_lib_1.Duration.seconds(0),
        }));
        // 4. Search API Lambda (VPC内)
        const searchAPI = new lambda.Function(this, "SearchAPI", {
            functionName: "historical-research-search-api",
            runtime: lambda.Runtime.PYTHON_3_11,
            handler: "handler.lambda_handler",
            code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/search-api")),
            timeout: aws_cdk_lib_1.Duration.seconds(30),
            memorySize: 1024,
            environment: commonEnvVars,
            layers: [commonLayer],
            vpc,
            vpcSubnets: {
                subnets: [privateSubnetA, privateSubnetC],
            },
            securityGroups: [searchLambdaSG, nhkAIPlatformSG],
        });
        // 5. Bulk Processor Lambda
        const bulkProcessor = new lambda.Function(this, "BulkProcessor", {
            functionName: "historical-research-bulk-processor",
            runtime: lambda.Runtime.PYTHON_3_11,
            handler: "handler.lambda_handler",
            code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/bulk-processor")),
            timeout: aws_cdk_lib_1.Duration.minutes(15),
            memorySize: 512,
            environment: commonEnvVars,
            layers: [commonLayer],
        });
        // 6. DLQ Processor Lambda
        const dlqProcessor = new lambda.Function(this, "DLQProcessor", {
            functionName: "historical-research-dlq-processor",
            runtime: lambda.Runtime.PYTHON_3_11,
            handler: "handler.lambda_handler",
            code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/dlq-processor")),
            timeout: aws_cdk_lib_1.Duration.minutes(5),
            memorySize: 256,
            environment: {
                IMAGE_ANALYSIS_QUEUE_URL: imageAnalysisQueue.queueUrl,
                EMBEDDING_QUEUE_URL: embeddingQueue.queueUrl,
                PROCESSING_STATUS_TABLE: processingStatusTable.tableName,
            },
            layers: [commonLayer],
        });
        // DLQトリガー設定
        dlqProcessor.addEventSource(new lambdaEventSources.SqsEventSource(imageAnalysisDLQ, {
            batchSize: 1,
        }));
        dlqProcessor.addEventSource(new lambdaEventSources.SqsEventSource(embeddingDLQ, {
            batchSize: 1,
        }));
        // 7. PDF Splitter Lambda
        // PyMuPDF は Docker で事前ビルド済み(lambda/pdf-splitter/ にバイナリ同梱)
        // build_pdf_lambda.sh で生成: fitz/, pymupdf/, PyMuPDF-*.dist-info/