./knowledge-base/cdk/lib/historical-research-stack.js
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.HistoricalResearchStack = void 0;
const cdk = __importStar(require("aws-cdk-lib"));
const s3 = __importStar(require("aws-cdk-lib/aws-s3"));
const s3n = __importStar(require("aws-cdk-lib/aws-s3-notifications"));
const dynamodb = __importStar(require("aws-cdk-lib/aws-dynamodb"));
const sqs = __importStar(require("aws-cdk-lib/aws-sqs"));
const lambda = __importStar(require("aws-cdk-lib/aws-lambda"));
const ec2 = __importStar(require("aws-cdk-lib/aws-ec2"));
const iam = __importStar(require("aws-cdk-lib/aws-iam"));
const opensearch = __importStar(require("aws-cdk-lib/aws-opensearchservice"));
const apigateway = __importStar(require("aws-cdk-lib/aws-apigateway"));
const cloudfront = __importStar(require("aws-cdk-lib/aws-cloudfront"));
const origins = __importStar(require("aws-cdk-lib/aws-cloudfront-origins"));
const lambdaEventSources = __importStar(require("aws-cdk-lib/aws-lambda-event-sources"));
const secretsmanager = __importStar(require("aws-cdk-lib/aws-secretsmanager"));
const certificatemanager = __importStar(require("aws-cdk-lib/aws-certificatemanager"));
const wafv2 = __importStar(require("aws-cdk-lib/aws-wafv2"));
const aws_cdk_lib_1 = require("aws-cdk-lib");
const path = __importStar(require("path"));
const allowed_ips_1 = require("./allowed-ips");
class HistoricalResearchStack extends cdk.Stack {
constructor(scope, id, props) {
super(scope, id, props);
// ========================================
// VPC参照(既存VPCを使用)
// ========================================
const vpc = ec2.Vpc.fromLookup(this, "ExistingVPC", {
vpcId: "vpc-08d84efb87d052cf9",
});
const privateSubnetA = ec2.Subnet.fromSubnetId(this, "PrivateSubnetA", "subnet-0ebcb5a9bc54d1bd1");
const privateSubnetC = ec2.Subnet.fromSubnetId(this, "PrivateSubnetC", "subnet-03fa4782f20ba49ec");
// ========================================
// Security Groups
// ========================================
const processingLambdaSG = new ec2.SecurityGroup(this, "ProcessingLambdaSG", {
vpc,
description: "Security group for processing Lambda functions (ImageAnalyzer, EmbeddingGenerator)",
securityGroupName: "historical-research-processing-lambda-sg",
});
const searchLambdaSG = new ec2.SecurityGroup(this, "SearchLambdaSG", {
vpc,
description: "Security group for search Lambda function",
securityGroupName: "historical-research-search-lambda-sg",
});
const vpcEndpointSG = new ec2.SecurityGroup(this, "VPCEndpointSG", {
vpc,
description: "Security group for VPC Endpoint (API Gateway)",
securityGroupName: "historical-research-vpc-endpoint-sg",
});
// VPC Endpoint SGのインバウンドルール
vpcEndpointSG.addIngressRule(searchLambdaSG, ec2.Port.tcp(443), "Allow from search consumers");
const openSearchSG = new ec2.SecurityGroup(this, "OpenSearchSG", {
vpc,
description: "Security group for OpenSearch domain",
securityGroupName: "historical-research-opensearch-sg",
});
// OpenSearchへのアクセス許可
openSearchSG.addIngressRule(processingLambdaSG, ec2.Port.tcp(443), "Allow from processing Lambda");
openSearchSG.addIngressRule(searchLambdaSG, ec2.Port.tcp(443), "Allow from search Lambda");
// 既存のNHK AI Platform用SG
const nhkAIPlatformSG = ec2.SecurityGroup.fromSecurityGroupId(this, "NHKAIPlatformSG", "sg-0faff8d9c30c94dc0");
// ========================================
// S3 Buckets
// ========================================
// 画像保存バケット
const imagesBucket = new s3.Bucket(this, "ImagesBucket", {
bucketName: "historical-research-images",
versioned: true,
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
// CORS は CloudFront URL 確定後に cfn escape hatch で設定
lifecycleRules: [
{
id: "DeleteOldVersions",
noncurrentVersionExpiration: aws_cdk_lib_1.Duration.days(90),
},
],
removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
});
// フロントエンド配信バケット
const webBucket = new s3.Bucket(this, "WebBucket", {
bucketName: "historical-research-web",
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
});
// プロンプト管理バケット
const promptsBucket = new s3.Bucket(this, "PromptsBucket", {
bucketName: "historical-research-prompts",
versioned: true,
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
});
// PDFアップロード用バケット(原始 PDF ファイルを平時保存)
// pdf-splitter Lambdaがこのバケットの ObjectCreated イベントで起動する
const pdfBucket = new s3.Bucket(this, "PdfBucket", {
bucketName: "historical-research-pdfs",
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
// Presigned URLで直接 PUT するため CORS を許可
cors: [
{
allowedMethods: [s3.HttpMethods.PUT],
allowedOrigins: [`https://historical-research-dev.xmc.nhk.or.jp`],
allowedHeaders: ["*"], // 全ヘッダー許可(ブラウザが送るヘッダーをすべて許容)
maxAge: 3000,
},
],
});
// ========================================
// DynamoDB Table
// ========================================
const processingStatusTable = new dynamodb.Table(this, "ProcessingStatusTable", {
tableName: "historical-research-processing-status",
partitionKey: { name: "page_id", type: dynamodb.AttributeType.STRING },
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
pointInTimeRecovery: true,
removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
timeToLiveAttribute: "ttl",
});
// GSI for document_id
processingStatusTable.addGlobalSecondaryIndex({
indexName: "document_id-index",
partitionKey: {
name: "document_id",
type: dynamodb.AttributeType.STRING,
},
sortKey: { name: "page_number", type: dynamodb.AttributeType.NUMBER },
});
// GSI for status
processingStatusTable.addGlobalSecondaryIndex({
indexName: "status-index",
partitionKey: { name: "status", type: dynamodb.AttributeType.STRING },
sortKey: { name: "updated_at", type: dynamodb.AttributeType.STRING },
});
// Books table(文献レベルのメタデータ管理)
const booksTable = new dynamodb.Table(this, "BooksTable", {
tableName: "historical-research-books",
partitionKey: {
name: "document_id",
type: dynamodb.AttributeType.STRING,
},
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
pointInTimeRecovery: true,
removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
});
// ========================================
// SQS Queues
// ========================================
// DLQ for Image Analysis
const imageAnalysisDLQ = new sqs.Queue(this, "ImageAnalysisDLQ", {
queueName: "historical-research-image-analysis-dlq",
retentionPeriod: aws_cdk_lib_1.Duration.days(14),
visibilityTimeout: aws_cdk_lib_1.Duration.seconds(1800),
});
// Image Analysis Queue
const imageAnalysisQueue = new sqs.Queue(this, "ImageAnalysisQueue", {
queueName: "historical-research-image-analysis-queue",
visibilityTimeout: aws_cdk_lib_1.Duration.minutes(15),
retentionPeriod: aws_cdk_lib_1.Duration.days(4),
deadLetterQueue: {
queue: imageAnalysisDLQ,
maxReceiveCount: 3,
},
});
// DLQ for Embedding Generation
const embeddingDLQ = new sqs.Queue(this, "EmbeddingDLQ", {
queueName: "historical-research-embedding-dlq",
retentionPeriod: aws_cdk_lib_1.Duration.days(14),
visibilityTimeout: aws_cdk_lib_1.Duration.seconds(1800),
});
// Embedding Generation Queue
const embeddingQueue = new sqs.Queue(this, "EmbeddingQueue", {
queueName: "historical-research-embedding-queue",
visibilityTimeout: aws_cdk_lib_1.Duration.minutes(5),
retentionPeriod: aws_cdk_lib_1.Duration.days(4),
deadLetterQueue: {
queue: embeddingDLQ,
maxReceiveCount: 3,
},
});
// DLQ for TOC Extraction
const tocExtractionDLQ = new sqs.Queue(this, "TocExtractionDLQ", {
queueName: "historical-research-toc-extraction-dlq",
retentionPeriod: aws_cdk_lib_1.Duration.days(14),
visibilityTimeout: aws_cdk_lib_1.Duration.seconds(1800),
});
// TOC Extraction Queue
const tocExtractionQueue = new sqs.Queue(this, "TocExtractionQueue", {
queueName: "historical-research-toc-extraction-queue",
visibilityTimeout: aws_cdk_lib_1.Duration.minutes(10),
retentionPeriod: aws_cdk_lib_1.Duration.days(4),
deadLetterQueue: {
queue: tocExtractionDLQ,
maxReceiveCount: 3,
},
});
// ========================================
// Secrets Manager参照
// ========================================
const liteLLMApiKey = secretsmanager.Secret.fromSecretCompleteArn(this, "LiteLLMApiKey", "arn:aws:secretsmanager:ap-northeast-1:903877990773:secret:nhk_ai_api_key_lite_llm-Yymln7");
// ========================================
// OpenSearch Domain
// ========================================
const openSearchDomain = new opensearch.Domain(this, "OpenSearchDomain", {
domainName: "historical-research-pages",
version: opensearch.EngineVersion.OPENSEARCH_2_11,
capacity: {
dataNodes: 1,
dataNodeInstanceType: "m5.large.search",
multiAzWithStandbyEnabled: false,
},
ebs: {
volumeSize: 20,
volumeType: ec2.EbsDeviceVolumeType.GP3,
},
vpc,
vpcSubnets: [
{
subnets: [privateSubnetA],
},
],
securityGroups: [openSearchSG],
enforceHttps: true,
nodeToNodeEncryption: true,
encryptionAtRest: {
enabled: true,
},
removalPolicy: aws_cdk_lib_1.RemovalPolicy.RETAIN,
});
// OpenSearch アクセスポリシーは Lambda 関数定義後に特定ロールで設定
// ========================================
// Lambda Layers(共通ライブラリ)
// ========================================
const commonLayer = new lambda.LayerVersion(this, "CommonLayer", {
code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/layers/common")),
compatibleRuntimes: [lambda.Runtime.PYTHON_3_11],
description: "Common utilities for Historical Research Lambda functions",
});
// ========================================
// Lambda Functions
// ========================================
// 共通環境変数
const commonEnvVars = {
IMAGES_BUCKET: imagesBucket.bucketName,
PDF_BUCKET: pdfBucket.bucketName,
PROMPTS_BUCKET: promptsBucket.bucketName,
PROCESSING_STATUS_TABLE: processingStatusTable.tableName,
OPENSEARCH_ENDPOINT: openSearchDomain.domainEndpoint,
IMAGE_ANALYSIS_QUEUE_URL: imageAnalysisQueue.queueUrl,
EMBEDDING_QUEUE_URL: embeddingQueue.queueUrl,
TOC_EXTRACTION_QUEUE_URL: tocExtractionQueue.queueUrl,
LITELLM_BASE_URL: "https://api2.ai.dev.nhk.jp",
LITELLM_MODEL: "claude-sonnet-4-5",
LITELLM_API_KEY_SECRET_ARN: liteLLMApiKey.secretArn,
BOOKS_TABLE: booksTable.tableName,
};
// 1. Upload Handler Lambda
const uploadHandler = new lambda.Function(this, "UploadHandler", {
functionName: "historical-research-upload-handler",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/upload-handler")),
timeout: aws_cdk_lib_1.Duration.seconds(30),
memorySize: 512,
environment: commonEnvVars,
layers: [commonLayer],
});
// 2. Image Analyzer Lambda (VPC内)
const imageAnalyzer = new lambda.Function(this, "ImageAnalyzer", {
functionName: "historical-research-image-analyzer",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/image-analyzer")),
timeout: aws_cdk_lib_1.Duration.minutes(15),
memorySize: 2048,
environment: commonEnvVars,
layers: [commonLayer],
vpc,
vpcSubnets: {
subnets: [privateSubnetA, privateSubnetC],
},
securityGroups: [processingLambdaSG, nhkAIPlatformSG],
reservedConcurrentExecutions: 10,
});
// SQSトリガー設定
imageAnalyzer.addEventSource(new lambdaEventSources.SqsEventSource(imageAnalysisQueue, {
batchSize: 1,
maxBatchingWindow: aws_cdk_lib_1.Duration.seconds(0),
}));
// 3. Embedding Generator Lambda (VPC内)
const embeddingGenerator = new lambda.Function(this, "EmbeddingGenerator", {
functionName: "historical-research-embedding-generator",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/embedding-generator")),
timeout: aws_cdk_lib_1.Duration.minutes(5),
memorySize: 1024,
environment: commonEnvVars,
layers: [commonLayer],
vpc,
vpcSubnets: {
subnets: [privateSubnetA, privateSubnetC],
},
securityGroups: [processingLambdaSG, nhkAIPlatformSG],
});
// SQSトリガー設定
embeddingGenerator.addEventSource(new lambdaEventSources.SqsEventSource(embeddingQueue, {
batchSize: 1,
maxBatchingWindow: aws_cdk_lib_1.Duration.seconds(0),
}));
// 4. Search API Lambda (VPC内)
const searchAPI = new lambda.Function(this, "SearchAPI", {
functionName: "historical-research-search-api",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/search-api")),
timeout: aws_cdk_lib_1.Duration.seconds(30),
memorySize: 1024,
environment: commonEnvVars,
layers: [commonLayer],
vpc,
vpcSubnets: {
subnets: [privateSubnetA, privateSubnetC],
},
securityGroups: [searchLambdaSG, nhkAIPlatformSG],
});
// 5. Bulk Processor Lambda
const bulkProcessor = new lambda.Function(this, "BulkProcessor", {
functionName: "historical-research-bulk-processor",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/bulk-processor")),
timeout: aws_cdk_lib_1.Duration.minutes(15),
memorySize: 512,
environment: commonEnvVars,
layers: [commonLayer],
});
// 6. DLQ Processor Lambda
const dlqProcessor = new lambda.Function(this, "DLQProcessor", {
functionName: "historical-research-dlq-processor",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(path.join(__dirname, "../../lambda/dlq-processor")),
timeout: aws_cdk_lib_1.Duration.minutes(5),
memorySize: 256,
environment: {
IMAGE_ANALYSIS_QUEUE_URL: imageAnalysisQueue.queueUrl,
EMBEDDING_QUEUE_URL: embeddingQueue.queueUrl,
PROCESSING_STATUS_TABLE: processingStatusTable.tableName,
},
layers: [commonLayer],
});
// DLQトリガー設定
dlqProcessor.addEventSource(new lambdaEventSources.SqsEventSource(imageAnalysisDLQ, {
batchSize: 1,
}));
dlqProcessor.addEventSource(new lambdaEventSources.SqsEventSource(embeddingDLQ, {
batchSize: 1,
}));
// 7. PDF Splitter Lambda
// PyMuPDF は Docker で事前ビルド済み(lambda/pdf-splitter/ にバイナリ同梱)
// build_pdf_lambda.sh で生成: fitz/, pymupdf/, PyMuPDF-*.dist-info/