./knowledge-base/cdk/lib/historical-research-stack.ts
import * as cdk from "aws-cdk-lib";
import { Construct } from "constructs";
import * as s3 from "aws-cdk-lib/aws-s3";
import * as s3n from "aws-cdk-lib/aws-s3-notifications";
import * as dynamodb from "aws-cdk-lib/aws-dynamodb";
import * as sqs from "aws-cdk-lib/aws-sqs";
import * as lambda from "aws-cdk-lib/aws-lambda";
import * as ec2 from "aws-cdk-lib/aws-ec2";
import * as iam from "aws-cdk-lib/aws-iam";
import * as opensearch from "aws-cdk-lib/aws-opensearchservice";
import * as apigateway from "aws-cdk-lib/aws-apigateway";
import * as cloudfront from "aws-cdk-lib/aws-cloudfront";
import * as origins from "aws-cdk-lib/aws-cloudfront-origins";
import * as lambdaEventSources from "aws-cdk-lib/aws-lambda-event-sources";
import * as secretsmanager from "aws-cdk-lib/aws-secretsmanager";
import * as certificatemanager from "aws-cdk-lib/aws-certificatemanager";
import * as wafv2 from "aws-cdk-lib/aws-wafv2";
import { Duration, RemovalPolicy } from "aws-cdk-lib";
import * as path from "path";
import * as lambdaNodejs from "aws-cdk-lib/aws-lambda-nodejs";
import { ALLOWED_IPS } from "./allowed-ips";
interface HistoricalResearchStackProps extends cdk.StackProps {
/** WAF WebACL ARN(us-east-1のWafStackから渡す / CloudFrontのIP制限) */
webAclArn: string;
}
export class HistoricalResearchStack extends cdk.Stack {
constructor(
scope: Construct,
id: string,
props: HistoricalResearchStackProps,
) {
super(scope, id, props);
// ========================================
// VPC参照(既存VPCを使用)
// ========================================
const vpc = ec2.Vpc.fromLookup(this, "ExistingVPC", {
vpcId: "vpc-08d84efb87d052cf9",
});
const privateSubnetA = ec2.Subnet.fromSubnetId(
this,
"PrivateSubnetA",
"subnet-0ebcb5a9bc54d1bd1",
);
const privateSubnetC = ec2.Subnet.fromSubnetId(
this,
"PrivateSubnetC",
"subnet-03fa4782f20ba49ec",
);
// ========================================
// Security Groups
// ========================================
const processingLambdaSG = new ec2.SecurityGroup(
this,
"ProcessingLambdaSG",
{
vpc,
description:
"Security group for processing Lambda functions (ImageAnalyzer, EmbeddingGenerator)",
securityGroupName: "historical-research-processing-lambda-sg",
},
);
const searchLambdaSG = new ec2.SecurityGroup(this, "SearchLambdaSG", {
vpc,
description: "Security group for search Lambda function",
securityGroupName: "historical-research-search-lambda-sg",
});
const vpcEndpointSG = new ec2.SecurityGroup(this, "VPCEndpointSG", {
vpc,
description: "Security group for VPC Endpoint (API Gateway)",
securityGroupName: "historical-research-vpc-endpoint-sg",
});
// VPC Endpoint SGのインバウンドルール
vpcEndpointSG.addIngressRule(
searchLambdaSG,
ec2.Port.tcp(443),
"Allow from search consumers",
);
const openSearchSG = new ec2.SecurityGroup(this, "OpenSearchSG", {
vpc,
description: "Security group for OpenSearch domain",
securityGroupName: "historical-research-opensearch-sg",
});
// OpenSearchへのアクセス許可
openSearchSG.addIngressRule(
processingLambdaSG,
ec2.Port.tcp(443),
"Allow from processing Lambda",
);
openSearchSG.addIngressRule(
searchLambdaSG,
ec2.Port.tcp(443),
"Allow from search Lambda",
);
// 既存のNHK AI Platform用SG
const nhkAIPlatformSG = ec2.SecurityGroup.fromSecurityGroupId(
this,
"NHKAIPlatformSG",
"sg-0faff8d9c30c94dc0",
);
// ========================================
// S3 Buckets
// ========================================
// 画像保存バケット
const imagesBucket = new s3.Bucket(this, "ImagesBucket", {
bucketName: "historical-research-images",
versioned: true,
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
// CORS は CloudFront URL 確定後に cfn escape hatch で設定
lifecycleRules: [
{
id: "DeleteOldVersions",
noncurrentVersionExpiration: Duration.days(90),
},
],
removalPolicy: RemovalPolicy.RETAIN,
});
// フロントエンド配信バケット
const webBucket = new s3.Bucket(this, "WebBucket", {
bucketName: "historical-research-web",
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
removalPolicy: RemovalPolicy.RETAIN,
});
// プロンプト管理バケット
const promptsBucket = new s3.Bucket(this, "PromptsBucket", {
bucketName: "historical-research-prompts",
versioned: true,
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
removalPolicy: RemovalPolicy.RETAIN,
});
// PDFアップロード用バケット(原始 PDF ファイルを平時保存)
// pdf-splitter Lambdaがこのバケットの ObjectCreated イベントで起動する
const pdfBucket = new s3.Bucket(this, "PdfBucket", {
bucketName: "historical-research-pdfs",
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
removalPolicy: RemovalPolicy.RETAIN,
// Presigned URLで直接 PUT するため CORS を許可
cors: [
{
allowedMethods: [s3.HttpMethods.PUT],
allowedOrigins: [`https://historical-research-dev.xmc.nhk.or.jp`],
allowedHeaders: ["*"], // 全ヘッダー許可(ブラウザが送るヘッダーをすべて許容)
maxAge: 3000,
},
],
});
// ========================================
// DynamoDB Table
// ========================================
const processingStatusTable = new dynamodb.Table(
this,
"ProcessingStatusTable",
{
tableName: "historical-research-processing-status",
partitionKey: { name: "page_id", type: dynamodb.AttributeType.STRING },
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
pointInTimeRecovery: true,
removalPolicy: RemovalPolicy.RETAIN,
timeToLiveAttribute: "ttl",
},
);
// GSI for document_id
processingStatusTable.addGlobalSecondaryIndex({
indexName: "document_id-index",
partitionKey: {
name: "document_id",
type: dynamodb.AttributeType.STRING,
},
sortKey: { name: "page_number", type: dynamodb.AttributeType.NUMBER },
});
// GSI for status
processingStatusTable.addGlobalSecondaryIndex({
indexName: "status-index",
partitionKey: { name: "status", type: dynamodb.AttributeType.STRING },
sortKey: { name: "updated_at", type: dynamodb.AttributeType.STRING },
});
// Books table(文献レベルのメタデータ管理)
const booksTable = new dynamodb.Table(this, "BooksTable", {
tableName: "historical-research-books",
partitionKey: {
name: "document_id",
type: dynamodb.AttributeType.STRING,
},
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
pointInTimeRecovery: true,
removalPolicy: RemovalPolicy.RETAIN,
});
// ========================================
// SQS Queues
// ========================================
// DLQ for Image Analysis
const imageAnalysisDLQ = new sqs.Queue(this, "ImageAnalysisDLQ", {
queueName: "historical-research-image-analysis-dlq",
retentionPeriod: Duration.days(14),
visibilityTimeout: Duration.seconds(1800),
});
// Image Analysis Queue
const imageAnalysisQueue = new sqs.Queue(this, "ImageAnalysisQueue", {
queueName: "historical-research-image-analysis-queue",
visibilityTimeout: Duration.minutes(15),
retentionPeriod: Duration.days(4),
deadLetterQueue: {
queue: imageAnalysisDLQ,
maxReceiveCount: 3,
},
});
// DLQ for Embedding Generation
const embeddingDLQ = new sqs.Queue(this, "EmbeddingDLQ", {
queueName: "historical-research-embedding-dlq",
retentionPeriod: Duration.days(14),
visibilityTimeout: Duration.seconds(1800),
});
// Embedding Generation Queue
const embeddingQueue = new sqs.Queue(this, "EmbeddingQueue", {
queueName: "historical-research-embedding-queue",
visibilityTimeout: Duration.minutes(5),
retentionPeriod: Duration.days(4),
deadLetterQueue: {
queue: embeddingDLQ,
maxReceiveCount: 3,
},
});
// DLQ for TOC Extraction
const tocExtractionDLQ = new sqs.Queue(this, "TocExtractionDLQ", {
queueName: "historical-research-toc-extraction-dlq",
retentionPeriod: Duration.days(14),
visibilityTimeout: Duration.seconds(1800),
});
// TOC Extraction Queue
const tocExtractionQueue = new sqs.Queue(this, "TocExtractionQueue", {
queueName: "historical-research-toc-extraction-queue",
visibilityTimeout: Duration.minutes(10),
retentionPeriod: Duration.days(4),
deadLetterQueue: {
queue: tocExtractionDLQ,
maxReceiveCount: 3,
},
});
// ========================================
// Secrets Manager参照
// ========================================
const liteLLMApiKey = secretsmanager.Secret.fromSecretCompleteArn(
this,
"LiteLLMApiKey",
"arn:aws:secretsmanager:ap-northeast-1:903877990773:secret:nhk_ai_api_key_lite_llm-Yymln7",
);
// ========================================
// OpenSearch Domain
// ========================================
const openSearchDomain = new opensearch.Domain(this, "OpenSearchDomain", {
domainName: "historical-research-pages",
version: opensearch.EngineVersion.OPENSEARCH_2_11,
capacity: {
dataNodes: 1,
dataNodeInstanceType: "m5.large.search",
multiAzWithStandbyEnabled: false,
},
ebs: {
volumeSize: 20,
volumeType: ec2.EbsDeviceVolumeType.GP3,
},
vpc,
vpcSubnets: [
{
subnets: [privateSubnetA],
},
],
securityGroups: [openSearchSG],
enforceHttps: true,
nodeToNodeEncryption: true,
encryptionAtRest: {
enabled: true,
},
removalPolicy: RemovalPolicy.RETAIN,
});
// OpenSearch アクセスポリシーは Lambda 関数定義後に特定ロールで設定
// ========================================
// Lambda Layers(共通ライブラリ)
// ========================================
const commonLayer = new lambda.LayerVersion(this, "CommonLayer", {
code: lambda.Code.fromAsset(
path.join(__dirname, "../../lambda/layers/common"),
),
compatibleRuntimes: [lambda.Runtime.PYTHON_3_11],
description: "Common utilities for Historical Research Lambda functions",
});
// ========================================
// Lambda Functions
// ========================================
// 共通環境変数
const commonEnvVars = {
IMAGES_BUCKET: imagesBucket.bucketName,
PDF_BUCKET: pdfBucket.bucketName,
PROMPTS_BUCKET: promptsBucket.bucketName,
PROCESSING_STATUS_TABLE: processingStatusTable.tableName,
OPENSEARCH_ENDPOINT: openSearchDomain.domainEndpoint,
IMAGE_ANALYSIS_QUEUE_URL: imageAnalysisQueue.queueUrl,
EMBEDDING_QUEUE_URL: embeddingQueue.queueUrl,
TOC_EXTRACTION_QUEUE_URL: tocExtractionQueue.queueUrl,
LITELLM_BASE_URL: "https://api2.ai.dev.nhk.jp",
LITELLM_MODEL: "claude-sonnet-4-5",
LITELLM_API_KEY_SECRET_ARN: liteLLMApiKey.secretArn,
BOOKS_TABLE: booksTable.tableName,
};
// 1. Upload Handler Lambda
const uploadHandler = new lambda.Function(this, "UploadHandler", {
functionName: "historical-research-upload-handler",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(
path.join(__dirname, "../../lambda/upload-handler"),
),
timeout: Duration.seconds(30),
memorySize: 512,
environment: commonEnvVars,
layers: [commonLayer],
});
// 2. Image Analyzer Lambda (VPC内)
const imageAnalyzer = new lambda.Function(this, "ImageAnalyzer", {
functionName: "historical-research-image-analyzer",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(
path.join(__dirname, "../../lambda/image-analyzer"),
),
timeout: Duration.minutes(15),
memorySize: 2048,
environment: commonEnvVars,
layers: [commonLayer],
vpc,
vpcSubnets: {
subnets: [privateSubnetA, privateSubnetC],
},
securityGroups: [processingLambdaSG, nhkAIPlatformSG],
reservedConcurrentExecutions: 10,
});
// SQSトリガー設定
imageAnalyzer.addEventSource(
new lambdaEventSources.SqsEventSource(imageAnalysisQueue, {
batchSize: 1,
maxBatchingWindow: Duration.seconds(0),
}),
);
// 3. Embedding Generator Lambda (VPC内)
const embeddingGenerator = new lambda.Function(this, "EmbeddingGenerator", {
functionName: "historical-research-embedding-generator",
runtime: lambda.Runtime.PYTHON_3_11,
handler: "handler.lambda_handler",
code: lambda.Code.fromAsset(
path.join(__dirname, "../../lambda/embedding-generator"),
),
timeout: Duration.minutes(5),
memorySize: 1024,
environment: commonEnvVars,
layers: [commonLayer],
vpc,
vpcSubnets: {
subnets: [privateSubnetA, privateSubnetC],
},
securityGroups: [processingLambdaSG, nhkAIPlatformSG],