diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4cbc175 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,84 @@ +# build via (docker root = project root): +# docker build -t absaoss/eventgate:latest . +# +# build [with customizations] via (docker root = project root): +# docker build -t absaoss/eventgate:latest \ +# --build-arg TRUSTED_SSL_CERTS=./myTrustedCertsStorage \ +# --build-arg SASL_SSL_ARTIFACTS=./mySaslSslCredentials \ +# . +# +# run locally via: +# docker run --platform=linux/amd64 -p 9000:8080 absaoss/eventgate:latest & +# +# test via (provide payload): +# curl http://localhost:9000/2015-03-31/functions/function/invocations -d "{payload}" +# +# Deploy to AWS Lambda via ACR + +FROM public.ecr.aws/lambda/python:3.11 + +# Directory with TRUSTED certs in PEM format +ARG TRUSTED_SSL_CERTS=./trusted_certs +# Artifacts for kerberized sasl_ssl +ARG SASL_SSL_ARTIFACTS=./sasl_ssl_artifacts + +# Trusted certs +COPY $TRUSTED_SSL_CERTS /opt/certs/ + +RUN \ + echo "######################################################" && \ + echo "### Import trusted certs before doing anything else ###" && \ + echo "######################################################" && \ + for FILE in `ls /opt/certs/*.pem`; \ + do cat $FILE >> /etc/pki/tls/certs/ca-bundle.crt ; done && \ + echo "###############################################" && \ + echo "### Install ###" && \ + echo "### -> Basics ###" && \ + echo "### -> GCC (some makefiles require cmd which)###" && \ + echo "### -> dependencies for kerberos SASL_SSL ###" && \ + echo "##############################################" && \ + yum install -y \ + wget tar xz bzip2-devel zlib-devel \ + which make gcc gcc-c++ \ + libffi-devel cyrus-sasl-devel cyrus-sasl-gssapi openssl-devel krb5-workstation && \ + echo "#################" && \ + echo "### librdkafka ###" && \ + echo "#################" && \ + mkdir -p /tmp/env-install-workdir/librdkafka && \ + cd /tmp/env-install-workdir/librdkafka && \ + wget https://github.com/edenhill/librdkafka/archive/v2.4.0.tar.gz && \ + tar -xf v2.4.0.tar.gz && \ + cd /tmp/env-install-workdir/librdkafka/librdkafka-2.4.0 && \ + ./configure && make && make install && \ + echo "######################" && \ + echo "### confluent-kafka ###" && \ + echo "######################" && \ + mkdir -p /tmp/env-install-workdir/confluent-kafka && \ + cd /tmp/env-install-workdir/confluent-kafka && \ + wget https://github.com/confluentinc/confluent-kafka-python/archive/v2.4.0.tar.gz && \ + tar -xf v2.4.0.tar.gz && \ + cd /tmp/env-install-workdir/confluent-kafka/confluent-kafka-python-2.4.0 && \ + CPPFLAGS="-I/usr/local/include" LDFLAGS="-L/opt" python setup.py install && \ + echo "##############" && \ + echo "### cleanup ###" && \ + echo "##############" && \ + cd /root && \ + rm -rf /tmp/env-install-workdir && \ + echo "###################" && \ + echo "### pip installs ###" && \ + echo "###################" && \ + pip install requests==2.31.0 urllib3==1.26.18 cryptography jsonschema PyJWT + +# Lambda and SASL_SSL_Artifacts +COPY $SASL_SSL_ARTIFACTS /opt/sasl_ssl_artifacts/ +COPY src/event_gate_lambda.py $LAMBDA_TASK_ROOT +COPY conf $LAMBDA_TASK_ROOT/conf + +# Mark librdkafka to LD_LIBRARY_PATH +# Kerberos default CCACHE override due to KEYRING issues +ENV \ + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib \ + KRB5CCNAME=FILE:/tmp/krb5cc + +# Set lambda entry point as CMD +CMD ["event_gate_lambda.lambda_handler"] diff --git a/README.md b/README.md index b0be3ac..0210d9f 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ assumes AWS Deployment with API Gateway exposure of endpoint ## Lambda itself -Hearth of the solution lies in the Src folder +Hearth of the solution lives in the Src folder ## API POST 🔒 method is guarded by JWT token in standard header "bearer" @@ -29,33 +29,73 @@ POST 🔒 method is guarded by JWT token in standard header "bearer" ## Config There are 3 configs for this solution (in conf folder) -- config.json - - this one needs to live in the conf folder - - defines where are other resources/configs -- access.json - - this one could be local or in AWS S3 - - defines who has access to post to individual topics -- topics.json - - this one could be local or in AWS S3 - - defines schema of the topics, as well as enumerates those - + - config.json + - this one needs to live in the conf folder + - defines where are other resources/configs + - for SASL_SSL also points to required secrets + - access.json + - this one could be local or in AWS S3 + - defines who has access to post to individual topics + - topics.json + - this one could be local or in AWS S3 + - defines schema of the topics, as well as enumerates those ## Terraform Deplyoment + Whole solution expects to be deployed as lambda in AWS, there are prepared terraform scripts to make initial deplyoment, and can be found in "terraform" folder -Resulting lambda_function zip file needs to be uploaded to aws s3 bucket (since direct upload of zip likes to fail, might be related to poor network though) +### Zip lambda + +Designated for use without authentication towards kafka -All that is needed afterwards is supplementing variables for - - aws_region - - vpc_id - - vpc_endpoint - - resource prefix - all terraform resources would be prefixed my this prefix, usefull when mixed-in with something else - - lambda_source_bucket - the bucket where "lambda_function.zip" is already uploaded - - lambda_role_arn - the role for the lambda, should be able to make HTTP calls to wherever kafka server lives - - lambda_vpc_subnet_ids + - create **zip** archive `scripts/prepare.deplyoment.sh` + - upload **zip** to **S3** + - provide terraform variables with tfvars + - `aws_region` + - `vpc_id` + - `vpc_endpoint` + - `resource prefix` + - all terraform resources would be prefixed my this + - `lambda_role_arn ` + - the role for the lambda + - should be able to make HTTP calls to wherever kafka server lives + - `lambda_vpc_subnet_ids` + - `lambda_package_type` + - `Zip` + - `lambda_src_s3_bucket ` + - the bucket where **zip** is already uploaded + - `lambda_src_s3_key` + - name of already uploaded **zip** + - `lambda_src_ecr_image` + - ignored + - `terraform apply` -Once tfvars are supplied, go terraform apply and you are done +### Containerized lambda + +Designated for use with kerberizes SASL_SSL authentication towards kafka, as it requires custom librdkafka compilation + + - build docker (**[follow comments at the top of Dockerfile](./Dockerfile)**) + - upload docker **image** to **ECR** + - provide terraform variables with tfvars + - `aws_region` + - `vpc_id` + - `vpc_endpoint` + - `resource prefix` + - all terraform resources would be prefixed my this + - `lambda_role_arn ` + - the role for the lambda + - should be able to make HTTP calls to wherever kafka server lives + - `lambda_vpc_subnet_ids` + - `lambda_package_type` + - `Image` + - `lambda_src_s3_bucket ` + - ignored + - `lambda_src_s3_key` + - ignored + - `lambda_src_ecr_image` + - already uploaded **image** in **ECR** + - `terraform apply` ## Scripts Useful scripts for dev and Deployment @@ -65,5 +105,5 @@ Jupyter notebook, with one cell for lambda initialization and one cell per metho Obviously using it requires correct configs to be in place (PUBLIC key is being loaded during initilization) ### Preapare Deployment -shell script for fetching pithon requirements and ziping it together with sources and config into lambda archive -it needs to be uploaded to s3 bucket first before running the terraform +Shell script for fetching python requirements and ziping it together with sources and config into lambda archive +it needs to be uploaded to s3 bucket first before running the terraform. diff --git a/conf/config.json b/conf/config.json index 06d37b8..6dbc4ef 100644 --- a/conf/config.json +++ b/conf/config.json @@ -1,7 +1,7 @@ { - "accessConfig": "s3:///access.json", - "topicsConfig": "s3:///topics.json", - "tokenProviderUrl": "https://", - "tokenPublicKeyUrl": "https://", - "kafkaBootstrapServer": "localhost:9092" + "access_config": "s3:///access.json", + "topics_config": "s3:///topics.json", + "token_provider_url": "https://", + "token_public_key_url": "https://", + "kafka_bootstrap_server": "localhost:9092" } \ No newline at end of file diff --git a/src/event_gate_lambda.py b/src/event_gate_lambda.py index 1cf876c..fc43dee 100644 --- a/src/event_gate_lambda.py +++ b/src/event_gate_lambda.py @@ -42,32 +42,47 @@ aws_session = boto3.Session() aws_s3 = aws_session.resource('s3', verify=False) -if CONFIG["topicsConfig"].startswith("s3://"): - name_parts = CONFIG["topicsConfig"].split('/') +if CONFIG["topics_config"].startswith("s3://"): + name_parts = CONFIG["topics_config"].split('/') bucket_name = name_parts[2] bucket_object = "/".join(name_parts[3:]) TOPICS = json.loads(aws_s3.Bucket(bucket_name).Object(bucket_object).get()["Body"].read().decode("utf-8")) else: - with open(CONFIG["topicsConfig"], "r") as file: + with open(CONFIG["topics_config"], "r") as file: TOPICS = json.load(file) -if CONFIG["accessConfig"].startswith("s3://"): - name_parts = CONFIG["accessConfig"].split('/') +if CONFIG["access_config"].startswith("s3://"): + name_parts = CONFIG["access_config"].split('/') bucket_name = name_parts[2] bucket_object = "/".join(name_parts[3:]) ACCESS = json.loads(aws_s3.Bucket(bucket_name).Object(bucket_object).get()["Body"].read().decode("utf-8")) else: - with open(CONFIG["accessConfig"], "r") as file: + with open(CONFIG["access_config"], "r") as file: ACCESS = json.load(file) -TOKEN_PROVIDER_URL = CONFIG["tokenProviderUrl"] +TOKEN_PROVIDER_URL = CONFIG["token_provider_url"] logger.info("Loaded configs") -token_public_key_encoded = requests.get(CONFIG["tokenPublicKeyUrl"], verify=False).json()["key"] +token_public_key_encoded = requests.get(CONFIG["token_public_key_url"], verify=False).json()["key"] TOKEN_PUBLIC_KEY = serialization.load_der_public_key(base64.b64decode(token_public_key_encoded)) logger.info("Loaded token public key") -kafka_producer = Producer({"bootstrap.servers": CONFIG["kafkaBootstrapServer"]}) +producer_config = {"bootstrap.servers": CONFIG["kafka_bootstrap_server"]} +if "kafka_sasl_kerberos_principal" in CONFIG and "kafka_ssl_key_path" in CONFIG: + producer_config.update({ + "security.protocol": "SASL_SSL", + "sasl.mechanism": "GSSAPI", + "sasl.kerberos.service.name": "kafka", + "sasl.kerberos.keytab": CONFIG["kafka_sasl_kerberos_keytab_path"], + "sasl.kerberos.principal": CONFIG["kafka_sasl_kerberos_principal"], + "ssl.ca.location": CONFIG["kafka_ssl_ca_path"], + "ssl.certificate.location": CONFIG["kafka_ssl_cert_path"], + "ssl.key.location": CONFIG["kafka_ssl_key_path"], + "ssl.key.password": CONFIG["kafka_ssl_key_password"] + }) + logger.info("producer will use SASL_SSL") + +kafka_producer = Producer(producer_config) logger.info("Initialized kafka producer") def kafkaWrite(topicName, message): diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 2cc94b5..adb2b58 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -12,12 +12,19 @@ resource "aws_vpc_security_group_egress_rule" "allow_all_traffic_ipv4" { } resource "aws_lambda_function" "event_gate_lambda" { - s3_bucket = var.lambda_source_bucket - s3_key = "lambda_function.zip" function_name = "${var.resource_prefix}event-gate-lambda" role = var.lambda_role_arn - handler = "event_gate_lambda.lambda_handler" - runtime = "python3.12" + architectures = ["x86_64"] + runtime = "python3.11" + package_type = var.lambda_package_type + + s3_bucket = var.lambda_package_type == "Zip" ? var.lambda_src_s3_bucket : null + s3_key = var.lambda_package_type == "Zip" ? var.lambda_src_s3_key : null + handler = var.lambda_package_type == "Zip" ? "event_gate_lambda.lambda_handler" : null + source_code_hash = var.lambda_package_type == "Zip" ? filebase64sha256("s3://${var.lambda_src_s3_bucket}/${var.lambda_src_s3_key}") : null + + image_uri = var.lambda_package_type == "Image" ? var.lambda_src_ecr_image : null + vpc_config { subnet_ids = var.lambda_vpc_subnet_ids security_group_ids = [aws_security_group.event_gate_sg.id] diff --git a/terraform/variables.tf b/terraform/variables.tf index c749624..495eb7a 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -4,4 +4,7 @@ variable "vpc_endpoint" {} variable "resource_prefix" {} variable "lambda_role_arn" {} variable "lambda_vpc_subnet_ids" {} -variable "lambda_source_bucket" {} +variable "lambda_package_type" {} +variable "lambda_src_s3_bucket" {} +variable "lambda_src_s3_key" {} +variable "lambda_src_ecr_image" {}