I was recently asked to look into the effort involved in implementing an outbound file copy interface between an on-premise proprietary application and public cloud storage.
A number of items needed clarification: nature of connectivity between the on-premise and the cloud, security and encryption requirements, expected frequency of transfers, the anticipated file size, etc.
- deploy a RHEL compute instance on Google Private
Cloud
o run latest OS updates
o obtain and install AWS Command Line binaries
o supply credentials needed to connect to a specific AWS account
o install pip, python 3.6 and boto3
- deploy a python script to transfer files onto S3
- create a new custom S3 bucket on AWS
- run the script on the GCP instance to pick up contents of a file from /var/log and transfer in into the specified S3 bucket
o run latest OS updates
o obtain and install AWS Command Line binaries
o supply credentials needed to connect to a specific AWS account
o install pip, python 3.6 and boto3
- deploy a python script to transfer files onto S3
- create a new custom S3 bucket on AWS
- run the script on the GCP instance to pick up contents of a file from /var/log and transfer in into the specified S3 bucket
provider "aws" {
region = var.aws-region
access_key = file("../Credentials/AWS/access_key.txt")
secret_key = file("../Credentials/AWS/secret_key.txt")
}
region = var.aws-region
access_key = file("../Credentials/AWS/access_key.txt")
secret_key = file("../Credentials/AWS/secret_key.txt")
}
provider "google" {
credentials = file("../Credentials/GCP/redhat-295420-769720b20d99.json")
project = "redhat-295420"
region = var.gcp-region
}
# Terraform plugin for creating random ids
resource "random_id" "instance_id" {
byte_length = 8
}
# A single Compute Engine instance
resource "google_compute_instance" "default" {
name = "rhel-vm-${random_id.instance_id.hex}"
machine_type = "f1-micro"
zone = "us-west1-a"
boot_disk {
initialize_params {
image = "rhel-7-v20201112"
}
}
resource "random_id" "instance_id" {
byte_length = 8
}
# A single Compute Engine instance
resource "google_compute_instance" "default" {
name = "rhel-vm-${random_id.instance_id.hex}"
machine_type = "f1-micro"
zone = "us-west1-a"
boot_disk {
initialize_params {
image = "rhel-7-v20201112"
}
}
# Make sure rhel is installed on all new instances for later steps
metadata_startup_script = "yum update"
network_interface {
network = "default"
access_config {
// Include this section to give the VM an external ip address
}
}
# install prerequisites
provisioner "file" {
source = "bootstrap.sh"
destination = "/tmp/bootstrap.sh"
}
# copy over python script that performs S3 transfer
provisioner "file" {
source = "upload_file.py"
destination = "/tmp/upload_file.py"
}
# copy over the AWS credenials file
provisioner "file" {
source = "credentials"
destination = "/tmp/credentials"
}
# initiate the transfer script
# remove exta characters that might have been added by Windows editor
provisioner "remote-exec" {
inline = [
"chmod +x /tmp/upload_file.py",
"tr -d '\r' < /tmp/bootstrap.sh > /tmp/scriptClean.sh",
"chmod +x /tmp/scriptClean.sh",
"sh /tmp/scriptClean.sh ${var.log_file_name} ${var.bucket_name}",
]
}
connection {
type = "ssh"
user = "${var.gcp_user_name}"
private_key = file("../Credentials/GCP/legacy_key")
host = google_compute_instance.default.network_interface.0.access_config.0.nat_ip
}
}
# Create a public IP address for our google compute instance to utilize
resource "google_compute_address" "static" {
name = "vm-public-address"
}
# A variable for extracting the external IP address of the instance
output "ip" {
value = google_compute_instance.default.network_interface.0.access_config.0.nat_ip
}
awsS3.tf
resource "aws_s3_bucket" "import_file_store" {
bucket = var.bucket_name
acl = "private"
tags = {
Name = "Source"
Environment = "GCP"
}
}
bucket = var.bucket_name
acl = "private"
tags = {
Name = "Source"
Environment = "GCP"
}
}
upload_file.py
import logging
import boto3
import sys
from botocore.exceptions import ClientError
def upload_file(file_name, bucket, object_name=None):
"""Upload a file to an S3 bucket
:param file_name: File to upload
:param bucket: Bucket to upload to
:param object_name: S3 object name. If not specified then file_name is used
:return: True if file was uploaded, else False
"""
# If S3 object_name was not specified, use file_name
if object_name is None:
object_name = file_name
# Upload the file
s3_client = boto3.client('s3')
try:
response = s3_client.upload_file(file_name, bucket, object_name)
except ClientError as e:
logging.error(e)
return False
return True
def main():
file_path_name = sys.argv[1]
bucket = sys.argv[2]
object_name = file_path_name.split('/')[-1]
if file_path_name != "" and bucket != "":
upload_file(file_path_name, bucket, object_name)
main()
bootstrap.sh
#!/bin/bash
mkdir $HOME/.aws
mv /tmp/credentials $HOME/.aws/.
export PATH="$HOME/.local/bin:$PATH"
sudo yum -y install python3
curl -O "https://bootstrap.pypa.io/get-pip.py"
python3 get-pip.py --user
sudo yum -y install unzip
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install
pip install boto3
sudo cat /var/log/$1>/tmp/$1
python3 /tmp/upload_file.py "/tmp/$1" "$2"
See more here: https://github.com/aleks-rodionov/terraform
The upload script was based on the AWS boto3 example: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html
A more comprehensive transfer example featuring multi-part upload, a callback routine and more is available here: https://docs.aws.amazon.com/code-samples/latest/catalog/code-catalog-python-example_code-s3.html
No comments:
Post a Comment