Glue를 이용한 RDS to S3 데이터 마이그레이션 python code 예시

2020-09-04

.

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from pyspark.sql.functions import *
from awsglue.dynamicframe import DynamicFrame
from pyspark.sql.types import *

sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)


datasource = glueContext.create_dynamic_frame.from_options(connection_type="postgresql",
                                                          connection_options={
                                                          "url": "jdbc:postgresql://XXXXXXXXX.XXXXXXXXXXXX.ap-northeast-2.rds.amazonaws.com:6001/[DataBaseName]",
                                                          "dbtable": "store",
                                                          "user": "xxxx",
                                                          "password": "yyyyyyy"})


glueContext.write_dynamic_frame.from_options(frame = datasource,connection_type = "s3",connection_options = {"path": "s3://[bucketname]/[prefix]/"},format = "parquet")