Glue를 이용한 RDS to S3 데이터 마이그레이션 python code 예시
2020-09-04
.
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from pyspark.sql.functions import *
from awsglue.dynamicframe import DynamicFrame
from pyspark.sql.types import *
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
datasource = glueContext.create_dynamic_frame.from_options(connection_type="postgresql",
connection_options={
"url": "jdbc:postgresql://XXXXXXXXX.XXXXXXXXXXXX.ap-northeast-2.rds.amazonaws.com:6001/[DataBaseName]",
"dbtable": "store",
"user": "xxxx",
"password": "yyyyyyy"})
glueContext.write_dynamic_frame.from_options(frame = datasource,connection_type = "s3",connection_options = {"path": "s3://[bucketname]/[prefix]/"},format = "parquet")