Schreiben der Daten von CosmosDB nach Postgresql mit Databricks
# Connection-related
host = "cosmosdbiotdrone.cassandra.cosmos.azure.com"
port = 10350
ssl_enabled = True
username = "cosmosdbiotdrone"
password = "XrSAyYKEhnmTFidHgKLCNhhluZ45dDYe1lCXwROGcDUDK1y9IykZbctenifJB0oB8bvUVPUrdAvPACDbUgC5Kg=="
# Throughput-related...adjust as needed
batch_size = 1
remote_connections_per_executor = 10
concurrent_writes = 1000
concurrent_reads = 512
grouping_buffer_size = 1000
keep_alive_ms = 600000000
# Usage example
spark.conf.set("spark.cassandra.connection.host", host)
spark.conf.set("spark.cassandra.connection.port", str(port))
spark.conf.set("spark.cassandra.connection.ssl.enabled", str(ssl_enabled))
spark.conf.set("spark.cassandra.auth.username", username)
spark.conf.set("spark.cassandra.auth.password", password)
spark.conf.set("spark.cassandra.output.batch.size.rows", str(batch_size))
spark.conf.set("spark.cassandra.connection.remoteConnectionsPerExecutor", str(remote_connections_per_executor))
spark.conf.set("spark.cassandra.output.concurrent.writes", str(concurrent_writes))
spark.conf.set("spark.cassandra.concurrent.reads", str(concurrent_reads))
spark.conf.set("spark.cassandra.output.batch.grouping.buffer.size", str(grouping_buffer_size))
spark.conf.set("spark.cassandra.connection.keep_alive_ms", str(keep_alive_ms))
cfg = {
"table" : "dronetable",
"keyspace": "dronedb"
}
# Spark connector
df = spark.read.format("org.apache.spark.sql.cassandra") \
.options(**cfg)\
.load()
jdbcHostname = "db-training-chris2.postgres.database.azure.com"
jdbcPort = "5432"
jdbcDatabase = "drone"
properties = {
"user" : "dbadmin@db-training-chris2",
"password" : "freelance!123" ,
"driver": "org.postgresql.Driver"}
url = "jdbc:postgresql://{0}:{1}/{2}".format(jdbcHostname, jdbcPort, jdbcDatabase)
df.write \
.jdbc(url=url, table="drone", mode="overwrite", properties=properties)
Last updated