public class InsertIntoHiveTable
extends org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
implements scala.Product, scala.Serializable
This class is mostly a mess, for legacy reasons (since it evolved in organic ways and had to follow Hive's internal implementations closely, which itself was a mess too). Please don't blame Reynold for this! He was just moving code around!
In the future we should converge the write path for Hive with the normal data source write path,
as defined in org.apache.spark.sql.execution.datasources.FileFormatWriter
.
param: table the metadata of the table.
param: partition a map from the partition key to the partition value (optional). If the partition
value is optional, dynamic partition insert will be performed.
As an example, INSERT INTO tbl PARTITION (a=1, b=2) AS ...
would have
Map('a' -> Some('1'), 'b' -> Some('2'))
and INSERT INTO tbl PARTITION (a=1, b) AS ...
would have
Map('a' -> Some('1'), 'b' -> None)
.
param: query the logical plan representing data to write to.
param: overwrite overwrite existing table or partitions.
param: ifPartitionNotExists If true, only write if the partition does not exist.
Only valid for static partitions.Constructor and Description |
---|
InsertIntoHiveTable(org.apache.spark.sql.catalyst.catalog.CatalogTable table,
scala.collection.immutable.Map<String,scala.Option<String>> partition,
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan query,
boolean overwrite,
boolean ifPartitionNotExists,
scala.collection.Seq<String> outputColumnNames) |
Modifier and Type | Method and Description |
---|---|
static org.apache.spark.sql.catalyst.expressions.AttributeSeq |
allAttributes() |
static org.apache.spark.sql.catalyst.trees.TreeNode<?> |
apply(int number) |
static String |
argString() |
static String |
asCode() |
static org.apache.spark.sql.execution.datasources.BasicWriteJobStatsTracker |
basicWriteJobStatsTracker(org.apache.hadoop.conf.Configuration hadoopConf) |
abstract static boolean |
canEqual(Object that) |
static PlanType |
canonicalized() |
static scala.collection.Seq<org.apache.spark.sql.catalyst.plans.logical.LogicalPlan> |
children() |
static boolean |
childrenResolved() |
static <B> scala.collection.Seq<B> |
collect(scala.PartialFunction<BaseType,B> pf) |
static <B> scala.Option<B> |
collectFirst(scala.PartialFunction<BaseType,B> pf) |
static scala.collection.Seq<BaseType> |
collectLeaves() |
static org.apache.spark.sql.internal.SQLConf |
conf() |
static org.apache.spark.sql.catalyst.expressions.ExpressionSet |
constraints() |
static scala.collection.immutable.Set<org.apache.spark.sql.catalyst.trees.TreeNode<?>> |
containsChild() |
static void |
createdTempDir_$eq(scala.Option<org.apache.hadoop.fs.Path> x$1) |
static scala.Option<org.apache.hadoop.fs.Path> |
createdTempDir() |
scala.Option<org.apache.hadoop.fs.Path> |
createdTempDir() |
void |
deleteExternalTmpPath(org.apache.hadoop.conf.Configuration hadoopConf) |
abstract static boolean |
equals(Object that) |
String |
executionId() |
static scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Expression> |
expressions() |
static boolean |
fastEquals(org.apache.spark.sql.catalyst.trees.TreeNode<?> other) |
static scala.Option<BaseType> |
find(scala.Function1<BaseType,Object> f) |
static <A> scala.collection.Seq<A> |
flatMap(scala.Function1<BaseType,scala.collection.TraversableOnce<A>> f) |
static void |
foreach(scala.Function1<BaseType,scala.runtime.BoxedUnit> f) |
static void |
foreachUp(scala.Function1<BaseType,scala.runtime.BoxedUnit> f) |
static scala.collection.mutable.StringBuilder |
generateTreeString(int depth,
scala.collection.Seq<Object> lastChildren,
scala.collection.mutable.StringBuilder builder,
boolean verbose,
String prefix,
boolean addSuffix) |
static String |
generateTreeString$default$5() |
static boolean |
generateTreeString$default$6() |
org.apache.hadoop.fs.Path |
getExternalScratchDir(java.net.URI extURI,
org.apache.hadoop.conf.Configuration hadoopConf,
String stagingDir) |
org.apache.hadoop.fs.Path |
getExternalTmpPath(SparkSession sparkSession,
org.apache.hadoop.conf.Configuration hadoopConf,
org.apache.hadoop.fs.Path path) |
org.apache.hadoop.fs.Path |
getExtTmpPathRelTo(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration hadoopConf,
String stagingDir) |
org.apache.hadoop.fs.Path |
getStagingDir(org.apache.hadoop.fs.Path inputPath,
org.apache.hadoop.conf.Configuration hadoopConf,
String stagingDir) |
static int |
hashCode() |
boolean |
ifPartitionNotExists() |
static org.apache.spark.sql.catalyst.expressions.AttributeSet |
inputSet() |
static void |
invalidateStatsCache() |
static boolean |
isStreaming() |
static BaseType |
makeCopy(Object[] newArgs) |
static <A> scala.collection.Seq<A> |
map(scala.Function1<BaseType,A> f) |
static BaseType |
mapChildren(scala.Function1<BaseType,BaseType> f) |
static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> |
mapExpressions(scala.Function1<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> f) |
static scala.Option<Object> |
maxRows() |
static scala.Option<Object> |
maxRowsPerPartition() |
static scala.collection.immutable.Map<String,org.apache.spark.sql.execution.metric.SQLMetric> |
metrics() |
static org.apache.spark.sql.catalyst.expressions.AttributeSet |
missingInput() |
org.apache.hadoop.fs.Path |
newVersionExternalTempPath(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration hadoopConf,
String stagingDir) |
static String |
nodeName() |
static String |
numberedTreeString() |
org.apache.hadoop.fs.Path |
oldVersionExternalTempPath(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration hadoopConf,
String scratchDir) |
static org.apache.spark.sql.catalyst.trees.Origin |
origin() |
static scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> |
output() |
scala.collection.Seq<String> |
outputColumnNames() |
static scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> |
outputColumns() |
static org.apache.spark.sql.catalyst.expressions.AttributeSet |
outputSet() |
boolean |
overwrite() |
static BaseType |
p(int number) |
scala.collection.immutable.Map<String,scala.Option<String>> |
partition() |
static String |
prettyJson() |
static void |
printSchema() |
static org.apache.spark.sql.catalyst.expressions.AttributeSet |
producedAttributes() |
abstract static int |
productArity() |
abstract static Object |
productElement(int n) |
static scala.collection.Iterator<Object> |
productIterator() |
static String |
productPrefix() |
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan |
query() |
static org.apache.spark.sql.catalyst.expressions.AttributeSet |
references() |
static void |
refresh() |
static scala.Option<org.apache.spark.sql.catalyst.expressions.NamedExpression> |
resolve(scala.collection.Seq<String> nameParts,
scala.Function2<String,String,Object> resolver) |
static scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> |
resolve(StructType schema,
scala.Function2<String,String,Object> resolver) |
static scala.Option<org.apache.spark.sql.catalyst.expressions.NamedExpression> |
resolveChildren(scala.collection.Seq<String> nameParts,
scala.Function2<String,String,Object> resolver) |
static boolean |
resolved() |
static scala.Option<org.apache.spark.sql.catalyst.expressions.NamedExpression> |
resolveQuoted(String name,
scala.Function2<String,String,Object> resolver) |
scala.collection.Seq<Row> |
run(SparkSession sparkSession,
org.apache.spark.sql.execution.SparkPlan child)
Inserts all the rows in the table into Hive.
|
static boolean |
sameResult(PlanType other) |
scala.collection.immutable.Set<String> |
saveAsHiveFile(SparkSession sparkSession,
org.apache.spark.sql.execution.SparkPlan plan,
org.apache.hadoop.conf.Configuration hadoopConf,
org.apache.spark.sql.hive.HiveShim.ShimFileSinkDesc fileSinkConf,
String outputLocation,
scala.collection.immutable.Map<scala.collection.immutable.Map<String,String>,String> customPartitionLocations,
scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> partitionAttributes) |
static StructType |
schema() |
static String |
schemaString() |
static int |
semanticHash() |
static String |
simpleString() |
static org.apache.spark.sql.catalyst.plans.logical.Statistics |
stats() |
static scala.collection.Seq<PlanType> |
subqueries() |
org.apache.spark.sql.catalyst.catalog.CatalogTable |
table() |
static String |
toJSON() |
static String |
toString() |
static BaseType |
transform(scala.PartialFunction<BaseType,BaseType> rule) |
static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> |
transformAllExpressions(scala.PartialFunction<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> rule) |
static BaseType |
transformDown(scala.PartialFunction<BaseType,BaseType> rule) |
static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> |
transformExpressions(scala.PartialFunction<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> rule) |
static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> |
transformExpressionsDown(scala.PartialFunction<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> rule) |
static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> |
transformExpressionsUp(scala.PartialFunction<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> rule) |
static BaseType |
transformUp(scala.PartialFunction<BaseType,BaseType> rule) |
static String |
treeString() |
static String |
treeString(boolean verbose,
boolean addSuffix) |
static boolean |
treeString$default$2() |
static String |
verboseString() |
static String |
verboseStringWithSuffix() |
static BaseType |
withNewChildren(scala.collection.Seq<BaseType> newChildren) |
childrenResolved, constraints, initializeLogIfNecessary, initializeLogIfNecessary, initializeLogIfNecessary$default$2, invalidateStatsCache, isStreaming, isTraceEnabled, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning, maxRows, maxRowsPerPartition, org$apache$spark$internal$Logging$$log__$eq, org$apache$spark$internal$Logging$$log_, org$apache$spark$sql$catalyst$plans$logical$LogicalPlan$$name$1, org$apache$spark$sql$catalyst$plans$logical$LogicalPlan$$resolveAsColumn, org$apache$spark$sql$catalyst$plans$logical$LogicalPlan$$resolveAsTableColumn, refresh, resolve, resolve, resolve, resolveChildren, resolved, resolveQuoted, statePrefix, stats, statsCache_$eq, statsCache, validConstraints, verboseStringWithSuffix
allAttributes, canEvaluate, canEvaluateWithinJoin, canonicalized, conf, doCanonicalize, expressions, innerChildren, inputSet, isCanonicalizedPlan, mapExpressions, missingInput, normalizeExprId, normalizePredicates, org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1, org$apache$spark$sql$catalyst$plans$QueryPlan$$seqToExpressions$1, output, outputSet, printSchema, producedAttributes, references, replaceAlias, sameResult, schema, schemaString, semanticHash, simpleString, splitConjunctivePredicates, splitDisjunctivePredicates, subqueries, transformAllExpressions, transformExpressions, transformExpressionsDown, transformExpressionsUp, verboseString
apply, argString, asCode, children, collect, collectFirst, collectLeaves, containsChild, fastEquals, find, flatMap, foreach, foreachUp, generateTreeString, generateTreeString$default$5, generateTreeString$default$6, hashCode, jsonFields, makeCopy, map, mapChildren, mapProductIterator, nodeName, numberedTreeString, org$apache$spark$sql$catalyst$trees$TreeNode$$allChildren, org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1, org$apache$spark$sql$catalyst$trees$TreeNode$$getNodeNumbered, org$apache$spark$sql$catalyst$trees$TreeNode$$parseToJson, origin, otherCopyArgs, p, prettyJson, productIterator, productPrefix, stringArgs, toJSON, toString, transform, transformDown, transformUp, treeString, treeString, treeString$default$2, withNewChildren
basicWriteJobStatsTracker, children, metrics, outputColumns
productArity, productElement, productIterator, productPrefix
initializeLogging, log_
public InsertIntoHiveTable(org.apache.spark.sql.catalyst.catalog.CatalogTable table, scala.collection.immutable.Map<String,scala.Option<String>> partition, org.apache.spark.sql.catalyst.plans.logical.LogicalPlan query, boolean overwrite, boolean ifPartitionNotExists, scala.collection.Seq<String> outputColumnNames)
public abstract static boolean canEqual(Object that)
public abstract static boolean equals(Object that)
public abstract static Object productElement(int n)
public abstract static int productArity()
public static scala.collection.Iterator<Object> productIterator()
public static String productPrefix()
public static org.apache.spark.sql.catalyst.trees.Origin origin()
public static scala.collection.immutable.Set<org.apache.spark.sql.catalyst.trees.TreeNode<?>> containsChild()
public static int hashCode()
public static boolean fastEquals(org.apache.spark.sql.catalyst.trees.TreeNode<?> other)
public static scala.Option<BaseType> find(scala.Function1<BaseType,Object> f)
public static void foreach(scala.Function1<BaseType,scala.runtime.BoxedUnit> f)
public static void foreachUp(scala.Function1<BaseType,scala.runtime.BoxedUnit> f)
public static <A> scala.collection.Seq<A> map(scala.Function1<BaseType,A> f)
public static <A> scala.collection.Seq<A> flatMap(scala.Function1<BaseType,scala.collection.TraversableOnce<A>> f)
public static <B> scala.collection.Seq<B> collect(scala.PartialFunction<BaseType,B> pf)
public static scala.collection.Seq<BaseType> collectLeaves()
public static <B> scala.Option<B> collectFirst(scala.PartialFunction<BaseType,B> pf)
public static BaseType withNewChildren(scala.collection.Seq<BaseType> newChildren)
public static BaseType transform(scala.PartialFunction<BaseType,BaseType> rule)
public static BaseType transformDown(scala.PartialFunction<BaseType,BaseType> rule)
public static BaseType transformUp(scala.PartialFunction<BaseType,BaseType> rule)
public static BaseType mapChildren(scala.Function1<BaseType,BaseType> f)
public static BaseType makeCopy(Object[] newArgs)
public static String nodeName()
public static String argString()
public static String toString()
public static String treeString()
public static String treeString(boolean verbose, boolean addSuffix)
public static String numberedTreeString()
public static org.apache.spark.sql.catalyst.trees.TreeNode<?> apply(int number)
public static BaseType p(int number)
public static scala.collection.mutable.StringBuilder generateTreeString(int depth, scala.collection.Seq<Object> lastChildren, scala.collection.mutable.StringBuilder builder, boolean verbose, String prefix, boolean addSuffix)
public static String asCode()
public static String toJSON()
public static String prettyJson()
public static boolean treeString$default$2()
public static String generateTreeString$default$5()
public static boolean generateTreeString$default$6()
public static org.apache.spark.sql.internal.SQLConf conf()
public static org.apache.spark.sql.catalyst.expressions.AttributeSet outputSet()
public static org.apache.spark.sql.catalyst.expressions.AttributeSet references()
public static org.apache.spark.sql.catalyst.expressions.AttributeSet inputSet()
public static org.apache.spark.sql.catalyst.expressions.AttributeSet producedAttributes()
public static org.apache.spark.sql.catalyst.expressions.AttributeSet missingInput()
public static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> transformExpressions(scala.PartialFunction<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> rule)
public static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> transformExpressionsDown(scala.PartialFunction<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> rule)
public static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> transformExpressionsUp(scala.PartialFunction<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> rule)
public static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> mapExpressions(scala.Function1<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> f)
public static org.apache.spark.sql.catalyst.plans.QueryPlan<PlanType> transformAllExpressions(scala.PartialFunction<org.apache.spark.sql.catalyst.expressions.Expression,org.apache.spark.sql.catalyst.expressions.Expression> rule)
public static final scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Expression> expressions()
public static StructType schema()
public static String schemaString()
public static void printSchema()
public static String simpleString()
public static String verboseString()
public static scala.collection.Seq<PlanType> subqueries()
public static final PlanType canonicalized()
public static final boolean sameResult(PlanType other)
public static final int semanticHash()
public static org.apache.spark.sql.catalyst.expressions.AttributeSeq allAttributes()
public static org.apache.spark.sql.catalyst.plans.logical.Statistics stats()
public static final void invalidateStatsCache()
public static org.apache.spark.sql.catalyst.expressions.ExpressionSet constraints()
public static boolean isStreaming()
public static String verboseStringWithSuffix()
public static scala.Option<Object> maxRows()
public static scala.Option<Object> maxRowsPerPartition()
public static boolean resolved()
public static boolean childrenResolved()
public static scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> resolve(StructType schema, scala.Function2<String,String,Object> resolver)
public static scala.Option<org.apache.spark.sql.catalyst.expressions.NamedExpression> resolveChildren(scala.collection.Seq<String> nameParts, scala.Function2<String,String,Object> resolver)
public static scala.Option<org.apache.spark.sql.catalyst.expressions.NamedExpression> resolve(scala.collection.Seq<String> nameParts, scala.Function2<String,String,Object> resolver)
public static scala.Option<org.apache.spark.sql.catalyst.expressions.NamedExpression> resolveQuoted(String name, scala.Function2<String,String,Object> resolver)
public static void refresh()
public static scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> output()
public static final scala.collection.Seq<org.apache.spark.sql.catalyst.plans.logical.LogicalPlan> children()
public static scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> outputColumns()
public static scala.collection.immutable.Map<String,org.apache.spark.sql.execution.metric.SQLMetric> metrics()
public static org.apache.spark.sql.execution.datasources.BasicWriteJobStatsTracker basicWriteJobStatsTracker(org.apache.hadoop.conf.Configuration hadoopConf)
public static scala.Option<org.apache.hadoop.fs.Path> createdTempDir()
public static void createdTempDir_$eq(scala.Option<org.apache.hadoop.fs.Path> x$1)
public org.apache.spark.sql.catalyst.catalog.CatalogTable table()
public scala.collection.immutable.Map<String,scala.Option<String>> partition()
public org.apache.spark.sql.catalyst.plans.logical.LogicalPlan query()
public boolean overwrite()
public boolean ifPartitionNotExists()
public scala.collection.Seq<String> outputColumnNames()
public scala.collection.Seq<Row> run(SparkSession sparkSession, org.apache.spark.sql.execution.SparkPlan child)
org.apache.hadoop.hive.serde2.SerDe
and the
org.apache.hadoop.mapred.OutputFormat
provided by the table definition.sparkSession
- (undocumented)child
- (undocumented)public scala.Option<org.apache.hadoop.fs.Path> createdTempDir()
public scala.collection.immutable.Set<String> saveAsHiveFile(SparkSession sparkSession, org.apache.spark.sql.execution.SparkPlan plan, org.apache.hadoop.conf.Configuration hadoopConf, org.apache.spark.sql.hive.HiveShim.ShimFileSinkDesc fileSinkConf, String outputLocation, scala.collection.immutable.Map<scala.collection.immutable.Map<String,String>,String> customPartitionLocations, scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> partitionAttributes)
public org.apache.hadoop.fs.Path getExternalTmpPath(SparkSession sparkSession, org.apache.hadoop.conf.Configuration hadoopConf, org.apache.hadoop.fs.Path path)
public void deleteExternalTmpPath(org.apache.hadoop.conf.Configuration hadoopConf)
public org.apache.hadoop.fs.Path oldVersionExternalTempPath(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration hadoopConf, String scratchDir)
public org.apache.hadoop.fs.Path newVersionExternalTempPath(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration hadoopConf, String stagingDir)
public org.apache.hadoop.fs.Path getExtTmpPathRelTo(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration hadoopConf, String stagingDir)
public org.apache.hadoop.fs.Path getExternalScratchDir(java.net.URI extURI, org.apache.hadoop.conf.Configuration hadoopConf, String stagingDir)
public org.apache.hadoop.fs.Path getStagingDir(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.conf.Configuration hadoopConf, String stagingDir)
public String executionId()