public class SparkUtils extends Object
Constructor and Description |
---|
SparkUtils() |
Modifier and Type | Method and Description |
---|---|
static Class<? extends org.apache.hadoop.io.compress.CompressionCodec> |
getCompressionCodeClass(String compressionCodecClass) |
static <T> T |
readObjectFromFile(String path,
Class<T> type,
org.apache.hadoop.conf.Configuration hadoopConfig)
Read an object from HDFS (or local) using default Java object serialization
|
static <T> T |
readObjectFromFile(String path,
Class<T> type,
org.apache.spark.api.java.JavaSparkContext sc)
Read an object from HDFS (or local) using default Java object serialization
|
static <T> T |
readObjectFromFile(String path,
Class<T> type,
org.apache.spark.SparkContext sc)
Read an object from HDFS (or local) using default Java object serialization
|
static String |
readStringFromFile(String path,
org.apache.hadoop.conf.Configuration hadoopConfig)
Read a UTF-8 format String from HDFS (or local)
|
static String |
readStringFromFile(String path,
org.apache.spark.api.java.JavaSparkContext sc)
Read a UTF-8 format String from HDFS (or local)
|
static String |
readStringFromFile(String path,
org.apache.spark.SparkContext sc)
Read a UTF-8 format String from HDFS (or local)
|
static void |
registerKryoClasses(org.apache.spark.SparkConf conf)
Register the DataVec writable classes for Kryo
|
static <T> List<org.apache.spark.api.java.JavaRDD<T>> |
splitData(SplitStrategy splitStrategy,
org.apache.spark.api.java.JavaRDD<T> data,
long seed) |
static void |
writeAnalysisHTMLToFile(String outputPath,
DataAnalysis dataAnalysis,
org.apache.spark.api.java.JavaSparkContext sc)
Write a DataAnalysis to HDFS (or locally) as a HTML file
|
static void |
writeObjectToFile(String path,
Object toWrite,
org.apache.hadoop.conf.Configuration hadoopConfig)
Write an object to HDFS (or local) using default Java object serialization
|
static void |
writeObjectToFile(String path,
Object toWrite,
org.apache.spark.api.java.JavaSparkContext sc)
Write an object to HDFS (or local) using default Java object serialization
|
static void |
writeObjectToFile(String path,
Object toWrite,
org.apache.spark.SparkContext sc)
Write an object to HDFS (or local) using default Java object serialization
|
static void |
writeSchema(String outputPath,
Schema schema,
org.apache.spark.api.java.JavaSparkContext sc)
Write a schema to a HDFS (or, local) file in a human-readable format
|
static void |
writeStringToFile(String path,
String toWrite,
org.apache.hadoop.conf.Configuration hadoopConfig)
Write a String to a file (on HDFS or local) in UTF-8 format
|
static void |
writeStringToFile(String path,
String toWrite,
org.apache.spark.api.java.JavaSparkContext sc)
Write a String to a file (on HDFS or local) in UTF-8 format
|
static void |
writeStringToFile(String path,
String toWrite,
org.apache.spark.SparkContext sc)
Write a String to a file (on HDFS or local) in UTF-8 format
|
static void |
writeWritablesToFile(String outputPath,
String delim,
List<List<Writable>> writables,
org.apache.spark.api.java.JavaSparkContext sc)
Wlite a set of writables (or, sequence) to HDFS (or, locally).
|
public static <T> List<org.apache.spark.api.java.JavaRDD<T>> splitData(SplitStrategy splitStrategy, org.apache.spark.api.java.JavaRDD<T> data, long seed)
public static void writeStringToFile(String path, String toWrite, org.apache.spark.api.java.JavaSparkContext sc) throws IOException
path
- Path to write totoWrite
- String to writesc
- Spark contextIOException
public static void writeStringToFile(String path, String toWrite, org.apache.spark.SparkContext sc) throws IOException
path
- Path to write totoWrite
- String to writesc
- Spark contextIOException
public static void writeStringToFile(String path, String toWrite, org.apache.hadoop.conf.Configuration hadoopConfig) throws IOException
path
- Path to write totoWrite
- String to writehadoopConfig
- Hadoop configuration, for example from SparkContext.hadoopConfiguration()IOException
public static String readStringFromFile(String path, org.apache.spark.api.java.JavaSparkContext sc) throws IOException
path
- Path to write the stringsc
- Spark contextIOException
public static String readStringFromFile(String path, org.apache.spark.SparkContext sc) throws IOException
path
- Path to write the stringsc
- Spark contextIOException
public static String readStringFromFile(String path, org.apache.hadoop.conf.Configuration hadoopConfig) throws IOException
path
- Path to write the stringhadoopConfig
- Hadoop configuration, for example from SparkContext.hadoopConfiguration()IOException
public static void writeObjectToFile(String path, Object toWrite, org.apache.spark.api.java.JavaSparkContext sc) throws IOException
path
- Path to write the object totoWrite
- Object to writesc
- Spark contextIOException
public static void writeObjectToFile(String path, Object toWrite, org.apache.spark.SparkContext sc) throws IOException
path
- Path to write the object totoWrite
- Object to writesc
- Spark contextIOException
public static void writeObjectToFile(String path, Object toWrite, org.apache.hadoop.conf.Configuration hadoopConfig) throws IOException
path
- Path to write the object totoWrite
- Object to writehadoopConfig
- Hadoop configuration, for example from SparkContext.hadoopConfiguration()IOException
public static <T> T readObjectFromFile(String path, Class<T> type, org.apache.spark.api.java.JavaSparkContext sc) throws IOException
T
- Type of the object to readpath
- File to readtype
- Class of the object to readsc
- Spark contextIOException
public static <T> T readObjectFromFile(String path, Class<T> type, org.apache.spark.SparkContext sc) throws IOException
T
- Type of the object to readpath
- File to readtype
- Class of the object to readsc
- Spark contextIOException
public static <T> T readObjectFromFile(String path, Class<T> type, org.apache.hadoop.conf.Configuration hadoopConfig) throws IOException
T
- Type of the object to readpath
- File to readtype
- Class of the object to readhadoopConfig
- Hadoop configuration, for example from SparkContext.hadoopConfiguration()IOException
public static void writeSchema(String outputPath, Schema schema, org.apache.spark.api.java.JavaSparkContext sc) throws IOException
outputPath
- Output path to write toschema
- Schema to writesc
- Spark contextIOException
public static void writeAnalysisHTMLToFile(String outputPath, DataAnalysis dataAnalysis, org.apache.spark.api.java.JavaSparkContext sc)
outputPath
- Output pathdataAnalysis
- Analysis to generate HTML file forsc
- Spark contextpublic static void writeWritablesToFile(String outputPath, String delim, List<List<Writable>> writables, org.apache.spark.api.java.JavaSparkContext sc) throws IOException
outputPath
- Path to write the outptudelim
- Delimiterwritables
- data to writesc
- Spark contextIOException
public static void registerKryoClasses(org.apache.spark.SparkConf conf)
Copyright © 2020. All rights reserved.