The steps for this recipe are as follows:
- Import the libraries and configurations:
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline
from sparkdl import DeepImageFeaturizer
from pyspark.ml.evaluation import \
MulticlassClassificationEvaluator
from pyspark.sql.functions import lit
import pickle
import mlflow
storage_account_name = "Your Storage Account Name"
storage_account_access_key = "Your Key"
- Read the data:
safe_images = "wasbs://unsafedrivers@"+storage_account_name+\
".blob.core.windows.net/safe/"
safe_df = spark.read.format('image').load(safe_images)\
.withColumn("label", lit(0))
unsafe_images = "wasbs://unsafedrivers@"+storage_account_name+\
".blob.core.windows.net/unsafe/"
unsafe_df = spark.read.format('image').load(unsafe_images)\
.withColumn("label", lit(1))
- Query the data...