Tensorflow Developer Certificate in 2022: Zero to Mastery
May 2022
udemy.com/course/tensorflow-developer-certificate-machine-learning-zero-to-mastery
https://github.com/mrdbourke/tensorflow-deep-learning
00. Getting started with TensorFlow: A guide to the fundamentals
https://github.com/mrdbourke/tensorflow-deep-learning/blob/main/00_tensorflow_fundamentals.ipynb
scalar = tf.constant(7)
scalar.ndim # 0
vector = tf.constant([10, 10])
vector.ndim # 1
matrix = tf.constant([[10, 7], [7, 10]])
matrix.ndim # 2
tf.size(matrix) # <tf.Tensor: shape=(), dtype=int32, numpy=4>
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
matrix.astype(tf.float16)
tf.cast(matrix, dtype=tf.float16)
np.array(matrix)
matrix
tensor = tf.constant(
[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]], [[13, 14, 15], [16, 17, 18]]]
)
tensor.ndim # 3
changeable_tensor = tf.Variable([10, 7])
changeable_tensor[0].assign(7)
I = tf.Variable(np.arange(0, 5))
# inplace
I.assign([0, 1, 2, 3, 50])
I.assign_add([10, 10, 10, 10, 10])
random_1 = tf.random.Generator.from_seed(42)
random_1 = random_1.normal(shape=(3, 2))
tf.random.shuffle(matrix, seed=42)
ones = tf.ones(shape=(3, 2))
zeros = tf.zeros(shape=(3, 2))
import numpy as np
numpy_A = np.arange(1, 25, dtype=np.int32)
A = tf.constant(numpy_A, shape=[2, 4, 3])
Get the first 2 items of each dimension
tensor[:2, :2, :2]
Add and remove a dimension
tensor[..., tf.newaxis]
tf.expand_dims(tensor, axis=-1)
tf.squeeze(tensor)
math
tf.multiply(tensor, 10)
Y = tf.constant([[7, 8], [9, 10], [11, 12]])
tf.matmul(Y, tf.reshape(Y, shape=(2, 3)))
tf.transpose(Y)
tf.matmul(a=Y, b=Y, transpose_a=False, transpose_b=True)
tf.tensordot(tf.transpose(Y), Y, axes=1)
import numpy as np
E = tf.constant(np.random.randint(low=0, high=100, size=50))
tf.reduce_min(E)
tf.math.reduce_std(tf.cast(E, dtype=tf.float16))
tf.argmax(F)
one hot encoding
some_list = [0, 1, 2, 3]
tf.one_hot(some_list, depth=4)
tf.one_hot(some_list, depth=4, on_value="We're live!", off_value="Offline")
function
@tf.function
def tf_function(x, y):
return x ** 2 + y
tf_function(x, y)
Finding GPUs
print(tf.config.list_physical_devices('GPU'))
01: Neural Network Regression with TensorFlow
from sklearn.compose import make_column_transformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
# Create features (using tensors)
X = tf.constant([-7.0, -4.0, -1.0, 2.0, 5.0, 8.0, 11.0, 14.0])
# Create labels (using tensors)
y = tf.constant([3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0])
# Set random seed
tf.random.set_seed(42)
# Create a model using the Sequential API
model = tf.keras.Sequential([tf.keras.layers.Dense(1)])
# Compile the model
model.compile(
loss=tf.keras.losses.mae, # mae is short for mean absolute error
optimizer=tf.keras.optimizers.SGD(), # SGD is short for stochastic gradient descent
metrics=["mae"],
)
# Fit the model
# model.fit(tf.expand_dims(X, axis=-1), y, epochs=5)
model.fit(tf.expand_dims(X, axis=-1), y, epochs=100)
# Make a bigger dataset
X = np.arange(-100, 100, 4)
# Make labels for the dataset (adhering to the same pattern as before)
y = np.arange(-90, 110, 4)
# Split data into train and test sets
X_train = X[:40] # first 40 examples (80% of data)
y_train = y[:40]
X_test = X[40:] # last 10 examples (20% of data)
y_test = y[40:]
plt.figure(figsize=(10, 7))
# Plot training data in blue
plt.scatter(X_train, y_train, c='b', label='Training data')
# Plot test data in green
plt.scatter(X_test, y_test, c='g', label='Testing data')
# Show the legend
plt.legend();
# Set random seed
tf.random.set_seed(42)
# Create a model (same as above)
model = tf.keras.Sequential([
tf.keras.layers.Dense(1, input_shape=[1]) # define the input_shape to our model
])
# Set random seed
tf.random.set_seed(42)
# Create a model (same as above)
model = tf.keras.Sequential(
[tf.keras.layers.Dense(1, input_shape=[1])] # define the input_shape to our model
)
# Compile model (same as above)
model.compile(
loss=tf.keras.losses.mae, optimizer=tf.keras.optimizers.SGD(), metrics=["mae"]
)
# Fit the model to the training data
model.fit(X_train, y_train, epochs=100, verbose=0) # verbose controls how much gets output
Pretty model viz
from tensorflow.keras.utils import plot_model
plot_model(model, show_shapes=True)
# Make predictions
y_preds = model.predict(X_test)
def plot_predictions(
train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_preds,
):
"""
Plots training data, test data and compares predictions.
"""
plt.figure(figsize=(10, 7))
# Plot training data in blue
plt.scatter(train_data, train_labels, c="b", label="Training data")
# Plot test data in green
plt.scatter(test_data, test_labels, c="g", label="Testing data")
# Plot the predictions in red (predictions were made on the test data)
plt.scatter(test_data, predictions, c="r", label="Predictions")
# Show the legend
plt.legend()
plot_predictions(
train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_preds,
)
model.evaluate() which will return the loss of the model as well as any metrics setup during the compile step.
# Evaluate the model on the test set
model.evaluate(X_test, y_test)
TensorFlow also has built in functions for MSE and MAE.
# Calcuate the MAE
mae = tf.metrics.mean_absolute_error(
y_true=y_test, y_pred=y_preds.squeeze()
) # use squeeze() to make same shape
# Returns the same as tf.metrics.mean_absolute_error()
tf.reduce_mean(tf.abs(y_test-y_preds.squeeze()))
# Calculate the MSE
mse = tf.metrics.mean_squared_error(y_true=y_test, y_pred=y_preds.squeeze())
def mae(y_test, y_pred):
"""
Calculuates mean absolute error between y_test and y_preds.
"""
return tf.metrics.mean_absolute_error(y_test, y_pred)
def mse(y_test, y_pred):
"""
Calculates mean squared error between y_test and y_preds.
"""
return tf.metrics.mean_squared_error(y_test, y_pred)
Get more data - get more examples for your model to train on (more opportunities to learn patterns).
Make your model larger (use a more complex model) - this might come in the form of more layers or more hidden units in each layer.
Train for longer - give your model more of a chance to find the patterns in the data.
model_1 - same as original model, 1 layer, trained for 100 epochs.
model_2 - 2 layers, trained for 100 epochs.
model_3 - 2 layers, trained for 500 epochs.
model_1
tf.random.set_seed(42)
# Replicate original model
model_1 = tf.keras.Sequential([tf.keras.layers.Dense(1)])
# Compile the model
model_1.compile(
loss=tf.keras.losses.mae, optimizer=tf.keras.optimizers.SGD(), metrics=["mae"]
)
# Fit the model
model_1.fit(tf.expand_dims(X_train, axis=-1), y_train, epochs=100)
# Make and plot predictions for model_1
y_preds_1 = model_1.predict(X_test)
plot_predictions(predictions=y_preds_1)
# Calculate model_1 metrics
mae_1 = mae(y_test, y_preds_1.squeeze()).numpy()
mse_1 = mse(y_test, y_preds_1.squeeze()).numpy()
model_2 (add an extra dense layer)
tf.random.set_seed(42)
# Replicate model_1 and add an extra layer
model_2 = tf.keras.Sequential(
[tf.keras.layers.Dense(1), tf.keras.layers.Dense(1)] # add a second layer
)
# Compile the model
model_2.compile(
loss=tf.keras.losses.mae, optimizer=tf.keras.optimizers.SGD(), metrics=["mae"]
)
# Fit the model
model_2.fit(
tf.expand_dims(X_train, axis=-1), y_train, epochs=100, verbose=0
)
# Make and plot predictions for model_2
y_preds_2 = model_2.predict(X_test)
plot_predictions(predictions=y_preds_2)
# Calculate model_2 metrics
mae_2 = mae(y_test, y_preds_2.squeeze()).numpy()
mse_2 = mse(y_test, y_preds_2.squeeze()).numpy()
model_3 (same as model_2 but 500 epochs)
tf.random.set_seed(42)
# Replicate model_2
model_3 = tf.keras.Sequential([tf.keras.layers.Dense(1), tf.keras.layers.Dense(1)])
# Compile the model
model_3.compile(
loss=tf.keras.losses.mae, optimizer=tf.keras.optimizers.SGD(), metrics=["mae"]
)
# Fit the model (this time for 500 epochs, not 100)
model_3.fit(
tf.expand_dims(X_train, axis=-1), y_train, epochs=500, verbose=0
)
# Make and plot predictions for model_3
y_preds_3 = model_3.predict(X_test)
plot_predictions(predictions=y_preds_3)
our model might've trained too long and has thus resulted in worse results
mae_3 = mae(y_test, y_preds_3.squeeze()).numpy()
mse_3 = mse(y_test, y_preds_3.squeeze()).numpy()
compare model results
model_results = [
["model_1", mae_1, mse_1],
["model_2", mae_2, mse_2],
["model_3", mae_3, mae_3],
]
all_results = pd.DataFrame(model_results, columns=["model", "mae", "mse"])
You can track experiments with https://tensorboard.dev/ https://wandb.ai/site
Save a model
save using model.save()
Save using
The SavedModel format (default).
The HDF5 format.
# Save a model using the SavedModel format
model_2.save('best_model_SavedModel_format')
# Check it out - outputs a protobuf binary file (.pb) as well as other files
!ls best_model_SavedModel_format
# Save a model using the HDF5 format
model_2.save("best_model_HDF5_format.h5") # note the addition of '.h5' on the end
# Check it out
!ls best_model_HDF5_format.h5
Load model
# Load a model from the SavedModel format
loaded_saved_model = tf.keras.models.load_model("best_model_SavedModel_format")
loaded_saved_model.summary()
Test it out
# Compare model_2 with the SavedModel version (should return True)
model_2_preds = model_2.predict(X_test)
saved_model_preds = loaded_saved_model.predict(X_test)
mae(y_test, saved_model_preds.squeeze()).numpy() == mae(y_test, model_2_preds.squeeze()).numpy()
# Load a model from the HDF5 format
loaded_h5_model = tf.keras.models.load_model("best_model_HDF5_format.h5")
loaded_h5_model.summary()
# Compare model_2 with the loaded HDF5 version (should return True)
h5_model_preds = loaded_h5_model.predict(X_test)
mae(y_test, h5_model_preds.squeeze()).numpy() == mae(y_test, model_2_preds.squeeze()).numpy()
Download the model (or any file) from Google Colab
from google.colab import files
files.download("best_model_HDF5_format.h5")
Larger example
https://www.kaggle.com/datasets/mirichoi0218/insurance
insurance = pd.read_csv("https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv")
# Turn all categories into numbers
insurance_one_hot = pd.get_dummies(insurance)
# Create X & y values
X = insurance_one_hot.drop("charges", axis=1)
y = insurance_one_hot["charges"]
# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
) # set random state for reproducible splits
# Set random seed
tf.random.set_seed(42)
# Create a new model (same as model_2)
insurance_model = tf.keras.Sequential(
[tf.keras.layers.Dense(1), tf.keras.layers.Dense(1)]
)
# Compile the model
insurance_model.compile(
loss=tf.keras.losses.mae, optimizer=tf.keras.optimizers.SGD(), metrics=["mae"]
)
# Fit the model
insurance_model.fit(X_train, y_train, epochs=100)
# Check the results of the insurance model
insurance_model.evaluate(X_test, y_test)
Increasing the number of layers (2 -> 3). Increasing the number of units in each layer (except for the output layer). Changing the optimizer (from SGD to Adam).
# Set random seed
tf.random.set_seed(42)
# Add an extra layer and increase number of units
insurance_model_2 = tf.keras.Sequential(
[
tf.keras.layers.Dense(100), # 100 units
tf.keras.layers.Dense(10), # 10 units
tf.keras.layers.Dense(1), # 1 unit (important for output layer)
]
)
# Compile the model
insurance_model_2.compile(
loss=tf.keras.losses.mae,
optimizer=tf.keras.optimizers.Adam(), # Adam works but SGD doesn't
metrics=["mae"],
)
# Fit the model and save the history (we can plot this)
history = insurance_model_2.fit(X_train, y_train, epochs=100, verbose=0)
For many problems, the Adam optimizer is a great starting choice http://karpathy.github.io/2019/04/25/recipe/
# Plot history (also known as a loss curve)
pd.DataFrame(history.history).plot()
plt.ylabel("loss")
plt.xlabel("epochs");
When to stop? common method is to set your model training for a very long time (e.g. 1000's of epochs) but set it up with an EarlyStopping callback so it stops automatically when it stops improving
# Try training for a little longer (100 more epochs)
history_2 = insurance_model_2.fit(X_train, y_train, epochs=100, verbose=0)
Preprocessing data (normalization and standardization)
common practice when working with neural networks is to make sure all of the data you pass to them is in the range 0 to 1 (normalization). subtracting the minimum value then dividing by the maximum value minus the minimum. This is also referred to as min-max scaling.
standardization which converts all of your data to unit variance and 0 mean. Substracting the mean value from the target feature and then dividing it by the standard deviation.
Turning all of your data to numbers (a neural network can't handle strings).
Making sure your data is in the right shape (verifying input and output shapes).
With neural networks you'll tend to favor normalization
see https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing-data https://towardsdatascience.com/scale-standardize-or-normalize-with-scikit-learn-6ccc7d176a02
# Create column transformer (this will help us normalize/preprocess our data)
ct = make_column_transformer(
(MinMaxScaler(), ["age", "bmi", "children"]), # get all values between 0 and 1
(OneHotEncoder(handle_unknown="ignore"), ["sex", "smoker", "region"]),
)
# Create X & y
X = insurance.drop("charges", axis=1)
y = insurance["charges"]
# Build our train and test sets (use random state to ensure same split as before)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Fit column transformer on the training data only (doing so on test data would result in data leakage)
ct.fit(X_train)
# Transform training and test data with normalization (MinMaxScalar) and one hot encoding (OneHotEncoder)
X_train_normal = ct.transform(X_train)
X_test_normal = ct.transform(X_test)
# Set random seed
tf.random.set_seed(42)
# Build the model (3 layers, 100, 10, 1 units)
insurance_model_3 = tf.keras.Sequential(
[tf.keras.layers.Dense(100), tf.keras.layers.Dense(10), tf.keras.layers.Dense(1)]
)
# Compile the model
insurance_model_3.compile(
loss=tf.keras.losses.mae, optimizer=tf.keras.optimizers.Adam(), metrics=["mae"]
)
# Fit the model for 200 epochs (same as insurance_model_2)
insurance_model_3.fit(X_train_normal, y_train, epochs=200, verbose=0)
# Evaluate 3rd model
insurance_model_3_loss, insurance_model_3_mae = insurance_model_3.evaluate(X_test_normal, y_test)
# Compare modelling results from non-normalized data and normalized data
insurance_model_2_mae, insurance_model_3_mae
Other data to test
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.boston_housing.load_data(
path="boston_housing.npz", test_split=0.2, seed=113
)
# Set random seed
tf.random.set_seed(42)
# Build the model (3 layers, 100, 10, 1 units)
boston_model_1 = tf.keras.Sequential(
[tf.keras.layers.Dense(100), tf.keras.layers.Dense(10), tf.keras.layers.Dense(1)]
)
# Compile the model
boston_model_1.compile(
loss=tf.keras.losses.mae, optimizer=tf.keras.optimizers.Adam(), metrics=["mae"]
)
# Fit the model for 200 epochs (same as insurance_model_2)
boston_model_1_history = boston_model_1.fit(X_train, y_train, epochs=200, verbose=0)
boston_model_1.evaluate(X_test, y_test)
pd.DataFrame(boston_model_1_history.history).plot()
plt.ylabel("loss")
plt.xlabel("epochs");
02: Neural Network Regression with TensorFlow
Binary classification uses sigmoid for output activation, binary cross entropy for loss
Multiclass classification uses softmax for output activation , categorical cross entropy for loss