Convert your MNIST model from Tensorflow to ONNX and run it on UbiOps twice as fast¶
Download notebook View source code
ONNX is an open format that is used to represent various Machine Learning models. It can also function as a model compression technique. In this tutorial we will show you how to convert a Tensorflow based image classification algorithm to ONNX and run it on UbiOps using the ONNX runtime. We will show that this allows you to run an inferencing job twice as fast!
First lets connect to UbiOps and load all of our dependencies.
!pip install tensorflow==2.10 tf2onnx==1.13.0 tqdm==4.64.1 'ubiops>=3.12.0' 'protobuf>=3.19.4'
First connect to our API
API_TOKEN = 'Token ' # Fill in your token here
PROJECT_NAME = '' # Fill in your project name here
DEPLOYMENT_NAME = 'tf-vs-onnx-test'
import ubiops
import shutil
import random, glob
import time
from datetime import datetime, timedelta
from tqdm import tqdm
import shutil
configuration = ubiops.Configuration(host="https://api.ubiops.com/v2.1")
configuration.api_key['Authorization'] = API_TOKEN
client = ubiops.ApiClient(configuration)
api = ubiops.CoreApi(client)
api.service_status()
Converting the model¶
We first download an h5 model from our public online bucket, then convert it as a SavedModel
. Lastly, we convert it to an onnx
model using the tf2onnx
package.
If everything worked correctly you should have the ONNX model at mnist_deployment_onnx_package/mnist.onnx
.
import os
import urllib.request
import zipfile
from tensorflow.keras.models import load_model
#Get bucket from online repo
bucket_name = "ubiops"
file_path = "demo-helper-files/cnn.zip"
# Create the URL for the file
url = f"https://storage.googleapis.com/{bucket_name}/{file_path}"
#Write zipfile to cnn folder
urllib.request.urlretrieve(url, "cnn")
#write modelfile to cnn_dir folder
with zipfile.ZipFile("cnn", 'r') as zip_ref:
zip_ref.extractall('cnn_dir')
model = load_model("cnn_dir/cnn.h5")
#Save as a SavedModel to the mnist_model directory
!mkdir mnist_model
model.save("mnist_model")
Preparing the comparison¶
The next step is to create two deployments. One with the original Tensorflow based runtime and the second with the ONNX model runnning on the ONNX runtime.
The following code will save the Tensorflow model, the requirements.txt's and the deployments.py's to the mnist_deployment_package directory.
!mkdir mnist_deployment_package
#Copy the tensorflowmodel to the deployment package
import shutil
shutil.copy('cnn_dir/cnn.h5', 'mnist_deployment_package/cnn.h5')
%%writefile ./mnist_deployment_package/requirements.txt
# This file contains package requirements for the deployment
# installed via PIP. Installed before deployment initialization
tensorflow==2.10
imageio==2.26.0
h5py==3.8.0
numpy==1.24.1
Pillow==9.4.0
%%writefile ./mnist_deployment_package/deployment.py
"""
The file containing the deployment code is required to be called 'deployment.py' and should contain the 'Deployment'
class and 'request' method.
"""
import os
from tensorflow.keras.models import load_model
from imageio import imread
import numpy as np
class Deployment:
def __init__(self, base_directory, context):
print("Initialising deployment")
weights = os.path.join(base_directory, "cnn.h5")
self.model = load_model(weights)
def request(self, data):
print("Processing request")
x = imread(data['image'])
# convert to a 4D tensor to feed into our model
x = x.reshape(1, 28, 28, 1)
x = x.astype(np.float32) / 255
out = self.model.predict(x)
# here we set our output parameters in the form of a json
return {'prediction': int(np.argmax(out)), 'probability': float(np.max(out))}
Now build a deployment package that hosts the ONNX model
!mkdir mnist_deployment_onnx_package
#Convert the model from SavedModel format to onnx, and store inside the ONNX deployment package
!python3 -m tf2onnx.convert --saved-model mnist_model --opset 13 --output mnist_deployment_onnx_package/mnist.onnx
%%writefile ./mnist_deployment_onnx_package/deployment.py
"""
The file containing the deployment code is required to be called 'deployment.py' and should contain the 'Deployment'
class and 'request' method.
"""
import os
import onnxruntime as rt
from imageio import imread
import numpy as np
class Deployment:
def __init__(self, base_directory, context):
self.sess = rt.InferenceSession("mnist.onnx")
self.input_name = self.sess.get_inputs()[0].name
def request(self, data):
x = imread(data['image'])
# convert to a 4D tensor to feed into our model
x = x.reshape(1, 28, 28, 1)
x = x.astype(np.float32) / 255
print("Prediction being made")
prediction = self.sess.run(None, {self.input_name: x})[0]
return {'prediction': int(np.argmax(prediction)), 'probability': float(np.max(prediction))}
%%writefile ./mnist_deployment_onnx_package/requirements.txt
# This file contains package requirements for the deployment
# installed via PIP. Installed before deployment initialization
onnx==1.12.0
onnxruntime==1.12.0
imageio==2.26.0
numpy==1.24.1
Now that the deployment packages are created, you can upload them to UbiOps. We will make one deployment with two versions, one running Tensorflow while the other is running ONNX.
mnist_template = ubiops.DeploymentCreate(
name=DEPLOYMENT_NAME,
description='A deployment to classify handwritten digits.',
input_type='structured',
output_type='structured',
input_fields=[
{'name': 'image', 'data_type': 'file'}
],
output_fields=[
{'name': 'prediction', 'data_type': 'int'},
{'name': 'probability', 'data_type': 'double'}
]
)
mnist_deployment = api.deployments_create(project_name=PROJECT_NAME, data=mnist_template)
print(mnist_deployment)
version_template = ubiops.DeploymentVersionCreate(
version="onnx",
environment='python3-10',
instance_type_group_name='1024 MB + 0.25 vCPU',
maximum_instances=1,
minimum_instances=0,
maximum_idle_time=1800, # = 30 minutes
request_retention_mode='full', # input/output of requests will be stored
request_retention_time=3600 # requests will be stored for 1 hour
)
version = api.deployment_versions_create(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
data=version_template
)
# Zip the deployment package
shutil.make_archive('mnist_deployment_onnx_package', 'zip', '.', 'mnist_deployment_onnx_package')
upload_response = api.revisions_file_upload(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
version="onnx",
file='mnist_deployment_onnx_package.zip'
)
print(upload_response)
version_template = ubiops.DeploymentVersionCreate(
version="tf",
environment='python3-10',
instance_type_group_name='1024 MB + 0.25 vCPU',
maximum_instances=1,
minimum_instances=0,
maximum_idle_time=1800, # = 30 minutes
request_retention_mode='full', # input/output of requests will be stored
request_retention_time=3600 # requests will be stored for 1 hour
)
version = api.deployment_versions_create(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
data=version_template
)
# Zip the deployment package
shutil.make_archive('mnist_deployment_package', 'zip', '.', 'mnist_deployment_package')
upload_response = api.revisions_file_upload(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
version="tf",
file='mnist_deployment_package.zip'
)
print(upload_response)
And let's wait until the deployment versions are built until we continue..
ubiops.utils.wait_for_deployment_version(client = api.api_client,
project_name = PROJECT_NAME,
deployment_name = DEPLOYMENT_NAME,
version = "onnx")
ubiops.utils.wait_for_deployment_version(client = api.api_client,
project_name = PROJECT_NAME,
deployment_name = DEPLOYMENT_NAME,
version = "tf")
print("Deployments are ready")
Benchmarking¶
If everything went well there should now be a deployment in UbiOps with two versions. We can now compare the average request time by sending both versions a list of 100 images (one image per request.)
import urllib
import zipfile
#Get dummy data from our online bucket
bucket_name = "ubiops"
file_path = "demo-helper-files/mnist_png.zip"
# Create the URL for the file
url = f"https://storage.googleapis.com/{bucket_name}/{file_path}"
urllib.request.urlretrieve(url, "mnist_png.zip")
with zipfile.ZipFile("mnist_png.zip", 'r') as zip_ref:
zip_ref.extractall('./')
pattern = "mnist_png/testing/*/*.png" # (or "*.*")
filenames = random.choices(glob.glob(pattern),k=100)
print(filenames)
Now we create one large batch requests that we can send to the deployment in one go
batch_request_data = []
for image_file in tqdm(filenames):
# First upload the image
file_uri = ubiops.utils.upload_file(client, PROJECT_NAME, image_file)
# Make a request using the file URI as input.
data = {'image': file_uri}
batch_request_data.append(data)
requests_onnx = api.batch_deployment_version_requests_create(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
version="onnx",
data=batch_request_data
)
requests_onnx_ids = [request_onnx.id for request_onnx in requests_onnx]
requests_tf = api.batch_deployment_version_requests_create(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
version="tf",
data=batch_request_data
)
requests_tf_ids = [request_tf.id for request_tf in requests_tf]
And then we wait until all requests are finished..
import time
while True:
requests_onnx = api.deployment_version_requests_batch_get(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
version="onnx",
data=requests_onnx_ids
)
requests_tf = api.deployment_version_requests_batch_get(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
version="tf",
data=requests_tf_ids
)
# Calculate the percentage of completed requests
onnx_completed_pct = sum(req.status == "completed" for req in requests_onnx) / len(requests_onnx) * 100 if requests_onnx else 0
tf_completed_pct = sum(req.status == "completed" for req in requests_tf) / len(requests_tf) * 100 if requests_tf else 0
print(f"ONNX Completed Percentage: {onnx_completed_pct:.2f}%")
print(f"TensorFlow Completed Percentage: {tf_completed_pct:.2f}%")
if onnx_completed_pct == 100 and tf_completed_pct == 100:
break
time.sleep(1)
Comparing the results¶
Now that the request are finished we can look at the results. You can do that either by looking at the 'Metrics' tab of the UbiOps webappby running the following piece of code.
Note that it can take up to two minutes before metrics become available through our API. So might be required to sleep a bit more:
time.sleep(60)
#First get the version ids so that we can filter the relevant metrics
tf_version_id = api.deployment_versions_get(PROJECT_NAME,DEPLOYMENT_NAME, "tf").id
onnx_version_id = api.deployment_versions_get(PROJECT_NAME,DEPLOYMENT_NAME, "onnx").id
print(f"Tensorflow deployment version id: {tf_version_id}")
print(f"ONNX deployment version id: {onnx_version_id}")
tf_time_series = api.time_series_data_list(
project_name=PROJECT_NAME,
metric = "deployments.request_duration",
start_date=str((datetime.today()- timedelta(days=1)).isoformat()),
end_date=str(datetime.today().isoformat()),
aggregation_period = 60*60*24, # seconds/day
labels = f"deployment_version_id:{tf_version_id}"
)
print(f"Average Tensorflow request duration: {tf_time_series.data_points[-1].value}s ")
onnx_time_series = api.time_series_data_list(
project_name=PROJECT_NAME,
metric = "deployments.request_duration",
start_date=str((datetime.today()- timedelta(days=1)).isoformat()),
end_date=str(datetime.today().isoformat()),
aggregation_period = 60*60*24, # seconds/day
labels = f"deployment_version_id:{onnx_version_id}"
)
print(f"Average ONNX request duration :{onnx_time_series.data_points[-1].value}s")
Cleaning up¶
api.client_close()