ONNX models on CPU and GPU¶
Download notebook View source code
API_TOKEN = 'Token ' # Fill in your token here
PROJECT_NAME = ' ' # Fill in your project name here
DEPLOYMENT_NAME = 'onnx-cpu-gpu'
IMPORT_LINK = "https://storage.googleapis.com/ubiops/deployment_exports/onnx-cpu-gpu-export.zip"
import shutil
import ubiops
import urllib.request
import random
import glob
import time
from tqdm import tqdm
from datetime import datetime, timedelta
configuration = ubiops.Configuration(host="https://api.ubiops.com/v2.1")
configuration.api_key['Authorization'] = API_TOKEN
client = ubiops.ApiClient(configuration)
api = ubiops.CoreApi(client)
api.service_status()
Getting the models on UbiOps¶
skip_confirmation = True # bool (optional)
# Create an import
api_response = api.imports_create(PROJECT_NAME, import_link=IMPORT_LINK, skip_confirmation=skip_confirmation)
print(api_response)
Benchmarking¶
# Download and unpack test images.
urllib.request.urlretrieve("https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-320.tgz", "imagenette2-320.tgz")
shutil.unpack_archive("imagenette2-320.tgz", "./")
# Take a random selection of 100 images.
pattern = "imagenette2-320/val/*/*.JPEG" # (or "*.*")
filenames = random.choices(glob.glob(pattern),k=100)
print(len(filenames))
# Actual benchmarking
ready = False
while not ready: # See if deployments are ready
time.sleep(5)
response = api.deployment_versions_list(project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME)
statuses = [d.status == 'available' for d in response]
ready = all(statuses)
print("Deployments are NOT ready")
print("Deployments are ready")
print("Uploading test images and making requests")
data = []
# We are sending all images in one big batch request
for image_file in tqdm(filenames):
# First upload the image
file_uri = ubiops.utils.upload_file(client, PROJECT_NAME, image_file)
# Make a request using the file uri as input.
data.append({'image': file_uri})
time.sleep(.05) # Let's not crash the api
api.batch_deployment_version_requests_create(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
version="gpu",
data=data
)
api.batch_deployment_version_requests_create(
project_name=PROJECT_NAME,
deployment_name=DEPLOYMENT_NAME,
version="cpu",
data=data
)
print("Done")
Now go to the UbiOps logging page and take a look at the logs of both deployments. You should see a number printed in the logs. This is the average time that an inference takes. After that you can compare it to the following. This code will show the average request time. Note that this is different from each other. the average request time will also include overhead like downloading and uploading images
version_id = api.deployment_versions_get(PROJECT_NAME,DEPLOYMENT_NAME, "cpu").id
print("Average request time (s)")
api_response = api.metrics_get(
project_name=PROJECT_NAME,
object_type="deployment_version",
object_id=version_id,
metric="compute",
interval="day",
start_date=str((datetime.today()- timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')),
end_date=str(datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')),
)
print(f"CPU: {api_response[-1].value}")
version_id = api.deployment_versions_get(PROJECT_NAME,DEPLOYMENT_NAME, "gpu").id
api_response = api.metrics_get(
project_name=PROJECT_NAME,
object_type="deployment_version",
object_id=version_id,
metric="compute",
interval="day",
start_date=str((datetime.today()- timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')),
end_date=str(datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')),
)
print(f"GPU: {api_response[-1].value}")
Cleaning up¶
# Close the connection
client.close()