# The server URL specifies the endpoint of your server running the ResNet # model with the name "resnet" and using the predict interface. SERVER_URL = 'http://localhost:8501/v1/models/resnet:predict' ... # Send few actual requests and time average latency. total_time = 0 num_requests = 10 for _ in xrange(num_requests): response = requests.post(SERVER_URL, data=predict_request) response.raise_for_status() total_time += response.elapsed.total_seconds() prediction = response.json()['predictions'][0] print('Prediction class: {}, avg latency: {} ms'.format( prediction['classes'], (total_time*1000)/num_requests))