- import torch
- import torchvision
- model = torchvision.models.resnet101(pretrained=True)
- model = model.cuda()
- model.eval()
- from ptflops import get_model_complexity_info
- import torch2trt
- #x = torch.randn(6, 3, 928, 1600, device="cuda:0", dtype=torch.float32)
- import time
- timer = []
- input = torch.randn(6, 3, 928, 1600, device="cuda:0", dtype=torch.float32)
- trtmodel = torch2trt.torch2trt(model,
- inputs=[input],
- fp16_mode=False,
- max_batch_size=6,
- max_workspace_size=int(4e9))
- for i in range(1000):
- s_time = time.time()
- trtmodel(input)
- torch.cuda.synchronize()
- e_time = time.time()
- timer.append(1000 * (e_time - s_time))
- print("{}".format(1000 * (e_time - s_time)))
- #print("{}".format(timer[-1]))
- avg_time = sum(timer[500:])/len(timer[500:])
- print("resnet_trt with 5 3 928 1600 {}".format(avg_time))
- timer = []
- input = torch.randn(1, 3, 224, 224, device="cuda:0", dtype=torch.float32)
- for i in range(1000):
- model(input)
- for i in range(1000):
- s_time = time.time()
- model(input)
- torch.cuda.synchronize()
- e_time = time.time()
- timer.append(1000 * (e_time - s_time))
- print("{}".format(1000 * (e_time - s_time)))
- avg_time = sum(timer)/len(timer)
- print("resnet_torch with 1 3 224 224 {}".format(avg_time))
- timer = []
- input = torch.randn(6, 3, 224, 224, device="cuda:0", dtype=torch.float32)
- for i in range(1000):
- s_time = time.time()
- model(input)
- torch.cuda.synchronize()
- e_time = time.time()
- timer.append(1000 * (e_time - s_time))
- print("{}".format(1000 * (e_time - s_time)))
- avg_time = sum(timer)/len(timer)
- print("resnet_torch with 6 3 224 224 {}".format(avg_time))
- timer = []
- input = torch.randn(1, 3, 928, 1600, device="cuda:0", dtype=torch.float32)
- for i in range(1000):
- s_time = time.time()
- model(input)
- torch.cuda.synchronize()
- e_time = time.time()
- timer.append(1000 * (e_time - s_time))
- print("{}".format(1000 * (e_time - s_time)))
- avg_time = sum(timer)/len(timer)
- print("resnet_torch with 1 3 928 1600 {}".format(avg_time))
- timer = []
- input = torch.randn(6, 3, 928, 1600, device="cuda:0", dtype=torch.float32)
- for i in range(1000):
- s_time = time.time()
- model(input)
- torch.cuda.synchronize()
- e_time = time.time()
- timer.append(1000 * (e_time - s_time))
- print("{}".format(1000 * (e_time - s_time)))
- avg_time = sum(timer)/len(timer)
- print("resnet_torch with 6 3 928 1600 {}".format(avg_time))
- timer = []
- input = torch.randn(1, 3, 224, 224, device="cuda:0", dtype=torch.float32)
- trtmodel = torch2trt.torch2trt(model,
- inputs=[input],
- fp16_mode=False,
- max_batch_size=6,
- max_workspace_size=int(4e9))
- for i in range(1000):
- s_time = time.time()
- trtmodel(input)
- torch.cuda.synchronize()
- e_time = time.time()
- timer.append(1000 * (e_time - s_time))
- print("{}".format(1000 * (e_time - s_time)))
- avg_time = sum(timer)/len(timer)
- print("resnet_trt with 1 3 224 224 {}".format(avg_time))
- timer = []
- input = torch.randn(6, 3, 224, 224, device="cuda:0", dtype=torch.float32)
- trtmodel = torch2trt.torch2trt(model,
- inputs=[input],
- fp16_mode=False,
- max_batch_size=6,
- max_workspace_size=int(4e9))
- for i in range(1000):
- s_time = time.time()
- trtmodel(input)
- torch.cuda.synchronize()
- e_time = time.time()
- timer.append(1000 * (e_time - s_time))
- print("{}".format(1000 * (e_time - s_time)))
- avg_time = sum(timer)/len(timer)
- print("resnet_trt with 6 3 224 224 {}".format(avg_time))
- timer = []
- input = torch.randn(1, 3, 928, 1600, device="cuda:0", dtype=torch.float32)
- trtmodel = torch2trt.torch2trt(model,
- inputs=[input],
- fp16_mode=False,
- max_batch_size=6,
- max_workspace_size=int(4e9))
- for i in range(1000):
- s_time = time.time()
- trtmodel(input)
- torch.cuda.synchronize()
- e_time = time.time()
- timer.append(1000 * (e_time - s_time))
- print("{}".format(1000 * (e_time - s_time)))
- avg_time = sum(timer)/len(timer)
- print("resnet_trt with 1 3 928 1600 {}".format(avg_time))