Issue
I am getting a runtime error that says inputs and weights must be on same. However I made sure that my model and input are on the same device yet I cannot get rid of the error. As far as I read, I know that my input data is not on GPU . Since, In this case image is an input so I tried img = torch.from_numpy(img).to(device)
and pred = model(img)[0].to(device
but no luck. Please let me know what can be done.
Here's the code:
source = '0'
webcam = source == '0'
image_size = 640
imgsz = check_img_size(image_size)
# Load the model
filepath = 'weights/mask.pt'
# device = torch.device('cpu')
device = select_device()
# half = device.type != 'cpu'
model = attempt_load(filepath, map_location = device)
model.to(device).eval()
# if half:
# model.half()
# Second stage classifier
classify = False
if classify:
modelc = torch_utils.load_classifier(name = 'resnet101', n = 2)
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location = device)['modelc']) ###########
modelc.to(device).eval()
vid_path, vid_writer = None, None
if webcam:
view_img = True
cudnn.benchmark = True
dataset = LoadStreams(source, img_size = imgsz)
names = model.module.names if hasattr(model, 'module') else model.names
print(names)
def process_image(image):
h, w = image.shape[:2]
desired_size = 416
ratio = desired_size/w
print("Ratio",ratio)
img = cv2.resize(image, (0, 0), fx = ratio, fy = ratio)
h, w = img.shape[:2]
img = cv2.copyMakeBorder(img, int((416-h)/2), int((416-h)/2), 0, 0, cv2.BORDER_CONSTANT)
img = img[:, :, ::-1].transpose(2, 0, 1)
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(device)
img = img.float()
img /=255.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
return img
def classify(image):
# device = torch.device("cpu")
#image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#im = Image.fromarray(image)
img = process_image(image)
print('Image processed')
# img = image.unsqueeze_(0)
# img = image.float()
pred = model(img)[0]
# Apply NMS
pred = non_max_suppression(pred, 0.4, 0.5, classes = [0, 1, 2], agnostic = None )
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
print("1 ", pred)
model.eval()
model.cpu()
classification = torch.cat(pred)[:, -1]
if len(classification) == 0:
return None
index = int(classification[0])
print(names[index])
return names[index]
def detect(frame):
source = '0'
webcam = source == '0'
image_size = 640
imgsz = check_img_size(image_size)
# Load model
file_path = 'weights/yolov5s.pt'
#device = torch.device('cpu')
device = select_device()
# half = device.type != 'cpu'
model = attempt_load(file_path, map_location = device)
model.to(device).eval()
# if half:
# model.half()
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[75, 125, 2]]
img = process_image(frame)
pred = model(img)[0]
pred = non_max_suppression(pred, 0.4, 0.5, classes = [0], agnostic = None)
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
gn = torch.tensor(frame.shape)[[1,0,1,0]]
for i, det in enumerate(pred):
det[:,:4] = scale_coords(img.shape[2:], det[:,:4], frame.shape).round()
for *xyxy, conf, cls in reversed(det):
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
label = '%s %.2f' % (names[int(cls)], conf)
if label is not None:
if (label.split())[0] == 'person':
plot_one_box(xyxy, frame, label = label, color = colors[0], line_thickness = 1) # utils.general
Here's the main code:
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/')
minsize = 20 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
frame_interval = 3
batch_size = 1000
image_size = 182
input_image_size = 160
print('Loading feature extraction model')
modeldir = './models/'
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename = './myclassifier/my_classifier.pkl'
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)
print('load classifier file-> %s' % type(class_names))
HumanNames = class_names
video_capture = cv2.VideoCapture(0)
c = 0
print('Start!')
prevTime = 0
while True:
ret, frame = video_capture.read()
# frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
curTime = time.time() # calc fps
timeF = frame_interval
if (c % timeF == 0):
find_results = []
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
# print('Bounding Boxes: ', bounding_boxes, 'Shape: ', bounding_boxes.shape, 'nrof_faces:: ', nrof_faces)
# print('Detected_FaceNum: %d' % nrof_faces)
if nrof_faces > 0:
detect(frame)
label = classify(frame)
if label == "a":
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
cropped = []
scaled = []
scaled_reshape = []
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
emb_array = np.zeros((1, embedding_size))
# print("Embeddinigs:::::")
# print(emb_array)
# print("Embeddinigs:::::")
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is inner of range!')
continue
cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
cropped[0] = facenet.flip(cropped[0], False)
scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled[0] = facenet.prewhiten(scaled[0])
scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
predictions = model.predict_proba(emb_array)
best_class_indices = np.argmax(predictions, axis=1)
best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)
text_x = bb[i][0]
text_y = bb[i][3] + 20
(Edit) Error:
Traceback (most recent call last):
File "realtime.py", line 105, in <module>
label = classify(frame)
File "yolov5-master\myutils.py", line 117, in classify
pred = model(img)[0]
File "C:\Users\Anuj\anaconda3\envs\py36\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "yolov5-master\models\yolo.py", line 122, in forward
return self.forward_once(x, profile) # single-scale inference, train
File "yolov5-master\models\yolo.py", line 138, in forward_once
x = m(x) # run
File "C:\Users\Anuj\anaconda3\envs\py36\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "yolov5-master\models\common.py", line 94, in forward
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
File "C:\Users\Anuj\anaconda3\envs\py36\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "yolov5-master\models\common.py", line 38, in fuseforward
return self.act(self.conv(x))
File "C:\Users\Anuj\anaconda3\envs\py36\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\Anuj\anaconda3\envs\py36\lib\site-packages\torch\nn\modules\conv.py", line 419, in forward
return self._conv_forward(input, self.weight)
File "C:\Users\Anuj\anaconda3\envs\py36\lib\site-packages\torch\nn\modules\conv.py", line 416, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same
Solution
You need to send the input tensor to your device, not its result:
pred = model(img.to(device))[0]
As a side note, I would point out that doing x.to(device)
as an expression has no effect on the location of the tensor. Instead reassign the tensor with x = x.to(device)
:
>>> x = torch.ones(1)
>>> x.to(device)
tensor([1], device='cuda:0')
>>> x.is_cuda
False
This is not true for nn.Module
, calling model.to(device)
will suffice.
Edit - in classify
you are sending the model back to the cpu you call it on img
. Since you're calling classify
in a loop, the first forward will work, while the following calls won't.
Answered By - Ivan
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.