Predicting an object over an pretrained model is not working

Discussion:

(too old to reply)

marc nicole

2024-07-30 18:18:42 UTC

Hello all,

I want to predict an object by given as input an image and want to have my
model be able to predict the label. I have trained a model using tensorflow
based on annotated database where the target object to predict was added to
the pretrained model. the code I am using is the following where I set the
target object image as input and want to have the prediction output:

class MultiObjectDetection():

def __init__(self, classes_name):

self._classes_name = classes_name
self._num_classes = len(classes_name)

self._common_params = {'image_size': 448, 'num_classes':
self._num_classes,
'batch_size':1}
self._net_params = {'cell_size': 7, 'boxes_per_cell':2,
'weight_decay': 0.0005}
self._net = YoloTinyNet(self._common_params, self._net_params,
test=True)

def predict_object(self, image):
predicts = self._net.inference(image)
return predicts

def process_predicts(self, resized_img, predicts, thresh=0.2):
"""
process the predicts of object detection with one image input.

Args:
resized_img: resized source image.
predicts: output of the model.
thresh: thresh of bounding box confidence.
Return:
predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}.
"""
cls_num = self._num_classes
bbx_per_cell = self._net_params["boxes_per_cell"]
cell_size = self._net_params["cell_size"]
img_size = self._common_params["image_size"]
p_classes = predicts[0, :, :, 0:cls_num]
C = predicts[0, :, :, cls_num:cls_num+bbx_per_cell] # two
bounding boxes in one cell.
coordinate = predicts[0, :, :, cls_num+bbx_per_cell:] # all
bounding boxes position.

p_classes = np.reshape(p_classes, (cell_size, cell_size, 1, cls_num))
C = np.reshape(C, (cell_size, cell_size, bbx_per_cell, 1))

P = C * p_classes # confidencefor all classes of all bounding
boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2,
1).

predicts_dict = {}
for i in range(cell_size):
for j in range(cell_size):
temp_data = np.zeros_like(P, np.float32)
temp_data[i, j, :, :] = P[i, j, :, :]
position = np.argmax(temp_data) # refer to the class
num (with maximum confidence) for every bounding box.
index = np.unravel_index(position, P.shape)

if P[index] > thresh:
class_num = index[-1]
coordinate = np.reshape(coordinate, (cell_size,
cell_size, bbx_per_cell, 4)) # (cell_size, cell_size,
bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax]
max_coordinate = coordinate[index[0], index[1], index[2], :]

xcenter = max_coordinate[0]
ycenter = max_coordinate[1]
w = max_coordinate[2]
h = max_coordinate[3]

xcenter = (index[1] + xcenter) * (1.0*img_size /cell_size)
ycenter = (index[0] + ycenter) * (1.0*img_size /cell_size)

w = w * img_size
h = h * img_size
xmin = 0 if (xcenter - w/2.0 < 0) else (xcenter - w/2.0)
ymin = 0 if (xcenter - w/2.0 < 0) else (ycenter - h/2.0)
xmax = resized_img.shape[0] if (xmin + w) >
resized_img.shape[0] else (xmin + w)
ymax = resized_img.shape[1] if (ymin + h) >
resized_img.shape[1] else (ymin + h)

class_name = self._classes_name[class_num]
predicts_dict.setdefault(class_name, [])
predicts_dict[class_name].append([int(xmin),
int(ymin), int(xmax), int(ymax), P[index]])

return predicts_dict

def non_max_suppress(self, predicts_dict, threshold=0.5):
"""
implement non-maximum supression on predict bounding boxes.
Args:
predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}.
threshhold: iou threshold
Return:
predicts_dict processed by non-maximum suppression
"""
for object_name, bbox in predicts_dict.items():
bbox_array = np.array(bbox, dtype=np.float)
x1, y1, x2, y2, scores = bbox_array[:,0], bbox_array[:,1],
bbox_array[:,2], bbox_array[:,3], bbox_array[:,4]
areas = (x2-x1+1) * (y2-y1+1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
inter = np.maximum(0.0, xx2-xx1+1) * np.maximum(0.0, yy2-yy1+1)
iou = inter/(areas[i]+areas[order[1:]]-inter)
indexs = np.where(iou<=threshold)[0]
order = order[indexs+1]
bbox = bbox_array[keep]
predicts_dict[object_name] = bbox.tolist()
predicts_dict = predicts_dict
return predicts_dict

class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor",
"small_ball"]
modelFile = ('models\\train\\model.ckpt-0')
track_object = "small_ball"print("object detection and tracking...")

multiObjectDetect = MultiObjectDetection(IP, class_names)
image = tf.placeholder(tf.float32, (1, 448, 448, 3))
object_predicts = multiObjectDetect.predict_object(image)

sess = tf.Session()
saver = tf.train.Saver(multiObjectDetect._net.trainable_collection)

saver.restore(sess, modelFile)

index = 0while 1:

src_img = cv2.imread("./weirdobject.jpg")
resized_img = cv2.resize(src_img, (448, 448))

np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
np_img = np_img.astype(np.float32)
np_img = np_img / 255.0 * 2 - 1
np_img = np.reshape(np_img, (1, 448, 448, 3))

np_predict = sess.run(object_predicts, feed_dict={image: np_img})
predicts_dict = multiObjectDetect.process_predicts(resized_img, np_predict)
predicts_dict = multiObjectDetect.non_max_suppress(predicts_dict)

print ("predict dict = ", predicts_dict)

The problem with this code is that the predicts_dict returns:

predict dict = {'sheep': [[233.0, 92.0, 448.0, -103.0,
5.3531270027160645], [167.0, 509.0, 209.0, 101.0, 4.947688579559326],
[0.0, 0.0, 448.0, 431.0, 3.393721580505371]], 'horse': [[374.0, 33.0,
282.0, 448.0, 5.277851581573486], [135.0, 688.0, -33.0, -14.0,
3.5144259929656982], [1.0, 117.0, 112.0, -138.0, 2.656987190246582]],
'bicycle': [[461.0, 781.0, 154.0, -381.0, 5.918102741241455], [70.0,
344.0, 391.0, -138.0, 3.031444787979126], [378.0, 497.0, 46.0, 149.0,
2.7629122734069824], [541.0, 583.0, 69.0, 307.0, 2.7170517444610596],
[323.0, 22.0, 336.0, 448.0, 1.608760952949524]], 'bottle': [[390.0,
218.0, -199.0, 448.0, 4.582971096038818], [0.0, 0.0, 448.0, -410.0,
0.9097045063972473]], 'sofa': [[346.0, 102.0, 323.0, -38.0,
2.371835947036743]], 'dog': [[319.0, 254.0, -282.0, 373.0,
4.022889137268066]], 'cat': [[63.0, -195.0, 365.0, -92.0,
3.5134828090667725]], 'person': [[22.0, -122.0, 154.0, 448.0,
3.927537441253662], [350.0, 155.0, -36.0, -445.0, 2.679833173751831],
[119.0, 416.0, -43.0, 292.0, 0.9529445171356201], [251.0, 445.0,
225.0, 188.0, 0.9001350402832031]], 'train': [[329.0, 485.0, -24.0,
-235.0, 2.7050414085388184], [483.0, 362.0, 237.0, -86.0,
2.555817127227783], [13.0, 365.0, 373.0, 448.0, 0.6229299902915955]],
'small_ball': [[217.0, 737.0, 448.0, -315.0, 1.739920973777771],
[117.0, 283.0, 153.0, 122.0, 1.5690066814422607]], 'boat': [[164.0,
805.0, 34.0, -169.0, 4.972668170928955], [0.0, 0.0, 397.0, 69.0,
2.353729486465454], [302.0, 605.0, 15.0, -22.0, 2.0259625911712646]],
'aeroplane': [[470.0, 616.0, -305.0, -37.0, 3.431873321533203], [0.0,
0.0, 448.0, -72.0, 2.836672306060791]], 'bus': [[0.0, 0.0, -101.0,
-280.0, 1.2078320980072021]], 'pottedplant': [[620.0, -268.0, -124.0,
418.0, 2.158564805984497], [0.0, 0.0, 448.0, -779.0,
1.6623022556304932]], 'tvmonitor': [[0.0, 0.0, 448.0, 85.0,
3.238999128341675], [240.0, 772.0, 200.0, 91.0, 1.7443398237228394],
[546.0, 155.0, 448.0, 448.0, 1.1334525346755981], [107.0, 441.0,
432.0, 219.0, 0.5971617698669434]], 'chair': [[470.0, -187.0, 106.0,
235.0, 3.8548083305358887], [524.0, 740.0, -103.0, 99.0,
3.636549234390259], [0.0, 0.0, 275.0, -325.0, 3.0997846126556396],
[711.0, -231.0, -146.0, 392.0, 2.205275535583496]], 'diningtable':
[[138.0, -310.0, 111.0, 448.0, 4.660728931427002], [317.0, -66.0,
313.0, 6.0, 4.535496234893799], [0.0, 0.0, -41.0, 175.0,
1.8571208715438843], [21.0, -92.0, 76.0, 172.0, 1.2035608291625977],
[0.0, 0.0, 448.0, -250.0, 1.00322687625885]], 'car': [[312.0, 232.0,
132.0, 309.0, 3.205225706100464], [514.0, -76.0, 218.0, 448.0,
1.4289973974227905], [0.0, 0.0, 448.0, 142.0, 0.7124998569488525]]}

WHile I expect only the dict to contain the small_ball key

How's that is possible? where's the prediction output?How to fix the code?

Thomas Passin

2024-07-30 19:25:39 UTC

Permalink

Post by marc nicole
Hello all,
I want to predict an object by given as input an image and want to have my
model be able to predict the label. I have trained a model using tensorflow
based on annotated database where the target object to predict was added to
the pretrained model. the code I am using is the following where I set the
self._classes_name = classes_name
self._num_classes = len(classes_name)
self._num_classes,
'batch_size':1}
self._net_params = {'cell_size': 7, 'boxes_per_cell':2,
'weight_decay': 0.0005}
self._net = YoloTinyNet(self._common_params, self._net_params,
test=True)
predicts = self._net.inference(image)
return predicts
"""
process the predicts of object detection with one image input.
resized_img: resized source image.
predicts: output of the model.
thresh: thresh of bounding box confidence.
predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}.
"""
cls_num = self._num_classes
bbx_per_cell = self._net_params["boxes_per_cell"]
cell_size = self._net_params["cell_size"]
img_size = self._common_params["image_size"]
p_classes = predicts[0, :, :, 0:cls_num]
C = predicts[0, :, :, cls_num:cls_num+bbx_per_cell] # two
bounding boxes in one cell.
coordinate = predicts[0, :, :, cls_num+bbx_per_cell:] # all
bounding boxes position.
p_classes = np.reshape(p_classes, (cell_size, cell_size, 1, cls_num))
C = np.reshape(C, (cell_size, cell_size, bbx_per_cell, 1))
P = C * p_classes # confidencefor all classes of all bounding
boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2,
1).
predicts_dict = {}
temp_data = np.zeros_like(P, np.float32)
temp_data[i, j, :, :] = P[i, j, :, :]
position = np.argmax(temp_data) # refer to the class
num (with maximum confidence) for every bounding box.
index = np.unravel_index(position, P.shape)
class_num = index[-1]
coordinate = np.reshape(coordinate, (cell_size,
cell_size, bbx_per_cell, 4)) # (cell_size, cell_size,
bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax]
max_coordinate = coordinate[index[0], index[1], index[2], :]
xcenter = max_coordinate[0]
ycenter = max_coordinate[1]
w = max_coordinate[2]
h = max_coordinate[3]
xcenter = (index[1] + xcenter) * (1.0*img_size /cell_size)
ycenter = (index[0] + ycenter) * (1.0*img_size /cell_size)
w = w * img_size
h = h * img_size
xmin = 0 if (xcenter - w/2.0 < 0) else (xcenter - w/2.0)
ymin = 0 if (xcenter - w/2.0 < 0) else (ycenter - h/2.0)
xmax = resized_img.shape[0] if (xmin + w) >
resized_img.shape[0] else (xmin + w)
ymax = resized_img.shape[1] if (ymin + h) >
resized_img.shape[1] else (ymin + h)
class_name = self._classes_name[class_num]
predicts_dict.setdefault(class_name, [])
predicts_dict[class_name].append([int(xmin),
int(ymin), int(xmax), int(ymax), P[index]])
return predicts_dict
"""
implement non-maximum supression on predict bounding boxes.
predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}.
threshhold: iou threshold
predicts_dict processed by non-maximum suppression
"""
bbox_array = np.array(bbox, dtype=np.float)
x1, y1, x2, y2, scores = bbox_array[:,0], bbox_array[:,1],
bbox_array[:,2], bbox_array[:,3], bbox_array[:,4]
areas = (x2-x1+1) * (y2-y1+1)
order = scores.argsort()[::-1]
keep = []
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
inter = np.maximum(0.0, xx2-xx1+1) * np.maximum(0.0, yy2-yy1+1)
iou = inter/(areas[i]+areas[order[1:]]-inter)
indexs = np.where(iou<=threshold)[0]
order = order[indexs+1]
bbox = bbox_array[keep]
predicts_dict[object_name] = bbox.tolist()
predicts_dict = predicts_dict
return predicts_dict
class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor",
"small_ball"]
modelFile = ('models\\train\\model.ckpt-0')
track_object = "small_ball"print("object detection and tracking...")
multiObjectDetect = MultiObjectDetection(IP, class_names)
image = tf.placeholder(tf.float32, (1, 448, 448, 3))
object_predicts = multiObjectDetect.predict_object(image)
sess = tf.Session()
saver = tf.train.Saver(multiObjectDetect._net.trainable_collection)
saver.restore(sess, modelFile)
src_img = cv2.imread("./weirdobject.jpg")
resized_img = cv2.resize(src_img, (448, 448))
np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
np_img = np_img.astype(np.float32)
np_img = np_img / 255.0 * 2 - 1
np_img = np.reshape(np_img, (1, 448, 448, 3))
np_predict = sess.run(object_predicts, feed_dict={image: np_img})
predicts_dict = multiObjectDetect.process_predicts(resized_img, np_predict)
predicts_dict = multiObjectDetect.non_max_suppress(predicts_dict)
print ("predict dict = ", predicts_dict)
predict dict = {'sheep': [[233.0, 92.0, 448.0, -103.0,
5.3531270027160645], [167.0, 509.0, 209.0, 101.0, 4.947688579559326],
[0.0, 0.0, 448.0, 431.0, 3.393721580505371]], 'horse': [[374.0, 33.0,
282.0, 448.0, 5.277851581573486], [135.0, 688.0, -33.0, -14.0,
3.5144259929656982], [1.0, 117.0, 112.0, -138.0, 2.656987190246582]],
'bicycle': [[461.0, 781.0, 154.0, -381.0, 5.918102741241455], [70.0,
344.0, 391.0, -138.0, 3.031444787979126], [378.0, 497.0, 46.0, 149.0,
2.7629122734069824], [541.0, 583.0, 69.0, 307.0, 2.7170517444610596],
[323.0, 22.0, 336.0, 448.0, 1.608760952949524]], 'bottle': [[390.0,
218.0, -199.0, 448.0, 4.582971096038818], [0.0, 0.0, 448.0, -410.0,
0.9097045063972473]], 'sofa': [[346.0, 102.0, 323.0, -38.0,
2.371835947036743]], 'dog': [[319.0, 254.0, -282.0, 373.0,
4.022889137268066]], 'cat': [[63.0, -195.0, 365.0, -92.0,
3.5134828090667725]], 'person': [[22.0, -122.0, 154.0, 448.0,
3.927537441253662], [350.0, 155.0, -36.0, -445.0, 2.679833173751831],
[119.0, 416.0, -43.0, 292.0, 0.9529445171356201], [251.0, 445.0,
225.0, 188.0, 0.9001350402832031]], 'train': [[329.0, 485.0, -24.0,
-235.0, 2.7050414085388184], [483.0, 362.0, 237.0, -86.0,
2.555817127227783], [13.0, 365.0, 373.0, 448.0, 0.6229299902915955]],
'small_ball': [[217.0, 737.0, 448.0, -315.0, 1.739920973777771],
[117.0, 283.0, 153.0, 122.0, 1.5690066814422607]], 'boat': [[164.0,
805.0, 34.0, -169.0, 4.972668170928955], [0.0, 0.0, 397.0, 69.0,
2.353729486465454], [302.0, 605.0, 15.0, -22.0, 2.0259625911712646]],
'aeroplane': [[470.0, 616.0, -305.0, -37.0, 3.431873321533203], [0.0,
0.0, 448.0, -72.0, 2.836672306060791]], 'bus': [[0.0, 0.0, -101.0,
-280.0, 1.2078320980072021]], 'pottedplant': [[620.0, -268.0, -124.0,
418.0, 2.158564805984497], [0.0, 0.0, 448.0, -779.0,
1.6623022556304932]], 'tvmonitor': [[0.0, 0.0, 448.0, 85.0,
3.238999128341675], [240.0, 772.0, 200.0, 91.0, 1.7443398237228394],
[546.0, 155.0, 448.0, 448.0, 1.1334525346755981], [107.0, 441.0,
432.0, 219.0, 0.5971617698669434]], 'chair': [[470.0, -187.0, 106.0,
235.0, 3.8548083305358887], [524.0, 740.0, -103.0, 99.0,
3.636549234390259], [0.0, 0.0, 275.0, -325.0, 3.0997846126556396],
[[138.0, -310.0, 111.0, 448.0, 4.660728931427002], [317.0, -66.0,
313.0, 6.0, 4.535496234893799], [0.0, 0.0, -41.0, 175.0,
1.8571208715438843], [21.0, -92.0, 76.0, 172.0, 1.2035608291625977],
[0.0, 0.0, 448.0, -250.0, 1.00322687625885]], 'car': [[312.0, 232.0,
132.0, 309.0, 3.205225706100464], [514.0, -76.0, 218.0, 448.0,
1.4289973974227905], [0.0, 0.0, 448.0, 142.0, 0.7124998569488525]]}
WHile I expect only the dict to contain the small_ball key
How's that is possible? where's the prediction output?How to fix the code?

Without trying to figure out all that code, why would you expect only
results for a single key? An ML system is going to compute
probabilities and parameters for all objects it knows about (presumably
subject to some threshold).

marc nicole

2024-07-30 20:49:21 UTC

Permalink

OK, but how's the probability of small_ball greater than others? I can't
find it anyway, what's its value?

Le mar. 30 juil. 2024 à 21:37, Thomas Passin via Python-list <

Post by marc nicole

Post by marc nicole
Hello all,
I want to predict an object by given as input an image and want to have

Post by marc nicole
model be able to predict the label. I have trained a model using

tensorflow

Post by marc nicole
based on annotated database where the target object to predict was added

Post by marc nicole
the pretrained model. the code I am using is the following where I set

the

Post by marc nicole
self._classes_name = classes_name
self._num_classes = len(classes_name)
self._num_classes,
'batch_size':1}
self._net_params = {'cell_size': 7, 'boxes_per_cell':2,
'weight_decay': 0.0005}
self._net = YoloTinyNet(self._common_params, self._net_params,
test=True)
predicts = self._net.inference(image)
return predicts
"""
process the predicts of object detection with one image input.
resized_img: resized source image.
predicts: output of the model.
thresh: thresh of bounding box confidence.
predicts_dict: {"stick": [[x1, y1, x2, y2, scores1],

[...]]}.

Post by marc nicole
"""
cls_num = self._num_classes
bbx_per_cell = self._net_params["boxes_per_cell"]
cell_size = self._net_params["cell_size"]
img_size = self._common_params["image_size"]
p_classes = predicts[0, :, :, 0:cls_num]
C = predicts[0, :, :, cls_num:cls_num+bbx_per_cell] # two
bounding boxes in one cell.
coordinate = predicts[0, :, :, cls_num+bbx_per_cell:] # all
bounding boxes position.
p_classes = np.reshape(p_classes, (cell_size, cell_size, 1,

cls_num))

Post by marc nicole
C = np.reshape(C, (cell_size, cell_size, bbx_per_cell, 1))
P = C * p_classes # confidencefor all classes of all bounding
boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2,
1).
predicts_dict = {}
temp_data = np.zeros_like(P, np.float32)
temp_data[i, j, :, :] = P[i, j, :, :]
position = np.argmax(temp_data) # refer to the class
num (with maximum confidence) for every bounding box.
index = np.unravel_index(position, P.shape)
class_num = index[-1]
coordinate = np.reshape(coordinate, (cell_size,
cell_size, bbx_per_cell, 4)) # (cell_size, cell_size,
bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax]
max_coordinate = coordinate[index[0], index[1],

index[2], :]

Post by marc nicole
xcenter = max_coordinate[0]
ycenter = max_coordinate[1]
w = max_coordinate[2]
h = max_coordinate[3]
xcenter = (index[1] + xcenter) * (1.0*img_size

/cell_size)

Post by marc nicole
ycenter = (index[0] + ycenter) * (1.0*img_size

/cell_size)

Post by marc nicole
w = w * img_size
h = h * img_size
xmin = 0 if (xcenter - w/2.0 < 0) else (xcenter -

w/2.0)

Post by marc nicole
ymin = 0 if (xcenter - w/2.0 < 0) else (ycenter -

h/2.0)

Post by marc nicole
xmax = resized_img.shape[0] if (xmin + w) >
resized_img.shape[0] else (xmin + w)
ymax = resized_img.shape[1] if (ymin + h) >
resized_img.shape[1] else (ymin + h)
class_name = self._classes_name[class_num]
predicts_dict.setdefault(class_name, [])
predicts_dict[class_name].append([int(xmin),
int(ymin), int(xmax), int(ymax), P[index]])
return predicts_dict
"""
implement non-maximum supression on predict bounding boxes.
predicts_dict: {"stick": [[x1, y1, x2, y2, scores1],

[...]]}.

Post by marc nicole
threshhold: iou threshold
predicts_dict processed by non-maximum suppression
"""
bbox_array = np.array(bbox, dtype=np.float)
x1, y1, x2, y2, scores = bbox_array[:,0], bbox_array[:,1],
bbox_array[:,2], bbox_array[:,3], bbox_array[:,4]
areas = (x2-x1+1) * (y2-y1+1)
order = scores.argsort()[::-1]
keep = []
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
inter = np.maximum(0.0, xx2-xx1+1) * np.maximum(0.0,

yy2-yy1+1)

Post by marc nicole
iou = inter/(areas[i]+areas[order[1:]]-inter)
indexs = np.where(iou<=threshold)[0]
order = order[indexs+1]
bbox = bbox_array[keep]
predicts_dict[object_name] = bbox.tolist()
predicts_dict = predicts_dict
return predicts_dict
class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor",
"small_ball"]
modelFile = ('models\\train\\model.ckpt-0')
track_object = "small_ball"print("object detection and tracking...")
multiObjectDetect = MultiObjectDetection(IP, class_names)
image = tf.placeholder(tf.float32, (1, 448, 448, 3))
object_predicts = multiObjectDetect.predict_object(image)
sess = tf.Session()
saver = tf.train.Saver(multiObjectDetect._net.trainable_collection)
saver.restore(sess, modelFile)
src_img = cv2.imread("./weirdobject.jpg")
resized_img = cv2.resize(src_img, (448, 448))
np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
np_img = np_img.astype(np.float32)
np_img = np_img / 255.0 * 2 - 1
np_img = np.reshape(np_img, (1, 448, 448, 3))
np_predict = sess.run(object_predicts, feed_dict={image: np_img})
predicts_dict = multiObjectDetect.process_predicts(resized_img,

np_predict)

Post by marc nicole
predicts_dict = multiObjectDetect.non_max_suppress(predicts_dict)
print ("predict dict = ", predicts_dict)
predict dict = {'sheep': [[233.0, 92.0, 448.0, -103.0,
5.3531270027160645], [167.0, 509.0, 209.0, 101.0, 4.947688579559326],
[0.0, 0.0, 448.0, 431.0, 3.393721580505371]], 'horse': [[374.0, 33.0,
282.0, 448.0, 5.277851581573486], [135.0, 688.0, -33.0, -14.0,
3.5144259929656982], [1.0, 117.0, 112.0, -138.0, 2.656987190246582]],
'bicycle': [[461.0, 781.0, 154.0, -381.0, 5.918102741241455], [70.0,
344.0, 391.0, -138.0, 3.031444787979126], [378.0, 497.0, 46.0, 149.0,
2.7629122734069824], [541.0, 583.0, 69.0, 307.0, 2.7170517444610596],
[323.0, 22.0, 336.0, 448.0, 1.608760952949524]], 'bottle': [[390.0,
218.0, -199.0, 448.0, 4.582971096038818], [0.0, 0.0, 448.0, -410.0,
0.9097045063972473]], 'sofa': [[346.0, 102.0, 323.0, -38.0,
2.371835947036743]], 'dog': [[319.0, 254.0, -282.0, 373.0,
4.022889137268066]], 'cat': [[63.0, -195.0, 365.0, -92.0,
3.5134828090667725]], 'person': [[22.0, -122.0, 154.0, 448.0,
3.927537441253662], [350.0, 155.0, -36.0, -445.0, 2.679833173751831],
[119.0, 416.0, -43.0, 292.0, 0.9529445171356201], [251.0, 445.0,
225.0, 188.0, 0.9001350402832031]], 'train': [[329.0, 485.0, -24.0,
-235.0, 2.7050414085388184], [483.0, 362.0, 237.0, -86.0,
2.555817127227783], [13.0, 365.0, 373.0, 448.0, 0.6229299902915955]],
'small_ball': [[217.0, 737.0, 448.0, -315.0, 1.739920973777771],
[117.0, 283.0, 153.0, 122.0, 1.5690066814422607]], 'boat': [[164.0,
805.0, 34.0, -169.0, 4.972668170928955], [0.0, 0.0, 397.0, 69.0,
2.353729486465454], [302.0, 605.0, 15.0, -22.0, 2.0259625911712646]],
'aeroplane': [[470.0, 616.0, -305.0, -37.0, 3.431873321533203], [0.0,
0.0, 448.0, -72.0, 2.836672306060791]], 'bus': [[0.0, 0.0, -101.0,
-280.0, 1.2078320980072021]], 'pottedplant': [[620.0, -268.0, -124.0,
418.0, 2.158564805984497], [0.0, 0.0, 448.0, -779.0,
1.6623022556304932]], 'tvmonitor': [[0.0, 0.0, 448.0, 85.0,
3.238999128341675], [240.0, 772.0, 200.0, 91.0, 1.7443398237228394],
[546.0, 155.0, 448.0, 448.0, 1.1334525346755981], [107.0, 441.0,
432.0, 219.0, 0.5971617698669434]], 'chair': [[470.0, -187.0, 106.0,
235.0, 3.8548083305358887], [524.0, 740.0, -103.0, 99.0,
3.636549234390259], [0.0, 0.0, 275.0, -325.0, 3.0997846126556396],
[[138.0, -310.0, 111.0, 448.0, 4.660728931427002], [317.0, -66.0,
313.0, 6.0, 4.535496234893799], [0.0, 0.0, -41.0, 175.0,
1.8571208715438843], [21.0, -92.0, 76.0, 172.0, 1.2035608291625977],
[0.0, 0.0, 448.0, -250.0, 1.00322687625885]], 'car': [[312.0, 232.0,
132.0, 309.0, 3.205225706100464], [514.0, -76.0, 218.0, 448.0,
1.4289973974227905], [0.0, 0.0, 448.0, 142.0, 0.7124998569488525]]}
WHile I expect only the dict to contain the small_ball key
How's that is possible? where's the prediction output?How to fix the

code?
Without trying to figure out all that code, why would you expect only
results for a single key? An ML system is going to compute
probabilities and parameters for all objects it knows about (presumably
subject to some threshold).
--
https://mail.python.org/mailman/listinfo/python-list

Thomas Passin

2024-07-30 21:45:20 UTC

Permalink

Post by marc nicole
OK, but how's the probability of small_ball greater than others? I can't
find it anyway, what's its value?

It's your code. I wouldn't know. I suppose it's represented somewhere in
all those parameters. You need to understand what those function calls
are returning. It's documented somewhere, right?

And you really do need to know the probabilities of the competing images
because otherwise you won't know how confident you can be that the
identification is a strong one.

Post by marc nicole
Le mar. 30 juil. 2024 à 21:37, Thomas Passin via Python-list

Post by marc nicole
Hello all,
I want to predict an object by given as input an image and want

to have my

Post by marc nicole
model be able to predict the label. I have trained a model using

tensorflow

Post by marc nicole
based on annotated database where the target object to predict

was added to

Post by marc nicole
the pretrained model. the code I am using is the following where

I set the

self._net_params,

Post by marc nicole
test=True)
predicts = self._net.inference(image)
return predicts
"""
process the predicts of object detection with one image

input.

Post by marc nicole
resized_img: resized source image.
predicts: output of the model.
thresh: thresh of bounding box confidence.
predicts_dict: {"stick": [[x1, y1, x2, y2, scores1],

[...]]}.

1, cls_num))

Post by marc nicole
C = np.reshape(C, (cell_size, cell_size, bbx_per_cell, 1))
P = C * p_classes # confidencefor all classes of all

bounding

Post by marc nicole
boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2,
1).
predicts_dict = {}
temp_data = np.zeros_like(P, np.float32)
temp_data[i, j, :, :] = P[i, j, :, :]
position = np.argmax(temp_data) # refer to the class
num (with maximum confidence) for every bounding box.
index = np.unravel_index(position, P.shape)
class_num = index[-1]
coordinate = np.reshape(coordinate, (cell_size,
cell_size, bbx_per_cell, 4)) # (cell_size, cell_size,
bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax]
max_coordinate = coordinate[index[0],

index[1], index[2], :]

Post by marc nicole
xcenter = max_coordinate[0]
ycenter = max_coordinate[1]
w = max_coordinate[2]
h = max_coordinate[3]
xcenter = (index[1] + xcenter) *

(1.0*img_size /cell_size)

Post by marc nicole
ycenter = (index[0] + ycenter) *

(1.0*img_size /cell_size)

Post by marc nicole
w = w * img_size
h = h * img_size
xmin = 0 if (xcenter - w/2.0 < 0) else

(xcenter - w/2.0)

Post by marc nicole
ymin = 0 if (xcenter - w/2.0 < 0) else

(ycenter - h/2.0)

[...]]}.

Post by marc nicole
threshhold: iou threshold
predicts_dict processed by non-maximum suppression
"""
bbox_array = np.array(bbox, dtype=np.float)
x1, y1, x2, y2, scores = bbox_array[:,0],

bbox_array[:,1],

Post by marc nicole
bbox_array[:,2], bbox_array[:,3], bbox_array[:,4]
areas = (x2-x1+1) * (y2-y1+1)
order = scores.argsort()[::-1]
keep = []
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
inter = np.maximum(0.0, xx2-xx1+1) *

np.maximum(0.0, yy2-yy1+1)

np_img})

Post by marc nicole
predicts_dict =

multiObjectDetect.process_predicts(resized_img, np_predict)

Post by marc nicole
predicts_dict =

multiObjectDetect.non_max_suppress(predicts_dict)

Post by marc nicole
print ("predict dict = ", predicts_dict)
predict dict = {'sheep': [[233.0, 92.0, 448.0, -103.0,
5.3531270027160645], [167.0, 509.0, 209.0, 101.0, 4.947688579559326],
[0.0, 0.0, 448.0, 431.0, 3.393721580505371]], 'horse': [[374.0, 33.0,
282.0, 448.0, 5.277851581573486], [135.0, 688.0, -33.0, -14.0,
3.5144259929656982], [1.0, 117.0, 112.0, -138.0, 2.656987190246582]],
'bicycle': [[461.0, 781.0, 154.0, -381.0, 5.918102741241455], [70.0,
344.0, 391.0, -138.0, 3.031444787979126], [378.0, 497.0, 46.0, 149.0,
2.7629122734069824], [541.0, 583.0, 69.0, 307.0, 2.7170517444610596],
[323.0, 22.0, 336.0, 448.0, 1.608760952949524]], 'bottle': [[390.0,
218.0, -199.0, 448.0, 4.582971096038818], [0.0, 0.0, 448.0, -410.0,
0.9097045063972473]], 'sofa': [[346.0, 102.0, 323.0, -38.0,
2.371835947036743]], 'dog': [[319.0, 254.0, -282.0, 373.0,
4.022889137268066]], 'cat': [[63.0, -195.0, 365.0, -92.0,
3.5134828090667725]], 'person': [[22.0, -122.0, 154.0, 448.0,
3.927537441253662], [350.0, 155.0, -36.0, -445.0, 2.679833173751831],
[119.0, 416.0, -43.0, 292.0, 0.9529445171356201], [251.0, 445.0,
225.0, 188.0, 0.9001350402832031]], 'train': [[329.0, 485.0, -24.0,
-235.0, 2.7050414085388184], [483.0, 362.0, 237.0, -86.0,
2.555817127227783], [13.0, 365.0, 373.0, 448.0, 0.6229299902915955]],
'small_ball': [[217.0, 737.0, 448.0, -315.0, 1.739920973777771],
[117.0, 283.0, 153.0, 122.0, 1.5690066814422607]], 'boat': [[164.0,
805.0, 34.0, -169.0, 4.972668170928955], [0.0, 0.0, 397.0, 69.0,
2.353729486465454], [302.0, 605.0, 15.0, -22.0, 2.0259625911712646]],
'aeroplane': [[470.0, 616.0, -305.0, -37.0, 3.431873321533203], [0.0,
0.0, 448.0, -72.0, 2.836672306060791]], 'bus': [[0.0, 0.0, -101.0,
-280.0, 1.2078320980072021]], 'pottedplant': [[620.0, -268.0, -124.0,
418.0, 2.158564805984497], [0.0, 0.0, 448.0, -779.0,
1.6623022556304932]], 'tvmonitor': [[0.0, 0.0, 448.0, 85.0,
3.238999128341675], [240.0, 772.0, 200.0, 91.0, 1.7443398237228394],
[546.0, 155.0, 448.0, 448.0, 1.1334525346755981], [107.0, 441.0,
432.0, 219.0, 0.5971617698669434]], 'chair': [[470.0, -187.0, 106.0,
235.0, 3.8548083305358887], [524.0, 740.0, -103.0, 99.0,
3.636549234390259], [0.0, 0.0, 275.0, -325.0, 3.0997846126556396],
[[138.0, -310.0, 111.0, 448.0, 4.660728931427002], [317.0, -66.0,
313.0, 6.0, 4.535496234893799], [0.0, 0.0, -41.0, 175.0,
1.8571208715438843], [21.0, -92.0, 76.0, 172.0, 1.2035608291625977],
[0.0, 0.0, 448.0, -250.0, 1.00322687625885]], 'car': [[312.0, 232.0,
132.0, 309.0, 3.205225706100464], [514.0, -76.0, 218.0, 448.0,
1.4289973974227905], [0.0, 0.0, 448.0, 142.0, 0.7124998569488525]]}
WHile I expect only the dict to contain the small_ball key
How's that is possible? where's the prediction output?How to fix

the code?
Without trying to figure out all that code, why would you expect only
results for a single key? An ML system is going to compute
probabilities and parameters for all objects it knows about (presumably
subject to some threshold).
--
https://mail.python.org/mailman/listinfo/python-list
<https://mail.python.org/mailman/listinfo/python-list>

2024-07-30 22:16:29 UTC

Permalink

...

Post by marc nicole
WHile I expect only the dict to contain the small_ball key
How's that is possible? where's the prediction output?How to fix the code?

To save us lots of reading and study to be able to help you, please advise:

1 what are the meanings of all these numbers?

Post by marc nicole
'sheep': [[233.0, 92.0, 448.0, -103.0,

Post by marc nicole
5.3531270027160645], [167.0, 509.0, 209.0, 101.0, 4.947688579559326],
[0.0, 0.0, 448.0, 431.0, 3.393721580505371]]

2 (assuming it hasn't) why the dict has not been sorted into a
meaningful order

3 how can one tell that the image is more likely to be a sheep than a train?

--
Regards,
=dn

marc nicole

2024-07-31 10:27:07 UTC

Permalink

I suppose the meaning of those numbers comes from this line
predicts_dict[class_name].append([int(xmin), int(ymin), int(xmax), int(ymax),
P[index]]) as well as the yolo inference call. But i was expecting zeros
for all classes except smallball. Because the image only shows that, and
that a train and a sheep wont have any target position or any probability
whatsoever in the image weirdobject.jpg

Post by marc nicole

Post by marc nicole
Hello all,
I want to predict an object by given as input an image and want to have

Post by marc nicole
model be able to predict the label. I have trained a model using

tensorflow

Post by marc nicole
based on annotated database where the target object to predict was added

Post by marc nicole
the pretrained model. the code I am using is the following where I set

the
...

Post by marc nicole
WHile I expect only the dict to contain the small_ball key
How's that is possible? where's the prediction output?How to fix the

code?
1 what are the meanings of all these numbers?

Post by marc nicole
'sheep': [[233.0, 92.0, 448.0, -103.0,

Post by marc nicole
5.3531270027160645], [167.0, 509.0, 209.0, 101.0, 4.947688579559326],
[0.0, 0.0, 448.0, 431.0, 3.393721580505371]]

2 (assuming it hasn't) why the dict has not been sorted into a
meaningful order
3 how can one tell that the image is more likely to be a sheep than a
train?
--
Regards,
=dn
--
https://mail.python.org/mailman/listinfo/python-list

Grant Edwards

2024-07-31 13:58:12 UTC

Permalink

Post by marc nicole
I suppose the meaning of those numbers comes from this line
predicts_dict[class_name].append([int(xmin), int(ymin), int(xmax),
int(ymax), P[index]]) as well as the yolo inference call. But i was
expecting zeros for all classes except smallball.

That's not how machine learning and object recognition works.

Post by marc nicole
Because the image only shows that,

You know that. The machine doesn't.

Post by marc nicole
and that a train and a sheep wont have any target position or any
probability whatsoever in the image weirdobject.jpg

That depends on the training data and how the model works.

You should probably do some reading on neural networks, machine
learning, and pattern/object recognition. You appear to be trying to
use tools without understanding what they do or how they work.

--
Grant

marc nicole

2024-07-31 19:59:58 UTC

Permalink

You invitation to read on machine is not helping, if you wanna enlighten us
on this specific case otherwise pls spare me such comments which i know

On Wed, 31 Jul 2024, 16:00 Grant Edwards via Python-list, <

Post by Grant Edwards

That's not how machine learning and object recognition works.

Post by marc nicole
Because the image only shows that,

You know that. The machine doesn't.

Post by marc nicole
and that a train and a sheep wont have any target position or any
probability whatsoever in the image weirdobject.jpg

That depends on the training data and how the model works.
You should probably do some reading on neural networks, machine
learning, and pattern/object recognition. You appear to be trying to
use tools without understanding what they do or how they work.
--
Grant
--
https://mail.python.org/mailman/listinfo/python-list