Issue with BoT-SORT during test

106 views
Skip to first unread message

Kate R

unread,
May 18, 2025, 5:12:05 AMMay 18
to VOT Challenge technical support
Good afternoon, as part of my research, I'm trying to evaluate yolo + BoT-SORT using vot-toolkit on some videos from the VOT-RTB2022 dataset, but I get this:
Знімок екрана з 2025-05-18 10-58-22.png. Plese, help me with the issue.
My code: 
import numpy as np

import vot
import sys
import cv2

from ultralytics import YOLO


def iou(box_a, box_b):
    # Compute intersection-over-union for two boxes [x1, y1, x2, y2]
    x_a = max(box_a[0], box_b[0])
    y_a = max(box_a[1], box_b[1])
    x_b = min(box_a[2], box_b[2])
    y_b = min(box_a[3], box_b[3])
    interArea = max(0, x_b - x_a) * max(0, y_b - y_a)
    boxAArea = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1])
    boxBArea = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1])
    IoU = interArea / float(boxAArea + boxBArea - interArea)
    return IoU

def get_best_match(selection_box, boxes):
    # selection_box: [x1, y1, x2, y2]
    # boxes: iterable of box objects with .xyxy attribute
    max_iou = 0
    best_id = None
    best_box = None
    for box in boxes:
        box_xyxy = box.xyxy.cpu().numpy().tolist()[0]
        iou_score = iou(selection_box, box_xyxy)
        if iou_score > max_iou:
            max_iou = iou_score
            best_id = int(box.id.item()) if box.id is not None else None
            best_box = box
    if max_iou == 0 and len(boxes) > 0:
        # Fallback: pick closest box by center
        sel_cx = (selection_box[0] + selection_box[2]) / 2
        sel_cy = (selection_box[1] + selection_box[3]) / 2
        min_dist = float('inf')
        for box in boxes:
            box_xyxy = box.xyxy.cpu().numpy().tolist()[0]
            bx = (box_xyxy[0] + box_xyxy[2]) / 2
            by = (box_xyxy[1] + box_xyxy[3]) / 2
            dist = (sel_cx - bx) ** 2 + (sel_cy - by) ** 2
            if dist < min_dist:
                min_dist = dist
                best_id = int(box.id.item()) if box.id is not None else None
                best_box = box
    return best_id, best_box



detector = YOLO('/home/kitykus/jupyter_projects/MyProjects/baka/yolo11n.pt')

handle = vot.VOT("rectangle")
selection = handle.region()
selection_box = [selection.x, selection.y, selection.x + selection.width, selection.y + selection.height]

max_iou = 0
target_id = None
score = 0
x1, y1, x2, y2 = 0, 0, 0, 0

results = detector.track(handle.frame(), tracker='botsort.yaml', stream=True, persist=True)

for result in results:
    boxes = result.boxes
    try:
        if target_id is None:
            for box in boxes:
                target_id, _ = get_best_match(selection_box, boxes)
            if target_id is None:
                print("Target ID not found -_-")
                handle.report(vot.Rectangle(0, 0, 0, 0), 0.0)

        found = False
        for box in boxes:
            curr_id = int(box.id.item()) if box.id is not None else None
            if curr_id == target_id:
                box_xyxy = box.xyxy.cpu().numpy().tolist()[0]
                x1, y1, x2, y2 = map(int, box_xyxy)
                score = float(box.conf.item())
                found = True
                break

        if found:
            handle.report(vot.Rectangle(x1, y1, x2 - x1, y2 - y1), score)
        else:
            handle.report(vot.Rectangle(0, 0, 0, 0), 0.0)

    except Exception as e:
        # Always respond, even if something goes wrong
        handle.report(vot.Rectangle(0, 0, 0, 0), 0.0)
        print(f"Error: {e}", file=sys.stderr)

alan.l...@gmail.com

unread,
Jun 9, 2025, 2:11:00 AMJun 9
to VOT Challenge technical support
Hi, 

I think that you have a problem with your tracker integration code. 
In principle the vot.frame() and vot.report() should be called within a loop - for every frame() call (except initialization) you should give a report() call. 
Here is a quick proof-of-concept Python example which demonstrates how this should be done:

tracker = create_tracker()  # Here you should create your tracker instance

# initialize vot environment and communication object (handle)
handle = vot.VOT("rectangle", multiobject=False)

# use handle to get init frame path
imagefile = handle.frame()
if not imagefile:
    sys.exit(0)
# load init frame (use PIL Image or opencv)
image = Image.open(imagefile)

# use handle to get init region
region = handle.region()
# convert vot region to bbox list
init_bbox = [region.x, region.y, region.width, region.height]

# initialize your tracker
tracker.initialize(image, init_bbox)

# main tracking loop
while True:
# obtain frame from handle
    imagefile = handle.frame()
    if not imagefile:
        break
    image = Image.open(imagefile)

# run your tracker on this image
    pred_bbox = tracker.track(image)
   
# report predicted bbox
# Note that predicted region should be reported before the next .frame() call
    handle.report(vot.Rectangle(pred_bbox[0], pred_bbox[1], pred_bbox[2], pred_bbox[3]))

nedelja, 18. maj 2025 ob 11:12:05 UTC+2 je oseba Kate R napisala:

Kate R

unread,
Jun 9, 2025, 11:36:31 AMJun 9
to VOT Challenge technical support
Hi, 

Thank you for the explanation and the example! I get the frame() and report() loop idea. I've adjusted my code to align with the frame-by-frame processing expected.
Now test goes from start to the end. 

Best Regards

понеділок, 9 червня 2025 р. о 08:11:00 UTC+2 alan.l...@gmail.com пише:
Reply all
Reply to author
Forward
0 new messages