This email is created by automation to help kernel developers deal with
a large volume of bug reports by decoding oopses into more actionable
information.
Decoded Backtrace
1. __debug_object_init -- crash site (lib/debugobjects.c:632)
The WARN fires inside debug_print_object (inlined into
__debug_object_init). The object at 0xffff888077a60f78 (a timer_list) is
in the ACTIVE state when debug_object_init is called on it a second time.
611 static void debug_print_object(struct debug_obj *obj, char *msg)
612 {
613 const struct debug_obj_descr *descr = obj->descr;
614 static int limit;
622 if (!debug_objects_enabled)
623 return;
625 if (limit < 5 && descr != descr_test) {
626 void *hint = descr->debug_hint ?
627 descr->debug_hint(obj->object) : NULL;
628 limit++;
-> 629 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
630 "object: %p object type: %s hint: %pS\n",
631 msg, obj_states[obj->state], obj->astate,
632 obj->object, descr->name, hint);
633 }
634 debug_objects_warnings++;
635 }
747 static void
748 __debug_object_init(void *addr, const struct debug_obj_descr *descr,
749 int onstack)
750 {
751 struct debug_obj *obj, o;
752 struct debug_bucket *db;
753 unsigned long flags;
755 debug_objects_fill_pool();
757 db = get_bucket((unsigned long) addr);
759 raw_spin_lock_irqsave(&db->lock, flags);
761 obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
762 if (unlikely(!obj)) {
763 raw_spin_unlock_irqrestore(&db->lock, flags);
764 debug_objects_oom();
765 return;
766 }
768 switch (obj->state) {
769 case ODEBUG_STATE_NONE:
770 case ODEBUG_STATE_INIT:
771 case ODEBUG_STATE_INACTIVE:
772 obj->state = ODEBUG_STATE_INIT;
773 raw_spin_unlock_irqrestore(&db->lock, flags);
774 return;
775 default:
776 break;
777 }
779 o = *obj;
780 raw_spin_unlock_irqrestore(&db->lock, flags);
-> 780 debug_print_object(&o, "init");
783 if (o.state == ODEBUG_STATE_ACTIVE)
784 debug_object_fixup(descr->fixup_init, addr, o.state);
785 }
2. timer_init_key -- kernel/time/timer.c:880
786 static inline void debug_timer_init(struct timer_list *timer)
787 {
-> 788 debug_object_init(timer, &timer_debug_descr);
789 }
834 static inline void debug_init(struct timer_list *timer)
835 {
-> 836 debug_timer_init(timer);
837 trace_timer_init(timer);
838 }
876 void timer_init_key(struct timer_list *timer,
877 void (*func)(struct timer_list *), unsigned int flags,
878 const char *name, struct lock_class_key *key)
879 {
-> 880 debug_init(timer);
881 do_init_timer(timer, func, flags, name, key);
882 }
3. lane_ioctl / lecd_attach / lec_arp_init (net/atm/lec.c:1037)
1264 static void lec_arp_init(struct lec_priv *priv)
1265 {
1266 unsigned short i;
1268 for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
1269 INIT_HLIST_HEAD(&priv->lec_arp_tables[i]);
1270 INIT_HLIST_HEAD(&priv->lec_arp_empty_ones);
1271 INIT_HLIST_HEAD(&priv->lec_no_forward);
1272 INIT_HLIST_HEAD(&priv->mcast_fwds);
1273 spin_lock_init(&priv->lec_arp_lock);
->1274 INIT_DELAYED_WORK(&priv->lec_arp_work, lec_arp_check_expire);
1275 schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL);
1276 }
748 static int lecd_attach(struct atm_vcc *vcc, int arg)
749 {
750 int i;
751 struct lec_priv *priv;
753 lockdep_assert_held(&lec_mutex);
754 if (arg < 0)
755 arg = 0;
756 if (arg >= MAX_LEC_ITF)
757 return -EINVAL;
758 i = array_index_nospec(arg, MAX_LEC_ITF);
759 if (!dev_lec[i]) {
763 dev_lec[i] = alloc_etherdev(size);
775 priv = netdev_priv(dev_lec[i]);
776 } else {
776 priv = netdev_priv(dev_lec[i]);
777 if (rcu_access_pointer(priv->lecd))
778 return -EADDRINUSE;
779 }
->781 lec_arp_init(priv); // called unconditionally for both new and
// existing priv -- no work cancellation
1018 static int lane_ioctl(struct socket *sock, unsigned int cmd,
1019 unsigned long arg)
1020 {
1034 mutex_lock(&lec_mutex);
1035 switch (cmd) {
1036 case ATMLEC_CTRL:
->1037 err = lecd_attach(vcc, (int)arg);
1038 if (err >= 0)
1039 sock->state = SS_CONNECTED;
1040 break;
1049 mutex_unlock(&lec_mutex);
1050 return err;
1051 }
4. do_vcc_ioctl (net/atm/ioctl.c:159)
153 error = -ENOIOCTLCMD;
155 mutex_lock(&ioctl_mutex);
156 list_for_each(pos, &ioctl_list) {
157 struct atm_ioctl *ic = list_entry(pos, struct atm_ioctl, list);
158 if (try_module_get(ic->owner)) {
->159 error = ic->ioctl(sock, cmd, arg); // dispatches to lane_ioctl
160 module_put(ic->owner);
161 if (error != -ENOIOCTLCMD)
162 break;
163 }
164 }
165 mutex_unlock(&ioctl_mutex);
Tentative Analysis
The ODEBUG WARNING fires when INIT_DELAYED_WORK() is called on a
timer_list (lec_priv.lec_arp_work) that is already in the ACTIVE state.
lec_arp_init() always calls INIT_DELAYED_WORK(&priv->lec_arp_work, ...)
followed by schedule_delayed_work(). lecd_attach() calls lec_arp_init()
unconditionally -- both for a brand-new device and for an existing one
in the else-branch. The only guard for the existing-device path is that
priv->lecd is NULL (no daemon currently attached).
The race is opened by lec_atm_close(), the ATM VCC close handler:
Thread A (lec_atm_close): Thread B (lecd_attach via lane_ioctl):
rcu_assign_pointer(lecd, NULL)
synchronize_rcu() mutex_lock(&lec_mutex)
[window open] sees priv->lecd == NULL -- passes guard
lec_arp_init(priv)
INIT_DELAYED_WORK on active timer
--> ODEBUG WARN
lec_arp_destroy(priv) [too late: work already re-initialized]
lec_atm_close() clears priv->lecd to NULL *before* calling
lec_arp_destroy() (which contains cancel_delayed_work_sync). Because
lec_atm_close() does not hold lec_mutex, Thread B can observe
priv->lecd == NULL while lec_arp_work is still active, pass the guard
in lecd_attach(), and call lec_arp_init() on a live timer.
The lec_mutex protecting dev_lec[] was introduced by commit d13a3824bfd2
("net: atm: add lec_mutex"), which serialised lecd_attach() and friends
but did not update lec_atm_close() to also acquire the mutex. The
unconditional lec_arp_init() call for existing devices predates that
commit and has always been present.
Potential Solution
Add cancel_delayed_work_sync(&priv->lec_arp_work) in the else-branch of
lecd_attach(), immediately before the call to lec_arp_init(). This
ensures any in-flight work is drained before the timer is re-initialized,
regardless of whether lec_atm_close() has already cancelled it.
cancel_delayed_work_sync() is safe to call from a lec_mutex-held context
because lec_arp_check_expire() only acquires priv->lec_arp_lock (a
spinlock) and never tries to take lec_mutex.
} else {
priv = netdev_priv(dev_lec[i]);
if (rcu_access_pointer(priv->lecd))
return -EADDRINUSE;
+ cancel_delayed_work_sync(&priv->lec_arp_work);
}
lec_arp_init(priv);
More information
Oops-Analysis:
http://oops.fenrus.org/reports/lkml/69f16c26.170a022...@google.com/
Assisted-by: GitHub-Copilot linux-kernel-oops.