Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH] pci: pciehp update the slot bridge res to get big range for pcie devices

13 views
Skip to first unread message

Yinghai Lu

unread,
Oct 21, 2009, 3:20:21 AM10/21/09
to Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Kenji Kaneshige, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to call
assign resources several times, when there are several bridges under the slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the first bridge that doesn't get pre-allocated big enough res from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_is_enabled must be removed in pci_setup_bridge(), otherwise update res is not
updated to bridge BAR. and we are safe to do that, because only have one bus_size
and assigned resources are called.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++----
drivers/pci/setup-bus.c | 70 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 90 insertions(+), 10 deletions(-)

Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}

int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;

dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
@@ -96,12 +95,28 @@ int pciehp_configure_device(struct slot
(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
- pci_configure_slot(dev);
pci_dev_put(dev);
}

- pci_bus_assign_resources(parent);
+ pci_bus_size_bridges(parent);
+ pci_bridge_assign_resources(bridge);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);
pci_bus_add_devices(parent);
+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
+ pci_configure_slot(dev);
+ pci_dev_put(dev);
+ }
+
return 0;
}

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -27,6 +27,44 @@
#include <linux/slab.h>
#include "pci.h"

+static void pdev_assign_resources_sorted(struct pci_dev *dev)
+{
+ struct resource *res;
+ struct resource_list head, *list, *tmp;
+ int idx;
+ u16 class = dev->class >> 8;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */
+ if (class == PCI_CLASS_NOT_DEFINED ||
+ class == PCI_CLASS_BRIDGE_HOST)
+ return;
+
+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+
+ for (list = head.next; list;) {
+ res = list->res;
+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}
+
static void pbus_assign_resources_sorted(const struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
u32 l, bu, lu, io_upper16;
int pref_mem64;

- if (pci_is_enabled(bridge))
- return;
-
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);

@@ -550,6 +585,35 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);

+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);

/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Alex Chiang

unread,
Oct 21, 2009, 2:57:30 PM10/21/09
to Yinghai Lu, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Kenji Kaneshige, Ivan Kokshaysky, Bjorn Helgaas
* Yinghai Lu <yin...@kernel.org>:

> @@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
> u32 l, bu, lu, io_upper16;
> int pref_mem64;
>
> - if (pci_is_enabled(bridge))
> - return;
> -
> dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
> pci_domain_nr(bus), bus->number);

Have you tested this with the hotplug facilities I added a while
back?

Basically, what happens when you offline a bridge using

/sys/bus/pci/devices/.../remove

And then re-add it using:

/sys/bus/pci/rescan

Please try this both for bridges that are claimed by pciehp as
well as bridges that are not claimed.

Thanks.

/ac

Yinghai Lu

unread,
Oct 21, 2009, 8:30:09 PM10/21/09
to Alex Chiang, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Kenji Kaneshige, Ivan Kokshaysky, Bjorn Helgaas

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res


from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 30 ++++++++++++----
drivers/pci/setup-bus.c | 73 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 94 insertions(+), 10 deletions(-)

Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}

int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;

dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {

@@ -96,12 +95,29 @@ int pciehp_configure_device(struct slot


(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
- pci_configure_slot(dev);
pci_dev_put(dev);
}

- pci_bus_assign_resources(parent);
+ pci_bus_size_bridges(parent);

+ pci_clear_master(bridge);

@@ -137,14 +175,14 @@ EXPORT_SYMBOL(pci_setup_cardbus);
config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)
+static void pci_setup_bridge(struct pci_bus *bus, bool check_enabled)
{
struct pci_dev *bridge = bus->self;
struct pci_bus_region region;


u32 l, bu, lu, io_upper16;
int pref_mem64;

- if (pci_is_enabled(bridge))

+ if (check_enabled && pci_is_enabled(bridge))
return;



dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",

@@ -550,6 +588,35 @@ void __ref pci_bus_size_bridges(struct p


}
EXPORT_SYMBOL(pci_bus_size_bridges);

+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:

+ pci_setup_bridge(b, false);


+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;

@@ -566,7 +633,7 @@ void __ref pci_bus_assign_resources(cons

switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);
+ pci_setup_bridge(b, true);
break;

case PCI_CLASS_BRIDGE_CARDBUS:


Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);

/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);

Kenji Kaneshige

unread,
Oct 26, 2009, 12:54:31 AM10/26/09
to Yinghai Lu, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu wrote:
> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
> and at last do them all together one time including slot bridge, to avoid to call
> assign resources several times, when there are several bridges under the slot bridge.
>
> need to introduce pci_bridge_assign_resources there.
>
> handle the case the first bridge that doesn't get pre-allocated big enough res from FW.
> for example pcie devices need 256M, but the bridge only get preallocated 2M...
>

Though I have not looked at the patch deeply yet (sorry), I have
some questions and concerns about this change. Please correct me
if my understanding is not correct.

- Your patch doesn't seems to have the code to free resources.
If we need to expand the resource range, don't we need to free
preallocated resource before allocating the new one?

- Your patch seems to update BARs for bridge itself. I think it
would break the bridge's driver (port service driver) that if
it controls the device's capability by using IO/Mem, though I
don't know if such driver or capabilities exists now.

And one comment about pci_configure_slot(). We need to program
hotplug parameters before the device start working. So we need
to call pci_configure_slot() before calling pci_bus_add_devices().

Thanks,
Kenji Kaneshige

> To unsubscribe from this list: send the line "unsubscribe linux-pci" in

Yinghai Lu

unread,
Oct 26, 2009, 1:50:42 AM10/26/09
to Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
>> and at last do them all together one time including slot bridge, to
>> avoid to call
>> assign resources several times, when there are several bridges under
>> the slot bridge.
>>
>> need to introduce pci_bridge_assign_resources there.
>>
>> handle the case the first bridge that doesn't get pre-allocated big
>> enough res from FW.
>> for example pcie devices need 256M, but the bridge only get
>> preallocated 2M...
>>
>
> Though I have not looked at the patch deeply yet (sorry), I have
> some questions and concerns about this change. Please correct me
> if my understanding is not correct.
>
> - Your patch doesn't seems to have the code to free resources.
> If we need to expand the resource range, don't we need to free
> preallocated resource before allocating the new one?

that is done with pci_bus_size_bridges ==> pbus_size_io/pbus_size_mem ==> find_free_bus_resource ==> release_resource.

>
> - Your patch seems to update BARs for bridge itself. I think it
> would break the bridge's driver (port service driver) that if
> it controls the device's capability by using IO/Mem, though I
> don't know if such driver or capabilities exists now.

port service driver will be AER and pciehotplug.
it seems those driver are not use those BAR...
those BAR are supposed for the devices under the pcie bridge.

>
> And one comment about pci_configure_slot(). We need to program
> hotplug parameters before the device start working. So we need
> to call pci_configure_slot() before calling pci_bus_add_devices().
>


Thanks, will correct that.

YH


YH

Kenji Kaneshige

unread,
Oct 26, 2009, 3:49:16 AM10/26/09
to Yinghai Lu, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Yinghai Lu wrote:
>>> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
>>> and at last do them all together one time including slot bridge, to
>>> avoid to call
>>> assign resources several times, when there are several bridges under
>>> the slot bridge.
>>>
>>> need to introduce pci_bridge_assign_resources there.
>>>
>>> handle the case the first bridge that doesn't get pre-allocated big
>>> enough res from FW.
>>> for example pcie devices need 256M, but the bridge only get
>>> preallocated 2M...
>>>
>> Though I have not looked at the patch deeply yet (sorry), I have
>> some questions and concerns about this change. Please correct me
>> if my understanding is not correct.
>>
>> - Your patch doesn't seems to have the code to free resources.
>> If we need to expand the resource range, don't we need to free
>> preallocated resource before allocating the new one?
>
> that is done with pci_bus_size_bridges ==> pbus_size_io/pbus_size_mem ==> find_free_bus_resource ==> release_resource.
>

I didn't noticed that find_free_bus_resource() was changed to call
release_resource() recently...

By the way, does this (release_resource() by find_bus_resource())
change the resource assignment by BIOS also for bridges other than
the ports with hotplug slot (switch upstreamport, for example)?

>> - Your patch seems to update BARs for bridge itself. I think it
>> would break the bridge's driver (port service driver) that if
>> it controls the device's capability by using IO/Mem, though I
>> don't know if such driver or capabilities exists now.
>
> port service driver will be AER and pciehotplug.
> it seems those driver are not use those BAR...
> those BAR are supposed for the devices under the pcie bridge.
>

I understand that there are only two port service drivers (AER and
PCIe hotplug) and both doesn't use BAR. But I still have a concern
that changing bridge's BARs might cause problems in the future. In
my understanding, what you need is expanding IO/Mem base and limit
of root or switch downstream ports. If so, I think we should only
touch IO/Mem base/limit, and should not touch bridge's BARs.

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Oct 26, 2009, 4:26:30 AM10/26/09
to Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

yes.

BIOS preallocate small range for the bridge, and leave the BAR for the device under that bridge uninitialized.

>
>>> - Your patch seems to update BARs for bridge itself. I think it
>>> would break the bridge's driver (port service driver) that if
>>> it controls the device's capability by using IO/Mem, though I
>>> don't know if such driver or capabilities exists now.
>>
>> port service driver will be AER and pciehotplug.
>> it seems those driver are not use those BAR...
>> those BAR are supposed for the devices under the pcie bridge.
>>
>
> I understand that there are only two port service drivers (AER and
> PCIe hotplug) and both doesn't use BAR. But I still have a concern
> that changing bridge's BARs might cause problems in the future. In
> my understanding, what you need is expanding IO/Mem base and limit
> of root or switch downstream ports. If so, I think we should only
> touch IO/Mem base/limit, and should not touch bridge's BARs.

those bridge BAR are for devices under that bridge. the port device is not supposed to use them.

if we don't touch the bridge's BAR, the hw will not forward the io for those devices under it.

YH

Yinghai Lu

unread,
Oct 26, 2009, 4:28:03 AM10/26/09
to Kenji Kaneshige, Jesse Barnes, Alex Chiang, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, Bjorn Helgaas

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res


from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise


update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.

-v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
drivers/pci/setup-bus.c | 73 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 94 insertions(+), 9 deletions(-)

Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}

int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;

dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {

@@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot


(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}

+ pci_dev_put(dev);
+ }
+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);


+ pci_bridge_assign_resources(bridge);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);

+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }

pci_configure_slot(dev);
pci_dev_put(dev);
}

- pci_bus_assign_resources(parent);
pci_bus_add_devices(parent);

@@ -137,14 +175,14 @@ EXPORT_SYMBOL(pci_setup_cardbus);
config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)
+static void pci_setup_bridge(struct pci_bus *bus, bool check_enabled)
{
struct pci_dev *bridge = bus->self;
struct pci_bus_region region;

u32 l, bu, lu, io_upper16;
int pref_mem64;

- if (pci_is_enabled(bridge))

+ if (check_enabled && pci_is_enabled(bridge))
return;

dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",

@@ -550,6 +588,35 @@ void __ref pci_bus_size_bridges(struct p


}
EXPORT_SYMBOL(pci_bus_size_bridges);

+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:

+ pci_setup_bridge(b, false);


+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;

@@ -566,7 +633,7 @@ void __ref pci_bus_assign_resources(cons

switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);
+ pci_setup_bridge(b, true);
break;

case PCI_CLASS_BRIDGE_CARDBUS:

Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);

/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);

Kenji Kaneshige

unread,
Oct 26, 2009, 6:27:43 AM10/26/09
to Yinghai Lu, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

Does this happen at the boot time regardless of hot-plug?


>>>> - Your patch seems to update BARs for bridge itself. I think it
>>>> would break the bridge's driver (port service driver) that if
>>>> it controls the device's capability by using IO/Mem, though I
>>>> don't know if such driver or capabilities exists now.
>>> port service driver will be AER and pciehotplug.
>>> it seems those driver are not use those BAR...
>>> those BAR are supposed for the devices under the pcie bridge.
>>>
>> I understand that there are only two port service drivers (AER and
>> PCIe hotplug) and both doesn't use BAR. But I still have a concern
>> that changing bridge's BARs might cause problems in the future. In
>> my understanding, what you need is expanding IO/Mem base and limit
>> of root or switch downstream ports. If so, I think we should only
>> touch IO/Mem base/limit, and should not touch bridge's BARs.
>
> those bridge BAR are for devices under that bridge. the port device is not supposed to use them.
>

Do you mean you touch only BARs of the devices under the bridge?

> if we don't touch the bridge's BAR, the hw will not forward the io for those devices under it.
>

I understand you need to touch I/O base/limit and Mem base/limit. But
I don't understand why you also need to update bridge's BARs. Could
you please explain a little more about it?

Just in case, my terminology "bridge's BARs" is Base Address Register
0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
(type 1) configuration space header of the bridge.

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Oct 26, 2009, 2:00:49 PM10/26/09
to Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

yes

>
>
>>>>> - Your patch seems to update BARs for bridge itself. I think it
>>>>> would break the bridge's driver (port service driver) that if
>>>>> it controls the device's capability by using IO/Mem, though I
>>>>> don't know if such driver or capabilities exists now.
>>>> port service driver will be AER and pciehotplug.
>>>> it seems those driver are not use those BAR...
>>>> those BAR are supposed for the devices under the pcie bridge.
>>>>
>>> I understand that there are only two port service drivers (AER and
>>> PCIe hotplug) and both doesn't use BAR. But I still have a concern
>>> that changing bridge's BARs might cause problems in the future. In
>>> my understanding, what you need is expanding IO/Mem base and limit
>>> of root or switch downstream ports. If so, I think we should only
>>> touch IO/Mem base/limit, and should not touch bridge's BARs.
>>
>> those bridge BAR are for devices under that bridge. the port device is
>> not supposed to use them.
>>
>
> Do you mean you touch only BARs of the devices under the bridge?

no. the BAR of 0x1c, 0x20, and 0x28

>
>> if we don't touch the bridge's BAR, the hw will not forward the io for
>> those devices under it.
>>
>
> I understand you need to touch I/O base/limit and Mem base/limit. But
> I don't understand why you also need to update bridge's BARs. Could
> you please explain a little more about it?
>
> Just in case, my terminology "bridge's BARs" is Base Address Register
> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
> (type 1) configuration space header of the bridge.

i mean 0x1c, 0x20, 0x28

did not notice that bridge device's 0x10, 0x14 are used...
if port service need to use 0x10, 0x14, and the device is enabled, we should touch 0x10, and 0x14.


YH

Yinghai Lu

unread,
Oct 26, 2009, 2:52:50 PM10/26/09
to Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

after check the code, if

pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==> pdev_sort_resources

will not touch 0x10 and 0x14, if those resource is claimed by port service.

/* Sort resources by alignment */
void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
{
int i;

for (i = 0; i < PCI_NUM_RESOURCES; i++) {
struct resource *r;
struct resource_list *list, *tmp;
resource_size_t r_align;

r = &dev->resource[i];

if (r->flags & IORESOURCE_PCI_FIXED)
continue;

if (!(r->flags) || r->parent)
continue;

r->parent != NULL, will make it skip those two.

So -v3 should be safe.

Thanks

Yinghai Lu

Yinghai Lu

unread,
Oct 27, 2009, 4:10:57 AM10/27/09
to Kenji Kaneshige, Jesse Barnes, Alex Chiang, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, Bjorn Helgaas, Eric W. Biederman, Ingo Molnar

after

| commit 308cf8e13f42f476dfd6552aeff58fdc0788e566
|
| PCI: get larger bridge ranges when space is available

found one of resource of peer root bus (0x00) get released from root
resource. later one hotplug device can not get big range anymore.
other peer root buses is ok.

it turns out it is from transparent path.

those resources will be used for pci bridge BAR updated.
so need to limit it to 3.

v2: Jesse doesn't like it is in find_free_bus_resource...
try to move out of pci_bus_size_bridges loop.
need to apply after:
[PATCH] pci: pciehp update the slot bridge res to get big range for pcie devices - v4
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
arch/x86/pci/i386.c | 6 +++
drivers/pci/hotplug/pciehp_pci.c | 1
drivers/pci/setup-bus.c | 69 ++++++++++++++++++++++++++++++++++++++-
include/linux/pci.h | 2 +
4 files changed, 77 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -319,6 +319,37 @@ static void pci_bridge_check_ranges(stru
}
}

+void pci_bridge_release_not_used_res(struct pci_bus *bus)
+{
+ int i;
+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */
+ for (i = 0; i < 3; i++) {
+ r = bus->resource[i];
+ if (r && (r->flags & type_mask)) {
+ if (!r->parent)
+ continue;
+ /*
+ * if there is no child under that, we should release
+ * and use it. don't need to reset it, pbus_size_* will
+ * set it again
+ */
+ if (!r->child && !release_resource(r)) {
+ r->flags = 0;
+ dev_printk(KERN_DEBUG, &bus->dev,
+ "resource %d %pRt released\n",
+ i, r);
+ }
+ }
+ }
+
+ pci_setup_bridge(bus);
+}
+EXPORT_SYMBOL(pci_bridge_release_not_used_res);
+
/* Helper function for sizing routines: find first available
bus resource of a given type. Note: we intentionally skip
the bus resources which have already been assigned (that is,
@@ -576,6 +607,42 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);

+void __ref pci_bus_release_bridges_not_used_res(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ struct pci_bus *b = dev->subordinate;
+ if (!b)
+ continue;
+
+ switch (dev->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ pci_bus_release_bridges_not_used_res(b);
+ break;
+ }
+ }
+
+ /* The root bus? */
+ if (!bus->self)
+ return;
+
+ switch (bus->self->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ pci_bridge_release_not_used_res(bus);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bus_release_bridges_not_used_res);
+


void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)

{
struct pci_bus *b;
@@ -644,7 +711,7 @@ static void pci_bus_dump_res(struct pci_

for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+ if (!res || !res->end || !res->flags)
continue;

dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);


Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c

@@ -171,6 +171,7 @@ int pciehp_unconfigure_device(struct slo
}
pci_dev_put(temp);
}
+ pci_bridge_release_not_used_res(parent);

return rc;


}
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h

@@ -759,6 +759,8 @@ int pci_vpd_truncate(struct pci_dev *dev


void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);

+void pci_bus_release_bridges_not_used_res(struct pci_bus *bus);
+void pci_bridge_release_not_used_res(struct pci_bus *bus);


int pci_claim_resource(struct pci_dev *, int);

void pci_assign_unassigned_resources(void);
void pdev_enable_device(struct pci_dev *);
Index: linux-2.6/arch/x86/pci/i386.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/i386.c
+++ linux-2.6/arch/x86/pci/i386.c
@@ -194,6 +194,7 @@ static void __init pcibios_allocate_reso
static int __init pcibios_assign_resources(void)
{
struct pci_dev *dev = NULL;
+ struct pci_bus *bus;
struct resource *r;

if (!(pci_probe & PCI_ASSIGN_ROMS)) {
@@ -213,6 +214,11 @@ static int __init pcibios_assign_resourc
}
}

+ /* Try to release bridge resources, that there is not child under it */
+ list_for_each_entry(bus, &pci_root_buses, node) {
+ pci_bus_release_bridges_not_used_res(bus);
+ }
+
pci_assign_unassigned_resources();

return 0;

Yinghai Lu

unread,
Oct 27, 2009, 4:11:21 AM10/27/09
to Kenji Kaneshige, Jesse Barnes, Alex Chiang, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, Bjorn Helgaas, Eric W. Biederman, Ingo Molnar

revert

| commit 28760489a3f1e136c5ae8581c0fa8f63511f2f4c
| Author: Eric W. Biederman <ebie...@aristanetworks.com>
| Date: Wed Sep 9 14:09:24 2009 -0700
|
| PCI: pcie: Ensure hotplug ports have a minimum number of resources

we don't need this trick anymore. the bridge BAR will be updated automatically

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/pci.c | 10 ----------
drivers/pci/probe.c | 18 ------------------
drivers/pci/setup-bus.c | 22 +++++-----------------
include/linux/pci.h | 4 ----
4 files changed, 5 insertions(+), 49 deletions(-)

Index: linux-2.6/drivers/pci/pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/pci.c
+++ linux-2.6/drivers/pci/pci.c
@@ -41,12 +41,6 @@ int pci_domains_supported = 1;
unsigned long pci_cardbus_io_size = DEFAULT_CARDBUS_IO_SIZE;
unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;

-#define DEFAULT_HOTPLUG_IO_SIZE (256)
-#define DEFAULT_HOTPLUG_MEM_SIZE (2*1024*1024)
-/* pci=hpmemsize=nnM,hpiosize=nn can override this */
-unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE;
-unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
-
/*
* The default CLS is used if arch didn't set CLS explicitly and not
* all pci devices agree on the same value. Arch can override either
@@ -2787,10 +2781,6 @@ static int __init pci_setup(char *str)
strlen(str + 19));
} else if (!strncmp(str, "ecrc=", 5)) {
pcie_ecrc_get_policy(str + 5);
- } else if (!strncmp(str, "hpiosize=", 9)) {
- pci_hotplug_io_size = memparse(str + 9, &str);
- } else if (!strncmp(str, "hpmemsize=", 10)) {
- pci_hotplug_mem_size = memparse(str + 10, &str);
} else {
printk(KERN_ERR "PCI: Unknown option `%s'\n",
str);
Index: linux-2.6/drivers/pci/probe.c
===================================================================
--- linux-2.6.orig/drivers/pci/probe.c
+++ linux-2.6/drivers/pci/probe.c
@@ -689,23 +689,6 @@ static void set_pcie_port_type(struct pc
pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
}

-static void set_pcie_hotplug_bridge(struct pci_dev *pdev)
-{
- int pos;
- u16 reg16;
- u32 reg32;
-
- pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
- if (!pos)
- return;
- pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
- if (!(reg16 & PCI_EXP_FLAGS_SLOT))
- return;
- pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &reg32);
- if (reg32 & PCI_EXP_SLTCAP_HPC)
- pdev->is_hotplug_bridge = 1;
-}
-
#define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED)

/**
@@ -813,7 +796,6 @@ int pci_setup_device(struct pci_dev *dev
pci_read_irq(dev);
dev->transparent = ((dev->class & 0xff) == 1);
pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
- set_pcie_hotplug_bridge(dev);
break;

case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */


Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -375,7 +375,7 @@ static struct resource *find_free_bus_re
since these windows have 4K granularity and the IO ranges
of non-bridge PCI devices are limited to 256 bytes.
We must be careful with the ISA aliasing though. */
-static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size)
+static void pbus_size_io(struct pci_bus *bus)
{
struct pci_dev *dev;
struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
@@ -402,8 +402,6 @@ static void pbus_size_io(struct pci_bus
size1 += r_size;
}
}
- if (size < min_size)
- size = min_size;
/* To be fixed in 2.5: we should have sort of HAVE_ISA
flag in the struct pci_bus. */
#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
@@ -422,8 +420,7 @@ static void pbus_size_io(struct pci_bus

/* Calculate the size of the bus and minimal alignment which
guarantees that all child resources fit in this size. */
-static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
- unsigned long type, resource_size_t min_size)
+static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type)
{
struct pci_dev *dev;
resource_size_t min_align, align, size;
@@ -473,8 +470,6 @@ static int pbus_size_mem(struct pci_bus
mem64_mask &= r->flags & IORESOURCE_MEM_64;
}
}
- if (size < min_size)
- size = min_size;

align = 0;
min_align = 0;
@@ -554,7 +549,6 @@ void __ref pci_bus_size_bridges(struct p
{
struct pci_dev *dev;
unsigned long mask, prefmask;
- resource_size_t min_mem_size = 0, min_io_size = 0;

list_for_each_entry(dev, &bus->devices, bus_list) {


struct pci_bus *b = dev->subordinate;

@@ -584,12 +578,8 @@ void __ref pci_bus_size_bridges(struct p

case PCI_CLASS_BRIDGE_PCI:
pci_bridge_check_ranges(bus);
- if (bus->self->is_hotplug_bridge) {
- min_io_size = pci_hotplug_io_size;
- min_mem_size = pci_hotplug_mem_size;
- }
default:
- pbus_size_io(bus, min_io_size);
+ pbus_size_io(bus);
/* If the bridge supports prefetchable range, size it
separately. If it doesn't, or its prefetchable window
has already been allocated by arch code, try
@@ -597,11 +587,9 @@ void __ref pci_bus_size_bridges(struct p
resources. */
mask = IORESOURCE_MEM;
prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
- if (pbus_size_mem(bus, prefmask, prefmask, min_mem_size))
+ if (pbus_size_mem(bus, prefmask, prefmask))
mask = prefmask; /* Success, size non-prefetch only. */
- else
- min_mem_size += min_mem_size;
- pbus_size_mem(bus, mask, IORESOURCE_MEM, min_mem_size);
+ pbus_size_mem(bus, mask, IORESOURCE_MEM);
break;


}
}
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h

@@ -279,7 +279,6 @@ struct pci_dev {
unsigned int is_physfn:1;
unsigned int is_virtfn:1;
unsigned int reset_fn:1;
- unsigned int is_hotplug_bridge:1;
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */

@@ -1256,9 +1255,6 @@ extern unsigned long pci_cardbus_mem_siz
extern u8 pci_dfl_cache_line_size;
extern u8 pci_cache_line_size;

-extern unsigned long pci_hotplug_io_size;
-extern unsigned long pci_hotplug_mem_size;
-
int pcibios_add_platform_entries(struct pci_dev *dev);
void pcibios_disable_device(struct pci_dev *dev);
int pcibios_set_pcie_reset_state(struct pci_dev *dev,

Yinghai Lu

unread,
Oct 27, 2009, 4:12:29 AM10/27/09
to Kenji Kaneshige, Jesse Barnes, Alex Chiang, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, Bjorn Helgaas, Eric W. Biederman, Ingo Molnar

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
-v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()

-v4: move out pci_is_enabled checkout of pci_setup_bridge()

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
drivers/pci/setup-bus.c | 73 ++++++++++++++++++++++++++++++++++++---
include/linux/pci.h | 1
3 files changed, 93 insertions(+), 10 deletions(-)

@@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_

u32 l, bu, lu, io_upper16;
int pref_mem64;

- if (pci_is_enabled(bridge))

- return;
-


dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",

pci_domain_nr(bus), bus->number);

@@ -550,6 +585,35 @@ void __ref pci_bus_size_bridges(struct p


}
EXPORT_SYMBOL(pci_bus_size_bridges);

+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:

+ pci_setup_bridge(b);


+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;

@@ -566,7 +630,8 @@ void __ref pci_bus_assign_resources(cons



switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);

+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);

Yinghai Lu

unread,
Oct 27, 2009, 4:12:44 AM10/27/09
to Kenji Kaneshige, Jesse Barnes, Alex Chiang, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, Bjorn Helgaas, Eric W. Biederman, Ingo Molnar

after

| commit 308cf8e13f42f476dfd6552aeff58fdc0788e566
|
| PCI: get larger bridge ranges when space is available

found one of resource of peer root bus (0x00) get released from root
resource. later one hotplug device can not get big range anymore.
other peer root buses is ok.

it turns out it is from transparent path.

those resources will be used for pci bridge BAR updated.
so need to limit it to 3.

but revert the commit at first.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/setup-bus.c | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -334,17 +334,8 @@ static struct resource *find_free_bus_re


r = bus->resource[i];

if (r == &ioport_resource || r == &iomem_resource)
continue;
- if (r && (r->flags & type_mask) == type) {
- if (!r->parent)
- return r;
- /*
- * if there is no child under that, we should release
- * and use it. don't need to reset it, pbus_size_* will
- * set it again
- */
- if (!r->child && !release_resource(r))
- return r;
- }
+ if (r && (r->flags & type_mask) == type && !r->parent)
+ return r;
}
return NULL;

Eric W. Biederman

unread,
Oct 27, 2009, 5:21:10 AM10/27/09
to Yinghai Lu, Kenji Kaneshige, Jesse Barnes, Alex Chiang, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, Bjorn Helgaas, Ingo Molnar
Yinghai Lu <yin...@kernel.org> writes:

> revert
>
> | commit 28760489a3f1e136c5ae8581c0fa8f63511f2f4c
> | Author: Eric W. Biederman <ebie...@aristanetworks.com>
> | Date: Wed Sep 9 14:09:24 2009 -0700
> |
> | PCI: pcie: Ensure hotplug ports have a minimum number of resources
>
> we don't need this trick anymore. the bridge BAR will be updated automatically

How so? We can't reassign resources after they have been allocated as someone
might be using them. If the firmware doesn't reserve the resource for a hotplug
we need to do it ourselves.


Eric

Kenji Kaneshige

unread,
Oct 28, 2009, 4:32:23 AM10/28/09
to Yinghai Lu, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

Thank you for the clarification.

But I still don't understand the whole picture of your set of
changes. Let me ask some questions.

In my understanding of your set of changes, if there is a PCIe
switch with some hot-plug slots and all of those slots are empty,
I/O and Memory resources assigned by BIOS are all released at
the boot time. For example, suppose the following case.

bridge(A)
|
-----------------------
| |
bridge(B) bridge(C)
| |
slot(1) slot(2)
(empty) (empty)

bridge(A): P2P bridge for switch upstream port
bridge(B): P2P bridge for switch downstream port
bridge(C): P2P bridge for switch downstream port

In the above example, I/O and Mem resource assigned to bridge(A),
bridge(B) and bridge(C) are all released at the boot time. Correct?

Then, when a adapter card is hot-added to slot(1), I/O and Mem
resources enough for enabling the hot-added adapter card is assigned
to bridge(A), bridge(B) and the adapter card. Correct?

Then, when an another adpater card is hot-added to slot(2), we
need to assign enough resource to bridge(C) and the new card.
But bridge(A) doesn't have enough resource for bridge(C) and
the new card. In addition, all bridge(A) and bridge(B) and the
adapter card on slot(1) are already working. How do you assign
resource to bridge(C) and the card on slot(2)?

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Oct 28, 2009, 1:45:16 PM10/28/09
to Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas, Eric W. Biederman

thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.

YH

Bjorn Helgaas

unread,
Oct 28, 2009, 1:56:41 PM10/28/09
to Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Eric W. Biederman

When you do, can you please add printks showing the changes you're
making to bridge resources? I think these events are infrequent, and
knowing about the changes is vital to debugging problems.

Bjorn

Yinghai Lu

unread,
Oct 28, 2009, 2:38:21 PM10/28/09
to Bjorn Helgaas, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Eric W. Biederman

sure.

[ 91.762205] pci 0000:00:07.0: resource 15 [mem 0x91800000-0x91ffffff 64bit pref] released
[ 91.771715] pci 0000:00:07.0: PCI bridge, secondary bus 0000:0e
[ 91.783297] pci 0000:00:07.0: IO window: 0x2000-0x2fff
[ 91.791564] pci 0000:00:07.0: MEM window: 0x90000000-0x917fffff
[ 91.806376] pci 0000:00:07.0: PREFETCH window: disabled
[ 91.811537] pci 0000:40:07.0: resource 15 [mem 0xb1800000-0xb1ffffff 64bit pref] released
[ 91.826760] pci 0000:40:07.0: PCI bridge, secondary bus 0000:50
[ 91.841288] pci 0000:40:07.0: IO window: 0x7000-0x7fff
[ 91.847605] pci 0000:40:07.0: MEM window: 0xb0000000-0xb17fffff
[ 91.861572] pci 0000:40:07.0: PREFETCH window: disabled
[ 91.868426] pci 0000:80:07.0: resource 15 [mem 0xd1800000-0xd1ffffff 64bit pref] released
[ 91.884552] pci 0000:80:07.0: PCI bridge, secondary bus 0000:90
[ 91.892898] pci 0000:80:07.0: IO window: 0xa000-0xafff
[ 91.905952] pci 0000:80:07.0: MEM window: 0xd0000000-0xd17fffff
[ 91.912304] pci 0000:80:07.0: PREFETCH window: disabled
[ 91.925909] pci 0000:c0:07.0: resource 15 [mem 0xf1800000-0xf1ffffff 64bit pref] released
[ 91.942305] pci 0000:c0:07.0: PCI bridge, secondary bus 0000:d0
[ 91.950273] pci 0000:c0:07.0: IO window: 0xf000-0xffff
[ 91.961687] pci 0000:c0:07.0: MEM window: 0xf0000000-0xf17fffff
[ 91.969438] pci 0000:c0:07.0: PREFETCH window: disabled

YH

Eric W. Biederman

unread,
Oct 28, 2009, 3:01:10 PM10/28/09
to Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu <yin...@kernel.org> writes:

Tell me what is your expected behavior if I plug a bridge with hotplug
slots into a leaf hotplug slot? Will you assign me enough resources so
that I can plug in additional devices?

Today I make plugging in a hotplug bridge work by having the firmware
reserve at one level and having the kernel reserve at the next level.

Windows handles the issue in theory by performing some kind of
hot-unplugging of drivers that already have assigned resources and
then replugging them. Which allows a full renumbering of busses.
We don't have the infrastructure to do that safely today.

Eric

Yinghai Lu

unread,
Oct 28, 2009, 3:13:23 PM10/28/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

no.

you need to plug device in those slots and then insert it into a leaf hotplug slot.

>
> Today I make plugging in a hotplug bridge work by having the firmware
> reserve at one level and having the kernel reserve at the next level.
>
> Windows handles the issue in theory by performing some kind of
> hot-unplugging of drivers that already have assigned resources and
> then replugging them. Which allows a full renumbering of busses.
> We don't have the infrastructure to do that safely today.

that will take some drivers offline at first ?

YH

Yinghai Lu

unread,
Oct 28, 2009, 3:21:49 PM10/28/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
after

| commit 308cf8e13f42f476dfd6552aeff58fdc0788e566
|
| PCI: get larger bridge ranges when space is available

found one of resource of peer root bus (0x00) get released from root
resource. later one hotplug device can not get big range anymore.
other peer root buses is ok.

it turns out it is from transparent path.

those resources will be used for pci bridge BAR updated.
so need to limit it to 3.

v2: Jesse doesn't like it is in find_free_bus_resource...


try to move out of pci_bus_size_bridges loop.
need to apply after:
[PATCH] pci: pciehp update the slot bridge res to get big range for pcie devices - v4
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.

v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
arch/x86/pci/i386.c | 6 ++
drivers/pci/hotplug/pciehp_pci.c | 2
drivers/pci/setup-bus.c | 81 ++++++++++++++++++++++++++++++++++++++-
include/linux/pci.h | 2
4 files changed, 90 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -319,6 +319,42 @@ static void pci_bridge_check_ranges(stru


}
}

+void pci_bridge_release_not_used_res(struct pci_bus *bus)
+{

+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;


+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */

+ dev = bus->self;
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
+ idx++) {
+ r = &dev->resource[idx];
+ if (r->flags & type_mask) {


+ if (!r->parent)
+ continue;
+ /*
+ * if there is no child under that, we should release
+ * and use it.

+ */
+ if (!r->child && !release_resource(r)) {
+ dev_info(&dev->dev,


+ "resource %d %pRt released\n",

+ idx, r);


+ r->flags = 0;

+ changed = true;
+ }
+ }
+ }
+
+ if (changed)


+ pci_setup_bridge(bus);
+}
+EXPORT_SYMBOL(pci_bridge_release_not_used_res);
+
/* Helper function for sizing routines: find first available
bus resource of a given type. Note: we intentionally skip
the bus resources which have already been assigned (that is,

@@ -576,6 +612,48 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);

+
+/* only release those resources that is on leaf bridge */


+void __ref pci_bus_release_bridges_not_used_res(struct pci_bus *bus)
+{
+ struct pci_dev *dev;

+ bool is_leaf_bridge = true;


+
+ list_for_each_entry(dev, &bus->devices, bus_list) {

+ struct pci_bus *b = dev->subordinate;
+ if (!b)


+ continue;
+
+ switch (dev->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:

+ is_leaf_bridge = false;
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ is_leaf_bridge = false;


+ pci_bus_release_bridges_not_used_res(b);
+ break;
+ }
+ }
+
+ /* The root bus? */
+ if (!bus->self)
+ return;
+
+ switch (bus->self->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:

+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ if (is_leaf_bridge)


+ pci_bridge_release_not_used_res(bus);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bus_release_bridges_not_used_res);
+

void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)

{
struct pci_bus *b;
@@ -644,7 +722,8 @@ static void pci_bus_dump_res(struct pci_



for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+
+ if (!res || !res->end || !res->flags)
continue;

dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);

Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c

@@ -98,6 +98,7 @@ int pciehp_configure_device(struct slot
pci_dev_put(dev);
}

+ pci_bridge_release_not_used_res(parent);
pci_bus_size_bridges(parent);
pci_clear_master(bridge);
pci_bridge_assign_resources(bridge);
@@ -171,6 +172,7 @@ int pciehp_unconfigure_device(struct slo


}
pci_dev_put(temp);
}
+ pci_bridge_release_not_used_res(parent);

return rc;
}

Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h

@@ -759,6 +759,8 @@ int pci_vpd_truncate(struct pci_dev *dev

void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);

+void pci_bus_release_bridges_not_used_res(struct pci_bus *bus);
+void pci_bridge_release_not_used_res(struct pci_bus *bus);


int pci_claim_resource(struct pci_dev *, int);

void pci_assign_unassigned_resources(void);
void pdev_enable_device(struct pci_dev *);
Index: linux-2.6/arch/x86/pci/i386.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/i386.c
+++ linux-2.6/arch/x86/pci/i386.c
@@ -194,6 +194,7 @@ static void __init pcibios_allocate_reso
static int __init pcibios_assign_resources(void)
{
struct pci_dev *dev = NULL;
+ struct pci_bus *bus;
struct resource *r;

if (!(pci_probe & PCI_ASSIGN_ROMS)) {
@@ -213,6 +214,11 @@ static int __init pcibios_assign_resourc
}
}

+ /* Try to release bridge resources, that there is not child under it */
+ list_for_each_entry(bus, &pci_root_buses, node) {
+ pci_bus_release_bridges_not_used_res(bus);
+ }
+
pci_assign_unassigned_resources();

return 0;

--

Yinghai Lu

unread,
Oct 28, 2009, 3:21:57 PM10/28/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res


from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise


update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
-v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()
-v4: move out pci_is_enabled checkout of pci_setup_bridge()

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
drivers/pci/setup-bus.c | 73 ++++++++++++++++++++++++++++++++++++---
include/linux/pci.h | 1
3 files changed, 93 insertions(+), 10 deletions(-)

Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}

int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;

dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {

@@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot


(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}

+ pci_dev_put(dev);
+ }
+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);

+ pci_bridge_assign_resources(bridge);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);

+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }

pci_configure_slot(dev);
pci_dev_put(dev);
}

- pci_bus_assign_resources(parent);
pci_bus_add_devices(parent);

@@ -541,6 +576,35 @@ void __ref pci_bus_size_bridges(struct p

@@ -557,7 +621,8 @@ void __ref pci_bus_assign_resources(cons



switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);
+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);
break;

case PCI_CLASS_BRIDGE_CARDBUS:

Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);

/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);

--

Eric W. Biederman

unread,
Oct 28, 2009, 3:37:02 PM10/28/09
to Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu <yin...@kernel.org> writes:

Scenario.

I insert a bridge with pci hotplug slots into a leaf hotplug slot.
Which adds more leave hotplug slots.

Since the bridge itself is no longer a leaf slot it's resources will not
get reassigned.

Then I will have no resources to assign to the leaves?

>> Today I make plugging in a hotplug bridge work by having the firmware
>> reserve at one level and having the kernel reserve at the next level.
>>
>> Windows handles the issue in theory by performing some kind of
>> hot-unplugging of drivers that already have assigned resources and
>> then replugging them. Which allows a full renumbering of busses.
>> We don't have the infrastructure to do that safely today.
>
> that will take some drivers offline at first ?

I believe windows only does that for drivers that support being temporarily
disconnected from their hardware.

Eric

Yinghai Lu

unread,
Oct 28, 2009, 3:50:57 PM10/28/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

so we still have your min_size code there.

in your case:
you need plug all card in your slots on that daughter card at first, and then insert the daughter card to leaf slot in the MB.

my setup is :

system got 4 io chains. and will get slot:
00:03.0 00:05.0 00:07.0 00:09.0
40:03.0 40:05.0 40:07.0 40:09.0
80:03.0 80:05.0 80:07.0 80:09.0
c0:03.0 c0:05.0 c0:07.0 c0:09.0

those are hanged on peer root buses directly. but bios assign to them every one get 8M, if user plug one card need 256M, then it will not work.

with those two patches, could clear the resource assigned by BIOS, and get resource as needed. ( with mmio 64 bit )


YH

Eric W. Biederman

unread,
Oct 28, 2009, 5:30:25 PM10/28/09
to Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu <yin...@kernel.org> writes:

Operationally that is an impossibility. I would not have multiple
layers of hotplug if I only needed a single layer.

Which means your patch would cause a regression in my setup.

> my setup is :
>
> system got 4 io chains. and will get slot:
> 00:03.0 00:05.0 00:07.0 00:09.0
> 40:03.0 40:05.0 40:07.0 40:09.0
> 80:03.0 80:05.0 80:07.0 80:09.0
> c0:03.0 c0:05.0 c0:07.0 c0:09.0
>
> those are hanged on peer root buses directly. but bios assign to
> them every one get 8M, if user plug one card need 256M, then it will
> not work.
>
> with those two patches, could clear the resource assigned by BIOS,
> and get resource as needed. ( with mmio 64 bit )

Hmm.

Could you avoid reallocating resources until a pci device is plugged in
that has problems?

A lot of root bridges have important configuration registers that are
not in standard locations. Which means in general we can not reprogram
root bridges successfully from linux. At least not without code that
knows the root bridge magic.

You can almost solve your problem by simply saying: pci=hpmemsize=256M.
Which works except that allocating 4G of pci memory isn't very likely
to work.

One of the suggestions when I made my patch was to have a per port option
instead of a global minimum. That is an option for your case. But it
is not as elegant.

The truly elegant approach is to make certain the hibernate in the
drivers can handle bars being changed under them, hibernate everything
that needs renumbering and then bring them back.

Personally I think you should walk over to whomever did your firmware
and tell them they goofed.

Eric

Yinghai Lu

unread,
Oct 28, 2009, 5:40:17 PM10/28/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

ok, may need to compare new range size and old range size before clear it.

>
>> my setup is :
>>
>> system got 4 io chains. and will get slot:
>> 00:03.0 00:05.0 00:07.0 00:09.0
>> 40:03.0 40:05.0 40:07.0 40:09.0
>> 80:03.0 80:05.0 80:07.0 80:09.0
>> c0:03.0 c0:05.0 c0:07.0 c0:09.0
>>
>> those are hanged on peer root buses directly. but bios assign to
>> them every one get 8M, if user plug one card need 256M, then it will
>> not work.
>>
>> with those two patches, could clear the resource assigned by BIOS,
>> and get resource as needed. ( with mmio 64 bit )
>
> Hmm.
>
> Could you avoid reallocating resources until a pci device is plugged in
> that has problems?
>
> A lot of root bridges have important configuration registers that are
> not in standard locations. Which means in general we can not reprogram
> root bridges successfully from linux. At least not without code that
> knows the root bridge magic.

no one change that

>
> You can almost solve your problem by simply saying: pci=hpmemsize=256M.
> Which works except that allocating 4G of pci memory isn't very likely
> to work.
>
> One of the suggestions when I made my patch was to have a per port option
> instead of a global minimum. That is an option for your case. But it
> is not as elegant.
>
> The truly elegant approach is to make certain the hibernate in the
> drivers can handle bars being changed under them, hibernate everything
> that needs renumbering and then bring them back.
>
> Personally I think you should walk over to whomever did your firmware
> and tell them they goofed.

they said it IS Linux problem. because other os is ok.

YH

Yinghai Lu

unread,
Oct 28, 2009, 6:25:37 PM10/28/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

after closing look up the code, it looks it will not break your setup.

1. before the patches:
a. when master card is inserted, all bridge in that card will get assigned with min_size
b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.

2. after the patches: v5
a. booted up, all leaf bridge mmio get clearred.
b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.

can you check those two patches in your setup to verify it?
http://patchwork.kernel.org/patch/56344/
http://patchwork.kernel.org/patch/56343/

Thanks

Yinghai Lu

Yinghai Lu

unread,
Oct 28, 2009, 6:27:07 PM10/28/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu wrote:

>>>
>>> Which means your patch would cause a regression in my setup.
>> ok, may need to compare new range size and old range size before clear it.
>
> after closing look up the code, it looks it will not break your setup.
>
> 1. before the patches:
> a. when master card is inserted, all bridge in that card will get assigned with min_size
> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>
> 2. after the patches: v5
> a. booted up, all leaf bridge mmio get clearred.
> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>
> can you check those two patches in your setup to verify it?
> http://patchwork.kernel.org/patch/56344/
> http://patchwork.kernel.org/patch/56343/

on top Jesse today's PCI tree.

YH

Kenji Kaneshige

unread,
Oct 29, 2009, 2:35:02 AM10/29/09
to Yinghai Lu, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

If this function is only for normal PCI bridge, we need additional
check at the head of this function.

if ((dev->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
return;

if not, 3 should be 4 instead. And we can do as follows

for (i = PCI_BRIDGE_RESOURCE; i <= PCI_BRIDGE_RESOURCE_END; i++) {

> + r = &dev->resource[idx];
> + if (r->flags & type_mask) {

How about
if (!(r->flags & type_mask))
continue;

> + if (!r->parent)
> + continue;
> + /*
> + * if there is no child under that, we should release
> + * and use it.
> + */

I think this comment should be updated because whether "we should
release and use it" is decided by the caller of this function.

> + if (!r->child && !release_resource(r)) {
> + dev_info(&dev->dev,
> + "resource %d %pRt released\n",
> + idx, r);
> + r->flags = 0;
> + changed = true;
> + }

How about
if (r->child)
continue;

if (!release_resource(r)) {
...
}

> + }
> + }
> +
> + if (changed)
> + pci_setup_bridge(bus);

The pci_setup_bridge() is called even if some resources are used
by children. For example, mem resource is used by children, but
prefetchable mem resource is not used by children. In this case,
I think mem resource is released and pci_setup_bridge() is called
while prefetcable mem resource is being used. I'm worrying that it
might cause something wrong.

How about like below. This might need to be called with "pci_bus_sem" held.

void __ref pci_bus_release_bridges_not_used_res(struct pci_bus *bus)
{
struct pci_bus *b;

/* If the bus is cardbus, do nothing */
if (bus->self && (bus->self->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
return;

/* We only release the resources under the leaf bridge */
if (list_empty(&bus->children)) {
if (!pci_is_root_bus(bus))
pci_bridge_release_not_used_res(bus);
return;
}

/* Scan child buses if the bus is not leaf */
list_for_each_entry(b, &bus->children, node)
pci_bus_release_bridges_not_used_res(b);
}

> +EXPORT_SYMBOL(pci_bus_release_bridges_not_used_res);
> +
> void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
> {
> struct pci_bus *b;
> @@ -644,7 +722,8 @@ static void pci_bus_dump_res(struct pci_
>
> for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
> struct resource *res = bus->resource[i];
> - if (!res || !res->end)
> +
> + if (!res || !res->end || !res->flags)
> continue;
>
> dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);
> Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
> +++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
> @@ -98,6 +98,7 @@ int pciehp_configure_device(struct slot
> pci_dev_put(dev);
> }
>
> + pci_bridge_release_not_used_res(parent);

I guess this is for the slots that are empty at the boot time on non x86
systems. And it only works at the first hot-add. Correct? How about doing
this at the pciehp's slot initialization.


> pci_bus_size_bridges(parent);
> pci_clear_master(bridge);
> pci_bridge_assign_resources(bridge);
> @@ -171,6 +172,7 @@ int pciehp_unconfigure_device(struct slo
> }
> pci_dev_put(temp);
> }
> + pci_bridge_release_not_used_res(parent);

How about call pci_bus_release_bridges_not_used_res() instead and
make pci_bridge_release_not_used_res() static function.

If this is only for PCIe hotplug, I don't think we need it here (as
you're doing for non x86 platforms). If not, I think you should do
it in the another patch.

Thanks,
Kenji Kaneshige

Eric W. Biederman

unread,
Oct 29, 2009, 4:16:27 AM10/29/09
to Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu <yin...@kernel.org> writes:
>
> after closing look up the code, it looks it will not break your setup.
>
> 1. before the patches:
> a. when master card is inserted, all bridge in that card will get assigned with min_size
> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>
> 2. after the patches: v5
> a. booted up, all leaf bridge mmio get clearred.
> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>
> can you check those two patches in your setup to verify it?

I have a much simpler case I will break, as I tried something similar by accident.

AMD cpu MCP55 with one pcie port setup as hotplug.
The system only has 2GB of RAM. So plenty of space for pcie devices.

If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
Reads from the bar of the hotplugged device work
Writes to the bar of the hotplugged device, cause further writes to go to lala land.

So I had to have the firmware make the assignment, because only it knows the
details of the hidden AMD bar registers for each hypertransport chain etc.

I don't see how throwing away the work that the firmware has done in
preallocation is something that we can afford to do in general if what
the firmware has done works for us.

Eric

Kenji Kaneshige

unread,
Oct 29, 2009, 4:29:39 AM10/29/09
to Yinghai Lu, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

Does this patch work without [PATCH 2/2]? I don't understand who
releases resouces? Does find_free_bus_resource() still release
resources?

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Oct 29, 2009, 4:32:12 AM10/29/09
to Kenji Kaneshige, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

need to work with [2/2].

YH

Kenji Kaneshige

unread,
Oct 29, 2009, 4:56:24 AM10/29/09
to Yinghai Lu, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

Ok. Could you rearrange the set of patches with the right pieces and
with the right order? It's very difficult for me to understand and
review the current patches.

By the way, is release_resource() in find_free_bus_resource() already
removed?

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Oct 29, 2009, 4:58:26 AM10/29/09
to Kenji Kaneshige, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

ok

>
> By the way, is release_resource() in find_free_bus_resource() already
> removed?

YES. Jesse sent request to Linus to revert that.

YH

Yinghai Lu

unread,
Oct 29, 2009, 5:03:49 AM10/29/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Eric W. Biederman wrote:
> Yinghai Lu <yin...@kernel.org> writes:
>> after closing look up the code, it looks it will not break your setup.
>>
>> 1. before the patches:
>> a. when master card is inserted, all bridge in that card will get assigned with min_size
>> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>
>> 2. after the patches: v5
>> a. booted up, all leaf bridge mmio get clearred.
>> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>
>> can you check those two patches in your setup to verify it?
>
> I have a much simpler case I will break, as I tried something similar by accident.
which kernel version?

>
> AMD cpu MCP55 with one pcie port setup as hotplug.
> The system only has 2GB of RAM. So plenty of space for pcie devices.

one or two ht chains?

do you still have lspci -tv with it?

>
> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
> Reads from the bar of the hotplugged device work
> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>
> So I had to have the firmware make the assignment, because only it knows the
> details of the hidden AMD bar registers for each hypertransport chain etc.

that mean kernel doesn't get peer root bus res probed properly


YH

Yinghai Lu

unread,
Oct 29, 2009, 5:04:11 AM10/29/09
to Kenji Kaneshige, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

don't want to mess up with pci cardbus yet. can not access related stuff.

>
>> + r = &dev->resource[idx];
>> + if (r->flags & type_mask) {
>
> How about
> if (!(r->flags & type_mask))
> continue;
>

ok

>> + if (!r->parent)
>> + continue;
>> + /*
>> + * if there is no child under that, we should release
>> + * and use it.
>> + */
>
> I think this comment should be updated because whether "we should
> release and use it" is decided by the caller of this function.
>

ok

>> + if (!r->child && !release_resource(r)) {
>> + dev_info(&dev->dev,
>> + "resource %d %pRt released\n",
>> + idx, r);
>> + r->flags = 0;
>> + changed = true;
>> + }
>
> How about
> if (r->child)
> continue;
>
> if (!release_resource(r)) {
> ...
> }
>

ok

>> + }
>> + }
>> +
>> + if (changed)
>> + pci_setup_bridge(bus);

>
> The pci_setup_bridge() is called even if some resources are used
> by children. For example, mem resource is used by children, but
> prefetchable mem resource is not used by children. In this case,
> I think mem resource is released and pci_setup_bridge() is called
> while prefetcable mem resource is being used. I'm worrying that it
> might cause something wrong.


should be ok
case 1: before assigned unassigned resource. no driver etc are loaded yet
case 2: before config pcie slot bridge, no device under it yet.

ok

>> +EXPORT_SYMBOL(pci_bus_release_bridges_not_used_res);
>> +
>> void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
>> {
>> struct pci_bus *b;
>> @@ -644,7 +722,8 @@ static void pci_bus_dump_res(struct pci_
>>
>> for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
>> struct resource *res = bus->resource[i];
>> - if (!res || !res->end)
>> +
>> + if (!res || !res->end || !res->flags)
>> continue;
>>
>> dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);
>> Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
>> +++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
>> @@ -98,6 +98,7 @@ int pciehp_configure_device(struct slot
>> pci_dev_put(dev);
>> }
>>
>> + pci_bridge_release_not_used_res(parent);
>
> I guess this is for the slots that are empty at the boot time on non x86
> systems. And it only works at the first hot-add. Correct? How about doing
> this at the pciehp's slot initialization.

this one could be removed.

>
>
>> pci_bus_size_bridges(parent);
>> pci_clear_master(bridge);
>> pci_bridge_assign_resources(bridge);
>> @@ -171,6 +172,7 @@ int pciehp_unconfigure_device(struct slo
>> }
>> pci_dev_put(temp);
>> }
>> + pci_bridge_release_not_used_res(parent);
>
> How about call pci_bus_release_bridges_not_used_res() instead and
> make pci_bridge_release_not_used_res() static function.
>

ok

it is needed for the first boot, when pcie card is already plugged in at boot time.
need to move back to setup_bus.c

YH

Yinghai Lu

unread,
Oct 29, 2009, 5:53:12 AM10/29/09
to Kenji Kaneshige, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

only for index < 3

v2: Jesse doesn't like it is in find_free_bus_resource...
try to move out of pci_bus_size_bridges loop.
need to apply after:
[PATCH] pci: pciehp update the slot bridge res to get big range for pcie devices - v4
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.

v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/setup-bus.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/pci.h | 1
2 files changed, 65 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -573,13 +573,68 @@ void __ref pci_bus_assign_resources(cons
}
EXPORT_SYMBOL(pci_bus_assign_resources);

+static void pci_bridge_release_not_used_res(struct pci_bus *bus)


+{
+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;
+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */
+ dev = bus->self;
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
+ idx++) {

+ r = &dev->resource[idx];

+ if (!(r->flags & type_mask))
+ continue;


+ if (!r->parent)
+ continue;

+ /* if there is no child under that, we should release it */
+ if (r->child)
+ continue;
+ if (!release_resource(r)) {
+ dev_info(&dev->dev, "resource %d %pRt released\n",


+ idx, r);
+ r->flags = 0;
+ changed = true;
+ }

+ }
+
+ if (changed)
+ pci_setup_bridge(bus);

+}
+


+void __ref pci_bus_release_bridges_not_used_res(struct pci_bus *bus)

+{
+ struct pci_bus *b;
+

+ /* If the bus is cardbus, do nothing */
+ if (bus->self && (bus->self->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
+ return;
+
+ /* We only release the resources under the leaf bridge */
+ if (list_empty(&bus->children)) {
+ if (!pci_is_root_bus(bus))
+ pci_bridge_release_not_used_res(bus);
+ return;
+ }
+
+ /* Scan child buses if the bus is not leaf */
+ list_for_each_entry(b, &bus->children, node)
+ pci_bus_release_bridges_not_used_res(b);
+}
+EXPORT_SYMBOL(pci_bus_release_bridges_not_used_res);
+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;



for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+
+ if (!res || !res->end || !res->flags)
continue;

dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);

@@ -608,6 +663,14 @@ pci_assign_unassigned_resources(void)
{
struct pci_bus *bus;

+ /*
+ * Try to release leaf bridge's resources that there is not child
+ * under it
+ */


+ list_for_each_entry(bus, &pci_root_buses, node) {
+ pci_bus_release_bridges_not_used_res(bus);
+ }
+

/* Depth first, calculate sizes and alignments of all
subordinate buses. */
list_for_each_entry(bus, &pci_root_buses, node) {


Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h

@@ -758,6 +758,7 @@ int pci_vpd_truncate(struct pci_dev *dev


/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */

void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);

+void pci_bus_release_bridges_not_used_res(struct pci_bus *bus);


int pci_claim_resource(struct pci_dev *, int);

void pci_assign_unassigned_resources(void);
void pdev_enable_device(struct pci_dev *);

--

Yinghai Lu

unread,
Oct 29, 2009, 5:53:24 AM10/29/09
to Kenji Kaneshige, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
-v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()
-v4: move out pci_is_enabled checkout of pci_setup_bridge()

-v5: change the applying sequence.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 31 +++++++++++++---


drivers/pci/setup-bus.c | 73 ++++++++++++++++++++++++++++++++++++---
include/linux/pci.h | 1

3 files changed, 95 insertions(+), 10 deletions(-)

@@ -155,5 +172,7 @@ int pciehp_unconfigure_device(struct slo
pci_dev_put(temp);
}

+ pci_bus_release_bridges_not_used_res(parent);
+
return rc;

void pci_bus_release_bridges_not_used_res(struct pci_bus *bus);

Bjorn Helgaas

unread,
Oct 29, 2009, 9:26:00 AM10/29/09
to Eric W. Biederman, Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky
On Thu, 2009-10-29 at 01:16 -0700, Eric W. Biederman wrote:
> Yinghai Lu <yin...@kernel.org> writes:
> >
> > after closing look up the code, it looks it will not break your setup.
> >
> > 1. before the patches:
> > a. when master card is inserted, all bridge in that card will get assigned with min_size
> > b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
> >
> > 2. after the patches: v5
> > a. booted up, all leaf bridge mmio get clearred.
> > b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
> > c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
> >
> > can you check those two patches in your setup to verify it?
>
> I have a much simpler case I will break, as I tried something similar by accident.
>
> AMD cpu MCP55 with one pcie port setup as hotplug.
> The system only has 2GB of RAM. So plenty of space for pcie devices.
>
> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
> Reads from the bar of the hotplugged device work
> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>
> So I had to have the firmware make the assignment, because only it knows the
> details of the hidden AMD bar registers for each hypertransport chain etc.

Do you mean you had to have firmware program a hot-added device, or just
that firmware had to program the apertures of the root port that was
present at boot, even though it had no devices below it?

Firmware normally supplies ACPI _CRS information that tells us how it
programmed the host bridge windows. On x86, Linux normally ignores that
and just assumes a range based on memory size. If we paid attention to
it (as with "pci=use_crs"), it's likely that we could do a better job of
doing this setup.

Or, of course, we could add a Linux driver that knows about "the hidden
AMD bar registers." But I think that should be a last resort, for when
firmware supplied incorrect _CRS information.

Eric W. Biederman

unread,
Oct 29, 2009, 11:13:34 AM10/29/09
to Bjorn Helgaas, Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky
Bjorn Helgaas <bjorn....@hp.com> writes:

> On Thu, 2009-10-29 at 01:16 -0700, Eric W. Biederman wrote:
>> Yinghai Lu <yin...@kernel.org> writes:
>> >
>> > after closing look up the code, it looks it will not break your setup.
>> >
>> > 1. before the patches:
>> > a. when master card is inserted, all bridge in that card will get assigned with min_size
>> > b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>> >
>> > 2. after the patches: v5
>> > a. booted up, all leaf bridge mmio get clearred.
>> > b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>> > c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>> >
>> > can you check those two patches in your setup to verify it?
>>
>> I have a much simpler case I will break, as I tried something similar by accident.
>>
>> AMD cpu MCP55 with one pcie port setup as hotplug.
>> The system only has 2GB of RAM. So plenty of space for pcie devices.
>>
>> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
>> Reads from the bar of the hotplugged device work
>> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>>
>> So I had to have the firmware make the assignment, because only it knows the
>> details of the hidden AMD bar registers for each hypertransport chain etc.
>
> Do you mean you had to have firmware program a hot-added device, or just
> that firmware had to program the apertures of the root port that was
> present at boot, even though it had no devices below it?

Firmware had to program the apertures of the root port that was present
at boot, even though it had no devices below it.

> Firmware normally supplies ACPI _CRS information that tells us how it
> programmed the host bridge windows. On x86, Linux normally ignores that
> and just assumes a range based on memory size. If we paid attention to
> it (as with "pci=use_crs"), it's likely that we could do a better job of
> doing this setup.
>
> Or, of course, we could add a Linux driver that knows about "the hidden
> AMD bar registers." But I think that should be a last resort, for when
> firmware supplied incorrect _CRS information.

In this case there was no ACPI, and even if there was correct _CRS information
it would have said only those addresses routed to bars/apertures on the
root bridge was routed to the MCP55. So while it looked like we had gobs
of unallocated space we could use. In practice we did not.

Eric W. Biederman

unread,
Oct 29, 2009, 11:43:55 AM10/29/09
to Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu <yin...@kernel.org> writes:

> Eric W. Biederman wrote:
>> Yinghai Lu <yin...@kernel.org> writes:
>>> after closing look up the code, it looks it will not break your setup.
>>>
>>> 1. before the patches:
>>> a. when master card is inserted, all bridge in that card will get assigned with min_size
>>> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>>
>>> 2. after the patches: v5
>>> a. booted up, all leaf bridge mmio get clearred.
>>> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>>> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>>
>>> can you check those two patches in your setup to verify it?
>>
>> I have a much simpler case I will break, as I tried something similar by accident.
> which kernel version?
>>
>> AMD cpu MCP55 with one pcie port setup as hotplug.
>> The system only has 2GB of RAM. So plenty of space for pcie devices.
>
> one or two ht chains?

One chain.

> do you still have lspci -tv with it?
>
>>
>> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
>> Reads from the bar of the hotplugged device work
>> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>>
>> So I had to have the firmware make the assignment, because only it knows the
>> details of the hidden AMD bar registers for each hypertransport chain etc.
>
> that mean kernel doesn't get peer root bus res probed properly

How do you do that without having drivers for the peer root bus?

Eric

Bjorn Helgaas

unread,
Oct 29, 2009, 11:47:46 AM10/29/09
to Eric W. Biederman, Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky

I know this is a hypothetical case since you don't have ACPI, but I'm
curious about this.

I assume the magic AMD BARs only affect the host bridge, and that the
downstream root ports look like standard PCI-to-PCI bridges. If that's
the case, and if we have correct descriptions of the host bridge
apertures, Linux should theoretically be able to do as well as firmware.

But you seem to be suggesting that even with a correct host bridge
description, there's space that *looks* available but is not. I don't
understand how this can be.

Bjorn

Jesse Barnes

unread,
Oct 29, 2009, 12:32:06 PM10/29/09
to Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
On Thu, 29 Oct 2009 02:52:00 -0700
Yinghai Lu <yin...@kernel.org> wrote:

>
> only for index < 3
>
> v2: Jesse doesn't like it is in find_free_bus_resource...
> try to move out of pci_bus_size_bridges loop.
> need to apply after:
> [PATCH] pci: pciehp update the slot bridge res to get big
> range for pcie devices - v4 v3: add pci_setup_bridge calling after
> pci_bridge_release_not_used_res. only clear release those res for x86.
> v4: Bjorn want to release use dev instead of bus.
> v5: Kenji pointed out it will have problem with several level bridge.
> so let only handle leaf bridge.
> v6: address Kenji's request (new pci_bus_release...). and change
> applying order move back release to pci_assign_unassigned_resource
>
> Signed-off-by: Yinghai Lu <yin...@kernel.org>

Starting to look better...

>
> ---
> drivers/pci/setup-bus.c | 65
> +++++++++++++++++++++++++++++++++++++++++++++++-
> include/linux/pci.h | 1 2 files changed, 65 insertions(+), 1
> deletion(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -573,13 +573,68 @@ void __ref pci_bus_assign_resources(cons
> }
> EXPORT_SYMBOL(pci_bus_assign_resources);
>
> +static void pci_bridge_release_not_used_res(struct pci_bus *bus)

Can we call these functions something else? Maybe
pci_bridge_release_unused() with a kdoc comment describing exactly what
it does and why?

Likewise, maybe pci_bus_release_unused_bridge_res or something, with
comments again.

Also I wonder if we need to make this a command line option that isn't
run by default?

Really, as Eric says, we need a real dynamic allocation system with the
ability to move devices around at some point. Any volunteers? :)

--
Jesse Barnes, Intel Open Source Technology Center

Yinghai Lu

unread,
Oct 29, 2009, 1:00:45 PM10/29/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas

we have amd_bus.c to handle amd k8 system with two chains. but one chain is skipped.
(wonder if need to reenable that for one chain k8 system)

another intel_bus.c is on the way to 2.6.33.

when use_crs is used, those info from pci conf space is not used but just print out for check if _CRS is right or not.

YH

Yinghai Lu

unread,
Oct 29, 2009, 1:11:11 PM10/29/09
to Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

ok.

ok

thinking:
1. try not to touch bridge resource and do the allocation and record the fail device
2. release leaf bridge for those failed device
3. do second around bus_size and allocation.

>
> Really, as Eric says, we need a real dynamic allocation system with the
> ability to move devices around at some point. Any volunteers? :)

that will need suspend driver and rescan allocation and wakeup driver...

YH

Jesse Barnes

unread,
Oct 29, 2009, 1:52:11 PM10/29/09
to Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
On Thu, 29 Oct 2009 10:10:22 -0700
Yinghai Lu <yin...@kernel.org> wrote:
> > Also I wonder if we need to make this a command line option that
> > isn't run by default?
>
> thinking:
> 1. try not to touch bridge resource and do the allocation and record
> the fail device 2. release leaf bridge for those failed device
> 3. do second around bus_size and allocation.

Yeah, that would make it more automatic at least, which would be nice.

> > Really, as Eric says, we need a real dynamic allocation system with
> > the ability to move devices around at some point. Any
> > volunteers? :)
>
> that will need suspend driver and rescan allocation and wakeup
> driver...

Right, it's fairly invasive. But it's the right direction to go long
term.

--
Jesse Barnes, Intel Open Source Technology Center

Eric W. Biederman

unread,
Oct 29, 2009, 3:28:30 PM10/29/09
to Bjorn Helgaas, Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky
Bjorn Helgaas <bjorn....@hp.com> writes:

What I meant was simply that not all of the non-memory space was
routed down the hypertransport chain to the mcp55. If you have an
accurate description of that you should be fine.

Eric

Bjorn Helgaas

unread,
Oct 29, 2009, 3:40:43 PM10/29/09
to Eric W. Biederman, Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky

OK, yep, that makes perfect sense. That's a good example of why I
believe we should start paying attention to the root bridge _CRS,
because that's exactly what it would tell us.

Bjorn

Eric W. Biederman

unread,
Oct 29, 2009, 3:48:23 PM10/29/09
to Yinghai Lu, Kenji Kaneshige, Jesse Barnes, linux-...@vger.kernel.org, linu...@vger.kernel.org, Alex Chiang, Ivan Kokshaysky, Bjorn Helgaas
Yinghai Lu <yin...@kernel.org> writes:

I was running a 32bit kernel so this didn't kick in. That might have
helped. At least as far as recognizing the resources weren't properly
routed. If we don't setup the infrastructure so that we can reprogram
those resources I'm not certain how much good it will do in general.

> another intel_bus.c is on the way to 2.6.33.
>
> when use_crs is used, those info from pci conf space is not used but just print out for check if _CRS is right or not.

If enough space is routed and we get accurate information I am certain
that is fine. I am still worried about the change in policy though.

Only rerouting things when there is a need gives us the best chance of
working everywhere. Freeing unused resources on hotplug ports before
we plug in a device scares me, because we do something that should
but doesn't we reallocate them. If there is simply not enough room
we can do something different.

Eric

Yinghai Lu

unread,
Oct 30, 2009, 4:36:56 AM10/30/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, Alex Chiang, Bjorn Helgaas, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

only for index < 3

v2: Jesse doesn't like it is in find_free_bus_resource...
try to move out of pci_bus_size_bridges loop.

v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.
v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource

v7: change functions name pci_bus_release_unused_bridge_res according to Jesse
v8: address Eric's concern, only overwrite leaf bridge resource that is not big enough
need to do it in two steps, and first step recore the failed res, and don't
touch bridge res that programmed by firmware. second step will try to release
bridge resource that is too small at first.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/setup-bus.c | 258 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 239 insertions(+), 19 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -27,11 +27,54 @@
#include <linux/slab.h>
#include "pci.h"

-static void pbus_assign_resources_sorted(const struct pci_bus *bus)
+
+static void add_to_failed_list(struct resource_list *head, struct pci_dev *dev,
+ struct resource *res)
+{
+ struct resource_list *list = head;
+ struct resource_list *ln = list->next;
+ struct resource_list *tmp;
+
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ pr_warning("add_to_failed_list: kmalloc() failed!\n");
+ return;
+ }
+
+ tmp->next = ln;
+ tmp->res = res;
+ tmp->dev = dev;
+ list->next = tmp;
+}
+
+static void free_failed_list(struct resource_list *head)
+{
+ struct resource_list *list, *tmp;
+ struct resource *res;
+ /*
+ * Try to release leaf bridge's resources that there is no child


+ * under it
+ */

+ for (list = head->next; list;) {


+ res = list->res;

+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+

+ head->next = NULL;
+}
+
+static void pbus_assign_resources_sorted(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_dev *dev;
struct resource *res;


struct resource_list head, *list, *tmp;

+ resource_size_t size;
int idx;

head.next = NULL;
@@ -57,10 +100,22 @@ static void pbus_assign_resources_sorted


for (list = head.next; list;) {

res = list->res;


idx = res - &list->dev->resource[0];

+ /* save the size at first */
+ size = resource_size(res);
if (pci_assign_resource(list->dev, idx)) {
- res->start = 0;
- res->end = 0;
- res->flags = 0;
+ if (fail_head && !list->dev->subordinate) {
+ /*
+ * device need to keep flags and size
+ * for second try
+ */


+ res->start = 0;

+ res->end = size - 1;
+ add_to_failed_list(fail_head, list->dev, res);
+ } else {


+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
}

tmp = list;
list = list->next;
@@ -137,18 +192,12 @@ EXPORT_SYMBOL(pci_setup_cardbus);
config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)
+
+static void pci_setup_bridge_io(struct pci_bus *bus)
{
struct pci_dev *bridge = bus->self;
struct pci_bus_region region;
- u32 l, bu, lu, io_upper16;
- int pref_mem64;
-


- if (pci_is_enabled(bridge))
- return;
-
- dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",

- pci_domain_nr(bus), bus->number);
+ u32 l, io_upper16;

/* Set up the top and bottom of the PCI I/O segment for this bus. */
pcibios_resource_to_bus(bridge, &region, bus->resource[0]);
@@ -175,7 +224,12 @@ static void pci_setup_bridge(struct pci_
pci_write_config_dword(bridge, PCI_IO_BASE, l);
/* Update upper 16 bits of I/O base/limit. */
pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
-
+}
+static void pci_setup_bridge_mmio(struct pci_bus *bus)
+{
+ struct pci_dev *bridge = bus->self;
+ struct pci_bus_region region;
+ u32 l;
/* Set up the top and bottom of the PCI Memory segment
for this bus. */
pcibios_resource_to_bus(bridge, &region, bus->resource[1]);
@@ -191,6 +245,13 @@ static void pci_setup_bridge(struct pci_
dev_info(&bridge->dev, " MEM window: disabled\n");
}
pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+}
+static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
+{
+ struct pci_dev *bridge = bus->self;
+ struct pci_bus_region region;
+ u32 l, bu, lu;
+ int pref_mem64;

/* Clear out the upper 32 bits of PREF limit.
If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
@@ -226,10 +287,37 @@ static void pci_setup_bridge(struct pci_
pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
}
+}
+static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+ struct pci_dev *bridge = bus->self;
+
+ if (pci_is_enabled(bridge))
+ return;
+
+ dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
+ pci_domain_nr(bus), bus->number);
+
+ if (type & IORESOURCE_IO)
+ pci_setup_bridge_io(bus);
+
+ if (type & IORESOURCE_MEM)
+ pci_setup_bridge_mmio(bus);
+
+ if (type & IORESOURCE_PREFETCH)
+ pci_setup_bridge_mmio_pref(bus);

pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
}

+static void pci_setup_bridge(struct pci_bus *bus)
+{
+ unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ __pci_setup_bridge(bus, type);
+}
+
/* Check whether the bridge supports optional I/O and
prefetchable memory ranges. If not, the respective
base/limit registers must be read-only and read as 0. */
@@ -541,19 +629,20 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);

-void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_bus *b;
struct pci_dev *dev;

- pbus_assign_resources_sorted(bus);
+ pbus_assign_resources_sorted(bus, fail_head);

list_for_each_entry(dev, &bus->devices, bus_list) {
b = dev->subordinate;
if (!b)
continue;

- pci_bus_assign_resources(b);
+ __pci_bus_assign_resources(b, fail_head);



switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:

@@ -571,15 +660,105 @@ void __ref pci_bus_assign_resources(cons
}
}
}
+
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+{
+ __pci_bus_assign_resources(bus, NULL);
+}
EXPORT_SYMBOL(pci_bus_assign_resources);

+static void release_children_resource(struct resource *r)
+{
+ struct resource *p;
+ resource_size_t size;
+
+ p = r->child;
+ while (p) {
+ release_children_resource(p);
+ release_resource(p);
+ printk(KERN_DEBUG "PCI: release child resource %pRt\n", p);
+ /* need to restore size, and keep flags */
+ size = resource_size(p);
+ p->start = 0;
+ p->end = size - 1;
+ p = r->child;
+ }
+}
+
+static void pci_bridge_release_unused_res(struct pci_bus *bus,
+ unsigned long type)


+{
+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;
+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */
+ dev = bus->self;
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
+ idx++) {
+ r = &dev->resource[idx];

+ if ((r->flags & type_mask) != type)


+ continue;
+ if (!r->parent)
+ continue;
+ /*

+ * if there are children under that, we should release them
+ * all
+ */
+ release_children_resource(r);
+ if (!release_resource(r)) {
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "resource %d %pRt released\n", idx, r);


+ r->flags = 0;
+ changed = true;
+ }
+ }
+
+ if (changed) {

+ if (type & IORESOURCE_PREFETCH) {
+ /* avoiding touch the one without PREF */
+ type = IORESOURCE_PREFETCH;
+ }
+ __pci_setup_bridge(bus, type);
+ }
+}
+
+/*
+ * try to release pci bridge resources that is from leaf bridge,
+ * so we can allocate big new one later
+ */
+static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
+ unsigned long type)


+{
+ struct pci_bus *b;
+
+ /* If the bus is cardbus, do nothing */
+ if (bus->self && (bus->self->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
+ return;
+
+ /* We only release the resources under the leaf bridge */
+ if (list_empty(&bus->children)) {
+ if (!pci_is_root_bus(bus))

+ pci_bridge_release_unused_res(bus, type);


+ return;
+ }
+
+ /* Scan child buses if the bus is not leaf */
+ list_for_each_entry(b, &bus->children, node)

+ pci_bus_release_unused_bridge_res(b, type);
+}


+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;

for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+
+ if (!res || !res->end || !res->flags)
continue;

dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);

@@ -607,6 +786,12 @@ void __init
pci_assign_unassigned_resources(void)
{
struct pci_bus *bus;
+ bool second_tried = false;


+ struct resource_list head, *list, *tmp;

+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;

+again:
+ head.next = NULL;



/* Depth first, calculate sizes and alignments of all
subordinate buses. */

@@ -615,7 +800,42 @@ pci_assign_unassigned_resources(void)
}
/* Depth last, allocate resources and update the hardware. */
list_for_each_entry(bus, &pci_root_buses, node) {
- pci_bus_assign_resources(bus);
+ __pci_bus_assign_resources(bus, &head);
+ }
+
+ /* any device complain? */
+ if (!head.next)
+ goto enable_and_dump;
+
+ if (second_tried) {
+ /* still fail, don't want to try more */
+ free_failed_list(&head);
+ goto enable_and_dump;
+ }
+
+ second_tried = true;
+ printk(KERN_DEBUG "PCI: second try to assign unassigned res\n");
+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */


+ for (list = head.next; list;) {

+ bus = list->dev->bus;
+ /* don't need to play with root bus */
+ if (!pci_is_root_bus(bus))
+ pci_bus_release_unused_bridge_res(bus,
+ list->res->flags & type_mask);


+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+

+ goto again;
+
+enable_and_dump:
+ /* Depth last, update the hardware. */
+ list_for_each_entry(bus, &pci_root_buses, node) {
pci_enable_bridges(bus);

Yinghai Lu

unread,
Oct 30, 2009, 4:37:51 AM10/30/09
to Eric W. Biederman, Kenji Kaneshige, Jesse Barnes, Alex Chiang, Bjorn Helgaas, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.


v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()

v4: move out pci_is_enabled checkout of pci_setup_bridge()

v5: change the applying sequence.
v6: change the functions name according to Jesse


v8: address Eric's concern, only overwrite leaf bridge resource that is not big enough

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 23 ++++--
drivers/pci/setup-bus.c | 133 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 147 insertions(+), 10 deletions(-)

Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c

@@ -53,17 +53,15 @@ static int __ref pciehp_add_bridge(struc


busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}

int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;

@@ -96,12 +94,25 @@ int pciehp_configure_device(struct slot


(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
+ pci_dev_put(dev);
+ }
+

+ pci_assign_unassigned_bridge_resources(bridge);


+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
pci_configure_slot(dev);
pci_dev_put(dev);
}

- pci_bus_assign_resources(parent);
pci_bus_add_devices(parent);
+
return 0;
}

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -68,6 +68,56 @@ static void free_failed_list(struct reso
head->next = NULL;
}

+static void pdev_assign_resources_sorted(struct pci_dev *dev,
+ struct resource_list *fail_head)
+{
+ struct resource *res;


+ struct resource_list head, *list, *tmp;
+ int idx;
+ u16 class = dev->class >> 8;

+ resource_size_t size;
+


+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */

+ if (class == PCI_CLASS_NOT_DEFINED || class == PCI_CLASS_BRIDGE_HOST)
+ return;
+


+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))

+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+


+ for (list = head.next; list;) {

+ res = list->res;

+ idx = res - &list->dev->resource[0];


+ /* save the size at first */
+ size = resource_size(res);

+ if (pci_assign_resource(list->dev, idx)) {

+ if (fail_head && !list->dev->subordinate) {
+ /*
+ * device need to keep flags and size
+ * for second try
+ */
+ res->start = 0;
+ res->end = size - 1;
+ add_to_failed_list(fail_head, list->dev, res);
+ } else {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }

+ }


+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}

static void pbus_assign_resources_sorted(const struct pci_bus *bus,

struct resource_list *fail_head)
{
@@ -292,9 +342,6 @@ static void __pci_setup_bridge(struct pc


{
struct pci_dev *bridge = bus->self;

- if (pci_is_enabled(bridge))
- return;
-

dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",

pci_domain_nr(bus), bus->number);

@@ -646,7 +693,8 @@ static void __ref __pci_bus_assign_resou



switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:

- pci_setup_bridge(b);
+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);
break;

case PCI_CLASS_BRIDGE_CARDBUS:

@@ -667,6 +715,34 @@ void __ref pci_bus_assign_resources(cons
}
EXPORT_SYMBOL(pci_bus_assign_resources);

+static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
+ struct resource_list *fail_head)


+{
+ struct pci_bus *b;
+

+ pdev_assign_resources_sorted((struct pci_dev *)bridge, fail_head);


+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+

+ __pci_bus_assign_resources(b, fail_head);


+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}

static void release_children_resource(struct resource *r)
{
struct resource *p;
@@ -844,3 +920,52 @@ enable_and_dump:
pci_bus_dump_resources(bus);
}
}
+
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
+{
+ struct pci_bus *bus;


+ struct pci_bus *parent = bridge->subordinate;

+ bool second_tried = false;
+ struct resource_list head, *list, *tmp;

+ int retval;


+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+

+again:
+ head.next = NULL;

+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);

+ __pci_bridge_assign_resources(bridge, &head);


+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);

+
+ /* any device complain? */
+ if (!head.next)

+ return;


+
+ if (second_tried) {
+ /* still fail, don't want to try more */
+ free_failed_list(&head);

+ return;


+ }
+
+ second_tried = true;
+ printk(KERN_DEBUG "PCI: second try to assign unassigned res\n");
+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */
+ for (list = head.next; list;) {
+ bus = list->dev->bus;

+ pci_bus_release_unused_bridge_res(bus,
+ list->res->flags & type_mask);
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ goto again;
+}

Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h

@@ -760,6 +760,7 @@ void pci_bus_assign_resources(const stru
void pci_bus_size_bridges(struct pci_bus *bus);


int pci_claim_resource(struct pci_dev *, int);
void pci_assign_unassigned_resources(void);

+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
void pdev_enable_device(struct pci_dev *);
void pdev_sort_resources(struct pci_dev *, struct resource_list *);
int pci_enable_resources(struct pci_dev *, int mask);

Jesse Barnes

unread,
Nov 4, 2009, 12:30:57 PM11/4/09
to Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
On Thu, 29 Oct 2009 02:52:25 -0700
Yinghai Lu <yin...@kernel.org> wrote:

>
> move out bus_size_bridges and assign resources out of
> pciehp_add_bridge() and at last do them all together one time
> including slot bridge, to avoid to call assign resources several
> times, when there are several bridges under the slot bridge.
>
> need to introduce pci_bridge_assign_resources there.
>
> handle the case the slot bridge that doesn't get pre-allocated big
> enough res from FW.
> for example pcie devices need 256M, but the bridge only get
> preallocated 2M...
>
> pci_setup_bridge() will take extra check_enabled for the slot bridge,
> otherwise update res is not updated to bridge BAR. that is bridge is
> enabled already for port service.
>
> -v2: address Alex's concern about pci remove/rescan feature about
> pci_setup_bridge changes. -v3: Kenji pointed out that pci_config_slot
> need to be called before pci_bus_add_devices() -v4: move out
> pci_is_enabled checkout of pci_setup_bridge() -v5: change the
> applying sequence.
>
> Signed-off-by: Yinghai Lu <yin...@kernel.org>

Where are we with this patchset? Given the nature of the changes I'll
definitely want to get a few Tested-bys in addition to acks from people
involved in this thread.

Thanks,


--
Jesse Barnes, Intel Open Source Technology Center

Yinghai Lu

unread,
Nov 4, 2009, 1:53:47 PM11/4/09
to Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Jesse Barnes wrote:
> On Thu, 29 Oct 2009 02:52:25 -0700
> Yinghai Lu <yin...@kernel.org> wrote:
>
>> move out bus_size_bridges and assign resources out of
>> pciehp_add_bridge() and at last do them all together one time
>> including slot bridge, to avoid to call assign resources several
>> times, when there are several bridges under the slot bridge.
>>
>> need to introduce pci_bridge_assign_resources there.
>>
>> handle the case the slot bridge that doesn't get pre-allocated big
>> enough res from FW.
>> for example pcie devices need 256M, but the bridge only get
>> preallocated 2M...
>>
>> pci_setup_bridge() will take extra check_enabled for the slot bridge,
>> otherwise update res is not updated to bridge BAR. that is bridge is
>> enabled already for port service.
>>
>> -v2: address Alex's concern about pci remove/rescan feature about
>> pci_setup_bridge changes. -v3: Kenji pointed out that pci_config_slot
>> need to be called before pci_bus_add_devices() -v4: move out
>> pci_is_enabled checkout of pci_setup_bridge() -v5: change the
>> applying sequence.
>>
>> Signed-off-by: Yinghai Lu <yin...@kernel.org>
>
> Where are we with this patchset? Given the nature of the changes I'll
> definitely want to get a few Tested-bys in addition to acks from people
> involved in this thread.

already have -v8.

but will refresh them after you push out your tree with several patches from Bjorn.

YH

Yinghai Lu

unread,
Nov 4, 2009, 8:41:08 PM11/4/09
to Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

only for index < 3

v2: Jesse doesn't like it is in find_free_bus_resource...
try to move out of pci_bus_size_bridges loop.
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.
v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource

v7: change functions name pci_bus_release_unused_bridge_res according to Jesse


v8: address Eric's concern, only overwrite leaf bridge resource that is not big

enough need to do it in two steps, and first step recore the failed res,


and don't touch bridge res that programmed by firmware. second step will
try to release bridge resource that is too small at first.

v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/setup-bus.c | 253 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 234 insertions(+), 19 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -27,7 +27,49 @@
#include <linux/slab.h>
#include "pci.h"


-static void pbus_assign_resources_sorted(const struct pci_bus *bus)
+
+static void add_to_failed_list(struct resource_list *head, struct pci_dev *dev,
+ struct resource *res)
+{
+ struct resource_list *list = head;
+ struct resource_list *ln = list->next;
+ struct resource_list *tmp;
+
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ pr_warning("add_to_failed_list: kmalloc() failed!\n");

+ return;
+ }
+


+ tmp->next = ln;
+ tmp->res = res;
+ tmp->dev = dev;
+ list->next = tmp;
+}
+
+static void free_failed_list(struct resource_list *head)
+{
+ struct resource_list *list, *tmp;

+ struct resource *res;
+ /*


+ * Try to release leaf bridge's resources that there is no child
+ * under it

+ */
+ for (list = head->next; list;) {


+ res = list->res;

+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+

+ head->next = NULL;
+}
+
+static void pbus_assign_resources_sorted(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_dev *dev;
struct resource *res;

@@ -58,9 +100,18 @@ static void pbus_assign_resources_sorted
res = list->res;


idx = res - &list->dev->resource[0];

if (pci_assign_resource(list->dev, idx)) {
- res->start = 0;
- res->end = 0;

- res->flags = 0;
+ if (fail_head && !list->dev->subordinate &&
+ !pci_is_root_bus(list->dev->bus)) {


+ /*
+ * device need to keep flags and size
+ * for second try
+ */

+ add_to_failed_list(fail_head, list->dev, res);
+ } else {

+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
}

tmp = list;
list = list->next;

@@ -134,19 +185,12 @@ EXPORT_SYMBOL(pci_setup_cardbus);


config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)

+static void pci_setup_bridge_io(struct pci_bus *bus)
{

struct pci_dev *bridge = bus->self;

struct resource *res;
struct pci_bus_region region;
- u32 l, bu, lu, io_upper16;
- int pref_mem64;
-


- if (pci_is_enabled(bridge))
- return;
-

- dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
- bus->secondary, bus->subordinate);


+ u32 l, io_upper16;

/* Set up the top and bottom of the PCI I/O segment for this bus. */

res = bus->resource[0];
@@ -172,7 +216,13 @@ static void pci_setup_bridge(struct pci_


pci_write_config_dword(bridge, PCI_IO_BASE, l);
/* Update upper 16 bits of I/O base/limit. */
pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
-
+}
+static void pci_setup_bridge_mmio(struct pci_bus *bus)
+{

+ struct pci_dev *bridge = bus->self;
+ struct resource *res;


+ struct pci_bus_region region;
+ u32 l;
/* Set up the top and bottom of the PCI Memory segment
for this bus. */

res = bus->resource[1];
@@ -187,6 +237,14 @@ static void pci_setup_bridge(struct pci_
dev_info(&bridge->dev, " bridge window [mem disabled]\n");


}
pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+}
+static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
+{

+ struct pci_dev *bridge = bus->self;
+ struct resource *res;


+ struct pci_bus_region region;
+ u32 l, bu, lu;
+ int pref_mem64;

/* Clear out the upper 32 bits of PREF limit.
If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily

@@ -219,10 +277,37 @@ static void pci_setup_bridge(struct pci_


pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
}
+}
+static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+ struct pci_dev *bridge = bus->self;
+
+ if (pci_is_enabled(bridge))
+ return;
+

+ dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
+ bus->secondary, bus->subordinate);


+
+ if (type & IORESOURCE_IO)
+ pci_setup_bridge_io(bus);
+
+ if (type & IORESOURCE_MEM)
+ pci_setup_bridge_mmio(bus);
+
+ if (type & IORESOURCE_PREFETCH)
+ pci_setup_bridge_mmio_pref(bus);

pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
}

+static void pci_setup_bridge(struct pci_bus *bus)
+{

+ unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+


+ __pci_setup_bridge(bus, type);
+}
+
/* Check whether the bridge supports optional I/O and
prefetchable memory ranges. If not, the respective
base/limit registers must be read-only and read as 0. */

@@ -543,19 +628,20 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);


-void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_bus *b;
struct pci_dev *dev;

- pbus_assign_resources_sorted(bus);
+ pbus_assign_resources_sorted(bus, fail_head);

list_for_each_entry(dev, &bus->devices, bus_list) {
b = dev->subordinate;
if (!b)
continue;

- pci_bus_assign_resources(b);
+ __pci_bus_assign_resources(b, fail_head);

switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:

@@ -573,15 +659,105 @@ void __ref pci_bus_assign_resources(cons


}
}
}
+
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+{
+ __pci_bus_assign_resources(bus, NULL);
+}
EXPORT_SYMBOL(pci_bus_assign_resources);

+static void release_children_resource(struct resource *r)
+{
+ struct resource *p;

+ resource_size_t size;
+


+ p = r->child;
+ while (p) {
+ release_children_resource(p);
+ release_resource(p);
+ printk(KERN_DEBUG "PCI: release child resource %pRt\n", p);
+ /* need to restore size, and keep flags */
+ size = resource_size(p);
+ p->start = 0;
+ p->end = size - 1;
+ p = r->child;
+ }
+}
+
+static void pci_bridge_release_unused_res(struct pci_bus *bus,
+ unsigned long type)
+{
+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;
+ struct resource *r;

+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+

+{
+ struct pci_bus *b;
+

+ /* If the bus is cardbus, do nothing */
+ if (bus->self && (bus->self->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)

+ return;
+


+ /* We only release the resources under the leaf bridge */
+ if (list_empty(&bus->children)) {
+ if (!pci_is_root_bus(bus))
+ pci_bridge_release_unused_res(bus, type);

+ return;
+ }
+


+ /* Scan child buses if the bus is not leaf */
+ list_for_each_entry(b, &bus->children, node)
+ pci_bus_release_unused_bridge_res(b, type);
+}
+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;

for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+
+ if (!res || !res->end || !res->flags)
continue;

dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pR\n", i, res);
@@ -609,6 +785,12 @@ void __init
pci_assign_unassigned_resources(void)
{
struct pci_bus *bus;


+ bool second_tried = false;

+ struct resource_list head, *list, *tmp;

+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;

+again:
+ head.next = NULL;

/* Depth first, calculate sizes and alignments of all
subordinate buses. */

@@ -617,7 +799,40 @@ pci_assign_unassigned_resources(void)


}
/* Depth last, allocate resources and update the hardware. */
list_for_each_entry(bus, &pci_root_buses, node) {
- pci_bus_assign_resources(bus);
+ __pci_bus_assign_resources(bus, &head);
+ }

+
+ /* any device complain? */
+ if (!head.next)

+ goto enable_and_dump;


+
+ if (second_tried) {
+ /* still fail, don't want to try more */
+ free_failed_list(&head);

+ goto enable_and_dump;


+ }
+
+ second_tried = true;
+ printk(KERN_DEBUG "PCI: second try to assign unassigned res\n");
+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */

+ for (list = head.next; list;) {

+ bus = list->dev->bus;
+ pci_bus_release_unused_bridge_res(bus,
+ list->res->flags & type_mask);

+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+

+ goto again;
+
+enable_and_dump:
+ /* Depth last, update the hardware. */
+ list_for_each_entry(bus, &pci_root_buses, node) {
pci_enable_bridges(bus);
}

Yinghai Lu

unread,
Nov 4, 2009, 8:42:05 PM11/4/09
to Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

v2: address Alex's concern about pci remove/rescan feature about
pci_setup_bridge changes.


v3: Kenji pointed out that pci_config_slot need to be called before
pci_bus_add_devices()

v4: move out pci_is_enabled checkout of pci_setup_bridge()

v5: change the applying sequence.

v6: change the functions name according to Jesse


v8: address Eric's concern, only overwrite leaf bridge resource that is not
big enough

v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 23 +++++-
drivers/pci/setup-bus.c | 129 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 143 insertions(+), 10 deletions(-)

Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c

@@ -53,17 +53,15 @@ static int __ref pciehp_add_bridge(struc


busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}

int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;

@@ -96,12 +94,25 @@ int pciehp_configure_device(struct slot


(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
+ pci_dev_put(dev);
+ }
+

+ pci_assign_unassigned_bridge_resources(bridge);


+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
pci_configure_slot(dev);
pci_dev_put(dev);
}

- pci_bus_assign_resources(parent);
pci_bus_add_devices(parent);
+
return 0;
}

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -68,6 +68,52 @@ static void free_failed_list(struct reso


head->next = NULL;
}

+static void pdev_assign_resources_sorted(struct pci_dev *dev,
+ struct resource_list *fail_head)
+{

+ struct resource *res;


+ struct resource_list head, *list, *tmp;

+ int idx;
+ u16 class = dev->class >> 8;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */

+ if (class == PCI_CLASS_NOT_DEFINED || class == PCI_CLASS_BRIDGE_HOST)
+ return;
+


+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))

+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+


+ for (list = head.next; list;) {

+ res = list->res;

+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {

+ if (fail_head && !list->dev->subordinate &&
+ !pci_is_root_bus(list->dev->bus)) {
+ /*
+ * device need to keep flags and size
+ * for second try
+ */
+ add_to_failed_list(fail_head, list->dev, res);
+ } else {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }

+ }


+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}

static void pbus_assign_resources_sorted(const struct pci_bus *bus,

struct resource_list *fail_head)
{
@@ -282,9 +328,6 @@ static void __pci_setup_bridge(struct pc


{
struct pci_dev *bridge = bus->self;

- if (pci_is_enabled(bridge))
- return;
-

dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",

bus->secondary, bus->subordinate);

@@ -645,7 +688,8 @@ static void __ref __pci_bus_assign_resou



switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:

- pci_setup_bridge(b);
+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);
break;

case PCI_CLASS_BRIDGE_CARDBUS:

@@ -666,6 +710,34 @@ void __ref pci_bus_assign_resources(cons


}
EXPORT_SYMBOL(pci_bus_assign_resources);

+static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
+ struct resource_list *fail_head)

+{
+ struct pci_bus *b;
+

+ pdev_assign_resources_sorted((struct pci_dev *)bridge, fail_head);


+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+

+ __pci_bus_assign_resources(b, fail_head);


+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}

static void release_children_resource(struct resource *r)
{
struct resource *p;

@@ -841,3 +913,52 @@ enable_and_dump:
pci_bus_dump_resources(bus);
}
}
+
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
+{
+ struct pci_bus *bus;


+ struct pci_bus *parent = bridge->subordinate;

+ bool second_tried = false;
+ struct resource_list head, *list, *tmp;

+ int retval;


+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+

+again:
+ head.next = NULL;

+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);

+ __pci_bridge_assign_resources(bridge, &head);


+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);

+
+ /* any device complain? */
+ if (!head.next)

+ return;


+
+ if (second_tried) {
+ /* still fail, don't want to try more */
+ free_failed_list(&head);

+ return;


+ }
+
+ second_tried = true;
+ printk(KERN_DEBUG "PCI: second try to assign unassigned res\n");
+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */
+ for (list = head.next; list;) {
+ bus = list->dev->bus;
+ pci_bus_release_unused_bridge_res(bus,
+ list->res->flags & type_mask);
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ goto again;
+}

Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h

@@ -767,6 +767,7 @@ void pci_bus_assign_resources(const stru


void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
void pci_assign_unassigned_resources(void);
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
void pdev_enable_device(struct pci_dev *);
void pdev_sort_resources(struct pci_dev *, struct resource_list *);
int pci_enable_resources(struct pci_dev *, int mask);

Alex Chiang

unread,
Nov 5, 2009, 3:48:09 PM11/5/09
to Yinghai Lu, Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
* Yinghai Lu <yin...@kernel.org>:

>
> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
> and at last do them all together one time including slot bridge, to avoid to
> call assign resources several times, when there are several bridges under the
> slot bridge.
>
> need to introduce pci_bridge_assign_resources there.
>
> handle the case the slot bridge that doesn't get pre-allocated big enough res
> from FW.
> for example pcie devices need 256M, but the bridge only get preallocated 2M...
>
> pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
> update res is not updated to bridge BAR. that is bridge is enabled already for
> port service.
>
> v2: address Alex's concern about pci remove/rescan feature about
> pci_setup_bridge changes.
> v3: Kenji pointed out that pci_config_slot need to be called before
> pci_bus_add_devices()
> v4: move out pci_is_enabled checkout of pci_setup_bridge()
> v5: change the applying sequence.
> v6: change the functions name according to Jesse
> v8: address Eric's concern, only overwrite leaf bridge resource that is not
> big enough
> v9: refresh to be applied after bjorn's patch, and remove trick about save
> size and restore resource second try.
>
> Signed-off-by: Yinghai Lu <yin...@kernel.org>

How did you build and test this? I got this during building:

ERROR: "pci_assign_unassigned_bridge_resources" [drivers/pci/hotplug/pciehp.ko] undefined!
make[1]: *** [__modpost] Error 1
make: *** [modules] Error 2
make: *** Waiting for unfinished jobs....

This patch fixed it for me:
---
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 95fe3f4..2f15473 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -962,3 +962,4 @@ again:

goto again;
}
+EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);

Yinghai Lu

unread,
Nov 5, 2009, 4:07:58 PM11/5/09
to Alex Chiang, Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

thanks.

i built that in

YH

Yinghai Lu

unread,
Nov 7, 2009, 12:42:50 AM11/7/09
to Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

v2: address Alex's concern about pci remove/rescan feature about
pci_setup_bridge changes.

v10: alex found need to have export for pci_assign_unassigned_bridge_resources

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 23 +++++-
drivers/pci/setup-bus.c | 130 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 144 insertions(+), 10 deletions(-)

@@ -841,3 +913,53 @@ enable_and_dump:

+EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);


Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h

@@ -768,6 +768,7 @@ void pci_bus_assign_resources(const stru

Yinghai Lu

unread,
Nov 7, 2009, 12:44:24 AM11/7/09
to Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

v2: address Alex's concern about pci remove/rescan feature about
pci_setup_bridge changes.
v3: Kenji pointed out that pci_config_slot need to be called before
pci_bus_add_devices()
v4: move out pci_is_enabled checkout of pci_setup_bridge()
v5: change the applying sequence.
v6: change the functions name according to Jesse
v8: address Eric's concern, only overwrite leaf bridge resource that is not
big enough
v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.

v10: alex found need to have export for pci_assign_unassigned_bridge_resources

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 23 +++++-
drivers/pci/setup-bus.c | 130 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 144 insertions(+), 10 deletions(-)

@@ -841,3 +913,53 @@ enable_and_dump:

+EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);


Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h

@@ -768,6 +768,7 @@ void pci_bus_assign_resources(const stru

Kenji Kaneshige

unread,
Nov 10, 2009, 3:01:01 AM11/10/09
to Yinghai Lu, Eric W. Biederman, Jesse Barnes, Alex Chiang, Bjorn Helgaas, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Hi Yinghai,

Sorry for the delayed response.
I'll try your patches on my machine soon (maybe in
the few days).

Thanks,
Kenji Kaneshige

> To unsubscribe from this list: send the line "unsubscribe linux-pci" in

Kenji Kaneshige

unread,
Nov 10, 2009, 3:08:26 AM11/10/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Can I ask which is the latest version?

I think -v10 is the latest. But I could not find -v10 for patch 1/2.

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Nov 10, 2009, 4:49:49 AM11/10/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Kenji Kaneshige wrote:
> Can I ask which is the latest version?
>
> I think -v10 is the latest. But I could not find -v10 for patch 1/2.

please use

http://patchwork.kernel.org/patch/57814/ 1/2 -v9
http://patchwork.kernel.org/patch/58294/ 2/2 -v10

Thanks

Yinghai

Kenji Kaneshige

unread,
Nov 13, 2009, 1:09:07 AM11/13/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Can I ask which is the latest version?
>>
>> I think -v10 is the latest. But I could not find -v10 for patch 1/2.
>
> please use
>
> http://patchwork.kernel.org/patch/57814/ 1/2 -v9
> http://patchwork.kernel.org/patch/58294/ 2/2 -v10
>

Regardless of PCIe hotplug, some of PCIe devices (not on hotplug
slots) doesn't work with your patches by the following error.

Copyright (c) 2007-2009 Intel Corporation.
igb 0000:07:00.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
igb 0000:07:00.0: setting latency timer to 64
igb 0000:07:00.0: PCI INT A disabled
igb: probe of 0000:07:00.0 failed with error -5
igb 0000:07:00.1: PCI INT B -> GSI 17 (level, low) -> IRQ 17
igb 0000:07:00.1: setting latency timer to 64
igb 0000:07:00.1: PCI INT B disabled
igb: probe of 0000:07:00.1 failed with error -5
igb 0000:08:00.0: PCI INT A -> GSI 17 (level, low) -> IRQ 17
igb 0000:08:00.0: setting latency timer to 64
igb 0000:08:00.0: PCI INT A disabled
igb: probe of 0000:08:00.0 failed with error -5
igb 0000:08:00.1: PCI INT B -> GSI 18 (level, low) -> IRQ 18
igb 0000:08:00.1: setting latency timer to 64
igb 0000:08:00.1: PCI INT B disabled
igb: probe of 0000:08:00.1 failed with error -5

I'm using Jesse's latest linux-next.
I'm attaching the /proc/iomem outputs. The iomem-default.txt is
/proc/iomem output without your patches. The iomem-yinghai.txt
is /proc/iomem output with your patches.

Thanks,
Kenji Kaneshige

iomem-default.txt
iomem-yinghai.txt

Yinghai Lu

unread,
Nov 13, 2009, 1:28:02 AM11/13/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

can you post whole bootlog with pci debug enabled?

Kenji Kaneshige

unread,
Nov 13, 2009, 3:34:07 AM11/13/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

I'll send it privately.

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Nov 14, 2009, 3:51:59 AM11/14/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

v2: Jesse doesn't like it is in find_free_bus_resource...
try to move out of pci_bus_size_bridges loop.
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.
v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource
v7: change functions name pci_bus_release_unused_bridge_res according to Jesse

v8: address Eric's concern, only overwrite leaf bridge resource that is not big
enough need to do it in two steps, and first step recore the failed res,

and don't touch bridge res that programmed by firmware. second step will
try to release bridge resource that is too small at first.
v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.
v11:add pci=try=5, about more try to change more bridge

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/pci.c | 5
drivers/pci/pci.h | 2
drivers/pci/setup-bus.c | 304 +++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 292 insertions(+), 19 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -27,7 +27,49 @@


#include <linux/slab.h>
#include "pci.h"

-static void pbus_assign_resources_sorted(const struct pci_bus *bus)
+
+static void add_to_failed_list(struct resource_list *head, struct pci_dev *dev,
+ struct resource *res)
+{
+ struct resource_list *list = head;
+ struct resource_list *ln = list->next;
+ struct resource_list *tmp;
+
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ pr_warning("add_to_failed_list: kmalloc() failed!\n");

+ return;
+ }
+


+ tmp->next = ln;
+ tmp->res = res;
+ tmp->dev = dev;
+ list->next = tmp;
+}
+
+static void free_failed_list(struct resource_list *head)
+{
+ struct resource_list *list, *tmp;

+ struct resource *res;
+ /*


+ * Try to release leaf bridge's resources that there is no child
+ * under it

+ */
+ for (list = head->next; list;) {


+ res = list->res;

+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;

+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+

+ head->next = NULL;
+}
+
+static void pbus_assign_resources_sorted(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_dev *dev;
struct resource *res;

@@ -58,9 +100,17 @@ static void pbus_assign_resources_sorted
res = list->res;


idx = res - &list->dev->resource[0];

if (pci_assign_resource(list->dev, idx)) {
- res->start = 0;
- res->end = 0;

- res->flags = 0;
+ if (fail_head && !pci_is_root_bus(list->dev->bus)) {


+ /*
+ * device need to keep flags and size

+ * for next try


+ */
+ add_to_failed_list(fail_head, list->dev, res);
+ } else {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
}

tmp = list;
list = list->next;

@@ -134,19 +184,12 @@ EXPORT_SYMBOL(pci_setup_cardbus);


config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)

+static void pci_setup_bridge_io(struct pci_bus *bus)
{

struct pci_dev *bridge = bus->self;

struct resource *res;


struct pci_bus_region region;
- u32 l, bu, lu, io_upper16;
- int pref_mem64;
-

- if (pci_is_enabled(bridge))
- return;
-
- dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",

- bus->secondary, bus->subordinate);

+ u32 l, io_upper16;

/* Set up the top and bottom of the PCI I/O segment for this bus. */

res = bus->resource[0];

@@ -172,7 +215,13 @@ static void pci_setup_bridge(struct pci_


pci_write_config_dword(bridge, PCI_IO_BASE, l);
/* Update upper 16 bits of I/O base/limit. */
pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
-
+}
+static void pci_setup_bridge_mmio(struct pci_bus *bus)
+{

+ struct pci_dev *bridge = bus->self;
+ struct resource *res;


+ struct pci_bus_region region;
+ u32 l;
/* Set up the top and bottom of the PCI Memory segment
for this bus. */

res = bus->resource[1];

@@ -187,6 +236,14 @@ static void pci_setup_bridge(struct pci_
dev_info(&bridge->dev, " bridge window [mem disabled]\n");


}
pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+}
+static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
+{

+ struct pci_dev *bridge = bus->self;
+ struct resource *res;


+ struct pci_bus_region region;
+ u32 l, bu, lu;
+ int pref_mem64;

/* Clear out the upper 32 bits of PREF limit.
If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily

@@ -219,10 +276,37 @@ static void pci_setup_bridge(struct pci_


pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
}
+}
+static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+ struct pci_dev *bridge = bus->self;
+
+ if (pci_is_enabled(bridge))
+ return;
+

+ dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
+ bus->secondary, bus->subordinate);

+
+ if (type & IORESOURCE_IO)
+ pci_setup_bridge_io(bus);
+
+ if (type & IORESOURCE_MEM)
+ pci_setup_bridge_mmio(bus);
+
+ if (type & IORESOURCE_PREFETCH)
+ pci_setup_bridge_mmio_pref(bus);

pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
}

+static void pci_setup_bridge(struct pci_bus *bus)
+{

+ unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+


+ __pci_setup_bridge(bus, type);
+}
+
/* Check whether the bridge supports optional I/O and
prefetchable memory ranges. If not, the respective
base/limit registers must be read-only and read as 0. */

@@ -543,19 +627,20 @@ void __ref pci_bus_size_bridges(struct p


}
EXPORT_SYMBOL(pci_bus_size_bridges);

-void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_bus *b;
struct pci_dev *dev;

- pbus_assign_resources_sorted(bus);
+ pbus_assign_resources_sorted(bus, fail_head);

list_for_each_entry(dev, &bus->devices, bus_list) {
b = dev->subordinate;
if (!b)
continue;

- pci_bus_assign_resources(b);
+ __pci_bus_assign_resources(b, fail_head);

switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:

@@ -573,15 +658,130 @@ void __ref pci_bus_assign_resources(cons

+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+

+ * check:
+ * 0: only release the bridge and only the bridge is leaf
+ * 1: release all down side bridge for third shoot


+ */
+static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,

+ unsigned long type,
+ int check_leaf)
+{
+ struct pci_dev *dev;
+ bool is_leaf_bridge = true;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ struct pci_bus *b = dev->subordinate;
+ if (!b)
+ continue;
+
+ switch (dev->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ is_leaf_bridge = false;
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ is_leaf_bridge = false;
+ if (!check_leaf)
+ pci_bus_release_unused_bridge_res(b, type,
+ check_leaf);
+ break;
+ }
+ }
+
+ /* The root bus? */
+ if (!bus->self)
+ return;
+
+ switch (bus->self->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ if ((check_leaf && is_leaf_bridge) || !check_leaf)
+ pci_bridge_release_unused_res(bus, type);
+ break;
+ }


+}
+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;

for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+
+ if (!res || !res->end || !res->flags)
continue;

dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pR\n", i, res);
@@ -605,10 +805,25 @@ static void pci_bus_dump_resources(struc
}
}

+/*
+ * first try will not touch pci bridge res
+ * second try will clear small leaf bridge res
+ * third try will clear related bridge: some aggressive
+ */
+/* assume we only have 4 level bridges, so only try 5 times */
+int pci_try_num = 5;


void __init
pci_assign_unassigned_resources(void)
{
struct pci_bus *bus;

+ int tried_times = 0;
+ int check_leaf = 1;


+ struct resource_list head, *list, *tmp;

+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;

+ unsigned long failed_type;


+again:
+ head.next = NULL;

/* Depth first, calculate sizes and alignments of all
subordinate buses. */

@@ -617,7 +832,58 @@ pci_assign_unassigned_resources(void)


}
/* Depth last, allocate resources and update the hardware. */
list_for_each_entry(bus, &pci_root_buses, node) {
- pci_bus_assign_resources(bus);
+ __pci_bus_assign_resources(bus, &head);
+ }

+ tried_times++;


+
+ /* any device complain? */
+ if (!head.next)

+ goto enable_and_dump;
+ failed_type = 0;


+ for (list = head.next; list;) {

+ unsigned long flags = list->res->flags;
+
+ failed_type |= flags;


+ list = list->next;
+ }

+ /*
+ * io port are tight, don't try extra
+ * or if reach the limit, don't want to try more
+ */
+ failed_type &= type_mask;
+ if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) {


+ free_failed_list(&head);
+ goto enable_and_dump;
+ }
+

+ printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n",
+ tried_times + 1);


+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */

+ /* third times and later will not check if it is leaf */
+ if ((tried_times + 1) > 2)
+ check_leaf = 0;


+ for (list = head.next; list;) {

+ unsigned long flags = list->res->flags;
+


+ bus = list->dev->bus;

+ if (list->dev->subordinate)
+ list->res->flags = 0;
+ pci_bus_release_unused_bridge_res(bus, flags & type_mask,
+ check_leaf);


+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ goto again;
+

+enable_and_dump:
+ /* Depth last, update the hardware. */
+ list_for_each_entry(bus, &pci_root_buses, node) {
pci_enable_bridges(bus);
}

Index: linux-2.6/drivers/pci/pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/pci.c
+++ linux-2.6/drivers/pci/pci.c
@@ -2779,6 +2779,11 @@ static int __init pci_setup(char *str)
pci_no_aer();
} else if (!strcmp(str, "nodomains")) {
pci_no_domains();
+ } else if (!strncmp(str, "try=", 4)) {
+ int try_num = memparse(str + 4, &str);
+
+ if (try_num > 0 && try_num < 10)
+ pci_try_num = try_num;
} else if (!strncmp(str, "cbiosize=", 9)) {
pci_cardbus_io_size = memparse(str + 9, &str);
} else if (!strncmp(str, "cbmemsize=", 10)) {
Index: linux-2.6/drivers/pci/pci.h
===================================================================
--- linux-2.6.orig/drivers/pci/pci.h
+++ linux-2.6/drivers/pci/pci.h
@@ -203,6 +203,8 @@ static inline int pci_ari_enabled(struct
return bus->self && bus->self->ari_enabled;
}

+extern int pci_try_num;
+
#ifdef CONFIG_PCI_QUIRKS
extern int pci_is_reassigndev(struct pci_dev *dev);
resource_size_t pci_specified_resource_alignment(struct pci_dev *dev);

Yinghai Lu

unread,
Nov 14, 2009, 3:52:34 AM11/14/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.

need to introduce pci_bridge_assign_resources there.

handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...

pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.

v2: address Alex's concern about pci remove/rescan feature about
pci_setup_bridge changes.
v3: Kenji pointed out that pci_config_slot need to be called before
pci_bus_add_devices()
v4: move out pci_is_enabled checkout of pci_setup_bridge()
v5: change the applying sequence.

v6: change the functions name according to Jesse


v8: address Eric's concern, only overwrite leaf bridge resource that is not
big enough

v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.

v10:alex found need to have export for pci_assign_unassigned_bridge_resources

v11: pass check_leaf with pci_bus_release_unused_bridge_res

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/hotplug/pciehp_pci.c | 23 ++++--
drivers/pci/setup-bus.c | 133 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 147 insertions(+), 10 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -68,6 +68,51 @@ static void free_failed_list(struct reso


head->next = NULL;
}

+static void pdev_assign_resources_sorted(struct pci_dev *dev,
+ struct resource_list *fail_head)
+{

+ struct resource *res;


+ struct resource_list head, *list, *tmp;

+ int idx;
+ u16 class = dev->class >> 8;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */
+ if (class == PCI_CLASS_NOT_DEFINED || class == PCI_CLASS_BRIDGE_HOST)
+ return;
+
+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))

+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+


+ for (list = head.next; list;) {

+ res = list->res;

+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {

+ if (fail_head && !pci_is_root_bus(list->dev->bus)) {
+ /*
+ * device need to keep flags and size

+ * for second try


+ */
+ add_to_failed_list(fail_head, list->dev, res);
+ } else {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }

+ }


+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}

static void pbus_assign_resources_sorted(const struct pci_bus *bus,

struct resource_list *fail_head)
{
@@ -281,9 +326,6 @@ static void __pci_setup_bridge(struct pc


{
struct pci_dev *bridge = bus->self;

- if (pci_is_enabled(bridge))
- return;
-

dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",

bus->secondary, bus->subordinate);

@@ -644,7 +686,8 @@ static void __ref __pci_bus_assign_resou



switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:

- pci_setup_bridge(b);
+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);
break;

case PCI_CLASS_BRIDGE_CARDBUS:

@@ -665,6 +708,34 @@ void __ref pci_bus_assign_resources(cons


}
EXPORT_SYMBOL(pci_bus_assign_resources);

+static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
+ struct resource_list *fail_head)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge, fail_head);
+

+ b = bridge->subordinate;
+ if (!b)


+ return;
+
+ __pci_bus_assign_resources(b, fail_head);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b);

+ break;
+


+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
static void release_children_resource(struct resource *r)
{
struct resource *p;

@@ -892,3 +963,57 @@ enable_and_dump:


pci_bus_dump_resources(bus);
}
}
+
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
+{
+ struct pci_bus *bus;
+ struct pci_bus *parent = bridge->subordinate;
+ bool second_tried = false;

+ struct resource_list head, *list, *tmp;

+ int retval;


+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+

+again:
+ head.next = NULL;

+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);
+ __pci_bridge_assign_resources(bridge, &head);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);

+
+ /* any device complain? */
+ if (!head.next)

+ return;
+
+ if (second_tried) {

+ /* still fail, don't want to try more */
+ free_failed_list(&head);
+ return;
+ }
+
+ second_tried = true;
+ printk(KERN_DEBUG "PCI: second try to assign unassigned res\n");


+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */

+ for (list = head.next; list;) {
+ unsigned long flags = list->res->flags;
+
+ bus = list->dev->bus;
+ if (list->dev->subordinate)
+ list->res->flags = 0;

+ pci_bus_release_unused_bridge_res(bus, flags & type_mask, 0);
+


+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ goto again;
+}

+EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -768,6 +768,7 @@ void pci_bus_assign_resources(const stru
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
void pci_assign_unassigned_resources(void);
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
void pdev_enable_device(struct pci_dev *);
void pdev_sort_resources(struct pci_dev *, struct resource_list *);
int pci_enable_resources(struct pci_dev *, int mask);

Kenji Kaneshige

unread,
Nov 23, 2009, 8:09:42 PM11/23/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Hi,

I tried v11 patches. This version seems to fix the problem I
reported against previous version.

I have no objection against the idea of resource allocation
changes for PCI express hotplug slots.

But I still have concern about changing resource allocation for
other than PCI express hotplug slots. For example, some hotplug
controller other than PCI express can have multiple slots under
the same bus. If some hotplug slots are occupied and the others
are empty at the boot time, I think your code try to shrink the
bus resources for hotplug slots allocated by BIOS. It would break
the hot-add on the empty slots due to the resource allocation
failure.

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Nov 23, 2009, 8:15:47 PM11/23/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Kenji Kaneshige wrote:
> Hi,
>
> I tried v11 patches. This version seems to fix the problem I
> reported against previous version.
>
> I have no objection against the idea of resource allocation
> changes for PCI express hotplug slots.

thanks

>
> But I still have concern about changing resource allocation for
> other than PCI express hotplug slots. For example, some hotplug
> controller other than PCI express can have multiple slots under
> the same bus. If some hotplug slots are occupied and the others
> are empty at the boot time, I think your code try to shrink the
> bus resources for hotplug slots allocated by BIOS. It would break
> the hot-add on the empty slots due to the resource allocation
> failure.

no,
it will not touch bridge resources that already assigned by BIOS except
some bridge resource is not big enough. and get big one for them.

YH

Kenji Kaneshige

unread,
Nov 23, 2009, 8:51:56 PM11/23/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Hi,
>>
>> I tried v11 patches. This version seems to fix the problem I
>> reported against previous version.
>>
>> I have no objection against the idea of resource allocation
>> changes for PCI express hotplug slots.
>
> thanks
>
>> But I still have concern about changing resource allocation for
>> other than PCI express hotplug slots. For example, some hotplug
>> controller other than PCI express can have multiple slots under
>> the same bus. If some hotplug slots are occupied and the others
>> are empty at the boot time, I think your code try to shrink the
>> bus resources for hotplug slots allocated by BIOS. It would break
>> the hot-add on the empty slots due to the resource allocation
>> failure.
>
> no,
> it will not touch bridge resources that already assigned by BIOS except
> some bridge resource is not big enough. and get big one for them.
>

Ok, I understood that if the BIOS assigns enough resources to the
bridge, it has no impact.

One question. I thought your patch shrinks the bridge resource to
allocate enough resource for sibling bridge. But it actually doesn't.
Right?

It would be appreciated if you update the patch description about
the problem and how to fix/improbe it.

Thanks,
Kenji Kaneshige

Kenji Kaneshige

unread,
Nov 25, 2009, 6:26:27 AM11/25/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Current pciehp driver configures bridge's child bus each time the
bridge is found on the hot-added PCIe adapter card. It is redundant
and strange because pciehp tryies to add devices on the child bus
before adding its parent bridge. It should be done at one time on the
slot's parent bus.

Signed-off-by: Kenji Kaneshige <kaneshi...@jp.fujitsu.com>

---
drivers/pci/hotplug/pciehp_pci.c | 28 +++++++++++++---------------
1 file changed, 13 insertions(+), 15 deletions(-)

Index: 20091125/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- 20091125.orig/drivers/pci/hotplug/pciehp_pci.c
+++ 20091125/drivers/pci/hotplug/pciehp_pci.c
@@ -34,30 +34,26 @@
#include "../pci.h"
#include "pciehp.h"

-static int __ref pciehp_add_bridge(struct pci_dev *dev)
+static void __ref pciehp_scan_bridge(struct pci_dev *dev)
{
struct pci_bus *parent = dev->bus;
- int pass, busnr, start = parent->secondary;
- int end = parent->subordinate;
+ int pass, busnr, start, end;

+ start = parent->secondary;
+ end = parent->subordinate;
for (busnr = start; busnr <= end; busnr++) {
if (!pci_find_bus(pci_domain_nr(parent), busnr))
break;
}
+
if (busnr-- > end) {
err("No bus number available for hot-added bridge %s\n",
- pci_name(dev));
- return -1;
+ pci_name(dev));
+ return;
}
+
for (pass = 0; pass < 2; pass++)


busnr = pci_scan_bridge(parent, dev, busnr, pass);

- if (!dev->subordinate)
- return -1;


- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);

- return 0;
}

int pciehp_configure_device(struct slot *p_slot)
@@ -93,14 +89,16 @@ int pciehp_configure_device(struct slot
continue;
}
if ((dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) ||
- (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
- pciehp_add_bridge(dev);
- }
+ (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS))
+ pciehp_scan_bridge(dev);
+
pci_configure_slot(dev);
pci_dev_put(dev);
}

+ pci_bus_size_bridges(parent);
pci_bus_assign_resources(parent);
+ pci_enable_bridges(parent);
pci_bus_add_devices(parent);
return 0;

Kenji Kaneshige

unread,
Nov 25, 2009, 6:27:49 AM11/25/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Some platform BIOS doesn't assign PCI bridge resources (IO base/limit,
Mem base/limit and Prefechable Mem base/limit) to root port or switch
downstream port with hotplug slots. On such platform, PCIe hotplug
doesn't work due to the resource allocation failure. This patch is to
improbe this situation.

With this patch, pciehp driver try to assign PCI bridge resources to
parent bridge (root port or switch downstream port) of the slot in
addition to assign resources to the hot-added device.

With this patch, pciehp driver try to release PCI bridge resources so
that we can reassign those resources to another root port on the same
PCI bus segment, or to another downstream port on the same swith.

This feature is enabled when 'pciehp_realloc' option is specified.

Signed-off-by: Kenji Kaneshige <kaneshi...@jp.fujitsu.com>

---
drivers/pci/hotplug/pciehp.h | 1
drivers/pci/hotplug/pciehp_core.c | 9 ++++
drivers/pci/hotplug/pciehp_pci.c | 9 +++-
drivers/pci/setup-bus.c | 84 +++++++++++++++++++++++++++++---------
include/linux/pci.h | 2
5 files changed, 85 insertions(+), 20 deletions(-)

Index: 20091125/drivers/pci/setup-bus.c
===================================================================
--- 20091125.orig/drivers/pci/setup-bus.c
+++ 20091125/drivers/pci/setup-bus.c
@@ -27,13 +27,31 @@


#include <linux/slab.h>
#include "pci.h"

-static void pbus_assign_resources_sorted(const struct pci_bus *bus)

+static void pbus_assign_resources_list(struct resource_list *head)
{
- struct pci_dev *dev;
struct resource *res;
- struct resource_list head, *list, *tmp;
+ struct resource_list *list, *tmp;
int idx;



+ for (list = head->next; list;) {
+ res = list->res;

+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {

+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}
+

+static void pbus_assign_resources_sorted(const struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+ struct resource_list head;
+
head.next = NULL;
list_for_each_entry(dev, &bus->devices, bus_list) {


u16 class = dev->class >> 8;

@@ -54,18 +72,7 @@ static void pbus_assign_resources_sorted
pdev_sort_resources(dev, &head);
}

- for (list = head.next; list;) {
- res = list->res;
- idx = res - &list->dev->resource[0];
- if (pci_assign_resource(list->dev, idx)) {


- res->start = 0;
- res->end = 0;
- res->flags = 0;

- }
- tmp = list;
- list = list->next;
- kfree(tmp);
- }
+ pbus_assign_resources_list(&head);
}

void pci_setup_cardbus(struct pci_bus *bus)
@@ -142,9 +149,6 @@ static void pci_setup_bridge(struct pci_


u32 l, bu, lu, io_upper16;

int pref_mem64;



- if (pci_is_enabled(bridge))
- return;
-

dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",

bus->secondary, bus->subordinate);

@@ -559,7 +563,8 @@ void __ref pci_bus_assign_resources(cons



switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:

- pci_setup_bridge(b);
+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);
break;

case PCI_CLASS_BRIDGE_CARDBUS:

@@ -575,6 +580,47 @@ void __ref pci_bus_assign_resources(cons
}
EXPORT_SYMBOL(pci_bus_assign_resources);

+void __ref pci_bridge_assign_resources(struct pci_bus *bus)


+{
+ struct pci_dev *bridge = bus->self;

+ struct resource_list head;
+
+ if (pci_is_root_bus(bus) ||
+ (bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+ return;


+
+ head.next = NULL;

+ pdev_sort_resources(bridge, &head);
+ pbus_assign_resources_list(&head);
+ pci_bus_assign_resources(bus);
+ pci_setup_bridge(bus);
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
+void __ref pci_bridge_release_window(struct pci_bus *bus)


+{
+ struct pci_dev *bridge = bus->self;

+ int i;
+
+ if (pci_is_root_bus(bus) ||
+ (bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+ return;
+
+ for (i = 0; i < 3; i++)
+ if (bus->resource[i]->child)
+ return;
+
+ for (i = 0; i < 3; i++)
+ bus->resource[i]->flags = 0;
+
+ pci_setup_bridge(bus);
+
+ for (i = 0; i < 3; i++)
+ if (bus->resource[i]->parent)
+ release_resource(bus->resource[i]);
+}
+EXPORT_SYMBOL(pci_bridge_release_window);


+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;

Index: 20091125/drivers/pci/hotplug/pciehp_core.c
===================================================================
--- 20091125.orig/drivers/pci/hotplug/pciehp_core.c
+++ 20091125/drivers/pci/hotplug/pciehp_core.c
@@ -41,6 +41,7 @@ int pciehp_debug;
int pciehp_poll_mode;
int pciehp_poll_time;
int pciehp_force;
+int pciehp_realloc;
struct workqueue_struct *pciehp_wq;

#define DRIVER_VERSION "0.4"
@@ -55,10 +56,13 @@ module_param(pciehp_debug, bool, 0644);
module_param(pciehp_poll_mode, bool, 0644);
module_param(pciehp_poll_time, int, 0644);
module_param(pciehp_force, bool, 0644);
+module_param(pciehp_realloc, bool, 0644);
+
MODULE_PARM_DESC(pciehp_debug, "Debugging mode enabled or not");
MODULE_PARM_DESC(pciehp_poll_mode, "Using polling mechanism for hot-plug events or not");
MODULE_PARM_DESC(pciehp_poll_time, "Polling mechanism frequency, in seconds");
MODULE_PARM_DESC(pciehp_force, "Force pciehp, even if _OSC and OSHP are missing");
+MODULE_PARM_DESC(pciehp_realloc, "Realloc resources for slot's parent bridge");

#define PCIE_MODULE_NAME "pciehp"

@@ -293,10 +297,15 @@ static int pciehp_probe(struct pcie_devi
pciehp_get_power_status(slot, &poweron);
if (occupied && pciehp_force)
pciehp_enable_slot(slot);
+
/* If empty slot's power status is on, turn power off */
if (!occupied && poweron && POWER_CTRL(ctrl))
pciehp_power_off_slot(slot);

+ /* If no device is running on the slot, release bridge's I/O window */
+ if (pciehp_realloc && !poweron)
+ pci_bridge_release_window(dev->port->subordinate);
+
return 0;

err_out_free_ctrl_slot:


Index: 20091125/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- 20091125.orig/drivers/pci/hotplug/pciehp_pci.c
+++ 20091125/drivers/pci/hotplug/pciehp_pci.c

@@ -97,7 +97,10 @@ int pciehp_configure_device(struct slot
}

pci_bus_size_bridges(parent);
- pci_bus_assign_resources(parent);
+ if (pciehp_realloc)
+ pci_bridge_assign_resources(parent);
+ else
+ pci_bus_assign_resources(parent);
pci_enable_bridges(parent);
pci_bus_add_devices(parent);
return 0;
@@ -153,5 +156,9 @@ int pciehp_unconfigure_device(struct slo
pci_dev_put(temp);
}

+ /* Release I/O window of the slots's parent bridge */
+ if (pciehp_realloc)
+ pci_bridge_release_window(parent);
+
return rc;
}
Index: 20091125/include/linux/pci.h
===================================================================
--- 20091125.orig/include/linux/pci.h
+++ 20091125/include/linux/pci.h
@@ -764,6 +764,8 @@ int pci_vpd_truncate(struct pci_dev *dev

/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
void pci_bus_assign_resources(const struct pci_bus *bus);
+void pci_bridge_assign_resources(struct pci_bus *bus);
+void pci_bridge_release_window(struct pci_bus *bus);


void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
void pci_assign_unassigned_resources(void);

Index: 20091125/drivers/pci/hotplug/pciehp.h
===================================================================
--- 20091125.orig/drivers/pci/hotplug/pciehp.h
+++ 20091125/drivers/pci/hotplug/pciehp.h
@@ -43,6 +43,7 @@ extern int pciehp_poll_mode;
extern int pciehp_poll_time;
extern int pciehp_debug;
extern int pciehp_force;
+extern int pciehp_realloc;
extern struct workqueue_struct *pciehp_wq;

#define dbg(format, arg...) \

Yinghai Lu

unread,
Nov 25, 2009, 12:39:07 PM11/25/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

you comment on my patch2, that we should move pci_configure_slot later?


YH

Yinghai Lu

unread,
Nov 25, 2009, 12:45:49 PM11/25/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Kenji Kaneshige wrote:
> Hi Yinghai,
>
> I would like to reconfirm what is the problem you're trying to solve
> before reviewing and testing the additional patch. To be honest, your
> patch looks more and more complicated and it is becoming difficult
> for me to review and test it.

the real problem:
1. boot time:

BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to the bridge, but stop
assigning resource to the device under that bridge, because the device need big resource.

so patch1 is trying to
a. pci assign unassign and record the failed device resource.
b. clear the BIOS assigned resource of the parent bridge of fail device
c. go back and call pci assign unsigned
d. if it still fail, will go up more bridges. and clear and try again.

2. hotplug:
BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to every bridge. (8M)
but when insert one card that big resource, the card can not get resource. because kernel will not touch the
bridge resource.

so patch2 is trying to
a. assign resource to devices with that slot. and record fail devices
b. if there is some failed, will clear sepcifically io port of bridge, or mmio of bridge, or mmio pref of bridge.
c. try to assign the parent bridge of the slot.

so it will keep original assigned resource by BIOS if possible.

and you have tested patch1 and patch2 in V11, but said patch1 may have shrinking resource problem.
the patch3 is addressing the patch1 that could shrinking resource for non-pcie hotplug bridge...


>
> By the way, if your problem is that BIOS doesn't assign the resource
> to the parent bridge (root port or switch downstream port) of PCIe
> hotplug slots, I guess we can improve it with simpler way. I made a
> sample patches (no enough testing). Please take a look. Patches are
>
> - [PATCH 1/2] pciehp: remove redundancy in bridge resource allocation
> - [PATCH 2/2] pciehp: add support for bridge resource reallocation

like some earlier version of patch2 (release it them all at first) but have pciehp_realloc parameter.
could be useful in some case when current patch2 (try and increase) could use up all space.
i like to have that after patch2.

Yinghai Lu

unread,
Nov 25, 2009, 3:00:17 PM11/25/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
split the two patches in to 9 for easy review...

please check

1. boot time:

BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to the bridge, but stop
assigning resource to the device under that bridge, because the device need big resource.

so patches (first 6) are trying to

a. pci assign unassign and record the failed device resource.
b. clear the BIOS assigned resource of the parent bridge of fail device
c. go back and call pci assign unsigned
d. if it still fail, will go up more bridges. and clear and try again.

v2: Jesse doesn't like it is in find_free_bus_resource...


try to move out of pci_bus_size_bridges loop.
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.
v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource
v7: change functions name pci_bus_release_unused_bridge_res according to Jesse
v8: address Eric's concern, only overwrite leaf bridge resource that is not big
enough need to do it in two steps, and first step recore the failed res,
and don't touch bridge res that programmed by firmware. second step will
try to release bridge resource that is too small at first.
v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.
v11:add pci=try=5, about more try to change more bridge

v12:not shrink pci bridge resource

2. hotplug:
BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to every bridge. (8M)
but when insert one card that big resource, the card can not get resource. because kernel will not touch the
bridge resource.

so patches (last 3) are trying to


a. assign resource to devices with that slot. and record fail devices
b. if there is some failed, will clear sepcifically io port of bridge, or mmio of bridge, or mmio pref of bridge.
c. try to assign the parent bridge of the slot.

v2: address Alex's concern about pci remove/rescan feature about


pci_setup_bridge changes.
v3: Kenji pointed out that pci_config_slot need to be called before
pci_bus_add_devices()
v4: move out pci_is_enabled checkout of pci_setup_bridge()
v5: change the applying sequence.

v6: change the functions name according to Jesse


v8: address Eric's concern, only overwrite leaf bridge resource that is not
big enough

v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.

v10:alex found need to have export for pci_assign_unassigned_bridge_resources
v11: pass check_leaf with pci_bus_release_unused_bridge_res


Thanks

Yinghai

Yinghai

unread,
Nov 26, 2009, 2:30:59 AM11/26/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

On Nov 25, 2009, at 10:43 PM, Kenji Kaneshige <kaneshi...@jp.fujitsu.com
> wrote:

> Yinghai Lu wrote:
>> Kenji Kaneshige wrote:
>>> Hi Yinghai,
>>>
>>> I would like to reconfirm what is the problem you're trying to solve
>>> before reviewing and testing the additional patch. To be honest,
>>> your
>>> patch looks more and more complicated and it is becoming difficult
>>> for me to review and test it.
>> the real problem:

>> 1. boot time:
>> BIOS separate IO range between several IOHs, and on some slots,
>> BIOS assign the resource to the bridge, but stop
>> assigning resource to the device under that bridge, because the
>> device need big resource.

>> so patch1 is trying to a. pci assign unassign and record the failed

>> device resource.
>> b. clear the BIOS assigned resource of the parent bridge of fail
>> device
>> c. go back and call pci assign unsigned
>> d. if it still fail, will go up more bridges. and clear and try
>> again.

>> 2. hotplug:
>> BIOS separate IO range between several IOHs, and on some slots,
>> BIOS assign the resource to every bridge. (8M)
>> but when insert one card that big resource, the card can not get
>> resource. because kernel will not touch the bridge resource.

>> so patch2 is trying to


>> a. assign resource to devices with that slot. and record fail devices
>> b. if there is some failed, will clear sepcifically io port of
>> bridge, or mmio of bridge, or mmio pref of bridge.
>> c. try to assign the parent bridge of the slot.

>> so it will keep original assigned resource by BIOS if possible.
>> and you have tested patch1 and patch2 in V11, but said patch1 may
>> have shrinking resource problem.
>> the patch3 is addressing the patch1 that could shrinking resource
>> for non-pcie hotplug bridge...
>>
>

> Thank you for the explanation. The patch3 seems to solve my concern.
>
> Your patch only touch the leaf bridge at the 2nd try of resource
> assignment. IIRC, this behavior is to prevent from shrinking bridge
> resources. Am I correct? I'm not sure but I think we don't need this
> behavior because now that we have another mechanism to prevent
> from shrinking bridge resource.
>>>

Third patch will only try to increase the bridge res

Try num still default to 5 , it could help other case

Can you send me whole bootlog ?

Kenji Kaneshige

unread,
Nov 27, 2009, 2:12:55 AM11/27/09
to Yinghai, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Organization: Internet mailing list

Bad news...

My system doesn't boot (hangup) with your latest set of patches.
Fusion MPT SAS driver initialization failed on some devices.
Please see below

..
Fusion MPT SAS Host driver 3.04.12
mptsas 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
mptsas 0000:09:00.0: BAR 1: can't reserve [mem 0x00510000-0x00513fff 64bit]
mptbase: ioc0: ERROR - pci_request_selected_regions() with MEM failed
mptsas 0000:0a:00.0: PCI INT A -> GSI 19 (level, low) -> IRQ 19
mptsas 0000:0a:00.0: BAR 1: can't reserve [mem 0x00610000-0x00613fff 64bit]
mptbase: ioc1: ERROR - pci_request_selected_regions() with MEM failed
..


This problem disappear when I revert the patch 6/9, and my system
can boot. But I found igb driver initialization failed on some
devices even in this case. Please see below

..
igb 0000:08:00.0: BAR 0: can't reserve [mem 0x00100000-0x0011ffff]


igb 0000:08:00.0: PCI INT A disabled

igb: probe of 0000:08:00.0 failed with error -16


igb 0000:08:00.1: PCI INT B -> GSI 18 (level, low) -> IRQ 18

igb 0000:08:00.1: BAR 0: can't reserve [mem 0x00140000-0x0015ffff]


igb 0000:08:00.1: PCI INT B disabled

igb: probe of 0000:08:00.1 failed with error -16
..

I'll send the whole boot log in both cases privately later.

Thanks,
Kenji Kaneshige

Kenji Kaneshige

unread,
Nov 27, 2009, 3:27:39 AM11/27/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Yinghai Lu wrote:
>> Bad news...
>>
>> My system doesn't boot (hangup) with your latest set of patches.
>> Fusion MPT SAS driver initialization failed on some devices.
>> Please see below
>>
>> ...

>> Fusion MPT SAS Host driver 3.04.12
>> mptsas 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
>> mptsas 0000:09:00.0: BAR 1: can't reserve [mem 0x00510000-0x00513fff 64bit]
>> mptbase: ioc0: ERROR - pci_request_selected_regions() with MEM failed
>> mptsas 0000:0a:00.0: PCI INT A -> GSI 19 (level, low) -> IRQ 19
>> mptsas 0000:0a:00.0: BAR 1: can't reserve [mem 0x00610000-0x00613fff 64bit]
>> mptbase: ioc1: ERROR - pci_request_selected_regions() with MEM failed
>> ...

>>
>>
>> This problem disappear when I revert the patch 6/9, and my system
>> can boot. But I found igb driver initialization failed on some
>> devices even in this case. Please see below
>>
>> ...

>> igb 0000:08:00.0: BAR 0: can't reserve [mem 0x00100000-0x0011ffff]
>> igb 0000:08:00.0: PCI INT A disabled
>> igb: probe of 0000:08:00.0 failed with error -16
>> igb 0000:08:00.1: PCI INT B -> GSI 18 (level, low) -> IRQ 18
>> igb 0000:08:00.1: BAR 0: can't reserve [mem 0x00140000-0x0015ffff]
>> igb 0000:08:00.1: PCI INT B disabled
>> igb: probe of 0000:08:00.1 failed with error -16
>> ...
>
> that looks werid, without patch 6/9 should only have pci bridge res shrink problem with your last test?
>

I don't know why. I might have overlooked it at the previous
test.

> maybe we need keep that feature default to be disabled.

Will do it next week.

Thanks,
Kenji Kaneshige


>
> please try
>
> ---
> drivers/pci/setup-bus.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)


>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c

> @@ -918,8 +918,7 @@ static void pci_bus_dump_resources(struc


> * second try will clear small leaf bridge res

> * third try will clear related bridge: some aggressive

> */
> -/* assume we only have 4 level bridges, so only try 5 times */
> -int pci_try_num = 5;
> +int pci_try_num = 1;
> void __init
> pci_assign_unassigned_resources(void)
> {

Yinghai Lu

unread,
Nov 28, 2009, 2:37:01 AM11/28/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
split the two patches in to 9 for easy review...

please check

1. boot time:

BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to the bridge, but stop
assigning resource to the device under that bridge, because the device need big resource.

so patches (first 6) are trying to

a. pci assign unassign and record the failed device resource.
b. clear the BIOS assigned resource of the parent bridge of fail device
c. go back and call pci assign unsigned
d. if it still fail, will go up more bridges. and clear and try again.

v2: Jesse doesn't like it is in find_free_bus_resource...


try to move out of pci_bus_size_bridges loop.
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.
v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource
v7: change functions name pci_bus_release_unused_bridge_res according to Jesse
v8: address Eric's concern, only overwrite leaf bridge resource that is not big
enough need to do it in two steps, and first step recore the failed res,
and don't touch bridge res that programmed by firmware. second step will
try to release bridge resource that is too small at first.
v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.
v11:add pci=try=5, about more try to change more bridge
v12:not shrink pci bridge resource

2. hotplug:


BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to every bridge. (8M)
but when insert one card that big resource, the card can not get resource. because kernel will not touch the
bridge resource.

so patches (last 3) are trying to


a. assign resource to devices with that slot. and record fail devices
b. if there is some failed, will clear sepcifically io port of bridge, or mmio of bridge, or mmio pref of bridge.
c. try to assign the parent bridge of the slot.

v2: address Alex's concern about pci remove/rescan feature about


pci_setup_bridge changes.
v3: Kenji pointed out that pci_config_slot need to be called before
pci_bus_add_devices()
v4: move out pci_is_enabled checkout of pci_setup_bridge()
v5: change the applying sequence.
v6: change the functions name according to Jesse
v8: address Eric's concern, only overwrite leaf bridge resource that is not
big enough
v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.
v10:alex found need to have export for pci_assign_unassigned_bridge_resources
v11: pass check_leaf with pci_bus_release_unused_bridge_res

-v13: change resource_list to resource_list_x, to save size and flags aside, otherwise grandchild res will
get confused with son's res as could be used


Thanks

Yinghai

Yinghai Lu

unread,
Nov 30, 2009, 2:16:09 AM11/30/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Kenji Kaneshige wrote:
> My system doesn't boot (hangup) with the following error messages
> from Fusion MPT SAS driver. Those are different from the one
> captured with the previous version of your patches.

>
>
> Fusion MPT SAS Host driver 3.04.12
> mptsas 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
> mptbase: ioc0: Initiating bringup
> mptbase: ioc0: WARNING - Unexpected doorbell active!
> mptbase: ioc0: ERROR - Failed to come READY after reset! IocState=f0000000
> mptbase: ioc0: WARNING - ResetHistory bit failed to clear!
> mptbase: ioc0: ERROR - Diagnostic reset FAILED! (ffffffffh)
> mptbase: ioc0: WARNING - NOT READY WARNING!
> mptbase: ioc0: ERROR - didn't initialize properly! (-1)
> mptsas: probe of 0000:09:00.0 failed with error -1

> mptsas 0000:0a:00.0: PCI INT A -> GSI 19 (level, low) -> IRQ 19
> mptbase: ioc1: Initiating bringup
> mptbase: ioc1: WARNING - Unexpected doorbell active!
> mptbase: ioc1: ERROR - Failed to come READY after reset! IocState=f0000000
> mptbase: ioc1: WARNING - ResetHistory bit failed to clear!
> mptbase: ioc1: ERROR - Diagnostic reset FAILED! (ffffffffh)
> mptbase: ioc1: WARNING - NOT READY WARNING!
> mptbase: ioc1: ERROR - didn't initialize properly! (-1)
> mptsas: probe of 0000:0a:00.0 failed with error -1
>
>
> I'll send the whole boot log privately.

thanks, at the same time please try "debug pci=try=1"

Kenji Kaneshige

unread,
Nov 30, 2009, 2:26:38 AM11/30/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

I confirmed system boot up without errors by "pci=try=1".
I'll capture the bootlog with "debug" option and send it
to you soon.

Thanks,
Kenji Kaneshige

Yinghai Lu

unread,
Nov 30, 2009, 2:45:10 AM11/30/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

thanks.

please send one only have "debug" only.

Yinghai

Yinghai Lu

unread,
Nov 30, 2009, 3:21:28 AM11/30/09
to Kenji Kaneshige, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

Please also try "debug pci=try=7"
it should work too.

Thanks

Yinghai

Kenji Kaneshige

unread,
Nov 30, 2009, 3:45:01 AM11/30/09
to Yinghai Lu, Jesse Barnes, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky

I confirmed it works.
I will send the whole boot log soon.

Thanks,
Kenji Kaneshige

Jesse Barnes

unread,
Dec 16, 2009, 3:42:26 PM12/16/09
to Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
On Fri, 27 Nov 2009 23:34:55 -0800
Yinghai Lu <yin...@kernel.org> wrote:

>
> prepare to use those small functions according to resource type later


>
> Signed-off-by: Yinghai Lu <yin...@kernel.org>
>
> ---

> drivers/pci/setup-bus.c | 54
> +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 44
> insertions(+), 10 deletions(-)

Seems like a reasonable cleanup, anyone have objections?

--
Jesse Barnes, Intel Open Source Technology Center

Jesse Barnes

unread,
Dec 16, 2009, 3:49:53 PM12/16/09
to Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, torv...@linux-foundation.org
On Fri, 27 Nov 2009 23:35:14 -0800
Yinghai Lu <yin...@kernel.org> wrote:

>
> so later we could use it to release small resource before pci assign
> unassign res


>
> Signed-off-by: Yinghai Lu <yin...@kernel.org>
>
> ---

> drivers/pci/setup-bus.c | 114
> +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 113
> insertions(+), 1 deletion(-)


>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c

> @@ -609,6 +609,118 @@ void __ref pci_bus_assign_resources(cons
> }
> EXPORT_SYMBOL(pci_bus_assign_resources);
>

> +static void release_children_resource(struct resource *r)

release_child_resources?

> +{
> + struct resource *p;
> + resource_size_t size;
> +
> + p = r->child;
> + while (p) {
> + release_children_resource(p);
> + release_resource(p);
> + printk(KERN_DEBUG "PCI: release child resource
> %pRt\n", p);
> + /* need to restore size, and keep flags */
> + size = resource_size(p);
> + p->start = 0;
> + p->end = size - 1;
> + p = r->child;
> + }
> +}

Also seems like it should go into resource.c instead?

> +
> +static void pci_bridge_release_unused_res(struct pci_bus *bus,
> + unsigned long type)
> +{
> + int idx;
> + bool changed = false;

> + struct pci_dev *dev;


> + struct resource *r;
> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> +
> + /* for pci bridges res only */
> + dev = bus->self;
> + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES
> + 3;
> + idx++) {
> + r = &dev->resource[idx];
> + if ((r->flags & type_mask) != type)
> + continue;
> + if (!r->parent)
> + continue;
> + /*
> + * if there are children under that, we should
> release them
> + * all
> + */
> + release_children_resource(r);
> + if (!release_resource(r)) {
> + dev_printk(KERN_DEBUG, &dev->dev,
> + "resource %d %pRt released\n", idx,
> r);

> + /* keep the old size */
> + r->end = resource_size(r) - 1;
> + r->start = 0;

> +{
> + struct pci_dev *dev;

> +
> static void pci_bus_dump_res(struct pci_bus *bus)
> {

The idea here seems good, of allowing reallocation of bridge windows
when the space is insufficient. Would be good if Linus could check out
this series though.

Jesse Barnes

unread,
Dec 16, 2009, 3:50:39 PM12/16/09
to Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
On Fri, 27 Nov 2009 23:35:28 -0800
Yinghai Lu <yin...@kernel.org> wrote:

>
>
> mean it is not used, so skip it.


>
> Signed-off-by: Yinghai Lu <yin...@kernel.org>
> ---

> drivers/pci/setup-bus.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)


>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c

> @@ -727,7 +727,8 @@ static void pci_bus_dump_res(struct pci_


>
> for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
> struct resource *res = bus->resource[i];
> - if (!res || !res->end)
> +
> + if (!res || !res->end || !res->flags)
> continue;
>
> dev_printk(KERN_DEBUG, &bus->dev, "resource %d
> %pR\n", i, res);

Seems fine apart from whitespace (looks like the if (...) isn't tabbed
in properly?).

Jesse Barnes

unread,
Dec 16, 2009, 3:55:34 PM12/16/09
to Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, torv...@linux-foundation.org

Patches 4-6 make me the most nervous, since they have the potential of
really changing our resource allocation (hopefully for the better) on
many platforms.

Linus, can you check them out and see if you're ok with the direction?

Patches 7-9 seem like they've recieved some review from Alex and
Kenji-san, but I don't see acks or reviewed-bys on them.

Alex and Kenji-san, are you ok with them assuming the previous patches
or something like them go upstream?

Thanks,


--
Jesse Barnes, Intel Open Source Technology Center

Alex Chiang

unread,
Dec 16, 2009, 4:12:14 PM12/16/09
to Jesse Barnes, Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, torv...@linux-foundation.org
Hi Yinghai,

* Jesse Barnes <jba...@virtuousgeek.org>:


> >
> > -v13: change resource_list to resource_list_x, to save size and flags
> > aside, otherwise grandchild res will get confused with son's res as
> > could be used

One difficulty that I have in reviewing is the multiple patch
revisions buried deep within a thread.

Blame my small brain for not being able to keep track of all
the revision-as-mail-responses in one coherent body of work.

> Patches 4-6 make me the most nervous, since they have the potential of
> really changing our resource allocation (hopefully for the better) on
> many platforms.
>
> Linus, can you check them out and see if you're ok with the direction?
>
> Patches 7-9 seem like they've recieved some review from Alex and
> Kenji-san, but I don't see acks or reviewed-bys on them.
>
> Alex and Kenji-san, are you ok with them assuming the previous patches
> or something like them go upstream?

Can you please repost your next revision (after taking Jesse's
review comments) in a new thread?

Thank you.
/ac

Yinghai Lu

unread,
Dec 16, 2009, 5:21:39 PM12/16/09
to Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, torv...@linux-foundation.org
Jesse Barnes wrote:
> On Fri, 27 Nov 2009 23:35:14 -0800
> Yinghai Lu <yin...@kernel.org> wrote:
>
>> so later we could use it to release small resource before pci assign
>> unassign res
>>
>> Signed-off-by: Yinghai Lu <yin...@kernel.org>
>>
>> ---
>> drivers/pci/setup-bus.c | 114
>> +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 113
>> insertions(+), 1 deletion(-)
>>
>> Index: linux-2.6/drivers/pci/setup-bus.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/pci/setup-bus.c
>> +++ linux-2.6/drivers/pci/setup-bus.c
>> @@ -609,6 +609,118 @@ void __ref pci_bus_assign_resources(cons
>> }
>> EXPORT_SYMBOL(pci_bus_assign_resources);
>>
>> +static void release_children_resource(struct resource *r)
>
> release_child_resources?

ok

>
>> +{
>> + struct resource *p;
>> + resource_size_t size;
>> +
>> + p = r->child;
>> + while (p) {
>> + release_children_resource(p);
>> + release_resource(p);
>> + printk(KERN_DEBUG "PCI: release child resource
>> %pRt\n", p);
>> + /* need to restore size, and keep flags */
>> + size = resource_size(p);
>> + p->start = 0;
>> + p->end = size - 1;
>> + p = r->child;
>> + }
>> +}
>
> Also seems like it should go into resource.c instead?

only one user, may move that later...

YH

Yinghai Lu

unread,
Dec 16, 2009, 5:22:15 PM12/16/09
to Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Jesse Barnes wrote:
> On Fri, 27 Nov 2009 23:35:28 -0800
> Yinghai Lu <yin...@kernel.org> wrote:
>
>>
>> mean it is not used, so skip it.
>>
>> Signed-off-by: Yinghai Lu <yin...@kernel.org>
>> ---
>> drivers/pci/setup-bus.c | 3 ++-
>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> Index: linux-2.6/drivers/pci/setup-bus.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/pci/setup-bus.c
>> +++ linux-2.6/drivers/pci/setup-bus.c
>> @@ -727,7 +727,8 @@ static void pci_bus_dump_res(struct pci_
>>
>> for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
>> struct resource *res = bus->resource[i];
>> - if (!res || !res->end)
>> +
>> + if (!res || !res->end || !res->flags)
>> continue;
>>
>> dev_printk(KERN_DEBUG, &bus->dev, "resource %d
>> %pR\n", i, res);
>
> Seems fine apart from whitespace (looks like the if (...) isn't tabbed
> in properly?).
>

old one have space instead of tab

Yinghai Lu

unread,
Dec 16, 2009, 5:22:57 PM12/16/09
to Alex Chiang, Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, torv...@linux-foundation.org
Alex Chiang wrote:
> Hi Yinghai,
>
> * Jesse Barnes <jba...@virtuousgeek.org>:
>>> -v13: change resource_list to resource_list_x, to save size and flags
>>> aside, otherwise grandchild res will get confused with son's res as
>>> could be used
>
> One difficulty that I have in reviewing is the multiple patch
> revisions buried deep within a thread.
>
> Blame my small brain for not being able to keep track of all
> the revision-as-mail-responses in one coherent body of work.
>
>> Patches 4-6 make me the most nervous, since they have the potential of
>> really changing our resource allocation (hopefully for the better) on
>> many platforms.
>>
>> Linus, can you check them out and see if you're ok with the direction?
>>
>> Patches 7-9 seem like they've recieved some review from Alex and
>> Kenji-san, but I don't see acks or reviewed-bys on them.
>>
>> Alex and Kenji-san, are you ok with them assuming the previous patches
>> or something like them go upstream?
>
> Can you please repost your next revision (after taking Jesse's
> review comments) in a new thread?

ok.

YH

Yinghai Lu

unread,
Dec 16, 2009, 5:29:19 PM12/16/09
to Alex Chiang, Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, torv...@linux-foundation.org
Alex Chiang wrote:
> Hi Yinghai,
>
> * Jesse Barnes <jba...@virtuousgeek.org>:
>>> -v13: change resource_list to resource_list_x, to save size and flags
>>> aside, otherwise grandchild res will get confused with son's res as
>>> could be used
>
> One difficulty that I have in reviewing is the multiple patch
> revisions buried deep within a thread.
>
> Blame my small brain for not being able to keep track of all
> the revision-as-mail-responses in one coherent body of work.
>
>> Patches 4-6 make me the most nervous, since they have the potential of
>> really changing our resource allocation (hopefully for the better) on
>> many platforms.
>>
>> Linus, can you check them out and see if you're ok with the direction?
>>
>> Patches 7-9 seem like they've recieved some review from Alex and
>> Kenji-san, but I don't see acks or reviewed-bys on them.
>>
>> Alex and Kenji-san, are you ok with them assuming the previous patches
>> or something like them go upstream?
>
> Can you please repost your next revision (after taking Jesse's
> review comments) in a new thread?
>

can you check


git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git >> master

http://git.kernel.org/?p=linux/kernel/git/yinghai/linux-2.6-yinghai.git;a=shortlog;h=refs/heads/master

i rebased them to linus tree 12-12-2009.

Thanks

Yinghai

Alex Chiang

unread,
Dec 16, 2009, 5:44:59 PM12/16/09
to Yinghai Lu, Jesse Barnes, Kenji Kaneshige, Eric W. Biederman, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky, torv...@linux-foundation.org
* Yinghai Lu <yin...@kernel.org>:
> Alex Chiang wrote:
> > * Jesse Barnes <jba...@virtuousgeek.org>:

> >>
> >> Patches 7-9 seem like they've recieved some review from Alex
> >> and Kenji-san, but I don't see acks or reviewed-bys on them.
> >>
> >> Alex and Kenji-san, are you ok with them assuming the
> >> previous patches or something like them go upstream?
> >
> > Can you please repost your next revision (after taking
> > Jesse's review comments) in a new thread?
> >
>
> can you check
>
> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-2.6-yinghai.git
> >> master
>
> http://git.kernel.org/?p=linux/kernel/git/yinghai/linux-2.6-yinghai.git;a=shortlog;h=refs/heads/master
>
> i rebased them to linus tree 12-12-2009.

Well, yes, I can read those, but now I have to try and guess
which patches are 7-9 that Jesse asked me to review.

One reason that it's hard to review your patches is because there
are many revisions, buried deep within threads. It's not easy for
reviewers to a) keep track of all them or b) keep track of how
they inter-relate.

I might be following one subthread, see N revisions for patch x/y
as replies in there, keep track of the changes mentally, and then
keep all that state in my mind as I navigate around in the thread
to try and review patch x+1/y.

Then, trying to figure out how x, x+1, x+2 are all related into
one coherent change is difficult.

In the future, for PCI patch series, when you have to make
revisions, please repost as a new thread every time. This will
make life much easier for reviewers.

Thanks,
/ac

Rolf Eike Beer

unread,
Dec 17, 2009, 6:04:05 AM12/17/09
to Jesse Barnes, Yinghai Lu, Kenji Kaneshige, Eric W. Biederman, Alex Chiang, Bjorn Helgaas, Ingo Molnar, linux-...@vger.kernel.org, linu...@vger.kernel.org, Ivan Kokshaysky
Jesse Barnes wrote:
> On Fri, 27 Nov 2009 23:34:55 -0800
>
> Yinghai Lu <yin...@kernel.org> wrote:
> > prepare to use those small functions according to resource type later
> >
> > Signed-off-by: Yinghai Lu <yin...@kernel.org>
> >
> > ---
> > drivers/pci/setup-bus.c | 54
> > +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 44
> > insertions(+), 10 deletions(-)
>
> Seems like a reasonable cleanup, anyone have objections?

There are some newlines missing between some functions, e.g. before
pci_setup_bridge_mmio().

Eike

signature.asc

Yinghai Lu

unread,
Dec 18, 2009, 3:56:01 PM12/18/09
to Jesse Barnes, Ingo Molnar, Linus Torvalds, Ivan Kokshaysky, Kenji Kaneshige, Alex Chiang, Bjorn Helgaas, linux-...@vger.kernel.org, linu...@vger.kernel.org

prepare to use those small functions according to resource type later

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/setup-bus.c | 54 ++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 46 insertions(+), 8 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -134,18 +134,12 @@ EXPORT_SYMBOL(pci_setup_cardbus);
config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)
+static void pci_setup_bridge_io(struct pci_bus *bus)
{


struct pci_dev *bridge = bus->self;

struct resource *res;
struct pci_bus_region region;
- u32 l, bu, lu, io_upper16;
-


- if (pci_is_enabled(bridge))
- return;
-
- dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",

- bus->secondary, bus->subordinate);
+ u32 l, io_upper16;

/* Set up the top and bottom of the PCI I/O segment for this bus. */
res = bus->resource[0];
@@ -171,7 +165,14 @@ static void pci_setup_bridge(struct pci_
pci_write_config_dword(bridge, PCI_IO_BASE, l);
/* Update upper 16 bits of I/O base/limit. */
pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
+}

+static void pci_setup_bridge_mmio(struct pci_bus *bus)


+{
+ struct pci_dev *bridge = bus->self;

+ struct resource *res;
+ struct pci_bus_region region;
+ u32 l;
/* Set up the top and bottom of the PCI Memory segment
for this bus. */
res = bus->resource[1];
@@ -186,6 +187,14 @@ static void pci_setup_bridge(struct pci_
dev_info(&bridge->dev, " bridge window [mem disabled]\n");
}
pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+}
+
+static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)


+{
+ struct pci_dev *bridge = bus->self;

+ struct resource *res;
+ struct pci_bus_region region;
+ u32 l, bu, lu;

/* Clear out the upper 32 bits of PREF limit.
If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
@@ -200,6 +209,7 @@ static void pci_setup_bridge(struct pci_
l = (region.start >> 16) & 0xfff0;
l |= region.end & 0xfff00000;
if (res->flags & IORESOURCE_MEM_64) {
+ pref_mem64 = 1;
bu = upper_32_bits(region.start);
lu = upper_32_bits(region.end);
}
@@ -214,10 +224,38 @@ static void pci_setup_bridge(struct pci_
/* Set the upper 32 bits of PREF base & limit. */
pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
+}
+
+static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+{


+ struct pci_dev *bridge = bus->self;
+

+ if (pci_is_enabled(bridge))
+ return;
+
+ dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
+ bus->secondary, bus->subordinate);
+
+ if (type & IORESOURCE_IO)
+ pci_setup_bridge_io(bus);
+
+ if (type & IORESOURCE_MEM)
+ pci_setup_bridge_mmio(bus);
+


+ if (type & IORESOURCE_PREFETCH)

+ pci_setup_bridge_mmio_pref(bus);

pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
}

+static void pci_setup_bridge(struct pci_bus *bus)
+{
+ unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ __pci_setup_bridge(bus, type);
+}
+
/* Check whether the bridge supports optional I/O and
prefetchable memory ranges. If not, the respective
base/limit registers must be read-only and read as 0. */

Yinghai Lu

unread,
Dec 18, 2009, 3:56:34 PM12/18/09
to Jesse Barnes, Ingo Molnar, Linus Torvalds, Ivan Kokshaysky, Kenji Kaneshige, Alex Chiang, Bjorn Helgaas, linux-...@vger.kernel.org, linu...@vger.kernel.org

so later we can do sth with those failed one

-v2: store start, end, flags aside. so could keep res cleared when assign
failed. and make following assignment of its children do not go wild

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/setup-bus.c | 66 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 62 insertions(+), 4 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -27,7 +27,52 @@


#include <linux/slab.h>
#include "pci.h"

-static void pbus_assign_resources_sorted(const struct pci_bus *bus)

+struct resource_list_x {
+ struct resource_list_x *next;
+ struct resource *res;
+ struct pci_dev *dev;
+ resource_size_t start;
+ resource_size_t end;
+ unsigned long flags;
+};
+
+static void add_to_failed_list(struct resource_list_x *head,
+ struct pci_dev *dev, struct resource *res)
+{
+ struct resource_list_x *list = head;
+ struct resource_list_x *ln = list->next;
+ struct resource_list_x *tmp;
+
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ pr_warning("add_to_failed_list: kmalloc() failed!\n");
+ return;
+ }
+
+ tmp->next = ln;
+ tmp->res = res;
+ tmp->dev = dev;
+ tmp->start = res->start;
+ tmp->end = res->end;
+ tmp->flags = res->flags;
+ list->next = tmp;
+}
+
+static void free_failed_list(struct resource_list_x *head)
+{
+ struct resource_list_x *list, *tmp;
+


+ for (list = head->next; list;) {

+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+

+ head->next = NULL;
+}
+
+static void pbus_assign_resources_sorted(const struct pci_bus *bus,
+ struct resource_list_x *fail_head)
{


struct pci_dev *dev;
struct resource *res;

@@ -58,6 +103,13 @@ static void pbus_assign_resources_sorted
res = list->res;


idx = res - &list->dev->resource[0];

if (pci_assign_resource(list->dev, idx)) {
+ if (fail_head && !pci_is_root_bus(list->dev->bus)) {
+ /*
+ * device need to keep flags and size
+ * for next try
+ */
+ add_to_failed_list(fail_head, list->dev, res);


+ }
res->start = 0;

res->end = 0;
res->flags = 0;
@@ -575,19 +627,20 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);

-void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
+ struct resource_list_x *fail_head)
{
struct pci_bus *b;
struct pci_dev *dev;

- pbus_assign_resources_sorted(bus);
+ pbus_assign_resources_sorted(bus, fail_head);

list_for_each_entry(dev, &bus->devices, bus_list) {
b = dev->subordinate;
if (!b)
continue;

- pci_bus_assign_resources(b);
+ __pci_bus_assign_resources(b, fail_head);



switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:

@@ -605,6 +658,11 @@ void __ref pci_bus_assign_resources(cons
}
}
}
+
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+{
+ __pci_bus_assign_resources(bus, NULL);
+}
EXPORT_SYMBOL(pci_bus_assign_resources);

static void release_child_resources(struct resource *r)

Yinghai Lu

unread,
Dec 18, 2009, 3:56:49 PM12/18/09
to Jesse Barnes, Ingo Molnar, Linus Torvalds, Ivan Kokshaysky, Kenji Kaneshige, Alex Chiang, Bjorn Helgaas, linux-...@vger.kernel.org, linu...@vger.kernel.org

when we are clearing leaf bridge resource and try to get big one, we could shrink the bridge if
there is no resource under it.

let check with old resource size and make sure we are trying to get big one.

Signed-off-by: Yinghai Lu <yin...@kernel.org>

---
drivers/pci/setup-bus.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c

@@ -390,7 +390,7 @@ static void pbus_size_io(struct pci_bus
{
struct pci_dev *dev;
struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
- unsigned long size = 0, size1 = 0;
+ unsigned long size = 0, size1 = 0, old_size;

if (!b_res)
return;
@@ -415,17 +415,18 @@ static void pbus_size_io(struct pci_bus
}
if (size < min_size)
size = min_size;
+ old_size = resource_size(b_res);
+ if (old_size == 1)
+ old_size = 0;
/* To be fixed in 2.5: we should have sort of HAVE_ISA
flag in the struct pci_bus. */
#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
size = (size & 0xff) + ((size & ~0xffUL) << 2);
#endif
size = ALIGN(size + size1, 4096);
+ if (size < old_size)
+ size = old_size;
if (!size) {
- if (b_res->start || b_res->end)
- dev_info(&bus->self->dev, "disabling bridge window "
- "%pR to [bus %02x-%02x] (unused)\n", b_res,


- bus->secondary, bus->subordinate);

b_res->flags = 0;
return;
}
@@ -441,7 +442,7 @@ static int pbus_size_mem(struct pci_bus
unsigned long type, resource_size_t min_size)
{
struct pci_dev *dev;
- resource_size_t min_align, align, size;
+ resource_size_t min_align, align, size, old_size;
resource_size_t aligns[12]; /* Alignments from 1Mb to 2Gb */
int order, max_order;
struct resource *b_res = find_free_bus_resource(bus, type);
@@ -491,6 +492,11 @@ static int pbus_size_mem(struct pci_bus
}
if (size < min_size)
size = min_size;
+ old_size = resource_size(b_res);
+ if (old_size == 1)
+ old_size = 0;
+ if (size < old_size)
+ size = old_size;

align = 0;
min_align = 0;
@@ -507,10 +513,6 @@ static int pbus_size_mem(struct pci_bus
}
size = ALIGN(size, min_align);
if (!size) {
- if (b_res->start || b_res->end)
- dev_info(&bus->self->dev, "disabling bridge window "
- "%pR to [bus %02x-%02x] (unused)\n", b_res,


- bus->secondary, bus->subordinate);

b_res->flags = 0;
return 1;

It is loading more messages.
0 new messages