tekst = '<li ><div class="views-field-field-webrubrik-value"><h3><a href="/307046">Claus Hjort spiller med mrkede kort</a></h3> </div><div class="views-field-field-skribent-uid"><div class="byline">Af: <span class="authors">Dennis Kristensen</span></div> </div> <div class="views-field-field-webteaser-value"> <div class="webteaser">Claus Hjort Frederiksens argumenter for at afvise trepartsforhandlinger har ikke hold i virkeligheden. Hans rinde er nok snarere at forberede det ideologiske grundlag for en Løkke Rasmussens genkomst som statsminister</div> </div><span class="views-field-view-node"> <span class="actions"><a href="/307046">Ls mere</a> | <a href="/307046/#comments">Kommentarer (4)</a></span> </span></li>'
to_find = "Rasmussen"
soup = BeautifulSoup(tekst)
contexts = soup.find_all(text=re.compile(to_find))
def find_nearest(element, url, direction="both"):
"""Find the nearest link, relative to a text string.
When complete it will search up and down (parent, child),
Will then return the link the fewest steps away from the
original element. Assumes we have already found an element"""
# Is the nearest link readily available?
# If so - this is what we want.
if element.find_parents('a'):
for artikel_link in element.find_parents('a'):
print "artikel_link er fundet %" % artikel_link
if ("http" or "www") not in link:
link = url+link
return link
# if the link is not readily available, we will go up
if not element.find_parents('a'):
element = element.parent
# Print for debugging
print element #on the 2nd run (i.e <li> this finds <a href=/307056>
# So shouldn't it be caught as readily available above?
# the recursive call
find_nearest(element,url)
if contexts:
for a in contexts: