Recursing into directories

0 views
Skip to first unread message

Ron

unread,
Dec 10, 2001, 12:22:06 PM12/10/01
to
Hi,

Is there a built-in class (in dircache, os.path????) that
recurses thru a directory tree?

I've written one using dircache.listdir, but if there is some-
thing pre-written, I'd rather use that...

Thanks,
Ron
--
+-----------------------------------------------------------------+
| Ron Johnson, Jr. Home: ron.l....@home.com |
| Jefferson, LA USA http://ronandheather.dhs.org:81 |
+-----------------------------------------------------------------+

John J. Lee

unread,
Dec 10, 2001, 12:50:56 PM12/10/01
to
On Mon, 10 Dec 2001, Ron wrote:

> Hi,
>
> Is there a built-in class (in dircache, os.path????) that
> recurses thru a directory tree?

os.path.walk?


John

Gerhard Häring

unread,
Dec 10, 2001, 1:00:28 PM12/10/01
to
Le 10/12/01 à 17:22, Ron écrivit:

> Hi,
>
> Is there a built-in class (in dircache, os.path????) that
> recurses thru a directory tree?
>
> I've written one using dircache.listdir, but if there is some-
> thing pre-written, I'd rather use that...

Only a function: os.path.walk

And because I usually want a class, too, I didn't find it especially
useful.

Gerhard
--
mail: gerhard <at> bigfoot <dot> de registered Linux user #64239
web: http://www.cs.fhm.edu/~ifw00065/ OpenPGP public key id 86AB43C0
public key fingerprint: DEC1 1D02 5743 1159 CD20 A4B6 7B22 6575 86AB 43C0
reduce(lambda x,y:x+y,map(lambda x:chr(ord(x)^42),tuple('zS^BED\nX_FOY\x0b')))

Michael Chermside

unread,
Dec 10, 2001, 3:58:33 PM12/10/01
to
In addition to os.path.walk, I'm also somewhat partial to
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52664 but both
are functions, not classes as you requested. Of course, it's simple
enough to build a class that uses the function....

-- Michael Chermside

> Hi,
>
> Is there a built-in class (in dircache, os.path????) that
> recurses thru a directory tree?
>
> I've written one using dircache.listdir, but if there is some-
> thing pre-written, I'd rather use that...
>

Steven Majewski

unread,
Dec 11, 2001, 12:01:08 PM12/11/01
to

On Mon, 10 Dec 2001, Gerhard [iso-8859-1] Häring wrote:

> Le 10/12/01 17:22, Ron crivit:


> > Hi,
> >
> > Is there a built-in class (in dircache, os.path????) that
> > recurses thru a directory tree?
> >
> > I've written one using dircache.listdir, but if there is some-
> > thing pre-written, I'd rather use that...
>

> Only a function: os.path.walk
>
> And because I usually want a class, too, I didn't find it especially
> useful.
>

A function like os.path.walk is awkward becuase it's inverted -- you
need to do all of the work inside with a callback. Generators are
much neater -- you can invert it back again so that your processing
is on the outside where it belongs, and the directory walking is
hidden inside the generator which just produces the next pathname
on each call. Something like:

# recursive file iterator as a generator:
from __future__ import generators
from os import listdir, path, curdir

def Files( *paths ):
if not paths: paths = ( curdir, )
for start in paths:
for file in listdir( start ):
file = path.join( start, file )
if path.isfile( file ): yield file
elif path.isdir(file):
for more in Files( file ):
yield more

Then you can do: 'for file in Files( ... )' ,
Or better yet, wrap the generator in a class.
The example below (which I've posted before) allows you to do something like:

for x in Files() & isGif & fileOwner('sdm7g') & fileLargerThan(512) |range(20):

to loop over the first 20 Gif files larger than 512 bytes owned by 'sdm7g'.
(or if you don't like the overloading of "&" and "|", use a test in a
list comprehension -- both are clearer than the functional notation. )

-- Steve Majewski

------

from __future__ import generators,nested_scopes

## You need:

## a generator: (you can also start with a list)

def Ints():
n = 0
while 1:
yield n
n += 1

def Always( arg ):
while 1:
yield arg


## one or more filters:

def Test( gen, test ):
for x in gen:
if test(x): yield x

## and a terminator:

## by condition:
def Quit( gen, test ):
for x in gen:
if test(x): raise StopIteration
else: yield x

# or by count:
def Count( gen, n ):
for x in gen:
yield x
n -= 1
if n == 0 : break


## shorthand names so the lines don't get too long...

odd = lambda x: Test( x, lambda y: y % 2 )
enough = lambda x: Quit( x, lambda y: y > 100 )
notdiv3 = lambda x: Test( x, lambda y: y % 3 )


print "\n odd ints that are not divisible by 3 under 100:"
for i in notdiv3( enough( odd( Ints() ))):
print i

print "\n first 20 odd ints not divisible by 3:"
for i in Count( notdiv3(odd(Ints())), 20 ):
print i


# recursive file iterator as a generator:

from os import listdir, path, curdir

def Files( *paths ):
if not paths: paths = ( curdir, )
for start in paths:
for file in listdir( start ):
file = path.join( start, file )
if path.isfile( file ): yield file
elif path.isdir(file):
for more in Files( file ):
yield more


import os, stat

def fileLargerThan( n ):
return lambda s,size=n: os.stat(s)[stat.ST_SIZE] > n

def fileExt(ext):
return lambda s: os.path.splitext( s )[-1].lower() == ext.lower()

isGif = fileExt( '.gif' )

## This is MUCH nicer than using os.path.walk() with a callback!

## find the first 20 gifs in your cwd...
for f in Count(Test( Files(), isGif ), 20 ): print f

def fileSize( test ):
return lambda name: test( os.stat(name)[stat.ST_SIZE] )

import pwd
def fileOwner( uname ):
uid = pwd.getpwnam( uname )[2]
return lambda fname: os.stat( fname )[stat.ST_UID] == uid


_test = Test
_count = Count

class Gen:
def __init__( self, generator ):
self.generator = generator
def __iter__( self ):
return self.generator

class Genpipe(Gen):
def Test( self, pred ):
self.generator = Test( self.generator, pred )
return self
def Count( self, n ):
self.generator = Count( self.generator, n )
return self
def __or__( self, other ):
if callable(other):
self.generator = Test( self.generator, other )
return self

print 'Genpipe test...'
for file in Genpipe( Files('.') ).Test( isGif ).Count(10):
print file

def Append( *generators ):
for g in generators:
for x in g:
yield x


def Alternate( *generators ):
gs = list(generators)
for i in range(len(gs)):
if not hasattr( gs[i], 'next' ):
gs[i] = iter(gs[i])
while 1:
for g in gs:
yield g.next()

def Combine( *generators ):
gs = list(generators)
for i in range(len(gs)):
if not hasattr( gs[i], 'next' ):
gs[i] = iter(gs[i])
while 1:
lis = []
for g in gs:
lis.append( g.next() )
yield tuple(lis)

class Genops(Genpipe):
def __and__( self, other ):
if callable(other):
self.generator = Test( self.generator, other )
return self
def __or__( self, other ):
self.generator = Alternate( self.generator, other )
return self
def __add__( self, other ):
self.generator = Append( self.generator, other )
return self


_files = Files
class Files(Genops):
def __init__( self, *args ):
self.generator = apply( _files, args )

print 'Last test...'
for x in Files() & isGif & fileOwner('sdm7g') & fileLargerThan(512) |range(20):
print x

Reply all
Reply to author
Forward
0 new messages