Pages

July 10, 2009

Python - Zip Directories Recursively

This helped me out today with some backup scripts. Posting here so I can remember it. Idea and snippet adapted from: http://mail.python.org/pipermail/python-list/2007-February/596539.html

        
#!/usr/bin/env python

import os
import zipfile


def main():
    zipper('c:/test', 'c:/temp/test.zip')


def zipper(dir, zip_file):
    zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
    root_len = len(os.path.abspath(dir))
    for root, dirs, files in os.walk(dir):
        archive_root = os.path.abspath(root)[root_len:]
        for f in files:
            fullpath = os.path.join(root, f)
            archive_name = os.path.join(archive_root, f)
            print f
            zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED)
    zip.close()
    return zip_file


if __name__ == '__main__':
    main()

* code updated. there was a bug in the original I posted (cmg - 07/13/09)

6 comments:

  1. hi corey, thanks for sharing this code. i extended zipper to have commandline options so it can be used for general recursive zipping. hope it helps. it's up on www.casualengineer.blogspot.com with credit given to you for inspiration

    cheers


    #!/usr/bin/env python
    import os, zipfile

    def zipper(dir, zip_file):
    z = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
    def walker(zip, dir, files, root=dir):
    for f in files:
    f = os.path.join(dir, f)
    archive_name = os.path.basename(f)
    zip.write( f, archive_name, zipfile.ZIP_DEFLATED)
    print f
    os.path.walk(dir, walker, z)
    z.close()
    return zip_file



    if '__main__' == __name__:
    # Late import, in case this project becomes a library, never to be run as main again
    import optparse

    # Populate our options, -h/--help is already there for you
    usage = "usage: %prog [options]"
    version="%prog 1.0"
    parser = optparse.OptionParser(usage=usage, version=version)
    parser.add_option("-d", "--dir", dest="inputDir", default="c:/test", action="store", help="sets the input directory to something other than the default (c:/test)")
    parser.add_option("-f", "--file", dest="outputFile", default="c:/temp/test.zip", action="store", help="sets the output zip file to something other than the default (c:/temp/test.zip)")
    parser.set_defaults()

    # Parse the arguments (defaults to parsing sys.argv)
    (options, args) = parser.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call parser.error("Useful message") to exit if all is not well
    if len(args) > 0 and (1 != options.inputDir or 1 != options.outputFile):
    parser.error("Additional arguments are not supported\nYou can only change the inputDir or outputFile using the -d and -f options.\nType zippy.py -h for help.\n")


    # Do the actual work
    zipper(options.inputDir, options.outputFile)

    ReplyDelete
  2. Hi,

    Maybe I was doing something wrong - but your code didn't work for me.

    It wouldn't handle sub-directories below the path.

    Here is some code that seems to work for me (well I just created it now :) ).
    Two other things I changed...
    a) Used the more modern os.walk rather os.path.walk
    b) removed the os.path.basename(f) call - this always gave you back what you had before the os.path.join on the previous line.




    import os
    import zipfile


    def main():
    zipper('tsearch', 'c:/_temp/testzip.zip')


    def zipper(dir, zip_file):
    zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)

    # get the length of the root dir
    root_len = len(os.path.abspath(dir))
    for root, dirs, files in os.walk(dir):
    # get the directories below the root
    archive_root = os.path.abspath(root)[root_len:]

    for f in files:
    # full path to the file
    fullpath = os.path.join(root, f)
    # archive path to the file
    archive_name = os.path.join(archive_root, f)
    print f
    zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED)

    zip.close()
    return zip_file


    if __name__ == '__main__':
    main()


    (strange that you can't make Blogger treat that as Monospaced!

    ReplyDelete
  3. Mark, I just updated my code with the fix!

    thanks,

    -Corey

    ReplyDelete
  4. Rayjan,

    very cool :) thanks for the additions. see my updated post with some fixed code.

    ReplyDelete
  5. Hi Corey,

    Thanks for sharing the code. It is now the backbone of a small auto backup script.

    ReplyDelete
  6. Zip file is being created but is empty for me. code below


    #!/usr/bin/env python
    import os, zipfile

    def zipper(dir, zip_file):
    z = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
    def walker(zip, dir, files, root=dir):
    root_len = len(os.path.abspath(dir))
    for root, dirs, files in os.walk(dir):
    archive_root = os.path.abspath(root)[root_len:]
    for f in files:
    f = os.path.join(dir, f)
    archive_name = os.path.basename(f)
    zip.write( f, archive_name, zipfile.ZIP_DEFLATED)
    print f
    os.path.walk(dir, walker, z)

    z.close()
    return zip_file



    if '__main__' == __name__:
    # Late import, in case this project becomes a library, never to be run as main again
    import optparse

    # Populate our options, -h/--help is already there for you
    usage = "usage: %prog [options]"
    version="%prog 1.0"
    parser = optparse.OptionParser(usage=usage, version=version)
    parser.add_option("-d", "--dir", dest="inputDir", default="c:/test", action="store", help="sets the input directory to something other than the default (c:/test)")
    parser.add_option("-f", "--file", dest="outputFile", default="c:/temp/test.zip", action="store", help="sets the output zip file to something other than the default (c:/temp/test.zip)")
    parser.set_defaults()

    # Parse the arguments (defaults to parsing sys.argv)
    (options, args) = parser.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call parser.error("Useful message") to exit if all is not well
    if len(args) > 0 and (1 != options.inputDir or 1 != options.outputFile):
    parser.error("Additional arguments are not supported\nYou can only change the inputDir or outputFile using the -d and -f options.\nType zippy.py -h for help.\n")


    # Do the actual work
    zipper(options.inputDir, options.outputFile)

    ReplyDelete