From 62dc691279a0d0ee0a46a7b0bea6f65257e81f03 Mon Sep 17 00:00:00 2001 From: Michael Krayer Date: Fri, 22 May 2020 09:47:41 +0200 Subject: [PATCH] forked from git://archivemail.git.sourceforge.net/gitroot/archivemail/archivemail --- CHANGELOG | 333 +++++++ COPYING | 341 +++++++ FAQ | 15 + MANIFEST.in | 11 + Makefile | 43 + NEWS | 44 + README | 63 ++ TODO | 103 ++ archivemail | 1951 ++++++++++++++++++++++++++++++++++++++ archivemail.xml | 794 ++++++++++++++++ db2html.xsl | 10 + db2man.xsl | 29 + examples/archivemail_all | 31 + index.html | 192 ++++ manpage.css | 15 + setup.py | 33 + style.css | 49 + test_archivemail | 1771 ++++++++++++++++++++++++++++++++++ 18 files changed, 5828 insertions(+) create mode 100644 CHANGELOG create mode 100644 COPYING create mode 100644 FAQ create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 NEWS create mode 100644 README create mode 100644 TODO create mode 100755 archivemail create mode 100644 archivemail.xml create mode 100644 db2html.xsl create mode 100644 db2man.xsl create mode 100644 examples/archivemail_all create mode 100644 index.html create mode 100644 manpage.css create mode 100755 setup.py create mode 100644 style.css create mode 100755 test_archivemail diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..d0d4424 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,333 @@ +version 0.9.0 - 9 July 2011 + + * Fixed manpage installation path to be FHS compliant + * Speed up IMAP archiving with the --quiet option + * Ported the manpage from SGML to XML + * Fix test suite failures with Python 2.7. Closes: #3314293. + * IMAP: support international mailbox names containing non-ASCII characters. + * IMAP: handle broken servers sending no untagged SEARCH response. + Closes: #879716, #3213272. + * IMAP: support servers listening on non-standard ports. Closes: #3168416. + +version 0.8.2 - 16 October 2010 + + * IMAP: don't prepend NAMESPACE prefix to INBOX and its children. + Closes: #3083236. + +version 0.8.1 - 30 September 2010 + + * IMAP: fixed handling of LIST replies by the server where the mailbox name + is not a quoted string. (Thanks Karsten Müller) + +Version 0.8.0 - 9 August 2010 + + * Fixed date header parsing to be precise with timezone information. Also, + when writing From_ line timestamps, don't use UTC but local time, without + timezone information. + * To determine the delivery date of a message, archivemail now looks for the + timestamp of the latest 'Received' header before resorting to + 'Resent-Date' or 'Date'. This should give much better results when there + is no 'Delivery-date' header, which is still checked first. + (Thanks Andrew Ferrier & Christian Brabandt) + Closes: #1481316, #1764855, Debian bug #272666. + * If present, the 'Resent-date' header now takes precedence over 'Date'. + * IMAP: recognize when a server advertises LOGINDISABLED. + * New option --debug-imap; this just sets imaplib.Debug, thereby dumping + archivemail's chat with the server to stdout. + * Fixed crash with Python 2.5 when archiving an empty maildir. + (Thanks "Aidant") Closes: #1878940. + * New option --all to archive all messages in a mailbox. Closes: #1764846. + * Fixed a crash when archiving maildirs with --days=0. (Thanks John Goerzen) + * IMAP: automatically add NAMESPACE prefix to a mailbox path if necessary. + * Removed the feature to setuid to the mailbox owners when run as root. + This was a bad idea; it's really hard to do safely, if at all possible. + Obsoletes: patch #2783134. + * Replaced some simple minded file operation security checks with more + decent ones. This means we can safely operate in /tmp, for example. The + price is that we no longer accept symlinked files. Obsoletes: patch + #1874868. + * Don't use rename() to update mbox files and the archive, but write the + files directly. This is more fragile, but required for correct mbox + locking, and also for mboxes in mail spool directories where we don't have + permission to create files. It also means that if selinux is enabled, + archivemail now preserves the selinux security context of an mbox. + Closes: #2210732. + * Fixed the test suite to deal with nanosecond file timestamps. These are + provided by ext4 and XFS, for example. Closes: #2043900. + * Cleaned up the test suite, replacing a lot of duplicated code and avoiding + a lot of redundand testing. This speeds up the test suite by a factor of + 15 or so. + * mbox locking got completely rewritten. Switched from flock to lockf + locking, which is NFS-safe and portable, and we now lock with lockf first, + then with a dotlock, instead of the other way around. (This is makes + archivemail compatible with Debian systems. ;) + * We now omit the dotlock if we don't have sufficient permissions to create + it in the mbox directory. (The file is still locked with lockf.) + Since we also no longer use rename() to commit changes to an mbox, (see + above) this means archivemail can now operate on mbox files in the system + mail spool. Closes: #855269. + * Refactoring of the mbox classes; much of the code got rewritten. + * The archive now also gets locked while archivemail updates it. + * Various Python language fixes (for example don't use "0" and "1" as + boolean constants). + * Added a lot of test cases for maildir archiving to the test suite. + Maildir testing should now be roughly on par with mbox testing. + * IMAP servers (Dovecot and UW-IMAP at least) may store mailbox meta data + for mboxes in a pseudo message. Such messages are now detected and never + archived. Obsoletes: patch #2210707. (Thanks, "tlhackque") + * New option --prefix, or short -p, to specify an archive name prefix. Like + a suffix specified with the --suffix option, the prefix is expanded with + strftime(). Specifying this option disables the default archive name + suffix. Obsoletes: feature request #604281. (Thanks Serafeim Zanikolas + for an initial patch) + * When archiving a mailbox with a leading dot in the name and with no archive + name prefix specified, archivemail no longer creates hidden archives, but + strips the dot off the archive name. In particular, this makes working + with Maildir++ subfolders more convenient. Closes: feature request + #604281. + * New option --archive-name, or short -a, to hard-code an archive filename. + Like the --suffix and --prefix options, it is expanded with strftime(). + This option conflicts with archiving multiple mailboxes. Closes: feature + request #1306538. + * archivemail now expands wildcards in IMAP mailbox names. For example, the + url imaps://user@server/foo/* will expand to all subfolders of foo. + Closes: feature request #1978540. Obsoletes: patch #1918937. + +Version 0.7.2 - 9 November 2007 + + * IMAP: fixed crash by working around python bug #1277098, which is still pending + in python << 2.5. + +Version 0.7.1 - 7 November 2007 + + * Fixed incompatibility with Python 2.5 which broke Maildir handling. + Closes: #1670422 + * Username and password in IMAP URLs can now be double-quoted, so it should be + no longer a problem if they contain delimiters like the '@' character. + Closes: #1640878 + * Invalid messages/files in Maildirs caused archivemail to silently stop + processing mails and claim it's all done. Now skip these and go ahead. + (Thanks Elan Ruusamäe for tracking this down.) Closes: #1783369. + (The Debian package has a different fix for this problem since 0.6.1-4, + closing Debian bugs #255944 and #305902.) + * Fixed IMAP message flag conversion which was completely broken. (Thanks + Christian Brabandt) Closes: Debian bug #434807 + * New option --copy: archive mail, but don't delete it from the mailbox. + This is mainly useful for testing purposes, and complements --delete. + Closes: #981865, #988803, #1764851, Debian bug #434798 + * If running as root, only switch the effective uid and gid back if we have + actually switched them before. Closes: #1762907 + * The automatic seteuid feature of archivemail is insecure and thus + deprecated; it will be removed from later versions. + * Expand tilde in argument of long option --pwfile. (Thanks Christian + Brabandt) Closes: Debian bug #434813 + * archivemail now accepts --days=0 + * Fixed crash if --warn-duplicate is used with IMAP (Thanks Christian + Brabandt) Closes: Debian bug #434786 (the Debian package already has a fix) + * When converting from other formats to mbox, archivemail used to preserve + existing 'Status' and 'X-Status' mbox headers; these are now silently + overwritten to ensure they have correct values. + * IMAP: if selecting the mailbox fails, archivemail detects the server's + mailbox hierarchy delimiter, replaces slashes in the mailbox name with the + delimiter and tries again. Closes: #1826757, Debian bug #368112 + +Version 0.7.0 - 2 November 2006 + * Fixed long options --filter-append and --pwfile to accept their arguments. + Closes: #1555935 + * Fixed From_ line generation to actually look for the 'Return-path' and + 'From' headers. Closes: #1555797 + * Fixed IMAP authentication/URL parsing, which wasn't working at all in + v0.6.2. Require username encoded in URL, but be flexible with the password: + handle both --pwfile and URL-encoded password, and fallback to querying the + user if neither is present. Closes: #1555951 + * Convert on-the-wire CRLF to native EOL when saving messages from an IMAP + folder. Closes: #1555803 + * Updated man page. This also addresses #1555951 + * Fixed unittest failure by updating --suffix testcase. Based on analysis by + Peter Poeml. Thanks, Peter. + * Fixed invalid IMAP filter string for large messages (--size option). + (Thanks to the anonymous bug reporter) Closes: #863813 + * Fixed IMAP --dry-run so it doesn't download all messages that would be + archived. + * Fixed IMAP --delete which didn't work at all. (Thanks Anand) + Closes: Debian bug #203282 + * Terminate each message in newly written mbox with an empty line if the + message source is not an mbox-format folder. (Thanks Chung-chieh Shan) + Closes: Debian bug #250410 + * Mangle From_ in message body if the message source is not an mbox-format + folder. (Thanks Chung-chieh Shan) Closes: Debian bug #250402 + * Added new option --dont-mangle to turn off From_ mangling. + * Bumped Python dependency to version 2.3. + * Fixed unittest TestMboxExclusiveLock which failed on Solaris. (Thanks Paul + Rodger) Closes: #904652 + * Fixed unsafe creation of temporary files in the test suite. + This addresses Debian bug #385253, and reading the BTS log, it seems this + issue was assigned CVE-2006-4245, although I cannot find any further + reference to that CVE. Note that the bug was initially reported to affect + archivemail itself, too. This is not correct. (Thanks Joey Hess) + Closes: Debian bug #385253 + * Fixed cleanup of temporary files after test suite failures. + * Fixed dotlocking race condition. + * Stats are now working with IMAP. + * Stats now report the total size of a mailbox and of the archived messages + therefrom. + * Always barf if the archive destination directory is world-writable. + * Distributing man page with the tarball again and fixed distutils setup. + Closes: #1574720 (Thanks Grant Goodyear) + * Improved IMAP error reporting + +Version 0.6.2 - 27 June 2006 + * add -F/--filter-append option to append an arbitrary string to the IMAP + filter string + * don't delete more than a certain number of messages at a time. The max + command len is limited. Fixes bug 942403 (Archiving large IMAP folders fails) + * IMAP: try CRAM-MD5 login first, if that fails fallback to plain login + * add SSL support per imaps URL (after patch by Tobias Gruetzmacher) + * add -P/--pwfile option to supply the IMAP password, so it does not end up in + the shell history + * Fix SyntaxWarning: assignment to None (bug #843890) + * Use the archive cut date rather than the actual time with the --suffix + option. (Thanks Manuel Estrada Sainz) + +Version 0.6.1 - 31 October 2002 + * Removed a test rule that we could archive messages older than the + Unix epoch. Newer versions of python now give an overflow error calling + mktime() on dates < 1970 instead of returning a negative number. + +Version 0.6.0 - 3 October 2002 + * Added IMAP mailbox support. (Thanks Mark Roach) + +Version 0.5.1 - 18 September 2002 + * Fixed a bug where when running archivemail as root on a non-root mailbox, + the temporary container directory would be created as root before the + seteuid() took place. (Thanks Jay Hesselberth) + +Version 0.5.0 - 15 September 2002 + * Fixed a bug where mailbox locking would fail under Solaris. (Thanks Mark + Sheller) + * Fixed a bug where archiving maildir mailboxes without a 'Received Date' or + 'From' header would fail. (Thanks Hugo van der Merwe) + * Removed yet another bug where messages dated on the Unix epoch would fail. + +Version 0.4.9 - 18 August 2002 + * Fixed a bug where an exception was thrown if a message was dated exactly + on the Unix epoch. + * Fixed a bug where trailing slashes on the end of maildir/MH mailbox + arguments were being used in the archive name. + +Version 0.4.8 - 20 May 2002 + * Call mkdir() to create a container directory in which we can place any + created tempfiles + +Version 0.4.7 - 9 May 2002 + * Fixed a bug where archivemail would abort if it received a date header + with a completely blank value. + +Version 0.4.6 - 6 May 2002 + * Fixed a bug where the os.rename() calls could fail if we were moving + temporary files across different filesystems/partitions. + +Version 0.4.5 - 29 April 2002 + * Fixed a bug where if you used the '--delete' option to completely clean + an mbox mailbox you would get a python error. + * Added a lot more testing to test_archivemail.py (test_archivemail.py is + now 37% bigger than archivemail -- scary) + * Added a new '--size' option which allows you to only archive messages + over a certain size. + +Version 0.4.4 - 27 April 2002 + * Fixed a bug where the long --suffix option was not working (although the + short option, '-s' was). + * Added time-based format directives to the --suffix option, so that you + can do things like specify --suffix='%B%Y' to create archives named + after the current month and year + * Added some more tests to test_archivemail.py + +Version 0.4.3 - 26 April 2002 + * Fixed a couple of bugs where I was using python version 2.2 syntax that + was giving errors in python v2.0 and v2.1. + * Changed the python requirements for the test script from python version + 2.0 to version 2.1. This is because the unittest module is only available + in version 2.1 and above. + +Version 0.4.2 - 24 April 2002 + * Added the ability to archive messages older than a given absolute date + with the new option '--date'. + * Fixed a bug where archivemail would complain about messages older than + 1970. Yes, someone had a 'Date' header with 1967 :) + * Complain if the mailbox to be read does not look like a valid mbox-format + mailbox. + * Added a few more tests to test_archivemail.py + +Version 0.4.1 - 21 April 2002 + * Don't archive messages that are flagged important unless we are given the + --include-flagged option. + * Fixed a bug where when archiving messages from maildir mailboxes, we were + not preserving the status information contained in the filename suffix to + Status and X-Status mbox headers. This means we forgot if we had read or + replied to the message. + * We now complain if an mbox-format mailbox that is being read changes in + size -- this should not happen, since we have locked these files, but it + is a good sanity check. + * Changed from using the mailbox.PortableUnixMailbox class to read mbox + mailboxes to the mailbox.UnixMailbox class. This fixes bugs where unquoted + 'From ' lines in the body of messages were confusing archivemail. + +Version 0.4.0 - 17 April 2002 + * Added an option --no-compress to make archives but not compress them with + gzip. + * Added an option --preserve-unread to not archive unread messages. + * Added a few more unittests. + +Version 0.3.2 - 13 April 2002 + * Added a lot more information to the manpage, including examples and + notes. + * Fixed up the README file and archivemail usage message. + * Added an example archivemail shell script that I run from crontab. + +Version 0.3.1 - 12 April 2002 + * Stopped calling 'gzip' externally and started using the gzip library + so that we can append to a copy of the gzip archive directly. + * Removed 'bzip2' and 'compress' options since they are increasing + complexity without adding much, and needed to be called externally. + Maybe when python gets a bzip2 library I will add back an option to + compress archives using bzip2. + * Included a man page & sgml docbook source. + +Version 0.3.0 - 11 April 2002 + * We now preserve the last-accessed and last-modified timestamps correctly + * We now preserve the correct permissions on the original mailbox instead + of always mode 600 + * Fixed a bug where lockfiles were being created that were not + world-readable + * Made archivemail work better when used as a python module so it can + integrate better with unittest. (... although I still distribute it + without the .py extension - dodgy?) + * Bundled a unit-testing script for archivemail + * Started using a distutils 'setup.py' script for installation. + +Version 0.2.1 - 4 April 2002 + * Since we might not have a parse-able 'Date-Received' or 'Date' field, + use 5 different ways to guess the date of a message. + * Removed the '--use-mtime' option since it is obsolete -- we will always + use the file modification time for the message if other date-parsing + methods fail. + * Check to see if we are running as root -- if so, change our + effective userid and groupid to that of the original mailbox. This will + make sure any archives or tempfiles we write have the same ownership and + will allow the root user to do "archivemail /var/spool/mail/*" + * Fixed a bug where if you ran 'archivemail.py foo/mbox' it would create + the archive file in the current directory instead of the directory 'foo'. + +Version 0.2.0 - 3 April 2002 + * Added support for reading from MH mailboxes + * Refuse to proceed if we would be making tempfiles in world-writable + directories + * Clamped down with lots of assert()s checking function parameters + * Complain earlier if we do not have permission to write to the output + directory + * Use the 'Date' field of a message when constructing the 'From_' line + from a maildir/MH mailbox if there is no 'Delivery-Date' field. + +Version 0.1.0 - 31 March 2002 + * Initial release diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..86fd703 --- /dev/null +++ b/COPYING @@ -0,0 +1,341 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. + diff --git a/FAQ b/FAQ new file mode 100644 index 0000000..9fa2f29 --- /dev/null +++ b/FAQ @@ -0,0 +1,15 @@ + +1. Why doesn't archivemail support bzip2 compression in addition to gzip? +------------------------------------------------------------------------- + +The bzip2 module in Python 2.x is not fully compatible with the gzip module, +and cannot be used with the current implementation of compressed mailbox +support in archivemail. See Python feature request #5863 for details. + +2. Can you add a switch to archive mailboxes greater than a certain size? +------------------------------------------------------------------------- + +If you are using mbox format mailboxes instead, use the find(1) command instead, it is more flexible: + + find $HOME/Mail -type f ! -name '*archive*' + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..886a863 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +include CHANGELOG +include COPYING +include FAQ +include MANIFEST +include TODO +include NEWS +include archivemail.1 +include archivemail.xml +include db2man.xsl +graft examples +include test_archivemail diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..57bad9c --- /dev/null +++ b/Makefile @@ -0,0 +1,43 @@ +VERSION=$(shell python setup.py --version) +VERSION_TAG=v$(subst .,_,$(VERSION)) +TARFILE=archivemail-$(VERSION).tar.gz +HTDOCS=htdocs-$(VERSION) + +default: + @echo "no default target" + +clean: + rm -rf $(HTDOCS) + +test: + python test_archivemail + +clobber: clean + rm -rf build dist + rm -f $(HTDOCS).tgz + + +sdist: clobber doc + python setup.py sdist + +tag: + git tag -a $(VERSION_TAG) + +doc: archivemail.1 archivemail.html + +htdocs: $(HTDOCS).tgz +$(HTDOCS).tgz: index.html archivemail.html RELNOTES style.css manpage.css + install -d -m 775 $(HTDOCS) + install -m 664 $^ $(HTDOCS) + cd $(HTDOCS) && mv archivemail.html manpage.html + tar czf $(HTDOCS).tgz $(HTDOCS) + +archivemail.1: archivemail.xml db2man.xsl + xsltproc db2man.xsl archivemail.xml + +archivemail.html: archivemail.xml db2html.xsl + xsltproc --output archivemail.html \ + db2html.xsl archivemail.xml + tidy -modify -indent -f /dev/null archivemail.html || true + +.PHONY: default clean test clobber sdist tag doc htdocs diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..8c84fdd --- /dev/null +++ b/NEWS @@ -0,0 +1,44 @@ +Notable changes in archivemail 0.9.0: + + * IMAP: support for international mailbox names containing non-ASCII + characters. + +Notable changes in archivemail 0.8.0: + + * Removed the feature to setuid to the mailbox owners when run as root. + This was a bad idea; it's really hard to do safely, if at all possible. + Obsoletes: patch #2783134. + * New option --archive-name, or short -a, to hard-code an archive filename. + Like the --suffix and --prefix options, it is expanded with strftime(). + This option conflicts with archiving multiple mailboxes. Closes: feature + request #1306538. + * New option --prefix, or short -p, to specify an archive name prefix. Like + a suffix specified with the --suffix option, the prefix is expanded with + strftime(). Specifying this option disables the default archive name + suffix. Obsoletes: feature request #604281. (Thanks Serafeim Zanikolas + for an initial patch) + * New option --all to archive all messages in a mailbox. Closes: #1764846. + * archivemail now expands wildcards in IMAP mailbox names. For example, the + url imaps://user@server/foo/* will expand to all subfolders of foo. + Closes: feature request #1978540. Obsoletes: patch #1918937. + * To determine the delivery date of a message, archivemail now looks for the + timestamp of the latest 'Received' header before resorting to + 'Resent-Date' or 'Date'. This should give much better results when there + is no 'Delivery-date' header, which is still checked first. + (Thanks Andrew Ferrier & Christian Brabandt) + Closes: #1481316, #1764855, Debian bug #272666. + * We now omit the dotlock if we don't have sufficient permissions to create + it in the mbox directory. (The file is still locked with lockf.) + Together with more changes under the hood (see below) this means + archivemail can now operate on mbox files in the system mail spool. + Closes: #855269. + * Replaced some simple minded file operation security checks with more + decent ones. This means we can safely operate in /tmp, for example. The + price is that we no longer accept symlinked files. Obsoletes: patch + #1874868. + * The archive now also gets locked while archivemail updates it. + * mbox locking got completely rewritten. Switched from flock to lockf + locking, which is NFS-safe and portable, and we now lock with lockf first, + then with a dotlock, instead of the other way around. (This is makes + archivemail compatible with Debian systems. ;) + diff --git a/README b/README new file mode 100644 index 0000000..cbb3fb4 --- /dev/null +++ b/README @@ -0,0 +1,63 @@ + +----------------------------------------------------------- +archivemail - archive and compress old mail in your mailbox +----------------------------------------------------------- + +OVERVIEW: + +archivemail is a tool written in python(1) for archiving and compressing old +email in mailboxes. + +It can move messages older than the specified number of days to a separate +'archive' mbox-format mailbox that is compressed with 'gzip'. + +For example, have you been subscribing to the 'linux-kernel' mailing list +for the last 6 years and ended up with an 160-meg mailbox that 'mutt' is +taking a long time to load? 'archivemail' can move all messages that are +older than 6 months to a separate compressed mailbox, and leave you with +just the most recent messages. + +'archivemail' can save a lot of disk space and will significantly reduce +overhead on your mail reader. The number of days before mail is considered +'old' is up to you, but the default is 180 days. You can also archive messages +by an absolute date or only archive unread messages. + + +REQUIREMENTS: + +archivemail requires python version 2.3. It also uses some optional python +modules, but these should be pretty much standard; if you get an ImportError +nonetheless, please report it, thanks. (For contact addresses see below.) + +Python is available from http://www.python.org/ + + +INSTALLATION: + +If you want to test archivemail: + python test_archivemail + +To install archivemail, run: + python setup.py install + + +USE: + +For more detailed information and examples, look at the archivemail man page. + +The best way to run archivemail is from cron, giving the '-q' option to +archivemail to make it quiet, only printing messages if something went wrong. +Check out the 'examples' directory for an example shell script to be run from +cron. + +The archivemail website is at: http://archivemail.sourceforge.net/ + +If you have any feedback or bug reports about archivemail, you are very +welcome to email the maintainers; as of November 2006, these are: + +Nikolaus Schulz +Peter Poeml + + +-- Paul Rodger , archivemail author + Updated by: Nikolaus Schulz, maintainer diff --git a/TODO b/TODO new file mode 100644 index 0000000..e6950ed --- /dev/null +++ b/TODO @@ -0,0 +1,103 @@ +Integrate --debug-imap option into yet-to-be-implemented -vv switch? +I had the idea to provide separate debugging info levels anyway, see --debug +below. + +Gracefully close IMAP connection upon unexptected error (currently archivemail +just terminates). + +LOCKING & Co: +* Block signals while writing changed mailbox back. +* Double-check the entire locking code. + +Seems like existing archives are not read or validated in any way. New archive +data is blindly appended... Probably okay, but should be documented. + +IMAP SEARCH BEFORE disregards time and timezone information. This should at +least be documented. E.g. I've found that '-d 0' didn't match all messages in +an IMAP mailbox. This is because the SEARCH key is (BEFORE 14-Nov-2007) on 14 +November, not matching messages that arrived today. (This problem is probably +fixed for most use cases by the --all option.) + +Document mbox format issues: link to +http://homepages.tesco.net/~J.deBoynePollard/FGA/mail-mbox-formats.html, +qmail mbox manpage, Debian manpage, RFC 4155. Document what mbox format we can +read, and what we write. +FIXME: we cannot yet parse rfc 2822 addr-spec stuff like quoted local-parts in +return-path addresses. + +Minor annoyance: when a From_ line is generated, guess_delivery_time() reports +the used date header a second time. + +Check sf.net and Debian BTS for new bugs. Again. + +IMAP: ensure mailbox archives are properly named. Currently imap folder names +are mapped like this: + + IMAP URL | resulting mbox_archive + ------------+------------------------ + test.box | test.box_archive.gz + test/box | box_archive.gz + + +Implement --include-draft. But before, think about it again. (This is feature +request #1569305.) + +Implement a fallback if an IMAP server doesn't support SEARCH. (Ouch!) + +Add IMAP tests to the testsuite (upload test messages with IMAP "APPEND +date-string"). This should be done without any real network I/O. + +Try to port archivemail to email.message and the new mailboxes in Python 2.5. +Is these flexible enough for our needs? + +Line out what we want with respect to multiple selection criteria. +Some make sense, but this easily gets too complex, and if only it's a hassle +with adding all the options. Hm. + +Reject patch #1036022 "Added option to inverse date compare" after cooling down +because the patch is both stupid (copy+paste code) and broken. Don't see why +anyone should want this/we should support it. +If this is reasonable *at all*, I think we'd better go for all the complexity +to honour _two_ cut off dates (see Debian bug "#184124: archivemail: -D and -d +should not be incompatible", which is a comparably half-baken thought). + +Add --debug or -vv switch, and move the printing of diagnostic info for each +message to --debug. + +Perhaps add some more nice stuff like printing of subject, sender... +See tracker #868714 "added stats option to archivemail", which has a point. +Message-Ids are useful for diagnosis, but not very nice to read for humans. + +Be a nicer citizen with respect to mailbox locking. + +Perhaps prune/shorten IMAP mailbox URLs in messages? +They may be quite long and may contain the sensitive password. +Also shows up in the process list... +Perhaps find a clean, lean replacement for all that clutter in the IMAP urls. + +Require --output-dir for IMAP archiving? Otherwise we just drop the archive in +in the current working directory. + +Check all items below, which are from the original author. :-) + +.archivemailrc support + +When you get a file-not-found in the 6th mailbox of 10, it aborts the whole +run. Better to fail gracefully and keep going. + +Add more tests (see top of test_archivemail) + +We need some better checking to see if we are really looking at a valid +mbox-format mailbox. + +Add an option to not cut threads. + +Add MMDF mailbox support + +Add Babyl mailbox support + +Add option to archive depending on mailbox size threshold +- is this a good idea? + +Add option to archive depending on number of messages +- is this a good idea? diff --git a/archivemail b/archivemail new file mode 100755 index 0000000..26b9aca --- /dev/null +++ b/archivemail @@ -0,0 +1,1951 @@ +#! /usr/bin/env python +############################################################################ +# Copyright (C) 2002 Paul Rodger , +# (C) 2006 Peter Poeml , +# (C) 2006-2010 Nikolaus Schulz +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +############################################################################ +""" +Archive and compress old mail in mbox, MH or maildir-format mailboxes. +Website: http://archivemail.sourceforge.net/ +""" + +# global administrivia +__version__ = "archivemail v0.9.0" +__copyright__ = """\ +Copyright (C) 2002 Paul Rodger + (C) 2006 Peter Poeml , + (C) 2006-2011 Nikolaus Schulz +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.""" + +import sys + +def check_python_version(): + """Abort if we are running on python < v2.3""" + too_old_error = "This program requires python v2.3 or greater. " + \ + "Your version of python is:\n%s""" % sys.version + try: + version = sys.version_info # we might not even have this function! :) + if (version[0] < 2) or (version[0] == 2 and version[1] < 3): + print too_old_error + sys.exit(1) + except AttributeError: + print too_old_error + sys.exit(1) + +# define & run this early +# (IMAP over SSL requires Python >= 2.3) +check_python_version() + +import fcntl +import getopt +import gzip +import mailbox +import os +import pwd +import re +import rfc822 +import shutil +import signal +import stat +import string +import tempfile +import time +import urlparse +import errno +import socket +import locale + +# From_ mangling regex. +from_re = re.compile(r'^From ', re.MULTILINE) +imapsize_re = re.compile(r'^(?P[0-9]+) \(RFC822\.SIZE (?P[0-9]+)\)') + +userencoding = locale.getpreferredencoding() + +############## class definitions ############### + +class ArchivemailException(Exception): + pass +class UserError(ArchivemailException): + pass +class UnexpectedError(ArchivemailException): + pass +class LockUnavailable(ArchivemailException): + pass + +class Stats: + """Class to collect and print statistics about mailbox archival""" + __archived = 0 + __archived_size = 0 + __mailbox_name = None + __archive_name = None + __start_time = 0 + __total = 0 + __total_size = 0 + + def __init__(self, mailbox_name, final_archive_name): + """Constructor for a new set of statistics. + + Arguments: + mailbox_name -- filename/dirname of the original mailbox + final_archive_name -- filename for the final 'mbox' archive, without + compression extension (eg .gz) + + """ + assert mailbox_name + assert final_archive_name + self.__start_time = time.time() + self.__mailbox_name = mailbox_name + self.__archive_name = final_archive_name + ".gz" + + def another_message(self, size): + """Add one to the internal count of total messages processed + and record message size.""" + self.__total = self.__total + 1 + self.__total_size = self.__total_size + size + + def another_archived(self, size): + """Add one to the internal count of messages archived + and record message size.""" + self.__archived = self.__archived + 1 + self.__archived_size = self.__archived_size + size + + def display(self): + """Print statistics about how many messages were archived""" + end_time = time.time() + time_seconds = end_time - self.__start_time + action = "archived" + if options.delete_old_mail: + action = "deleted" + if options.dry_run: + action = "I would have " + action + print "%s:\n %s %d of %d message(s) (%s of %s) in %.1f seconds" % \ + (self.__mailbox_name, action, self.__archived, self.__total, + nice_size_str(self.__archived_size), + nice_size_str(self.__total_size), time_seconds) + + +class StaleFiles: + """Class to keep track of files to be deleted on abnormal exit""" + dotlock_files = [] # dotlock files for source mbox and final archive + temp_mboxes = [] # temporary retain and archive mboxes + temp_dir = None # our tempfile directory container + + def clean(self): + """Delete any temporary files or lockfiles that exist""" + while self.dotlock_files: + dotlock = self.dotlock_files.pop() + vprint("removing stale dotlock file '%s'" % dotlock) + try: + os.remove(dotlock) + except (IOError, OSError): pass + while self.temp_mboxes: + mbox = self.temp_mboxes.pop() + vprint("removing stale temporary mbox '%s'" % mbox) + try: + os.remove(mbox) + except (IOError, OSError): pass + if self.temp_dir: + vprint("removing stale tempfile directory '%s'" % self.temp_dir) + try: + os.rmdir(self.temp_dir) + except OSError, e: + if e.errno == errno.ENOTEMPTY: # Probably a bug + user_warning("cannot remove temporary directory '%s', " + "directory not empty" % self.temp_dir) + except IOError: pass + else: self.temp_dir = None + + + +class Options: + """Class to store runtime options, including defaults""" + archive_prefix = None + archive_suffix = None + archive_default_suffix = "_archive" + archive_name = None + days_old_max = 180 + date_old_max = None + delete_old_mail = False + dry_run = False + filter_append = None + include_flagged = False + locking_attempts = 5 + lockfile_extension = ".lock" + lock_sleep = True + no_compress = False + only_archive_read = False + output_dir = None + pwfile = None + preserve_unread = False + mangle_from = True + quiet = False + read_buffer_size = 8192 + script_name = os.path.basename(sys.argv[0]) + min_size = None + verbose = False + debug_imap = 0 + warn_duplicates = False + copy_old_mail = False + archive_all = False + + def parse_args(self, args, usage): + """Set our runtime options from the command-line arguments. + + Arguments: + args -- this is sys.argv[1:] + usage -- a usage message to display on '--help' or bad arguments + + Returns the remaining command-line arguments that have not yet been + parsed as a string. + + """ + try: + opts, args = getopt.getopt(args, '?D:S:Vd:hno:F:P:qs:p:a:uv', + ["date=", "days=", "delete", "dry-run", "help", + "include-flagged", "no-compress", "output-dir=", + "filter-append=", "pwfile=", "dont-mangle", + "preserve-unread", "quiet", "size=", "suffix=", + "prefix=", "archive-name=", "verbose", + "debug-imap=", "version", "warn-duplicate", + "copy", "all"]) + except getopt.error, msg: + user_error(msg) + + archive_by = None + + for o, a in opts: + if o == '--delete': + if self.copy_old_mail: + user_error("found conflicting options --copy and --delete") + self.delete_old_mail = True + if o == '--include-flagged': + self.include_flagged = True + if o == '--no-compress': + self.no_compress = True + if o == '--warn-duplicate': + self.warn_duplicates = True + if o in ('-D', '--date'): + if archive_by: + user_error("you cannot specify both -d and -D options") + archive_by = "date" + self.date_old_max = self.date_argument(a) + if o in ('-d', '--days'): + if archive_by: + user_error("you cannot specify both -d and -D options") + archive_by = "days" + self.days_old_max = string.atoi(a) + if o in ('-o', '--output-dir'): + self.output_dir = os.path.expanduser(a) + if o in ('-P', '--pwfile'): + self.pwfile = os.path.expanduser(a) + if o in ('-F', '--filter-append'): + self.filter_append = a + if o in ('-h', '-?', '--help'): + print usage + sys.exit(0) + if o in ('-n', '--dry-run'): + self.dry_run = True + if o in ('-q', '--quiet'): + self.quiet = True + if o in ('-s', '--suffix'): + self.archive_suffix = a + if o in ('-p', '--prefix'): + self.archive_prefix = a + if o in ('-a', '--archive-name'): + self.archive_name = os.path.expanduser(a) + if o in ('-S', '--size'): + self.min_size = string.atoi(a) + if o in ('-u', '--preserve-unread'): + self.preserve_unread = True + if o == '--dont-mangle': + self.mangle_from = False + if o in ('-v', '--verbose'): + self.verbose = True + if o == '--debug-imap': + self.debug_imap = int(a) + if o == '--copy': + if self.delete_old_mail: + user_error("found conflicting options --copy and --delete") + self.copy_old_mail = True + if o == '--all': + self.archive_all = True + if o in ('-V', '--version'): + print __version__ + "\n\n" + __copyright__ + sys.exit(0) + return args + + def sanity_check(self, args): + """Complain bitterly about our options now rather than later""" + if self.output_dir: + check_sane_destdir(self.output_dir) + if self.days_old_max < 0: + user_error("--days argument must be positive") + if self.days_old_max >= 10000: + user_error("--days argument must be less than 10000") + if self.min_size is not None and self.min_size < 1: + user_error("--size argument must be greater than zero") + if self.quiet and self.verbose: + user_error("you cannot use both the --quiet and --verbose options") + if self.pwfile: + if not os.path.isfile(self.pwfile): + user_error("pwfile %s does not exist" % self.pwfile) + if self.archive_name and len(args) > 1: + user_error("the --archive-name cannot be used with multiple " \ + "mailboxes") + + def date_argument(self, string): + """Converts a date argument string into seconds since the epoch""" + date_formats = ( + "%Y-%m-%d", # ISO format + "%d %b %Y" , # Internet format + "%d %B %Y" , # Internet format with full month names + ) + time.accept2dyear = False # I'm not going to support 2-digit years + for format in date_formats: + try: + date = time.strptime(string, format) + seconds = time.mktime(date) + return seconds + except (ValueError, OverflowError): + pass + user_error("cannot parse the date argument '%s'\n" + "The date should be in ISO format (eg '2002-04-23'),\n" + "Internet format (eg '23 Apr 2002') or\n" + "Internet format with full month names (eg '23 April 2002')" % + string) + + +class LockableMboxMixin: + """Locking methods for mbox files.""" + + def __init__(self, mbox_file, mbox_file_name): + self.mbox_file = mbox_file + self.mbox_file_name = mbox_file_name + self._locked = False + self._use_dotlock = True + + def lock(self): + """Lock this mbox with both a dotlock and a posix lock.""" + assert not self._locked + attempt = 1 + while True: + try: + self._posix_lock() + self._dotlock_lock() + break + except LockUnavailable, e: + self._posix_unlock() + attempt += 1 + if (attempt > options.locking_attempts): + unexpected_error(str(e)) + vprint("%s - sleeping..." % e) + time.sleep(options.lock_sleep) + except: + self._posix_unlock() + raise + self._locked = True + + def unlock(self): + """Unlock this mbox.""" + assert self._locked + self._dotlock_unlock() + self._posix_unlock() + self._locked = False + + def _posix_lock(self): + """Set an exclusive posix lock on the 'mbox' mailbox""" + vprint("trying to acquire posix lock on file '%s'" % self.mbox_file_name) + try: + fcntl.lockf(self.mbox_file, fcntl.LOCK_EX|fcntl.LOCK_NB) + except IOError, e: + if e.errno in (errno.EAGAIN, errno.EACCES): + raise LockUnavailable("posix lock for '%s' unavailable" % \ + self.mbox_file_name) + else: + raise + vprint("acquired posix lock on file '%s'" % self.mbox_file_name) + + def _posix_unlock(self): + """Unset any posix lock on the 'mbox' mailbox""" + vprint("dropping posix lock on file '%s'" % self.mbox_file_name) + fcntl.lockf(self.mbox_file, fcntl.LOCK_UN) + + def _dotlock_lock(self): + """Create a dotlock file for the 'mbox' mailbox""" + hostname = socket.gethostname() + pid = os.getpid() + box_dir, prelock_prefix = os.path.split(self.mbox_file_name) + prelock_suffix = ".%s.%s%s" % (hostname, pid, options.lockfile_extension) + lock_name = self.mbox_file_name + options.lockfile_extension + vprint("trying to create dotlock file '%s'" % lock_name) + try: + plfd, prelock_name = tempfile.mkstemp(prelock_suffix, prelock_prefix, + dir=box_dir) + except OSError, e: + if e.errno == errno.EACCES: + if not options.quiet: + user_warning("no write permissions: omitting dotlock for '%s'" % \ + self.mbox_file_name) + self._use_dotlock = False + return + raise + try: + try: + os.link(prelock_name, lock_name) + # We've got the lock. + except OSError, e: + if os.fstat(plfd)[stat.ST_NLINK] == 2: + # The Linux man page for open(2) claims that in this + # case we have actually succeeded to create the link, + # and this assumption seems to be folklore. + # So we've got the lock. + pass + elif e.errno == errno.EEXIST: + raise LockUnavailable("Dotlock for '%s' unavailable" % self.mbox_file_name) + else: + raise + _stale.dotlock_files.append(lock_name) + finally: + os.close(plfd) + os.unlink(prelock_name) + vprint("acquired lockfile '%s'" % lock_name) + + def _dotlock_unlock(self): + """Delete the dotlock file for the 'mbox' mailbox.""" + if not self._use_dotlock: + return + lock_name = self.mbox_file_name + options.lockfile_extension + vprint("removing lockfile '%s'" % lock_name) + os.remove(lock_name) + _stale.dotlock_files.remove(lock_name) + + def commit(self): + """Sync the mbox file to disk.""" + self.mbox_file.flush() + os.fsync(self.mbox_file.fileno()) + + def close(self): + """Close the mbox file""" + vprint("closing file '%s'" % self.mbox_file_name) + assert not self._locked + self.mbox_file.close() + + +class Mbox(mailbox.UnixMailbox, LockableMboxMixin): + """A mostly-read-only mbox with locking. The mbox content can only be + modified by overwriting the entire underlying file.""" + + def __init__(self, path): + """Constructor for opening an existing 'mbox' mailbox. + Extends constructor for mailbox.UnixMailbox() + + Named Arguments: + path -- file name of the 'mbox' file to be opened + """ + assert path + fd = safe_open_existing(path) + st = os.fstat(fd) + self.original_atime = st.st_atime + self.original_mtime = st.st_mtime + self.starting_size = st.st_size + self.mbox_file = os.fdopen(fd, "r+") + self.mbox_file_name = path + LockableMboxMixin.__init__(self, self.mbox_file, path) + mailbox.UnixMailbox.__init__(self, self.mbox_file) + + def reset_timestamps(self): + """Set the file timestamps to the original values""" + assert self.original_atime + assert self.original_mtime + assert self.mbox_file_name + os.utime(self.mbox_file_name, (self.original_atime, \ + self.original_mtime)) + + def get_size(self): + """Return the current size of the mbox file on disk""" + return os.path.getsize(self.mbox_file_name) + + def overwrite_with(self, mbox_filename): + """Overwrite the mbox content with the content of the given mbox file.""" + fin = open(mbox_filename, "r") + self.mbox_file.seek(0) + shutil.copyfileobj(fin, self.mbox_file) + self.mbox_file.truncate() + + +class ArchiveMbox(LockableMboxMixin): + """Simple append-only access to the archive mbox. Entirely content-agnostic.""" + + def __init__(self, path): + fd = safe_open(path) + self.mbox_file = os.fdopen(fd, "a") + LockableMboxMixin.__init__(self, self.mbox_file, path) + + def append(self, filename): + """Append the content of the given file to the mbox.""" + assert self._locked + fin = open(filename, "r") + oldsize = os.fstat(self.mbox_file.fileno()).st_size + try: + shutil.copyfileobj(fin, self.mbox_file) + except: + # We can safely abort here without data loss, because + # we have not yet changed the original mailbox + self.mbox_file.truncate(oldsize) + raise + fin.close() + + +class TempMbox: + """A write-only temporary mbox. No locking methods.""" + + def __init__(self, prefix=tempfile.template): + """Creates a temporary mbox file.""" + fd, filename = tempfile.mkstemp(prefix=prefix) + self.mbox_file_name = filename + _stale.temp_mboxes.append(filename) + self.mbox_file = os.fdopen(fd, "w") + # an empty gzip file is not really empty (it contains the gzip header + # and trailer), so we need to track manually if this mbox is empty + self.empty = True + + def write(self, msg): + """Write a rfc822 message object to the 'mbox' mailbox. + If the rfc822 has no Unix 'From_' line, then one is constructed + from other headers in the message. + + Arguments: + msg -- rfc822 message object to be written + + """ + assert msg + assert self.mbox_file + + self.empty = False + vprint("saving message to file '%s'" % self.mbox_file_name) + unix_from = msg.unixfrom + if unix_from: + msg_has_mbox_format = True + else: + msg_has_mbox_format = False + unix_from = make_mbox_from(msg) + self.mbox_file.write(unix_from) + assert msg.headers + self.mbox_file.writelines(msg.headers) + self.mbox_file.write(os.linesep) + + # The following while loop is about twice as fast in + # practice to 'self.mbox_file.writelines(msg.fp.readlines())' + assert options.read_buffer_size > 0 + linebuf = "" + while True: + body = msg.fp.read(options.read_buffer_size) + if (not msg_has_mbox_format) and options.mangle_from: + # Be careful not to break pattern matching + splitindex = body.rfind(os.linesep) + nicebody = linebuf + body[:splitindex] + linebuf = body[splitindex:] + body = from_re.sub('>From ', nicebody) + if not body: + break + self.mbox_file.write(body) + if not msg_has_mbox_format: + self.mbox_file.write(os.linesep) + + def commit(self): + """Sync the mbox file to disk.""" + self.mbox_file.flush() + os.fsync(self.mbox_file.fileno()) + + def close(self): + """Close the mbox file""" + vprint("closing file '%s'" % self.mbox_file_name) + self.mbox_file.close() + + def saveas(self, filename): + """Rename this temporary mbox file to the given name, making it + permanent. Emergency use only.""" + os.rename(self.mbox_file_name, filename) + _stale.temp_mboxes.remove(self.mbox_file_name) + + def remove(self): + """Delete the temporary mbox file.""" + os.remove(self.mbox_file_name) + _stale.temp_mboxes.remove(self.mbox_file_name) + + +class CompressedTempMbox(TempMbox): + """A compressed version of a TempMbox.""" + + def __init__(self, prefix=tempfile.template): + TempMbox.__init__(self, prefix) + self.raw_file = self.mbox_file + self.mbox_file = gzip.GzipFile(mode="a", fileobj=self.mbox_file) + # Workaround that GzipFile.close() isn't idempotent in Python < 2.6 + # (python issue #2959). There is no GzipFile.closed, so we need a + # replacement. + self.gzipfile_closed = False + + def commit(self): + """Finish gzip file and sync it to disk.""" + # This method is currently not used + self.mbox_file.close() # close GzipFile, writing gzip trailer + self.gzipfile_closed = True + self.raw_file.flush() + os.fsync(self.raw_file.fileno()) + + def close(self): + """Close the gzip file.""" + if not self.gzipfile_closed: + self.mbox_file.close() + self.raw_file.close() + + +class IdentityCache: + """Class used to remember Message-IDs and warn if they are seen twice""" + seen_ids = {} + mailbox_name = None + + def __init__(self, mailbox_name): + """Constructor: takes the mailbox name as an argument""" + assert mailbox_name + self.mailbox_name = mailbox_name + + def warn_if_dupe(self, msg): + """Print a warning message if the message has already appeared""" + assert msg + message_id = msg.get('Message-ID') + assert message_id + if self.seen_ids.has_key(message_id): + user_warning("duplicate message id: '%s' in mailbox '%s'" % + (message_id, self.mailbox_name)) + self.seen_ids[message_id] = True + + +# global class instances +options = Options() # the run-time options object +_stale = StaleFiles() # remember what we have to delete on abnormal exit + + +def main(args = sys.argv[1:]): + global _stale + + # this usage message is longer than 24 lines -- bad idea? + usage = """Usage: %s [options] mailbox [mailbox...] +Moves old mail in IMAP, mbox, MH or maildir-format mailboxes to an mbox-format +mailbox compressed with gzip. + +Options are as follows: + -d, --days=NUM archive messages older than NUM days (default: %d) + -D, --date=DATE archive messages older than DATE + -o, --output-dir=DIR directory to store archives (default: same as original) + -P, --pwfile=FILE file to read imap password from (default: None) + -F, --filter-append=STRING append arbitrary string to the IMAP filter string + -p, --prefix=NAME prefix for archive filename (default: none) + -s, --suffix=NAME suffix for archive filename (default: '%s') + -a, --archive-name=NAME specify complete archive filename + -S, --size=NUM only archive messages NUM bytes or larger + -n, --dry-run don't write to anything - just show what would be done + -u, --preserve-unread never archive unread messages + --dont-mangle do not mangle From_ in message bodies + --delete delete rather than archive old mail (use with caution!) + --copy copy rather than archive old mail + --include-flagged messages flagged important can also be archived + --all archive all messages + --no-compress do not compress archives with gzip + --warn-duplicate warn about duplicate Message-IDs in the same mailbox + -v, --verbose report lots of extra debugging information + --debug-imap=NUM set IMAP debugging output level (0 is none) + -q, --quiet quiet mode - print no statistics (suitable for crontab) + -V, --version display version information + -h, --help display this message + +Example: %s linux-kernel + This will move all messages older than %s days to a 'mbox' mailbox called + 'linux-kernel_archive.gz', deleting them from the original 'linux-kernel' + mailbox. If the 'linux-kernel_archive.gz' mailbox already exists, the + newly archived messages are appended. + +To archive IMAP mailboxes, format your mailbox argument like this: + imap://username:password@server/mailbox + (substitute 'imap' with 'imaps' for an SSL connection) + +Website: http://archivemail.sourceforge.net/ """ % \ + (options.script_name, options.days_old_max, options.archive_suffix, + options.script_name, options.days_old_max) + + args = options.parse_args(args, usage) + if len(args) == 0: + print usage + sys.exit(1) + + options.sanity_check(args) + + for mailbox_path in args: + archive(mailbox_path) + + +######## errors and debug ########## + +def vprint(string): + """Print the string argument if we are in verbose mode""" + if options.verbose: + print string + + +def unexpected_error(string): + """Print the string argument, a 'shutting down' message and abort. Raise + UnexpectedErrors if archivemail is run as a module. This function never + returns.""" + if not __name__ == '__main__': + raise UnexpectedError(string) + sys.stderr.write("%s: %s\n" % (options.script_name, string)) + sys.stderr.write("%s: unexpected error encountered - shutting down\n" % + options.script_name) + sys.exit(1) + + +def user_error(string): + """Print the string argument and abort. Raise UserError if archivemail is + run as a module. This function never returns.""" + if not __name__ == '__main__': + raise UserError(string) + sys.stderr.write("%s: %s\n" % (options.script_name, string)) + sys.exit(1) + + +def user_warning(string): + """Print the string argument""" + sys.stderr.write("%s: Warning - %s\n" % (options.script_name, string)) + +########### operations on a message ############ + +def make_mbox_from(message): + """Return a string suitable for use as a 'From_' mbox header for the + message. + + Arguments: + message -- the rfc822 message object + + """ + assert message + address = guess_return_path(message) + time_message = guess_delivery_time(message) + date = time.localtime(time_message) + assert date + date_string = time.asctime(date) + mbox_from = "From %s %s\n" % (address, date_string) + return mbox_from + + +def guess_return_path(message): + """Return a guess at the Return Path address of an rfc822 message""" + assert message + + for header in ('Return-path', 'From'): + address_header = message.get(header) + if address_header: + (name, address) = rfc822.parseaddr(address_header) + if address: + return address + # argh, we can't find any valid 'Return-path' guesses - just + # just use the current unix username like mutt does + login = pwd.getpwuid(os.getuid())[0] + assert login + return login + + +def guess_delivery_time(message): + """Return a guess at the delivery date of an rfc822 message""" + assert message + # try to guess the delivery date from various headers + # get more desparate as we go through the array + for header in 'Delivery-date', 'Received', 'Resent-Date', 'Date': + try: + if header == 'Received': + # This should be good enough for almost all headers in the wild; + # if we're guessing wrong, parsedate_tz() will fail graciously. + token = message.getrawheader(header).rsplit(';', 1)[-1] + else: + token = message.get(header) + date = rfc822.parsedate_tz(token) + if date: + time_message = rfc822.mktime_tz(date) + vprint("using valid time found from '%s' header" % header) + return time_message + except (AttributeError, IndexError, ValueError, OverflowError): pass + # as a second-last resort, try the date from the 'From_' line (ugly) + # this will only work from a mbox-format mailbox + if (message.unixfrom): + # Hmm. This will break with full-blown RFC 2822 addr-spec's. + header = message.unixfrom.split(None, 2)[-1] + # Interpret no timezone as localtime + date = rfc822.parsedate_tz(header) + if date: + try: + time_message = rfc822.mktime_tz(date) + vprint("using valid time found from unix 'From_' header") + return time_message + except (ValueError, OverflowError): pass + # the headers have no valid dates -- last resort, try the file timestamp + # this will not work for mbox mailboxes + try: + file_name = get_filename(message) + except AttributeError: + # we are looking at a 'mbox' mailbox - argh! + # Just return the current time - this will never get archived :( + vprint("no valid times found at all -- using current time!") + return time.time() + if not os.path.isfile(file_name): + unexpected_error("mailbox file name '%s' has gone missing" % \ + file_name) + time_message = os.path.getmtime(file_name) + vprint("using valid time found from '%s' last-modification time" % \ + file_name) + return time_message + + +def add_status_headers(message): + """ + Add Status and X-Status headers to a message from a maildir mailbox. + + Maildir messages store their information about being read/replied/etc in + the suffix of the filename rather than in Status and X-Status headers in + the message. In order to archive maildir messages into mbox format, it is + nice to preserve this information by putting it into the status headers. + + """ + status = "" + x_status = "" + file_name = get_filename(message) + match = re.search(":2,(.+)$", file_name) + if match: + flags = match.group(1) + for flag in flags: + if flag == "D": # (draft): the user considers this message a draft + pass # does this make any sense in mbox? + elif flag == "F": # (flagged): user-defined 'important' flag + x_status = x_status + "F" + elif flag == "R": # (replied): the user has replied to this message + x_status = x_status + "A" + elif flag == "S": # (seen): the user has viewed this message + status = status + "R" + elif flag == "T": # (trashed): user has moved this message to trash + pass # is this Status: D ? + else: + pass # no whingeing here, although it could be a good experiment + + # files in the maildir 'cur' directory are no longer new, + # they are the same as messages with 'Status: O' headers in mbox + last_dir = os.path.basename(os.path.dirname(file_name)) + if last_dir == "cur": + status = status + "O" + + # Overwrite existing 'Status' and 'X-Status' headers. They add no value in + # maildirs, and we better don't listen to them. + if status: + vprint("converting maildir status into Status header '%s'" % status) + message['Status'] = status + else: + del message['Status'] + if x_status: + vprint("converting maildir status into X-Status header '%s'" % x_status) + message['X-Status'] = x_status + else: + del message['X-Status'] + +def add_status_headers_imap(message, flags): + """Add Status and X-Status headers to a message from an imap mailbox.""" + status = "" + x_status = "" + for flag in flags: + if flag == "\\Draft": # (draft): the user considers this message a draft + pass # does this make any sense in mbox? + elif flag == "\\Flagged": # (flagged): user-defined 'important' flag + x_status = x_status + "F" + elif flag == "\\Answered": # (replied): the user has replied to this message + x_status = x_status + "A" + elif flag == "\\Seen": # (seen): the user has viewed this message + status = status + "R" + elif flag == "\\Deleted": # (trashed): user has moved this message to trash + pass # is this Status: D ? + else: + pass # no whingeing here, although it could be a good experiment + if not "\\Recent" in flags: + status = status + "O" + + # As with maildir folders, overwrite Status and X-Status headers + # if they exist. + vprint("converting imap status (%s)..." % " ".join(flags)) + if status: + vprint("generating Status header '%s'" % status) + message['Status'] = status + else: + vprint("not generating Status header") + del message['Status'] + if x_status: + vprint("generating X-Status header '%s'" % x_status) + message['X-Status'] = x_status + else: + vprint("not generating X-Status header") + del message['X-Status'] + +def is_flagged(message): + """return true if the message is flagged important, false otherwise""" + # MH and mbox mailboxes use the 'X-Status' header to indicate importance + x_status = message.get('X-Status') + if x_status and re.search('F', x_status): + vprint("message is important (X-Status header='%s')" % x_status) + return True + file_name = None + try: + file_name = get_filename(message) + except AttributeError: + pass + # maildir mailboxes use the filename suffix to indicate flagged status + if file_name and re.search(":2,.*F.*$", file_name): + vprint("message is important (filename info has 'F')") + return True + vprint("message is not flagged important") + return False + + +def is_unread(message): + """return true if the message is unread, false otherwise""" + # MH and mbox mailboxes use the 'Status' header to indicate read status + status = message.get('Status') + if status and re.search('R', status): + vprint("message has been read (status header='%s')" % status) + return False + file_name = None + try: + file_name = get_filename(message) + except AttributeError: + pass + # maildir mailboxes use the filename suffix to indicate read status + if file_name and re.search(":2,.*S.*$", file_name): + vprint("message has been read (filename info has 'S')") + return False + vprint("message is unread") + return True + + +def sizeof_message(message): + """Return size of message in bytes (octets).""" + assert message + file_name = None + message_size = None + try: + file_name = get_filename(message) + except AttributeError: + pass + if file_name: + # with maildir and MH mailboxes, we can just use the file size + message_size = os.path.getsize(file_name) + else: + # with mbox mailboxes, not so easy + message_size = 0 + if message.unixfrom: + message_size = message_size + len(message.unixfrom) + for header in message.headers: + message_size = message_size + len(header) + message_size = message_size + 1 # the blank line after the headers + start_offset = message.fp.tell() + message.fp.seek(0, 2) # seek to the end of the message + end_offset = message.fp.tell() + message.rewindbody() + message_size = message_size + (end_offset - start_offset) + return message_size + +def is_smaller(message, size): + """Return true if the message is smaller than size bytes, false otherwise""" + assert message + assert size > 0 + message_size = sizeof_message(message) + if message_size < size: + vprint("message is too small (%d bytes), minimum bytes : %d" % \ + (message_size, size)) + return True + else: + vprint("message is not too small (%d bytes), minimum bytes: %d" % \ + (message_size, size)) + return False + + +def should_archive(message): + """Return true if we should archive the message, false otherwise""" + if options.archive_all: + return True + old = False + time_message = guess_delivery_time(message) + if options.date_old_max == None: + old = is_older_than_days(time_message, options.days_old_max) + else: + old = is_older_than_time(time_message, options.date_old_max) + + # I could probably do this in one if statement, but then I wouldn't + # understand it. + if not old: + return False + if not options.include_flagged and is_flagged(message): + return False + if options.min_size and is_smaller(message, options.min_size): + return False + if options.preserve_unread and is_unread(message): + return False + return True + + +def is_older_than_time(time_message, max_time): + """Return true if a message is older than the specified time, + false otherwise. + + Arguments: + time_message -- the delivery date of the message measured in seconds + since the epoch + max_time -- maximum time allowed for message + + """ + days_old = (max_time - time_message) / 24 / 60 / 60 + if time_message < max_time: + vprint("message is %.2f days older than the specified date" % days_old) + return True + vprint("message is %.2f days younger than the specified date" % \ + abs(days_old)) + return False + + +def is_older_than_days(time_message, max_days): + """Return true if a message is older than the specified number of days, + false otherwise. + + Arguments: + time_message -- the delivery date of the message measured in seconds + since the epoch + max_days -- maximum number of days before message is considered old + """ + time_now = time.time() + if time_message > time_now: + vprint("warning: message has date in the future") + return False + secs_old_max = (max_days * 24 * 60 * 60) + days_old = (time_now - time_message) / 24 / 60 / 60 + vprint("message is %.2f days old" % days_old) + if ((time_message + secs_old_max) < time_now): + return True + return False + +def build_imap_filter(): + """Return an imap filter string""" + + imap_filter = [] + if options.date_old_max == None: + time_now = time.time() + secs_old_max = (options.days_old_max * 24 * 60 * 60) + time_old = time.gmtime(time_now - secs_old_max) + else: + time_old = time.gmtime(options.date_old_max) + time_str = time.strftime('%d-%b-%Y', time_old) + imap_filter.append("BEFORE %s" % time_str) + + if not options.include_flagged: + imap_filter.append("UNFLAGGED") + if options.min_size: + imap_filter.append("LARGER %d" % options.min_size) + if options.preserve_unread: + imap_filter.append("SEEN") + if options.filter_append: + imap_filter.append(options.filter_append) + + return '(' + string.join(imap_filter, ' ') + ')' + +############### mailbox operations ############### + +def archive(mailbox_name): + """Archives a mailbox. + + Arguments: + mailbox_name -- the filename/dirname/url of the mailbox to be archived + """ + assert mailbox_name + + # strip any trailing slash (we could be archiving a maildir or MH format + # mailbox and somebody was pressing in bash) - we don't want to use + # the trailing slash in the archive name + mailbox_name = mailbox_name.rstrip("/") + assert mailbox_name + + set_signal_handlers() + os.umask(077) # saves setting permissions on mailboxes/tempfiles + + vprint("processing '%s'" % mailbox_name) + is_imap = urlparse.urlparse(mailbox_name)[0] in ('imap', 'imaps') + if not is_imap: + # Check if the mailbox exists, and refuse to mess with other people's + # stuff + try: + fuid = os.stat(mailbox_name).st_uid + except OSError, e: + user_error(str(e)) + else: + if fuid != os.getuid(): + user_error("'%s' is owned by someone else!" % mailbox_name) + + old_temp_dir = tempfile.tempdir + try: + # create a temporary directory for us to work in securely + tempfile.tempdir = None + new_temp_dir = tempfile.mkdtemp('archivemail') + assert new_temp_dir + _stale.temp_dir = new_temp_dir + tempfile.tempdir = new_temp_dir + vprint("set tempfile directory to '%s'" % new_temp_dir) + + if is_imap: + vprint("guessing mailbox is of type: imap(s)") + _archive_imap(mailbox_name) + elif os.path.isfile(mailbox_name): + vprint("guessing mailbox is of type: mbox") + _archive_mbox(mailbox_name) + elif os.path.isdir(mailbox_name): + cur_path = os.path.join(mailbox_name, "cur") + new_path = os.path.join(mailbox_name, "new") + if os.path.isdir(cur_path) and os.path.isdir(new_path): + vprint("guessing mailbox is of type: maildir") + _archive_dir(mailbox_name, "maildir") + else: + vprint("guessing mailbox is of type: MH") + _archive_dir(mailbox_name, "mh") + else: + user_error("'%s' is not a normal file or directory" % mailbox_name) + + # remove our special temp directory - hopefully empty + os.rmdir(new_temp_dir) + _stale.temp_dir = None + + finally: + tempfile.tempdir = old_temp_dir + clean_up() + +def _archive_mbox(mailbox_name): + """Archive a 'mbox' style mailbox - used by archive_mailbox()""" + assert mailbox_name + final_archive_name = make_archive_name(mailbox_name) + vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name)) + check_archive(final_archive_name) + stats = Stats(mailbox_name, final_archive_name) + cache = IdentityCache(mailbox_name) + original = Mbox(path=mailbox_name) + if options.dry_run or options.copy_old_mail: + retain = None + else: + retain = TempMbox(prefix="retain") + archive = prepare_temp_archive() + + original.lock() + msg = original.next() + if not msg and (original.starting_size > 0): + user_error("'%s' is not a valid mbox-format mailbox" % mailbox_name) + if msg and 'X-IMAP' in msg: + # Dovecot and UW-IMAP pseudo message for mailbox meta data + vprint("detected IMAP pseudo message") + if retain: + retain.write(msg) + msg = original.next() + while (msg): + msg_size = sizeof_message(msg) + stats.another_message(msg_size) + vprint("processing message '%s'" % msg.get('Message-ID')) + if options.warn_duplicates: + cache.warn_if_dupe(msg) + if should_archive(msg): + stats.another_archived(msg_size) + if options.delete_old_mail: + vprint("decision: delete message") + else: + vprint("decision: archive message") + if archive: + archive.write(msg) + else: + vprint("decision: retain message") + if retain: + retain.write(msg) + msg = original.next() + vprint("finished reading messages") + if original.starting_size != original.get_size(): + unexpected_error("the mailbox '%s' changed size during reading!" % \ + mailbox_name) + # Write the new archive before modifying the mailbox, to prevent + # losing data if something goes wrong + commit_archive(archive, final_archive_name) + if retain: + pending_changes = original.mbox_file.tell() != retain.mbox_file.tell() + if pending_changes: + retain.commit() + retain.close() + vprint("writing back changed mailbox '%s'..." % \ + original.mbox_file_name) + # Prepare for recovery on error. + # FIXME: tempfile.tempdir is our nested dir. + saved_name = "%s/%s.%s.%s-%s-%s" % \ + (tempfile.tempdir, options.script_name, + os.path.basename(original.mbox_file_name), + socket.gethostname(), os.getuid(), + os.getpid()) + try: + original.overwrite_with(retain.mbox_file_name) + original.commit() + except: + retain.saveas(saved_name) + print "Error writing back changed mailbox; saved good copy to " \ + "%s" % saved_name + raise + else: + retain.close() + vprint("no changes to mbox '%s'" % original.mbox_file_name) + retain.remove() + original.unlock() + original.close() + original.reset_timestamps() # Minor race here; mutt has this too. + if not options.quiet: + stats.display() + + +def _archive_dir(mailbox_name, type): + """Archive a 'maildir' or 'MH' style mailbox - used by archive_mailbox()""" + assert mailbox_name + assert type + final_archive_name = make_archive_name(mailbox_name) + vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name)) + check_archive(final_archive_name) + stats = Stats(mailbox_name, final_archive_name) + delete_queue = [] + + if type == "maildir": + original = mailbox.Maildir(mailbox_name) + elif type == "mh": + original = mailbox.MHMailbox(mailbox_name) + else: + unexpected_error("unknown type: %s" % type) + cache = IdentityCache(mailbox_name) + archive = prepare_temp_archive() + + for msg in original: + if not msg: + vprint("ignoring invalid message '%s'" % get_filename(msg)) + continue + msg_size = sizeof_message(msg) + stats.another_message(msg_size) + vprint("processing message '%s'" % msg.get('Message-ID')) + if options.warn_duplicates: + cache.warn_if_dupe(msg) + if should_archive(msg): + stats.another_archived(msg_size) + if options.delete_old_mail: + vprint("decision: delete message") + else: + vprint("decision: archive message") + if archive: + if type == "maildir": + add_status_headers(msg) + archive.write(msg) + if not options.dry_run and not options.copy_old_mail: + delete_queue.append(get_filename(msg)) + else: + vprint("decision: retain message") + vprint("finished reading messages") + # Write the new archive before modifying the mailbox, to prevent + # losing data if something goes wrong + commit_archive(archive, final_archive_name) + for file_name in delete_queue: + vprint("removing original message: '%s'" % file_name) + try: os.remove(file_name) + except OSError, e: + if e.errno != errno.ENOENT: raise + if not options.quiet: + stats.display() + +def _archive_imap(mailbox_name): + """Archive an imap mailbox - used by archive_mailbox()""" + assert mailbox_name + import imaplib + import cStringIO + import getpass + + vprint("Setting imaplib.Debug = %d" % options.debug_imap) + imaplib.Debug = options.debug_imap + archive = None + imap_username, imap_password, \ + imap_server, imap_server_port, \ + imap_folder_pattern = parse_imap_url(mailbox_name) + if not imap_password: + if options.pwfile: + imap_password = open(options.pwfile).read().rstrip() + else: + if (not os.isatty(sys.stdin.fileno())) or options.quiet: + unexpected_error("No imap password specified") + imap_password = getpass.getpass('IMAP password: ') + + is_ssl = mailbox_name[:5].lower() == 'imaps' + if is_ssl: + vprint("establishing secure connection to server %s, port %s" % + (imap_server, imap_server_port)) + imap_srv = imaplib.IMAP4_SSL(imap_server, imap_server_port) + else: + vprint("establishing connection to server %s, port %s" % + (imap_server, imap_server_port)) + imap_srv = imaplib.IMAP4(imap_server, imap_server_port) + if "AUTH=CRAM-MD5" in imap_srv.capabilities: + vprint("authenticating (cram-md5) to server as %s" % imap_username) + result, response = imap_srv.login_cram_md5(imap_username, imap_password) + elif not "LOGINDISABLED" in imap_srv.capabilities: + vprint("logging in to server as %s" % imap_username) + result, response = imap_srv.login(imap_username, imap_password) + else: + user_error("imap server %s has login disabled (hint: " + "try ssl/imaps)" % imap_server) + + mailboxes = imap_find_mailboxes(imap_srv, imap_folder_pattern) + for imap_folder in mailboxes: + final_archive_name = make_archive_name(imap_folder) + vprint("archiving mailbox '%s' on IMAP server '%s' to '%s' ..." % + (imap_folder, imap_server, final_archive_name)) + check_archive(final_archive_name) + cur_mailbox = mailbox_name[:-len(imap_folder_pattern)] + imap_folder + stats = Stats(cur_mailbox, final_archive_name) + cache = IdentityCache(cur_mailbox) + + imap_smart_select(imap_srv, imap_folder) + total_msg_count = int(imap_srv.response("EXISTS")[1][0]) + vprint("folder has %d message(s)" % total_msg_count) + + # IIUIC the message sequence numbers are stable for the whole session, since + # we just send SEARCH, FETCH and STORE commands, which should prevent the + # server from sending untagged EXPUNGE responses -- see RFC 3501 (IMAP4rev1) + # 7.4.1 and RFC 2180 (Multi-Accessed Mailbox Practice). + # Worst thing should be that we bail out FETCHing a message that has been + # deleted. + + if options.archive_all: + message_list = [str(n) for n in range(1, total_msg_count+1)] + else: + imap_filter = build_imap_filter() + vprint("imap filter: '%s'" % imap_filter) + vprint("searching messages matching criteria") + result, response = imap_srv.search(None, imap_filter) + if result != 'OK': unexpected_error("imap search failed; server says '%s'" % + response[0]) + if response[0] is not None: + # response is a list with a single item, listing message + # sequence numbers like ['1 2 3 1016'] + message_list = response[0].split() + else: + # Broken server has sent no untagged response; assume empty result set. + message_list = [] + vprint("%d messages are matching filter" % len(message_list)) + + # First, gather data for the statistics. + if total_msg_count > 0 and not options.quiet: + vprint("fetching size of messages...") + result, response = imap_srv.fetch('1:*', '(RFC822.SIZE)') + if result != 'OK': unexpected_error("Failed to fetch message sizes; " + "server says '%s'" % response[0]) + # response is a list with entries like '1016 (RFC822.SIZE 3118)', + # where the first number is the message sequence number, the second is + # the size. + for x in response: + m = imapsize_re.match(x) + msn, msg_size = m.group('msn'), int(m.group('size')) + stats.another_message(msg_size) + if msn in message_list: + stats.another_archived(msg_size) + + if not options.dry_run: + if not options.delete_old_mail: + archive = prepare_temp_archive() + vprint("fetching messages...") + for msn in message_list: + # Fetching message flags and body together always finds \Seen + # set. To check \Seen, we must fetch the flags first. + result, response = imap_srv.fetch(msn, '(FLAGS)') + if result != 'OK': unexpected_error("Failed to fetch message " + "flags; server says '%s'" % response[0]) + msg_flags = imaplib.ParseFlags(response[0]) + result, response = imap_srv.fetch(msn, '(RFC822)') + if result != 'OK': unexpected_error("Failed to fetch message; " + "server says '%s'" % response[0]) + msg_str = response[0][1].replace("\r\n", os.linesep) + msg = rfc822.Message(cStringIO.StringIO(msg_str)) + vprint("processing message '%s'" % msg.get('Message-ID')) + add_status_headers_imap(msg, msg_flags) + if options.warn_duplicates: + cache.warn_if_dupe(msg) + archive.write(msg) + commit_archive(archive, final_archive_name) + if not options.copy_old_mail: + vprint("Deleting %s messages" % len(message_list)) + # do not delete more than a certain number of messages at a time, + # because the command length is limited. This avoids that servers + # terminate the connection with EOF or TCP RST. + max_delete = 100 + for i in range(0, len(message_list), max_delete): + result, response = imap_srv.store( \ + string.join(message_list[i:i+max_delete], ','), + '+FLAGS.SILENT', '\\Deleted') + if result != 'OK': unexpected_error("Error while deleting " + "messages; server says '%s'" % response[0]) + vprint("Closing mailbox.") + imap_srv.close() + if not options.quiet: + stats.display() + vprint("Terminating connection.") + imap_srv.logout() + + +############### IMAP functions ############### + + +# First, some IMAP modified UTF-7 support functions. + +# The modified BASE64 alphabet. 64 characters, each one encodes 6 Bit. +mb64alpha = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+,' + +def isprint_ascii(char): + """Test for an ASCII printable character.""" + return 0x20 <= ord(char) and ord(char) <= 0x7e + +def mod_utf7_encode(ustr): + """Encode unicode string object in modified UTF-7.""" + + def mb64_encode(tomb64): + """Encode unicode string object as a modified UTF-7 shifted sequence + in modified BASE64.""" + u16be = tomb64.encode('utf_16_be') + mb64 = "" + # Process 24-bit blocks, encoding them in 6-bit steps. + for block in [u16be[i:i+3] for i in range(0, len(u16be), 3)]: + idx = 0 + shift = 2 + for octet in block: + mb64 += mb64alpha[idx | (ord(octet) >> shift)] + idx = (ord(octet) << (6-shift)) & 0x3f + shift += 2 + mb64 += mb64alpha[idx] + return mb64 + + mu7 = "" + tomb64 = u"" + for c in ustr: + if not isprint_ascii(c): + tomb64 += c + continue + if tomb64: + mu7 += '&' + mb64_encode(tomb64) + '-' + tomb64 = u"" + if c == '&': + mu7 += '&-' + else: + mu7 += str(c) + if tomb64: + mu7 += '&' + mb64_encode(tomb64) + '-' + return mu7 + +def mod_utf7_decode(mu7): + """Decode a modified UTF-7 encoded string to an unicode string object.""" + + def mb64_decode(mb64): + """Decode a modified UTF-7 shifted sequence from modified BASE64 to an + unicode string object.""" + if not mb64: + # A null shift '&-' decodes to '&'. + return u"&" + u16be = "" + # Process blocks of 4 BASE64 characters, decoding each char to 6 bits. + for block in [mb64[i:i+4] for i in range(0, len(mb64), 4)]: + carrybits = mb64alpha.index(block[0]) << 2 + shift = 4 + for char in block[1:]: + bits = mb64alpha.index(char) + u16be += chr(carrybits | (bits >> shift)) + carrybits = (bits << (8-shift)) & 0xff + shift -= 2 + if carrybits: + raise ValueError("Ill-formed modified UTF-7 string: " + "trailing bits in shifted sequence") + return u16be.decode('utf_16_be') + + ustr = u"" + mb64 = "" + inmb64 = False + for octet in mu7: + if not isprint_ascii(octet): + raise ValueError("Ill-formed modified UTF-7 string: " + "contains non-printable ASCII" % ord(octet)) + if not inmb64: + if octet == '&': + inmb64 = True + else: + ustr += octet + continue + + if octet in mb64alpha: + mb64 += octet + continue + + if octet == '-': + inmb64 = False + ustr += mb64_decode(mb64) + mb64 = "" + else: + break # This triggers the exception below. + + if inmb64: + raise ValueError("Ill-formed modified UTF-7 string: " + "unterminated BASE64 sequence") + return ustr + + +def imap_quote(astring): + """Quote an IMAP `astring' string (see RFC 3501, section "Formal Syntax").""" + if astring.startswith('"') and astring.endswith('"'): + quoted = astring + else: + quoted = '"' + astring.replace('\\', '\\\\').replace('"', '\\"') + '"' + return quoted + +def imap_unquote(quoted): + """Un-quote a `quoted' IMAP string (see RFC 3501, section "Formal Syntax").""" + if not (quoted.startswith('"') and quoted.endswith('"')): + unquoted = quoted + else: + unquoted = re.sub(r'\\(\\|")', r'\1', quoted[1:-1]) + return unquoted + +def parse_imap_url(url): + """Parse IMAP URL and return username, password (if appliciable), servername + and foldername.""" + + def split_qstr(string, delim): + """Split string once at delim, keeping quoted substring intact. + Strip and unescape quotes where necessary.""" + rm = re.match(r'"(.+?(?"."|NIL)', response[0]) + if not m: + unexpected_error("imap_getdelim(): cannot parse '%s'" % response[0]) + delim = m.group('delim').strip('"') + vprint("Found mailbox hierarchy delimiter: '%s'" % delim) + if delim == "NIL": + return None + return delim + + +def imap_get_namespace(srv): + """Return the IMAP namespace prefixes and hierarchy delimiters.""" + assert 'NAMESPACE' in srv.capabilities + result, response = srv.namespace() + if result != 'OK': + unexpected_error("Cannot retrieve IMAP namespace; server says: '%s'" + % response[0]) + vprint("NAMESPACE response: %s" % repr(response[0])) + # Typical response is e.g. + # ['(("INBOX." ".")) NIL (("#shared." ".")("shared." "."))'] or + # ['(("" ".")) NIL NIL'], see RFC 2342. + # Make a reasonable guess parsing this beast. + try: + m = re.match(r'\(\("([^"]*)" (?:"(.)"|NIL)', response[0]) + nsprefix, hdelim = m.groups() + except: + print "Cannot parse IMAP NAMESPACE response %s" % repr(response) + raise + return nsprefix, hdelim + + +def imap_smart_select(srv, mailbox): + """Select the given mailbox on the IMAP server.""" + roflag = options.dry_run or options.copy_old_mail + # Work around python bug #1277098 (still pending in python << 2.5) + if not roflag: + roflag = None + if roflag: + vprint("examining imap folder '%s' read-only" % mailbox) + else: + vprint("selecting imap folder '%s'" % mailbox) + imap_mailbox = mod_utf7_encode(mailbox.decode(userencoding)) + result, response = srv.select(imap_quote(imap_mailbox), roflag) + if result != 'OK': + unexpected_error("selecting '%s' failed; server says: '%s'." \ + % (mailbox, response[0])) + if not roflag: + # Sanity check that we don't silently fail to delete messages. + # As to the following indices: IMAP4.response(key) returns + # a tuple (key, ['']) if the key is found, (key, [None]) + # otherwise. Imaplib just *loves* to nest trivial lists! + permflags = srv.response("PERMANENTFLAGS")[1][0] + if permflags: + permflags = permflags.strip('()').lower().split() + if not '\\deleted' in permflags: + unexpected_error("Server doesn't allow deleting messages in " \ + "'%s'." % mailbox) + elif "IMAP4REV1" in srv.capabilities: + vprint("Suspect IMAP4rev1 server, doesn't send PERMANENTFLAGS " \ + "upon SELECT") + + +def imap_find_mailboxes(srv, mailbox): + """Find matching mailboxes on the IMAP server, correcting an invalid + mailbox path if possible.""" + for curbox in imap_guess_mailboxnames(srv, mailbox): + if '%' in curbox or '*' in curbox: + vprint("Looking for mailboxes matching '%s'..." % curbox) + else: + vprint("Looking for mailbox '%s'..." % curbox) + curbox = mod_utf7_encode(curbox.decode(userencoding)) + result, response = srv.list(pattern=imap_quote(curbox)) + if result != 'OK': + unexpected_error("LIST command failed; " \ + "server says: '%s'" % response[0]) + # Say we queried for the mailbox "foo". + # Upon success, response is e.g. ['(\\HasChildren) "." foo']. + # Upon failure, response is [None]. Funky imaplib! + if response[0] != None: + break + else: + user_error("Cannot find mailbox '%s' on server." % mailbox) + mailboxes = [] + for mailbox_data in response: + if not mailbox_data: # imaplib sometimes returns an empty string + continue + try: + m = re.match(r'\((.*?)\) (?:"."|NIL) (.+)', mailbox_data) + except TypeError: + # May be a literal. For literals, imaplib returns a tuple like + # ('(\\HasNoChildren) "." {12}', 'with "quote"'). + m = re.match(r'\((.*?)\) (?:"."|NIL) \{\d+\}$', mailbox_data[0]) + if m is None: + unexpected_error("cannot parse LIST reply %s" % + (mailbox_data,)) + attrs = m.group(1) + name = mailbox_data[1] + else: + attrs, name = m.groups() + name = imap_unquote(name) + try: + name = mod_utf7_decode(name) + except ValueError: + vprint("Mailbox name '%s' returned by server doesn't look like " + "modified UTF-7" % name) + name = name.decode('utf-8') + name = name.encode(userencoding) + if '\\noselect' in attrs.lower().split(): + vprint("skipping not selectable mailbox '%s'" % name) + continue + vprint("Found mailbox '%s'" % name) + mailboxes.append(name) + if not mailboxes: + user_error("No matching folder is selectable") + return mailboxes + + +def imap_guess_mailboxnames(srv, mailbox): + """Return a list of possible real IMAP mailbox names in descending order + of preference, compiled by prepending an IMAP namespace prefix if necessary, + and by translating hierarchy delimiters.""" + if 'NAMESPACE' in srv.capabilities: + nsprefix, hdelim = imap_get_namespace(srv) + else: + vprint("Server doesn't support NAMESPACE command.") + nsprefix = "" + hdelim = imap_getdelim(srv) + vprint("IMAP namespace prefix: '%s', hierarchy delimiter: '%s'" % \ + (nsprefix, hdelim)) + if mailbox.upper() == "INBOX" or \ + (hdelim is not None and mailbox.upper().startswith("INBOX" + hdelim)): + # INBOX is not a real mailbox name, so namespace prefixes do not apply + # to INBOX and its children + boxnames = [mailbox] + elif mailbox.startswith(nsprefix): + boxnames = [mailbox] + else: + boxnames = [nsprefix + mailbox] + if os.path.sep in mailbox and hdelim is not None: + mailbox = mailbox.replace(os.path.sep, hdelim) + if mailbox.upper().startswith("INBOX" + hdelim): + boxnames.append(mailbox) + else: + if mailbox.startswith(nsprefix): + boxnames.append(mailbox) + if nsprefix: + boxnames.append(nsprefix + mailbox) + return boxnames + + +############### misc functions ############### + + +def set_signal_handlers(): + """set signal handlers to clean up temporary files on unexpected exit""" + # Make sure we clean up nicely - we don't want to leave stale dotlock + # files about if something bad happens to us. This is quite + # important, even though procmail will delete stale files after a while. + signal.signal(signal.SIGHUP, clean_up_signal) # signal 1 + # SIGINT (signal 2) is handled as a python exception + signal.signal(signal.SIGQUIT, clean_up_signal) # signal 3 + signal.signal(signal.SIGTERM, clean_up_signal) # signal 15 + + +def clean_up(): + """Delete stale files""" + vprint("cleaning up ...") + _stale.clean() + + +def clean_up_signal(signal_number, stack_frame): + """Delete stale files -- to be registered as a signal handler. + + Arguments: + signal_number -- signal number of the terminating signal + stack_frame -- the current stack frame + + """ + # this will run the above clean_up(), since unexpected_error() + # will abort with sys.exit() and clean_up will be registered + # at this stage + unexpected_error("received signal %s" % signal_number) + +def prepare_temp_archive(): + """Create temporary archive mbox.""" + if options.dry_run or options.delete_old_mail: + return None + if options.no_compress: + return TempMbox() + else: + return CompressedTempMbox() + +def commit_archive(archive, final_archive_name): + """Finalize temporary archive and write it to its final destination.""" + if not options.no_compress: + final_archive_name = final_archive_name + '.gz' + if archive: + archive.close() + if not archive.empty: + final_archive = ArchiveMbox(final_archive_name) + final_archive.lock() + try: + final_archive.append(archive.mbox_file_name) + final_archive.commit() + finally: + final_archive.unlock() + final_archive.close() + archive.remove() + +def make_archive_name(mailbox_name): + """Derive archive name and (relative) path from the mailbox name.""" + # allow the user to embed time formats such as '%B' in the archive name + if options.date_old_max == None: + tm = time.localtime(time.time() - options.days_old_max*24*60*60) + else: + tm = time.localtime(options.date_old_max) + prefix = suffix = "" + if options.archive_name: + archive_head = "" + archive_tail = time.strftime(options.archive_name, tm) + else: + if options.archive_prefix is None and options.archive_suffix is None: + suffix = options.archive_default_suffix + else: + if options.archive_prefix: + prefix = time.strftime(options.archive_prefix, tm) + if options.archive_suffix: + suffix = time.strftime(options.archive_suffix, tm) + archive_head, archive_tail = os.path.split(mailbox_name) + if not prefix: + # Don't create hidden archives, e.g. when processing Maildir++ + # subfolders + archive_tail = archive_tail.lstrip('.') + if options.output_dir: + archive_head = options.output_dir + archive_name = os.path.join(archive_head, prefix + archive_tail + suffix) + return archive_name + +def check_sane_destdir(dir): + """Do a very primitive check if the given directory looks like a reasonable + destination directory and bail out if it doesn't.""" + assert dir + if not os.path.isdir(dir): + user_error("output directory does not exist: '%s'" % dir) + if not os.access(dir, os.W_OK): + user_error("no write permission on output directory: '%s'" % dir) + +def check_archive(archive_name): + """Check if existing archive files are (not) compressed as expected and + check if we can work with the destination directory.""" + compressed_archive = archive_name + ".gz" + if options.no_compress: + if os.path.isfile(compressed_archive): + user_error("There is already a file named '%s'!\n" + "Have you been previously compressing this archive?\n" + "You probably should uncompress it manually, and try running me " + "again." % compressed_archive) + elif os.path.isfile(archive_name): + user_error("There is already a file named '%s'!\n" + "Have you been reading this archive?\n" + "You probably should re-compress it manually, and try running me " + "again." % archive_name) + dest_dir = os.path.dirname(archive_name) + if not dest_dir: + dest_dir = os.getcwd() + check_sane_destdir(dest_dir) + +def nice_size_str(size): + """Return given size in bytes as '12kB', '1.2MB'""" + kb = size / 1024.0 + mb = kb / 1024.0 + if mb >= 1.0: return str(round(mb, 1)) + 'MB' + if kb >= 1.0: return str(round(kb)) + 'kB' + return str(size) + 'B' + + +def get_filename(msg): + """If the given rfc822.Message can be identified with a file (no mbox), + return the filename, otherwise raise AttributeError.""" + try: + return msg.fp.name + except AttributeError: + # Ugh, that's ugly. msg.fp is not a plain file, it may be an + # instance of + # a. mailbox._Subfile + # (msg from mailbox.UnixMailbox, Python <= 2.4) + # File object is msg.fp.fp, we don't want that + # b. mailbox._PartialFile, subclass of mailbox._ProxyFile + # (msg from mailbox.UnixMailbox, Python >= 2.5) + # File object is msg.fp._file, we don't want that + # c. mailbox._ProxyFile + # (msg from mailbox.Maildir, Python >= 2.5) + # File object is msg.fp._file, we do want that. + if msg.fp.__class__ == mailbox._ProxyFile: + assert hasattr(mailbox, "_PartialFile") + return msg.fp._file.name + raise + +def safe_open_create(filename): + """Create and open a file in a NFSv2-safe way, and return a r/w file descriptor. + The new file is created with mode 600.""" + # This is essentially a simplified version of the dotlocking function. + vprint("Creating file '%s'" % filename) + dir, basename = os.path.split(filename) + # We rely on tempfile.mkstemp to create files safely and with 600 mode. + fd, pre_name = tempfile.mkstemp(prefix=basename+".pre-", dir=dir) + try: + try: + os.link(pre_name, filename) + except OSError, e: + if os.fstat(fd).st_nlink == 2: + pass + else: + raise + finally: + os.unlink(pre_name) + return fd + +def safe_open_existing(filename): + """Safely open an existing file, and return a r/w file descriptor.""" + lst = os.lstat(filename) + if stat.S_ISLNK(lst.st_mode): + unexpected_error("file '%s' is a symlink." % filename) + fd = os.open(filename, os.O_RDWR) + fst = os.fstat(fd) + if fst.st_nlink != 1: + unexpected_error("file '%s' has %d hard links." % \ + (filename, fst.st_nlink)) + if stat.S_ISDIR(fst.st_mode): + unexpected_error("file '%s' is a directory." % filename) + for i in stat.ST_DEV, stat.ST_INO, stat.ST_UID, stat.ST_GID, stat.ST_MODE, stat.ST_NLINK: + if fst[i] != lst[i]: + unexpected_error("file status changed unexpectedly") + return fd + +def safe_open(filename): + """Safely open a file, creating it if it doesn't exist, and return a + r/w file descriptor.""" + # This borrows from postfix code. + vprint("Opening archive...") + try: + fd = safe_open_existing(filename) + except OSError, e: + if e.errno != errno.ENOENT: raise + fd = safe_open_create(filename) + return fd + +# this is where it all happens, folks +if __name__ == '__main__': + main() diff --git a/archivemail.xml b/archivemail.xml new file mode 100644 index 0000000..9bc5642 --- /dev/null +++ b/archivemail.xml @@ -0,0 +1,794 @@ + + +lockf +2'> + + +gzip +1'> + + +procmail +1'> + + +python +1'> + + +crontab +5'> + + +mbox +5'> +]> + + + +5 July 2011 + + +archivemail +1 +archivemail user manual +archivemail 0.9.0 + + + +archivemail +archive and compress your old email + + + + + +archivemail + +MAILBOX + + + + + +Description + + +archivemail is a tool for archiving and compressing old +email in mailboxes. +By default it will read the mailbox MAILBOX, moving +messages that are older than the specified number of days (180 by default) to +a &mbox;-format mailbox in the same directory that is compressed with &gzip;. +It can also just delete old email rather than archive it. + + + +By default, archivemail derives the archive filename from +the mailbox name by appending an _archive suffix to the +mailbox name. For example, if you run archivemail on a +mailbox called exsouthrock, the archive will be created +with the filename exsouthrock_archive.gz. +This default behavior can be overridden with command line options, choosing +a custom suffix, a prefix, or a completely custom name for the archive. + + + +archivemail supports reading IMAP, +Maildir, MH and +mbox-format mailboxes, but always writes +mbox-format archives. + + + +Messages that are flagged important are not archived or deleted unless +explicitly requested with the option. +Also, archivemail can be configured not to archive unread +mail, or to only archive messages larger than a specified size. + + + +To archive an IMAP-format mailbox, use the format +imap://username:password@server/mailbox to specify +the mailbox. +archivemail will expand wildcards in +IMAP mailbox names according to +RFC 3501, which says: The +character "*" is a wildcard, and matches zero or more characters at this +position. The character "%" is similar to "*", but it does not match a +hierarchy delimiter. +You can omit the password from the URL; use the + option to make archivemail read +the password from a file, or alternatively just enter it upon request. +If the option is set, archivemail +does not look for a password in the URL, and the colon is +not considered a delimiter. +Substitute imap with +imaps, and archivemail will +establish a secure SSL connection. +See below for more IMAP peculiarities. + + + + +Options + + + + + + +Archive messages older than NUM +days. The default is 180. This option is incompatible with the + option below. + + + + + + +Archive messages older than DATE. +DATE can be a date string in ISO format (eg +2002-04-23), Internet format (eg 23 Apr +2002) or Internet format with full month names (eg +23 April 2002). Two-digit years are not supported. +This option is incompatible with the option above. + + + + + + +Use the directory name PATH to +store the mailbox archives. The default is the same directory as the mailbox +to be read. + + + + + + +Read IMAP password from file +FILE instead of from the command line. Note +that this will probably not work if you are archiving folders from +more than one IMAP account. + + + + + + +Append STRING to the +IMAP filter string. +For IMAP wizards. + + + + + + +Prefix NAME to the archive name. +NAME is expanded by the &python; function +time.strftime(), which means that you can specify special +directives in NAME to make an archive named after +the archive cut-off date. +See the discussion of the option for a list of valid +strftime() directives. +The default is not to add a prefix. + + + + + + + +Use the suffix NAME to create the filename used for +archives. The default is _archive, unless a prefix is +specified. + + +Like a prefix, the suffix NAME is expanded by the +&python; function time.strftime() with the archive +cut-off date. time.strftime() understands the following +directives: + + %a + + Locale's abbreviated weekday name. + + + %A + + Locale's full weekday name. + + + %b + + Locale's abbreviated month name. + + + %B + + Locale's full month name. + + + %c + + Locale's appropriate date and time representation. + + + %d + + Day of the month as a decimal number [01,31]. + + + %H + + Hour (24-hour clock) as a decimal number [00,23]. + + + %I + + Hour (12-hour clock) as a decimal number [01,12]. + + + %j + + Day of the year as a decimal number [001,366]. + + + %m + + Month as a decimal number [01,12]. + + + %M + + Minute as a decimal number [00,59]. + + + %p + + Locale's equivalent of either AM or PM. + + + %S + + Second as a decimal number [00,61]. (1) + + + %U + + Week number of the year (Sunday as the first day of the week) + as a decimal number [00,53]. All days in a new year preceding + the first Sunday are considered to be in week 0. + + + %w + + Weekday as a decimal number [0(Sunday),6]. + + + %W + + Week number of the year (Monday as the first day of the week) + as a decimal number [00,53]. All days in a new year preceding + the first Sunday are considered to be in week 0. + + + %x + + Locale's appropriate date representation. + + + %X + + Locale's appropriate time representation. + + + %y + + Year without century as a decimal number [00,99]. + + + %Y + + Year with century as a decimal number. + + + %Z + + Time zone name (or by no characters if no time zone exists). + + + %% + + A literal % character. + + + + + + + + + +Use NAME as the archive name, +ignoring the name of the mailbox that is archived. +Like prefixes and suffixes, NAME is expanded by +time.strftime() with the archive cut-off date. +Because it hard-codes the archive name, this option cannot be used when +archiving multiple mailboxes. + + + + + + +Only archive messages that are NUM +bytes or greater. + + + + + + + +Don't write to any files -- just show what would have been done. This is +useful for testing to see how many messages would have been archived. + + + + + + + +Do not archive any messages that have not yet been read. +archivemail determines if a message in a +mbox-format or MH-format mailbox has +been read by looking at the Status header (if it exists). +If the status header is equal to RO or +OR then archivemail +assumes the message has been read. +archivemail determines if a maildir +message has been read by looking at the filename. +If the filename contains an S after +:2, then it assumes the message has been read. + + + + + + + + +Do not mangle lines in message bodies beginning with +From . +When archiving a message from a mailbox not in mbox +format, by default archivemail mangles such lines by +prepending a > to them, since mail user +agents might otherwise interpret these lines as message separators. +Messages from mbox folders are never mangled. See &mbox; +for more information. + + + + + + + + +Delete rather than archive old mail. Use this option with caution! + + + + + + + + +Copy rather than archive old mail. +Creates an archive, but the archived messages are not deleted from the +originating mailbox, which is left unchanged. +This is a complement to the option, and mainly +useful for testing purposes. +Note that multiple passes will create duplicates, since messages are blindly +appended to an existing archive. + + + + + + + + +Archive all messages, without distinction. + + + + + + + + +Normally messages that are flagged important are not archived or deleted. If +you specify this option, these messages can be archived or deleted just like +any other message. + + + + + + + + +Do not compress any archives. + + + + + + + + +Warn about duplicate Message-IDs that appear in the input +mailbox. + + + + + + +Reports lots of extra debugging information about what is going on. + + + + + + + + +Set IMAP debugging level. This makes +archivemail dump its conversation with the +IMAP server and some internal IMAP +processing to stdout. Higher values for +NUM give more elaborate output. Set +NUM to 4 to see all exchanged +IMAP commands. (Actually, NUM +is just passed literally to imaplib.Debug.) + + + + + + + +Turns on quiet mode. Do not print any statistics about how many messages were +archived. This should be used if you are running +archivemail from cron. + + + + + + + +Display the version of archivemail and exit. + + + + + + + +Display brief summary information about how to run +archivemail. + + + + + + + +Notes + + +archivemail requires &python; version 2.3 or later. +When reading an mbox-format mailbox, +archivemail will create a lockfile with the extension +.lock so that &procmail; will not +deliver to the mailbox while it is being processed. It will also create an +advisory lock on the mailbox using &lockf;. The archive is locked in the same +way when it is updated. +archivemail will also complain and abort if a 3rd-party +modifies the mailbox while it is being read. + + + +archivemail will always attempt to preserve the last-access +and last-modify times of the input mailbox. Archive mailboxes are always +created with a mode of 0600. +If archivemail finds a pre-existing archive mailbox it will +append rather than overwrite that archive. +archivemail will refuse to operate on mailboxes that are +symbolic links. + + + +archivemail attempts to find the delivery date of a message +by looking for valid dates in the following headers, in order of precedence: +Delivery-date, +Received, +Resent-Date and +Date. +If it cannot find any valid date in these headers, it will use the +last-modified file timestamp on MH and +Maildir format mailboxes, or the date on the +From_ line on mbox-format mailboxes. + + + +When archiving mailboxes with leading dots in the name, +archivemail will strip the dots off the archive name, so +that the resulting archive file is not hidden. +This is not done if the or + option is used. +Should there really be mailboxes distinguished only by leading dots in the +name, they will thus be archived to the same archive file by default. + + + +A conversion from other formats to &mbox; will silently overwrite existing +Status and X-Status message headers. + + + +<acronym>IMAP</acronym> + +When archivemail processes an IMAP +folder, all messages in that folder will have their \Recent +flag unset, and they will probably not show up as new in your +user agent later on. +There is no way around this, it's just how IMAP works. +This does not apply, however, if you run archivemail with +the options or . + + +archivemail relies on server-side searches to determine the +messages that should be archived. +When matching message dates, IMAP servers refer to server +internal message dates, and these may differ from both delivery time of a +message and its Date header. +Also, there exist broken servers which do not implement server side searches. + +<acronym>IMAP</acronym> <acronym>URL</acronym>s + +archivemail's IMAP +URL parser was written with the RFC 2882 +(Internet Message Format) rules for the +local-part of email addresses in mind. +So, rather than enforcing an URL-style encoding of +non-ascii and reserved characters, it allows you to +double-quote the username and password. +If your username or password contains the delimiter characters +@ or :, just quote it like this: +imap://"username@bogus.com":"password"@imap.bogus.com/mailbox. +You can use a backslash to escape double-quotes that are part of a quoted +username or password. +Note that quoting only a substring will not work, and be aware that your shell +will probably remove unprotected quotes or backslashes. + + +Similarly, there is no need to percent-encode non-ascii +characters in IMAP mailbox names. +As long as your locale is configured properly, archivemail +should handle these without problems. +Note, however, that due to limitations of the IMAP +protocol, non-ascii characters do not mix well with +wildcards in mailbox names. + + +archivemail tries to be smart when handling mailbox paths. +In particular, it will automatically add an IMAP +NAMESPACE prefix to the mailbox path if necessary; and if +you are archiving a subfolder, you can use the slash as a path separator +instead of the IMAP server's internal representation. + + + + + + +Examples + + + +To archive all messages in the mailbox debian-user that +are older than 180 days to a compressed mailbox called +debian-user_archive.gz in the current directory: + +bash$ archivemail debian-user + + + + + + +To archive all messages in the mailbox debian-user that +are older than 180 days to a compressed mailbox called +debian-user_October_2001.gz (where the current month and +year is April, 2002) in the current directory: + +bash$ archivemail --suffix '_%B_%Y' debian-user + + + + + + +To archive all messages in the mailbox cm-melb that +are older than the first of January 2002 to a compressed mailbox called +cm-melb_archive.gz in the current directory: + +bash$ archivemail --date='1 Jan 2002' cm-melb + + + + + + +Exactly the same as the above example, using an ISO date +format instead: + +bash$ archivemail --date=2002-01-01 cm-melb + + + + + + +To delete all messages in the mailbox spam that +are older than 30 days: + +bash$ archivemail --delete --days=30 spam + + + + + + +To archive all read messages in the mailbox incoming that +are older than 180 days to a compressed mailbox called +incoming_archive.gz in the current directory: + +bash$ archivemail --preserve-unread incoming + + + + + + +To archive all messages in the mailbox received that +are older than 180 days to an uncompressed mailbox called +received_archive in the current directory: + +bash$ archivemail --no-compress received + + + + + + +To archive all mailboxes in the directory $HOME/Mail +that are older than 90 days to compressed mailboxes in the +$HOME/Mail/Archive directory: + +bash$ archivemail -d90 -o $HOME/Mail/Archive $HOME/Mail/* + + + + + + +To archive all mails older than 180 days from the given +IMAP INBOX to a compressed mailbox +INBOX_archive.gz in the +$HOME/Mail/Archive directory, quoting the password and +reading it from the environment variable PASSWORD: + + + +bash$ archivemail -o $HOME/Mail/Archive imaps://user:'"'$PASSWORD'"'@example.org/INBOX + + +Note the protected quotes. + + + + + +To archive all mails older than 180 days in subfolders of foo on the given IMAP +server to corresponding archives in the current working directory, reading the +password from the file ~/imap-pass.txt: + + +bash$ archivemail --pwfile=~/imap-pass.txt imaps://user@example.org/foo/* + + + + + +Tips + +Probably the best way to run archivemail is from your +&crontab; file, using the option. +Don't forget to try the and perhaps the + option for non-destructive testing. + + + + +Exit Status +Normally the exit status is 0. Nonzero indicates an unexpected error. + + + + +Bugs + +If an IMAP mailbox path contains slashes, the archive +filename will be derived from the basename of the mailbox. +If the server's folder separator differs from the Unix slash and is used in +the IMAP URL, however, the whole path +will be considered the basename of the mailbox. +E.g. the two URLs +imap://user@example.com/folder/subfolder and +imap://user@example.com/folder.subfolder will be +archived in subfolder_archive.gz and +folder.subfolder_archive.gz, respectively, although they +might refer to the same IMAP mailbox. + + +archivemail does not support reading +MMDF or Babyl-format mailboxes. In fact, +it will probably think it is reading an mbox-format mailbox +and cause all sorts of problems. + + + +archivemail is still too slow, but if you are running from +&crontab; you won't care. Archiving maildir-format +mailboxes should be a lot quicker than mbox-format +mailboxes since it is less painful for the original mailbox to be +reconstructed after selective message removal. + + + + +See Also + + &mbox; + &crontab; + &python; + &procmail; + + + + +<acronym>Url</acronym> +The archivemail home page is currently hosted at +sourceforge + + + + +Author + This manual page was written by Paul Rodger <paul at paulrodger +dot com>. Updated and supplemented by Nikolaus Schulz +microschulz@web.de + + + diff --git a/db2html.xsl b/db2html.xsl new file mode 100644 index 0000000..d3be5f6 --- /dev/null +++ b/db2html.xsl @@ -0,0 +1,10 @@ + + + + manpage.css + +

archivemail

+
+
+
diff --git a/db2man.xsl b/db2man.xsl new file mode 100644 index 0000000..bcc3f61 --- /dev/null +++ b/db2man.xsl @@ -0,0 +1,29 @@ + + + + + + + + + .TP + + + + + + + + + + + + + + + + + + + diff --git a/examples/archivemail_all b/examples/archivemail_all new file mode 100644 index 0000000..30cb05e --- /dev/null +++ b/examples/archivemail_all @@ -0,0 +1,31 @@ +#!/bin/sh +# +# This is an example shell script I use from my crontab(5) file to selectively +# archive some of my mailboxes. Most of these mailboxes come directly from +# procmail and are in maildir-format -- not that it should really matter. +# +# I probably could have done them all as: +# $ARCMAIL $HOME/Mail/* +# ...if I had enough disk space to keep mail uncompressed for 180 days :) +# +set -e + +ARCMAIL="/usr/local/bin/archivemail --quiet --output-dir=$HOME/Mail/Archive/ " + +$ARCMAIL --days 14 $HOME/Mail/debian-user \ + $HOME/Mail/linux-kernel \ + $HOME/Mail/python-list \ + $HOME/Mail/spam-l + +$ARCMAIL --days 30 --delete $HOME/Mail/duplicates + +$ARCMAIL --days 90 $HOME/Mail/bugtraq \ + $HOME/Mail/debian-devel \ + $HOME/Mail/debian-mentors \ + $HOME/Mail/spam + +$ARCMAIL $HOME/Mail/cm-melb \ + $HOME/Mail/exsouthrock \ + $HOME/Mail/received \ + $HOME/Mail/sent \ + $HOME/Mail/vim diff --git a/index.html b/index.html new file mode 100644 index 0000000..90bef89 --- /dev/null +++ b/index.html @@ -0,0 +1,192 @@ + + + + + archivemail – a tool for archiving and compressing old email + + + + + +

archivemail

+ +
+
+ Latest version: 0.9.0
+ Released on 2011-07-09 +
+

What is it?

+

+archivemail is a tool for archiving and compressing +old email in mailboxes. +It moves messages older than the specified number of +days to a separate mbox format mailbox that is +compressed with gzip. +It can also just delete old email rather than archive it. + +

What can it do for me?

+

+Maybe some of your mailboxes are quite large (eg, over 10,000 messages) and +they are taking a while to load in your mail reader. Perhaps they are taking +up too much space on your disk. Archiving old messages to a separate, +compressed mailbox will mean: +

    +
  1. Your mail reader will get a huge performance boost loading and reading + your mail. +
  2. You will be taking up less disk space, since old mail will be compressed. + (Mail usually compresses quite nicely.) +
  3. You won't be confronted with semi-obsolete mail all the time. +
+

+You can also use archivemail as a simple backup +tool. + +

Features overview

+
    +
  • Supports archiving + IMAP, + mbox, + MH and + Maildir format + mailboxes. +
  • Old mail can be either archived or just deleted. +
  • The age in days that is considered old is configurable – it defaults + to 180 days. + You can also set an absolute limit date. +
  • Messages that are flagged important are not archived or deleted unless + explicitely requested. + +
  • Can be configured to preserve unread mail. +
  • Can be configured to only archive messages over a given byte size. +
  • Stores the compressed archive mailboxes in a directory of + your choice, with an extension of your choice. +
  • Easy read-only testing, not touching your valuable data. +
  • Supports IMAPS/SSL. +
  • When archiving IMAP mailboxes, the message selection can be refined by + extending the underlying IMAP SEARCH command with arbitrary + search keys (you will have to cope with the raw IMAP protocol, though). +
+ +

Documentation

+

+The archivemail manpage is the primary documentation +for archivemail. +To see what has changed in the latest version, check the release notes. +If you want to have a closer look at the current development status, here's the + +CHANGELOG and the +TODO list, fresh from the repository. + +

Where can I get it?

+

+You can grab the latest version of archivemail +directly from the archivemail +download area at Sourceforge. +There should be up-to-date binary RPM packages at the +OpenSUSE build +service for SUSE Linux and Fedora Core. +Also, many Linux distributions provide packages; e.g. there is a +Debian +package. + +

+archivemail is written in Python, and hacking it is +easy and fun; you can check out the source from the git repository with +the following command: +

+git clone git://archivemail.git.sourceforge.net/gitroot/archivemail/archivemail +
+

+See also the short +introduction to git access at sourceforge. + + + +

Getting involved

+ + +

Requirements

+

+archivemail requires Python 2.3 or newer. +It also uses some optional python modules, but these should be pretty much +standard; if you get an ImportError nonetheless, please +report it, thanks. + +

License

+

+This software is licensed under the terms of the +GNU GPL, either +version 2 of the license, or any later version. + +

Credits

+

+archivemail was written by Paul Rodger +<paul at paulrodger dot + com> +and is currently maintained by Peter Poeml <poeml + at suse dot de>, Nikolaus Schulz <microschulz@web.de> +and Brandon Knitter. + +


+

+ + + + SourceForge.net Logo + + + + + diff --git a/manpage.css b/manpage.css new file mode 100644 index 0000000..915cea2 --- /dev/null +++ b/manpage.css @@ -0,0 +1,15 @@ +@import "style.css"; +h2 { + font-variant: small-caps; + font-size: 170%; +} +.informalexample { + margin-bottom: 1.2em; +} +div.informalexample .screen { + margin-left: 2ex; +} + +a#strftime + dl dt { float: left; margin: 0.3ex 0; width: 1.5em; } +a#strftime + dl dd { float: left; margin: 0.3ex 0; margin-left: 1.2em; width: 90%; } +dt { clear: left; } diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..e4521c9 --- /dev/null +++ b/setup.py @@ -0,0 +1,33 @@ +#! /usr/bin/env python + +import sys + +def check_python_version(): + """Abort if we are running on python < v2.0""" + too_old_error = """This program requires python v2.0 or greater. +Your version of python is: %s""" % sys.version + try: + version = sys.version_info # we might not even have this function! :) + if (version[0] < 2): + print too_old_error + sys.exit(1) + except AttributeError: + print too_old_error + sys.exit(1) + +# define & run this early - 'distutils.core' requires Python >= 2.0 +check_python_version() +from distutils.core import setup + +setup(name="archivemail", + version="0.9.0", + description="archive and compress old email", + license="GNU GPL", + url="http://archivemail.sourceforge.net/", + author="Paul Rodger", + author_email="paul@paulrodger.com", + maintainer="Nikolaus Schulz, Peter Poeml", + maintainer_email="nikosch@users.sourceforge.net, poeml@users.sourceforge.net", + scripts=["archivemail"], + data_files=[("share/man/man1", ["archivemail.1"])], + ) diff --git a/style.css b/style.css new file mode 100644 index 0000000..7893786 --- /dev/null +++ b/style.css @@ -0,0 +1,49 @@ +body { + padding: 2%; + line-height: 130%; + margin: 0; + /*color: #036;*/ +} +h1 { + font-size: 220%; + font-weight: bold; + padding: 0 0 0.4em 0.1em; + /*margin: 0 0 0.5em 0; */ + margin: 0; + /*border-bottom: 2px solid black;*/ +} +hr { + border: 1px #b8b8b8 solid; +} +h1 + hr { + margin-top: 0; + margin-bottom: 1.7em; +} +h2 { + margin: 1em 0 0.8em 0; + padding: 0; + font-size: 150%; +} +img {border: none;} +a { + text-decoration: underline; +} +a:link { + /*color: #0073c7;*/ + color: blue; + background-color: inherit; +} + +a:visited { + /*color: #5A88B5;*/ + color: #844084; + background-color: inherit; +} + +/* +a:hover, +a:active { + color: #0073c7; + background-color: #f0f0f0; +} +*/ diff --git a/test_archivemail b/test_archivemail new file mode 100755 index 0000000..ed033e2 --- /dev/null +++ b/test_archivemail @@ -0,0 +1,1771 @@ +#! /usr/bin/env python +############################################################################ +# Copyright (C) 2002 Paul Rodger +# (C) 2006-2011 Nikolaus Schulz +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +############################################################################ +""" +Unit-test archivemail using 'PyUnit'. + +TODO: add tests for: + * dotlock locks already existing + * archiving MH-format mailboxes + * a 3rd party process changing the mbox file being read + +""" + +import sys + +def check_python_version(): + """Abort if we are running on python < v2.4""" + too_old_error = "This test script requires python version 2.4 or later. " + \ + "Your version of python is:\n%s" % sys.version + try: + version = sys.version_info # we might not even have this function! :) + if (version[0] < 2) or (version[0] == 2 and version[1] < 4): + print too_old_error + sys.exit(1) + except AttributeError: + print too_old_error + sys.exit(1) + +# define & run this early because 'unittest' requires Python >= 2.1 +check_python_version() + +import copy +import fcntl +import filecmp +import os +import re +import shutil +import stat +import tempfile +import time +import unittest +import gzip +import cStringIO +import rfc822 +import mailbox + +from types import ModuleType +archivemail = ModuleType("archivemail") +try: + module_fp = open("archivemail", "r") +except IOError: + print "The archivemail script should be in the current directory in order" + print "to be imported and tested. Sorry." + sys.exit(1) +exec module_fp in archivemail.__dict__ + +# We want to iterate over messages in a compressed archive mbox and verify +# them. This involves seeking in the mbox. The gzip.Gzipfile.seek() in +# Python 2.5 doesn't understand whence; this is Python bug #1355023, triggered +# by mailbox._PartialFile.seek(). The bug is still pending as of Python +# 2.5.2. To work around it, we subclass gzip.GzipFile. +# +# It should be noted that seeking backwards in a GzipFile is emulated by +# re-reading the entire file from the beginning, which is extremely +# inefficient and won't work with large files; but our test archives are all +# small, so it's okay. + +class FixedGzipFile(gzip.GzipFile): + """GzipFile with seek method accepting whence parameter.""" + def seek(self, offset, whence=0): + try: + # Try calling gzip.GzipFile.seek with the whence parameter. + # For Python >= 2.7, it returns the new offset; pass that on. + return gzip.GzipFile.seek(self, offset, whence) + except TypeError: + if whence: + if whence == 1: + offset = self.offset + offset + else: + raise ValueError('Seek from end not supported') + return gzip.GzipFile.seek(self, offset) + +# precision of os.utime() when restoring mbox timestamps +utimes_precision = 5 + +class MessageIdFactory: + """Factory to create `uniqe' message-ids.""" + def __init__(self): + self.seq = 0 + def __call__(self): + self.seq += 1 + return "" % self.seq + +make_msgid = MessageIdFactory() + +class IndexedMailboxDir: + """An indexed mailbox directory, providing random message access by + message-id. Intended as a base class for a maildir and an mh subclass.""" + + def __init__(self, mdir_name): + assert tempfile.tempdir + self.root = tempfile.mkdtemp(prefix=mdir_name) + self.msg_id_dict = {} + self.deliveries = 0 + + def _add_to_index(self, msg_text, fpath): + """Add the given message to the index, for later random access.""" + # Extract the message-id as index key + msg_id = None + fp = cStringIO.StringIO(msg_text) + while True: + line = fp.readline() + # line empty means we didn't find a message-id + assert line + if line.lower().startswith("message-id:"): + msg_id = line.split(":", 1)[-1].strip() + assert msg_id + break + assert not self.msg_id_dict.has_key(msg_id) + self.msg_id_dict[msg_id] = fpath + + def get_all_filenames(self): + """Return all relative pathnames of files in this mailbox.""" + return self.msg_id_dict.values() + +class SimpleMaildir(IndexedMailboxDir): + """Primitive Maildir class, just good enough for generating short-lived + test maildirs.""" + + def __init__(self, mdir_name='maildir'): + IndexedMailboxDir.__init__(self, mdir_name) + for d in "cur", "tmp", "new": + os.mkdir(os.path.join(self.root, d)) + + def write(self, msg_str, new=True, flags=[]): + """Store a message with the given flags.""" + assert not (new and flags) + if new: + subdir = "new" + else: + subdir = "cur" + fname = self._mkname(new, flags) + relpath = os.path.join(subdir, fname) + path = os.path.join(self.root, relpath) + assert not os.path.exists(path) + f = open(path, "w") + f.write(msg_str) + f.close() + self._add_to_index(msg_str, relpath) + + def _mkname(self, new, flags): + """Generate a unique filename for a new message.""" + validflags = 'DFPRST' + for f in flags: + assert f in validflags + # This 'unique' name should be good enough, since nobody else + # will ever write messages to this maildir folder. + uniq = str(self.deliveries) + self.deliveries += 1 + if new: + return uniq + if not flags: + return uniq + ':2,' + finfo = "".join(sorted(flags)) + return uniq + ':2,' + finfo + + def get_message_and_mbox_status(self, msgid): + """For the Message-Id msgid, return the matching message in text + format and its status, expressed as a set of mbox flags.""" + fpath = self.msg_id_dict[msgid] # Barfs if not found + mdir_flags = fpath.rsplit('2,', 1)[-1] + flagmap = { + 'F': 'F', + 'R': 'A', + 'S': 'R' + } + mbox_flags = set([flagmap[x] for x in mdir_flags]) + if fpath.startswith("cur/"): + mbox_flags.add('O') + fp = open(os.path.join(self.root, fpath), "r") + msg = fp.read() + fp.close() + return msg, mbox_flags + + +class TestCaseInTempdir(unittest.TestCase): + """Base class for testcases that need to create temporary files. + All testcases that create temporary files should be derived from this + class, not directly from unittest.TestCase. + TestCaseInTempdir provides these methods: + + setUp() Creates a safe temporary directory and sets tempfile.tempdir. + + tearDown() Recursively removes the temporary directory and unsets + tempfile.tempdir. + + Overriding methods should call the ones above.""" + temproot = None + + def setUp(self): + if not self.temproot: + assert not tempfile.tempdir + self.temproot = tempfile.tempdir = \ + tempfile.mkdtemp(prefix="test-archivemail") + + def tearDown(self): + assert tempfile.tempdir == self.temproot + if self.temproot: + shutil.rmtree(self.temproot) + tempfile.tempdir = self.temproot = None + + +############ Mbox Class testing ############## + +class TestMboxDotlock(TestCaseInTempdir): + def setUp(self): + super(TestMboxDotlock, self).setUp() + self.mbox_name = make_mbox() + self.mbox_mode = os.stat(self.mbox_name)[stat.ST_MODE] + self.mbox = archivemail.Mbox(self.mbox_name) + + def testDotlock(self): + """dotlock_lock/unlock should create/delete a lockfile""" + lock = self.mbox_name + ".lock" + self.mbox._dotlock_lock() + assert os.path.isfile(lock) + self.mbox._dotlock_unlock() + assert not os.path.isfile(lock) + + def testDotlockingSucceedsUponEACCES(self): + """A dotlock should silently be omitted upon EACCES.""" + archivemail.options.quiet = True + mbox_dir = os.path.dirname(self.mbox_name) + os.chmod(mbox_dir, 0500) + try: + self.mbox._dotlock_lock() + self.mbox._dotlock_unlock() + finally: + os.chmod(mbox_dir, 0700) + archivemail.options.quiet = False + +class TestMboxPosixLock(TestCaseInTempdir): + def setUp(self): + super(TestMboxPosixLock, self).setUp() + self.mbox_name = make_mbox() + self.mbox = archivemail.Mbox(self.mbox_name) + + def testPosixLock(self): + """posix_lock/unlock should create/delete an advisory lock""" + + # The following code snippet heavily lends from the Python 2.5 mailbox + # unittest. + # BEGIN robbery: + + # Fork off a subprocess that will lock the file for 2 seconds, + # unlock it, and then exit. + pid = os.fork() + if pid == 0: + # In the child, lock the mailbox. + self.mbox._posix_lock() + time.sleep(2) + self.mbox._posix_unlock() + os._exit(0) + + # In the parent, sleep a bit to give the child time to acquire + # the lock. + time.sleep(0.5) + # The parent's file self.mbox.mbox_file shares fcntl locks with the + # duplicated FD in the child; reopen it so we get a different file + # table entry. + file = open(self.mbox_name, "r+") + lock_nb = fcntl.LOCK_EX | fcntl.LOCK_NB + fd = file.fileno() + try: + self.assertRaises(IOError, fcntl.lockf, fd, lock_nb) + + finally: + # Wait for child to exit. Locking should now succeed. + exited_pid, status = os.waitpid(pid, 0) + + fcntl.lockf(fd, lock_nb) + fcntl.lockf(fd, fcntl.LOCK_UN) + # END robbery + + +class TestMboxNext(TestCaseInTempdir): + def setUp(self): + super(TestMboxNext, self).setUp() + self.not_empty_name = make_mbox(messages=18) + self.empty_name = make_mbox(messages=0) + + def testNextEmpty(self): + """mbox.next() should return None on an empty mailbox""" + mbox = archivemail.Mbox(self.empty_name) + msg = mbox.next() + self.assertEqual(msg, None) + + def testNextNotEmpty(self): + """mbox.next() should a message on a populated mailbox""" + mbox = archivemail.Mbox(self.not_empty_name) + for count in range(18): + msg = mbox.next() + assert msg + msg = mbox.next() + self.assertEqual(msg, None) + + +############ TempMbox Class testing ############## + +class TestTempMboxWrite(TestCaseInTempdir): + def setUp(self): + super(TestTempMboxWrite, self).setUp() + + def testWrite(self): + """mbox.write() should append messages to a mbox mailbox""" + read_file = make_mbox(messages=3) + mbox_read = archivemail.Mbox(read_file) + mbox_write = archivemail.TempMbox() + write_file = mbox_write.mbox_file_name + for count in range(3): + msg = mbox_read.next() + mbox_write.write(msg) + mbox_read.close() + mbox_write.close() + assert filecmp.cmp(read_file, write_file, shallow=0) + + def testWriteNone(self): + """calling mbox.write() with no message should raise AssertionError""" + write = archivemail.TempMbox() + self.assertRaises(AssertionError, write.write, None) + +class TestTempMboxRemove(TestCaseInTempdir): + def setUp(self): + super(TestTempMboxRemove, self).setUp() + self.mbox = archivemail.TempMbox() + self.mbox_name = self.mbox.mbox_file_name + + def testMboxRemove(self): + """remove() should delete a mbox mailbox""" + assert os.path.exists(self.mbox_name) + self.mbox.remove() + assert not os.path.exists(self.mbox_name) + + + +########## options class testing ################# + +class TestOptionDefaults(unittest.TestCase): + def testVerbose(self): + """verbose should be off by default""" + self.assertEqual(archivemail.options.verbose, False) + + def testDaysOldMax(self): + """default archival time should be 180 days""" + self.assertEqual(archivemail.options.days_old_max, 180) + + def testQuiet(self): + """quiet should be off by default""" + self.assertEqual(archivemail.options.quiet, False) + + def testDeleteOldMail(self): + """we should not delete old mail by default""" + self.assertEqual(archivemail.options.delete_old_mail, False) + + def testNoCompress(self): + """no-compression should be off by default""" + self.assertEqual(archivemail.options.no_compress, False) + + def testIncludeFlagged(self): + """we should not archive flagged messages by default""" + self.assertEqual(archivemail.options.include_flagged, False) + + def testPreserveUnread(self): + """we should not preserve unread messages by default""" + self.assertEqual(archivemail.options.preserve_unread, False) + +class TestOptionParser(unittest.TestCase): + def setUp(self): + self.oldopts = copy.copy(archivemail.options) + + def testOptionDate(self): + """--date and -D options are parsed correctly""" + date_formats = ( + "%Y-%m-%d", # ISO format + "%d %b %Y" , # Internet format + "%d %B %Y" , # Internet format with full month names + ) + date = time.strptime("2000-07-29", "%Y-%m-%d") + unixdate = time.mktime(date) + for df in date_formats: + d = time.strftime(df, date) + for opt in '-D', '--date=': + archivemail.options.date_old_max = None + archivemail.options.parse_args([opt+d], "") + self.assertEqual(unixdate, archivemail.options.date_old_max) + + def testOptionPreserveUnread(self): + """--preserve-unread option is parsed correctly""" + archivemail.options.parse_args(["--preserve-unread"], "") + assert archivemail.options.preserve_unread + archivemail.options.preserve_unread = False + archivemail.options.parse_args(["-u"], "") + assert archivemail.options.preserve_unread + + def testOptionSuffix(self): + """--suffix and -s options are parsed correctly""" + for suffix in ("_static_", "_%B_%Y", "-%Y-%m-%d"): + archivemail.options.parse_args(["--suffix="+suffix], "") + self.assertEqual(archivemail.options.archive_suffix, suffix) + archivemail.options.archive_suffix = None + archivemail.options.parse_args(["-s", suffix], "") + self.assertEqual(archivemail.options.archive_suffix, suffix) + + def testOptionPrefix(self): + """--prefix and -p options are parsed correctly""" + for prefix in ("_static_", "_%B_%Y", "-%Y-%m-%d"): + archivemail.options.parse_args(["--prefix="+prefix], "") + self.assertEqual(archivemail.options.archive_prefix, prefix) + archivemail.options.archive_prefix = None + archivemail.options.parse_args(["-p", prefix], "") + self.assertEqual(archivemail.options.archive_prefix, prefix) + + def testOptionArchivename(self): + """--archive-name and -a options are parsed correctly""" + for name in ("custom", ".withdot", "custom_%Y", "%Y/joe"): + archivemail.options.parse_args(["--archive-name="+name], "") + self.assertEqual(archivemail.options.archive_name, name) + archivemail.options.archive_name = None + archivemail.options.parse_args(["-a", name], "") + self.assertEqual(archivemail.options.archive_name, name) + + def testOptionDryrun(self): + """--dry-run option is parsed correctly""" + archivemail.options.parse_args(["--dry-run"], "") + assert archivemail.options.dry_run + archivemail.options.preserve_unread = False + archivemail.options.parse_args(["-n"], "") + assert archivemail.options.dry_run + + def testOptionDays(self): + """--days and -d options are parsed correctly""" + archivemail.options.parse_args(["--days=11"], "") + self.assertEqual(archivemail.options.days_old_max, 11) + archivemail.options.days_old_max = None + archivemail.options.parse_args(["-d11"], "") + self.assertEqual(archivemail.options.days_old_max, 11) + + def testOptionDelete(self): + """--delete option is parsed correctly""" + archivemail.options.parse_args(["--delete"], "") + assert archivemail.options.delete_old_mail + + def testOptionCopy(self): + """--copy option is parsed correctly""" + archivemail.options.parse_args(["--copy"], "") + assert archivemail.options.copy_old_mail + + def testOptionOutputdir(self): + """--output-dir and -o options are parsed correctly""" + for path in "/just/some/path", "relative/path": + archivemail.options.parse_args(["--output-dir=%s" % path], "") + self.assertEqual(archivemail.options.output_dir, path) + archivemail.options.output_dir = None + archivemail.options.parse_args(["-o%s" % path], "") + self.assertEqual(archivemail.options.output_dir, path) + + def testOptionNocompress(self): + """--no-compress option is parsed correctly""" + archivemail.options.parse_args(["--no-compress"], "") + assert archivemail.options.no_compress + + def testOptionSize(self): + """--size and -S options are parsed correctly""" + size = "666" + archivemail.options.parse_args(["--size=%s" % size ], "") + self.assertEqual(archivemail.options.min_size, int(size)) + archivemail.options.parse_args(["-S%s" % size ], "") + self.assertEqual(archivemail.options.min_size, int(size)) + + def tearDown(self): + archivemail.options = self.oldopts + +########## archivemail.is_older_than_days() unit testing ################# + +class TestIsTooOld(unittest.TestCase): + def testVeryOld(self): + """with max_days=360, should be true for these dates > 1 year""" + for years in range(1, 10): + time_msg = time.time() - (years * 365 * 24 * 60 * 60) + assert archivemail.is_older_than_days(time_message=time_msg, + max_days=360) + + def testOld(self): + """with max_days=14, should be true for these dates > 14 days""" + for days in range(14, 360): + time_msg = time.time() - (days * 24 * 60 * 60) + assert archivemail.is_older_than_days(time_message=time_msg, + max_days=14) + + def testJustOld(self): + """with max_days=1, should be true for these dates >= 1 day""" + for minutes in range(0, 61): + time_msg = time.time() - (25 * 60 * 60) + (minutes * 60) + assert archivemail.is_older_than_days(time_message=time_msg, + max_days=1) + + def testNotOld(self): + """with max_days=9, should be false for these dates < 9 days""" + for days in range(0, 9): + time_msg = time.time() - (days * 24 * 60 * 60) + assert not archivemail.is_older_than_days(time_message=time_msg, + max_days=9) + + def testJustNotOld(self): + """with max_days=1, should be false for these hours <= 1 day""" + for minutes in range(0, 60): + time_msg = time.time() - (23 * 60 * 60) - (minutes * 60) + assert not archivemail.is_older_than_days(time_message=time_msg, + max_days=1) + + def testFuture(self): + """with max_days=1, should be false for times in the future""" + for minutes in range(0, 60): + time_msg = time.time() + (minutes * 60) + assert not archivemail.is_older_than_days(time_message=time_msg, + max_days=1) + +########## archivemail.parse_imap_url() unit testing ################# + +class TestParseIMAPUrl(unittest.TestCase): + def setUp(self): + archivemail.options.quiet = True + archivemail.options.verbose = False + archivemail.options.pwfile = None + + urls_withoutpass = [ + ('imap://user@example.org@imap.example.org/upperbox/lowerbox', + ('user', None, 'example.org@imap.example.org', 143, + 'upperbox/lowerbox')), + ('imap://"user@example.org"@imap.example.org/upperbox/lowerbox', + ('user@example.org', None, 'imap.example.org', 143, + 'upperbox/lowerbox')), + ('imap://user@example.org"@imap.example.org/upperbox/lowerbox', + ('user', None, 'example.org"@imap.example.org', 143, + 'upperbox/lowerbox')), + ('imaps://"user@example.org@imap.example.org/upperbox/lowerbox', + ('"user', None, 'example.org@imap.example.org', 993, + 'upperbox/lowerbox')), + ('imaps://"us\\"er@example.org"@imap.example.org/upperbox/lowerbox', + ('us"er@example.org', None, 'imap.example.org', 993, + 'upperbox/lowerbox')), + ('imaps://user\\@example.org@imap.example.org/upperbox/lowerbox', + ('user\\', None, 'example.org@imap.example.org', 993, + 'upperbox/lowerbox')) + ] + urls_withpass = [ + ('imap://user@example.org:passwd@imap.example.org/upperbox/lowerbox', + ('user@example.org', 'passwd', 'imap.example.org', 143, + 'upperbox/lowerbox')), + ('imaps://"user@example.org:passwd@imap.example.org/upperbox/lowerbox', + ('"user@example.org', "passwd", 'imap.example.org', 993, + 'upperbox/lowerbox')), + ('imaps://u\\ser\\@example.org:"p@sswd"@imap.example.org/upperbox/lowerbox', + ('u\\ser\\@example.org', 'p@sswd', 'imap.example.org', 993, + 'upperbox/lowerbox')) + ] + # These are invalid when the password's not stripped. + urls_onlywithpass = [ + ('imaps://"user@example.org":passwd@imap.example.org/upperbox/lowerbox', + ('user@example.org', "passwd", 'imap.example.org', + 'upperbox/lowerbox')) + ] + def testUrlsWithoutPwfile(self): + """Parse test urls with --pwfile option unset. This parses a password in + the URL, if present.""" + archivemail.options.pwfile = None + for mbstr in self.urls_withpass + self.urls_withoutpass: + url = mbstr[0] + result = archivemail.parse_imap_url(url) + self.assertEqual(result, mbstr[1]) + + def testUrlsWithPwfile(self): + """Parse test urls with --pwfile set. In this case the ':' character + loses its meaning as a delimiter.""" + archivemail.options.pwfile = "whocares.txt" + for mbstr in self.urls_onlywithpass: + url = mbstr[0] + self.assertRaises(archivemail.UnexpectedError, + archivemail.parse_imap_url, url) + + def testUrlsDefaultPorts(self): + """If an IMAP URL does not specify a server port, the standard ports + are used.""" + archivemail.options.pwfile = "doesnotexist.txt" + self.assertEqual(143, archivemail.parse_imap_url("imap://user@host/box")[3]) + self.assertEqual(993, archivemail.parse_imap_url("imaps://user@host/box")[3]) + + def testUrlsWithPassAndPortnumber(self): + """IMAP URLs with an embedded password and a server port number are + correctly parsed.""" + self.assertEqual(1234, archivemail.parse_imap_url("imap://user:pass@host:1234/box")[3]) + self.assertEqual(1234, archivemail.parse_imap_url("imap://user:pass@host:1234/box")[3]) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.verbose = False + archivemail.options.pwfile = None + +########## quoting and un-quoting of IMAP strings ########## + +class TestIMAPQuoting(unittest.TestCase): + stringlist = ( + ('{braces} and space', '"{braces} and space"'), + ('\\backslash', '"\\\\backslash"'), + ('with "quotes" inbetween', '"with \\"quotes\\" inbetween"'), + ('ending with "quotes"', '"ending with \\"quotes\\""'), + ('\\"backslash before quote', '"\\\\\\"backslash before quote"') + ) + + def testQuote(self): + for unquoted, quoted in self.stringlist: + self.assertEqual(archivemail.imap_quote(unquoted), quoted) + + def testUnquote(self): + for unquoted, quoted in self.stringlist: + self.assertEqual(unquoted, archivemail.imap_unquote(quoted)) + + +########## Modified UTF-7 support functions ########## + +class TestModUTF7(unittest.TestCase): + goodpairs = ( + (u"A\N{NOT IDENTICAL TO}A.", "A&ImI-A."), + (u"Hi Mom -\N{WHITE SMILING FACE}-!", "Hi Mom -&Jjo--!"), + (u"~peter/mail/\u53f0\u5317/\u65e5\u672c\u8a9e", + "~peter/mail/&U,BTFw-/&ZeVnLIqe-") + ) + + def testEncode(self): + """Ensure that encoding text in modified UTF-7 works properly.""" + for text, code in self.goodpairs: + self.assertEqual(archivemail.mod_utf7_encode(text), code) + + def testDecode(self): + """Ensure that decoding modified UTF-7 to text works properly.""" + for text, code in self.goodpairs: + self.assertEqual(archivemail.mod_utf7_decode(code), text) + + +########## acceptance testing ########### + +class TestArchive(TestCaseInTempdir): + """Base class defining helper functions for doing test archiving runs.""" + mbox = None # mbox file that will be processed by archivemail + good_archive = None # Uncompressed reference archive file to verify the + # archive after processing + good_mbox = None # Reference mbox file to verify the mbox after processing + + def verify(self): + assert os.path.exists(self.mbox) + if self.good_mbox is not None: + assertEqualContent(self.mbox, self.good_mbox) + else: + self.assertEqual(os.path.getsize(self.mbox), 0) + archive_name = self.mbox + "_archive" + if not archivemail.options.no_compress: + archive_name += ".gz" + iszipped = True + else: + assert not os.path.exists(archive_name + ".gz") + iszipped = False + if self.good_archive is not None: + assertEqualContent(archive_name, self.good_archive, iszipped) + else: + assert not os.path.exists(archive_name) + + def make_old_mbox(self, body=None, headers=None, messages=1, make_old_archive=False): + """Prepare for a test run with an old mbox by making an old mbox, + optionally an existing archive, and a reference archive to verify the + archive after archivemail has run.""" + self.mbox = make_mbox(body, headers, 181*24, messages) + archive_does_change = not (archivemail.options.dry_run or + archivemail.options.delete_old_mail) + mbox_does_not_change = archivemail.options.dry_run or \ + archivemail.options.copy_old_mail + if make_old_archive: + archive = archivemail.make_archive_name(self.mbox) + self.good_archive = make_archive_and_plain_copy(archive) + if archive_does_change: + append_file(self.mbox, self.good_archive) + elif archive_does_change: + self.good_archive = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_archive) + if mbox_does_not_change: + if archive_does_change and not make_old_archive: + self.good_mbox = self.good_archive + else: + self.good_mbox = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_mbox) + + def make_mixed_mbox(self, body=None, headers=None, messages=1, make_old_archive=False): + """Prepare for a test run with a mixed mbox by making a mixed mbox, + optionally an existing archive, a reference archive to verify the + archive after archivemail has run, and likewise a reference mbox to + verify the mbox.""" + self.make_old_mbox(body, headers, messages=messages, make_old_archive=make_old_archive) + new_mbox_name = make_mbox(body, headers, 179*24, messages) + append_file(new_mbox_name, self.mbox) + if self.good_mbox is None: + self.good_mbox = new_mbox_name + else: + if self.good_mbox == self.good_archive: + self.good_mbox = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_mbox) + else: + append_file(new_mbox_name, self.good_mbox) + + def make_new_mbox(self, body=None, headers=None, messages=1, make_old_archive=False): + """Prepare for a test run with a new mbox by making a new mbox, + optionally an exiting archive, and a reference mbox to verify the mbox + after archivemail has run.""" + self.mbox = make_mbox(body, headers, 179*24, messages) + self.good_mbox = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_mbox) + if make_old_archive: + archive = archivemail.make_archive_name(self.mbox) + self.good_archive = make_archive_and_plain_copy(archive) + + +class TestArchiveMbox(TestArchive): + """archiving should work based on the date of messages given""" + + def setUp(self): + self.oldopts = copy.copy(archivemail.options) + archivemail.options.quiet = True + super(TestArchiveMbox, self).setUp() + + def testOld(self): + """archiving an old mailbox""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOldFromInBody(self): + """archiving an old mailbox with 'From ' in the body""" + body = """This is a message with ^From at the start of a line +From is on this line +This is after the ^From line""" + self.make_old_mbox(messages=3, body=body) + archivemail.archive(self.mbox) + self.verify() + + def testDateSystem(self): + """test that the --date option works as expected""" + test_headers = ( + { + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : None, + }, + { + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : None, + 'Delivery-date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : None, + 'Resent-Date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + ) + for headers in test_headers: + msg = make_message(default_headers=headers, wantobj=True) + date = time.strptime("2000-07-29", "%Y-%m-%d") + archivemail.options.date_old_max = time.mktime(date) + assert archivemail.should_archive(msg) + date = time.strptime("2000-07-27", "%Y-%m-%d") + archivemail.options.date_old_max = time.mktime(date) + assert not archivemail.should_archive(msg) + + def testMixed(self): + """archiving a mixed mailbox""" + self.make_mixed_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testNew(self): + """archiving a new mailbox""" + self.make_new_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOldExisting(self): + """archiving an old mailbox with an existing archive""" + self.make_old_mbox(messages=3, make_old_archive=True) + archivemail.archive(self.mbox) + self.verify() + + def testOldWeirdHeaders(self): + """archiving old mailboxes with weird headers""" + weird_headers = ( + { # we should archive because of the date on the 'From_' line + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : 'Friskhdfkjkh, 28 Jul 2002 1line noise6:11:36 +1000', + }, + { # we should archive because of the date on the 'From_' line + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : None, + }, + { # we should archive because of the date in 'Delivery-date' + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : 'Frcorruptioni, 28 Jul 20line noise00 16:6 +1000', + 'Delivery-date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { # we should archive because of the date in 'Delivery-date' + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : None, + 'Delivery-date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { # we should archive because of the date in 'Resent-Date' + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : 'Frcorruptioni, 28 Jul 20line noise00 16:6 +1000', + 'Resent-Date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { # we should archive because of the date in 'Resent-Date' + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : None, + 'Resent-Date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { # completely blank dates were crashing < version 0.4.7 + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : '', + }, + { # completely blank dates were crashing < version 0.4.7 + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : '', + 'Resent-Date' : '', + }, + ) + fd, self.mbox = tempfile.mkstemp() + fp = os.fdopen(fd, "w") + for headers in weird_headers: + msg_text = make_message(default_headers=headers) + fp.write(msg_text*2) + fp.close() + self.good_archive = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_archive) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options = self.oldopts + super(TestArchiveMbox, self).tearDown() + + +class TestArchiveMboxTimestamp(TestCaseInTempdir): + """original mbox timestamps should always be preserved""" + def setUp(self): + super(TestArchiveMboxTimestamp, self).setUp() + archivemail.options.quiet = True + self.mbox_name = make_mbox(messages=3, hours_old=(24 * 180)) + self.mtime = os.path.getmtime(self.mbox_name) - 66 + self.atime = os.path.getatime(self.mbox_name) - 88 + os.utime(self.mbox_name, (self.atime, self.mtime)) + + def testNew(self): + """mbox timestamps should not change after no archival""" + archivemail.options.days_old_max = 181 + archivemail.archive(self.mbox_name) + self.verify() + + def testOld(self): + """mbox timestamps should not change after archival""" + archivemail.options.days_old_max = 179 + archivemail.archive(self.mbox_name) + self.verify() + + def verify(self): + assert os.path.exists(self.mbox_name) + new_atime = os.path.getatime(self.mbox_name) + new_mtime = os.path.getmtime(self.mbox_name) + self.assertAlmostEqual(self.mtime, new_mtime, utimes_precision) + self.assertAlmostEqual(self.atime, new_atime, utimes_precision) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.days_old_max = 180 + os.remove(self.mbox_name) + super(TestArchiveMboxTimestamp, self).tearDown() + + +class TestArchiveMboxAll(unittest.TestCase): + def setUp(self): + archivemail.options.quiet = True + archivemail.options.archive_all = True + + def testNew(self): + """new messages should be archived with --all""" + self.msg = make_message(hours_old=24*179, wantobj=True) + assert archivemail.should_archive(self.msg) + + def testOld(self): + """old messages should be archived with --all""" + self.msg = make_message(hours_old=24*181, wantobj=True) + assert archivemail.should_archive(self.msg) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_all = False + +class TestArchiveMboxPreserveUnread(unittest.TestCase): + """make sure the 'preserve_unread' option works""" + def setUp(self): + archivemail.options.quiet = True + archivemail.options.preserve_unread = True + self.msg = make_message(hours_old=24*181, wantobj=True) + + def testOldRead(self): + """old read messages should be archived with --preserve-unread""" + self.msg["Status"] = "RO" + assert archivemail.should_archive(self.msg) + + def testOldUnread(self): + """old unread messages should not be archived with --preserve-unread""" + self.msg["Status"] = "O" + assert not archivemail.should_archive(self.msg) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.preserve_unread = False + + +class TestArchiveMboxSuffix(unittest.TestCase): + """make sure the 'suffix' option works""" + def setUp(self): + archivemail.options.quiet = True + + def testSuffix(self): + """archiving with specified --suffix arguments""" + for suffix in ("_static_", "_%B_%Y", "-%Y-%m-%d"): + mbox_name = "foobar" + archivemail.options.archive_suffix = suffix + days_old_max = 180 + parsed_suffix_time = time.time() - days_old_max*24*60*60 + parsed_suffix = time.strftime(suffix, + time.localtime(parsed_suffix_time)) + archive_name = mbox_name + parsed_suffix + self.assertEqual(archive_name, + archivemail.make_archive_name(mbox_name)) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_suffix = None + +class TestArchiveMboxPrefix(unittest.TestCase): + """make sure the 'prefix' option works""" + def setUp(self): + archivemail.options.quiet = True + + def testPrefix(self): + """archiving with specified --prefix arguments""" + for archive_prefix in ("_static_", "_%B_%Y", "-%Y-%m-%d", "%Y/%m/"): + archivemail.options.archive_prefix = archive_prefix + for mbox_name in "foobar", "/tmp/foobar", "schnorchz/foobar": + archive_dir, archive_base = os.path.split(mbox_name) + days = archivemail.options.days_old_max + tm = time.localtime(time.time() - days*24*60*60) + prefix = time.strftime(archive_prefix, tm) + archive_name = os.path.join(archive_dir, prefix + archive_base) + self.assertEqual(archive_name, + archivemail.make_archive_name(mbox_name)) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_prefix = None + +class TestArchiveName(unittest.TestCase): + def setUp(self): + archivemail.options.quiet = True + + def testArchiveName(self): + """test the --archive-name option""" + archive_names = ("custom", ".withdot", "custom_%Y", "%Y/joe") + mbox = "foobar" + for name in archive_names: + archivemail.options.archive_name = name + days = archivemail.options.days_old_max + tm = time.localtime(time.time() - days*24*60*60) + name = time.strftime(name, tm) + self.assertEqual(archivemail.make_archive_name(mbox), name) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_name = None + +class TestArchiveAffixes(unittest.TestCase): + def setUp(self): + self.mbox = "harbsch" + self.archive_prefix = "wurbl+" + self.archive_suffix = "+schronk&borsz" + archivemail.options.quiet = True + + def testDefaultPrefix(self): + """if no archive name affix is specified, the default archive suffix is appended""" + self.assertEqual(archivemail.make_archive_name(self.mbox), + self.mbox + archivemail.options.archive_default_suffix) + + def testPrefixKillsDefaultSuffix(self): + """if an archive name prefix is specified, the default archive suffix is not appended""" + archivemail.options.archive_prefix = self.archive_prefix + self.assertEqual(archivemail.make_archive_name(self.mbox), + self.archive_prefix + self.mbox) + + def testPrefixAndSuffix(self): + """specifying both an archive name prefix and suffix works""" + archivemail.options.archive_prefix = self.archive_prefix + archivemail.options.archive_suffix = self.archive_suffix + self.assertEqual(archivemail.make_archive_name(self.mbox), + self.archive_prefix + self.mbox + self.archive_suffix) + + def tearDown(self): + archivemail.options.archive_prefix = None + archivemail.options.archive_suffix = None + archivemail.options.quiet = False + +class TestArchiveHiddenMbox(unittest.TestCase): + def setUp(self): + archivemail.options.quiet = True + self.mbox = ".upper.lower" + + def testHiddenMbox(self): + """leading dots are stripped from the archive name when no prefix is added""" + self.assertEqual(archivemail.make_archive_name(self.mbox), + self.mbox.lstrip('.') + + archivemail.options.archive_default_suffix) + + def testHiddenMboxPrefixedArchive(self): + """no dots are stripped from the archive name when a prefix is added""" + prefix = ".hidden_" + archivemail.options.archive_prefix = prefix + self.assertEqual(archivemail.make_archive_name(self.mbox), + prefix + self.mbox) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_prefix = None + +class TestArchiveDryRun(TestArchive): + """make sure the 'dry-run' option works""" + def setUp(self): + super(TestArchiveDryRun, self).setUp() + archivemail.options.quiet = True + archivemail.options.dry_run = True + + def testOld(self): + """archiving an old mailbox with the 'dry-run' option""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options.dry_run = False + archivemail.options.quiet = False + super(TestArchiveDryRun, self).tearDown() + + +class TestArchiveDelete(TestArchive): + """make sure the 'delete' option works""" + def setUp(self): + super(TestArchiveDelete, self).setUp() + archivemail.options.quiet = True + archivemail.options.delete_old_mail = True + + def testNew(self): + """archiving a new mailbox with the 'delete' option""" + self.make_new_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testMixed(self): + """archiving a mixed mailbox with the 'delete' option""" + self.make_mixed_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOld(self): + """archiving an old mailbox with the 'delete' option""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options.delete_old_mail = False + archivemail.options.quiet = False + super(TestArchiveDelete, self).tearDown() + + +class TestArchiveCopy(TestArchive): + """make sure the 'copy' option works""" + def setUp(self): + super(TestArchiveCopy, self).setUp() + archivemail.options.quiet = True + archivemail.options.copy_old_mail = True + + def testNew(self): + """archiving a new mailbox with the 'copy' option""" + self.make_new_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testMixed(self): + """archiving a mixed mailbox with the 'copy' option""" + self.make_mixed_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOld(self): + """archiving an old mailbox with the 'copy' option""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options.copy_old_mail = False + archivemail.options.quiet = False + super(TestArchiveCopy, self).tearDown() + + +class TestArchiveMboxFlagged(unittest.TestCase): + """make sure the 'include_flagged' option works""" + def setUp(self): + archivemail.options.include_flagged = False + archivemail.options.quiet = True + + def testOld(self): + """by default, old flagged messages should not be archived""" + msg = make_message(default_headers={"X-Status": "F"}, + hours_old=24*181, wantobj=True) + assert not archivemail.should_archive(msg) + + def testIncludeFlaggedNew(self): + """new flagged messages should not be archived with include_flagged""" + msg = make_message(default_headers={"X-Status": "F"}, + hours_old=24*179, wantobj=True) + assert not archivemail.should_archive(msg) + + def testIncludeFlaggedOld(self): + """old flagged messages should be archived with include_flagged""" + archivemail.options.include_flagged = True + msg = make_message(default_headers={"X-Status": "F"}, + hours_old=24*181, wantobj=True) + assert archivemail.should_archive(msg) + + def tearDown(self): + archivemail.options.include_flagged = False + archivemail.options.quiet = False + + +class TestArchiveMboxOutputDir(unittest.TestCase): + """make sure that the 'output-dir' option works""" + def setUp(self): + archivemail.options.quiet = True + + def testOld(self): + """archiving an old mailbox with a sepecified output dir""" + for dir in "/just/a/path", "relative/path": + archivemail.options.output_dir = dir + archive_dir = archivemail.make_archive_name("/tmp/mbox") + self.assertEqual(dir, os.path.dirname(archive_dir)) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.output_dir = None + + +class TestArchiveMboxUncompressed(TestArchive): + """make sure that the 'no_compress' option works""" + mbox_name = None + new_mbox = None + old_mbox = None + copy_name = None + + def setUp(self): + archivemail.options.quiet = True + archivemail.options.no_compress = True + super(TestArchiveMboxUncompressed, self).setUp() + + def testOld(self): + """archiving an old mailbox uncompressed""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testNew(self): + """archiving a new mailbox uncompressed""" + self.make_new_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testMixed(self): + """archiving a mixed mailbox uncompressed""" + self.make_mixed_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOldExists(self): + """archiving an old mailbox uncopressed with an existing archive""" + self.make_old_mbox(messages=3, make_old_archive=True) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.no_compress = False + super(TestArchiveMboxUncompressed, self).tearDown() + + +class TestArchiveSize(unittest.TestCase): + """check that the 'size' argument works""" + def setUp(self): + archivemail.options.quiet = True + msg_text = make_message(hours_old=24*181) + self.msg_size = len(msg_text) + fp = cStringIO.StringIO(msg_text) + self.msg = rfc822.Message(fp) + + def testSmaller(self): + """giving a size argument smaller than the message""" + archivemail.options.min_size = self.msg_size - 1 + assert archivemail.should_archive(self.msg) + + def testBigger(self): + """giving a size argument bigger than the message""" + archivemail.options.min_size = self.msg_size + 1 + assert not archivemail.should_archive(self.msg) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.min_size = None + + +class TestXIMAPMessage(TestArchive): + """Test if IMAP pseudo messages in mboxes are properly handled.""" + def setUp(self): + super(TestXIMAPMessage, self).setUp() + archivemail.options.quiet = True + + def testXIMAPMbox(self): + """IMAP pseudo messages in an mbox are always preserved.""" + self.good_mbox = make_mbox(hours_old=181*24, headers={'X-IMAP': 'dummytext'}, + messages=1) + self.good_archive = make_mbox(hours_old=181*24, messages=3) + self.mbox = tempfile.mkstemp()[-1] + shutil.copyfile(self.good_mbox, self.mbox) + append_file(self.good_archive, self.mbox) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + super(TestXIMAPMessage, self).tearDown() + archivemail.options.quiet = False + + +############# Test archiving maildirs ############### + +class TestArchiveMailboxdir(TestCaseInTempdir): + """Base class defining helper functions for doing test archive runs with + maildirs.""" + maildir = None # Maildir that will be processed by archivemail + orig_maildir_obj = None # A backup copy of the maildir, a SimpleMaildir object + remaining_msg = set() # Filenames of maildir messages that should be preserved + number_archived = 0 # Number of messages that get archived + orig_archive = None # An uncompressed copy of a pre-existing archive, + # if one exists + + def setUp(self): + super(TestArchiveMailboxdir, self).setUp() + self.orig_maildir_obj = SimpleMaildir() + + def verify(self): + self._verify_remaining() + self._verify_archive() + + def _verify_remaining(self): + """Verify that the preserved messages weren't altered.""" + assert self.maildir + # Compare maildir with backup object. + dcmp = filecmp.dircmp(self.maildir, self.orig_maildir_obj.root) + # Top-level has only directories cur, new, tmp and must be unchanged. + self.assertEqual(dcmp.left_list, dcmp.right_list) + found = set() + for d in dcmp.common_dirs: + dcmp2 = dcmp.subdirs[d] + # We need to verify three things. + # 1. directory is a subset of the original... + assert not dcmp2.left_only + # 2. all common files are identical... + self.assertEqual(dcmp2.common_files, dcmp2.same_files) + found = found.union([os.path.join(d, x) for x in dcmp2.common_files]) + # 3. exactly the `new' messages (recorded in self.remaining_msg) + # were preserved. + self.assertEqual(found, self.remaining_msg) + + def _verify_archive(self): + """Verify the archive correctness.""" + # TODO: currently make_archive_name does not include the .gz suffix. + # Is this something that should be fixed? + archive = archivemail.make_archive_name(self.maildir) + if archivemail.options.no_compress: + iszipped = False + else: + archive += '.gz' + iszipped = True + if self.number_archived == 0: + if self.orig_archive: + assertEqualContent(archive, self.orig_archive, iszipped) + else: + assert not os.path.exists(archive) + return + fp_new = fp_archive = tmp_archive_name = None + try: + if self.orig_archive: + new_size = os.path.getsize(archive) + # Brute force: split archive in old and new part and verify the + # parts separately. (Of course this destroys the archive.) + fp_archive = open(archive, "r+") + fp_archive.seek(self.orig_archive_size) + fd, tmp_archive_name = tempfile.mkstemp() + fp_new = os.fdopen(fd, "w") + shutil.copyfileobj(fp_archive, fp_new) + fp_new.close() + fp_archive.truncate(self.orig_archive_size) + fp_archive.close() + assertEqualContent(archive, self.orig_archive, iszipped) + new_archive = tmp_archive_name + else: + new_archive = archive + if archivemail.options.no_compress: + fp_archive = open(new_archive, "r") + else: + fp_archive = FixedGzipFile(new_archive, "r") + mb = mailbox.UnixMailbox(fp_archive) + found = 0 + for msg in mb: + self.verify_maildir_has_msg(self.orig_maildir_obj, msg) + found += 1 + self.assertEqual(found, self.number_archived) + finally: + if tmp_archive_name: + os.remove(tmp_archive_name) + if fp_new is not None: + fp_new.close() + if fp_archive is not None: + fp_archive.close() + + def verify_maildir_has_msg(self, maildir, msg): + """Assert that the given maildir has a copy of the rfc822 message.""" + mid = msg['Message-Id'] # Complains if there is no message-id + mdir_msg_str, mdir_flags = \ + maildir.get_message_and_mbox_status(mid) + mbox_flags = set(msg.get('status', '') + msg.get('x-status', '')) + self.assertEqual(mdir_flags, mbox_flags) + + headers = filter(lambda h: msg.isheader(h) not in ('status', 'x-status'), + msg.headers) + headers = "".join(headers) + msg.rewindbody() + # Discard last mbox LF which is not part of the message. + body = msg.fp.read()[:-1] + msg_str = headers + os.linesep + body + self.assertEqual(mdir_msg_str, msg_str) + + def add_messages(self, body=None, headers=None, hours_old=0, messages=1): + for count in range(messages): + msg = make_message(body, default_headers=headers, mkfrom=False, + hours_old=hours_old) + self.orig_maildir_obj.write(msg, new=False) + + def make_maildir(self, mkold, mknew, body=None, headers=None, messages=1, + make_old_archive=False): + mailbox_does_change = not (archivemail.options.dry_run or + archivemail.options.copy_old_mail) + archive_does_change = not (archivemail.options.dry_run or + archivemail.options.delete_old_mail) + if mknew: + self.add_messages(body, headers, 179*24, messages) + if archive_does_change and archivemail.options.archive_all: + self.number_archived += messages + if mailbox_does_change: + self.remaining_msg = set(self.orig_maildir_obj.get_all_filenames()) + if mkold: + self.add_messages(body, headers, 181*24, messages) + if archive_does_change: + self.number_archived += messages + if not mailbox_does_change: + self.remaining_msg = set(self.orig_maildir_obj.get_all_filenames()) + self.maildir = copy_maildir(self.orig_maildir_obj.root) + if make_old_archive: + archive = archivemail.make_archive_name(self.maildir) + self.orig_archive = make_archive_and_plain_copy(archive) + # FIXME: .gz extension handling is a mess II + if not archivemail.options.no_compress: + archive += '.gz' + self.orig_archive_size = os.path.getsize(archive) + +class TestEmptyMaildir(TestCaseInTempdir): + def setUp(self): + super(TestEmptyMaildir, self).setUp() + archivemail.options.quiet = True + + def testEmpty(self): + """Archiving an empty maildir should not result in an archive.""" + self.mdir = SimpleMaildir() + archivemail.archive(self.mdir.root) + assert not os.path.exists(self.mdir.root + '_archive.gz') + + def tearDown(self): + super(TestEmptyMaildir, self).tearDown() + archivemail.options.quiet = False + +class TestMaildir(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildir, self).setUp() + archivemail.options.quiet = True + + def testOld(self): + self.make_maildir(True, False, messages=3) + archivemail.archive(self.maildir) + self.verify() + + def testNew(self): + self.make_maildir(False, True, messages=3) + archivemail.archive(self.maildir) + self.verify() + + def testMixed(self): + self.make_maildir(True, True, messages=3) + archivemail.archive(self.maildir) + self.verify() + + def testMixedExisting(self): + self.make_maildir(True, True, messages=3, make_old_archive=True) + archivemail.archive(self.maildir) + self.verify() + + def tearDown(self): + archivemail.options.quiet = False + super(TestMaildir, self).tearDown() + + +class TestMaildirPreserveUnread(TestCaseInTempdir): + """Test if the preserve_unread option works with maildirs.""" + def setUp(self): + super(TestMaildirPreserveUnread, self).setUp() + archivemail.options.quiet = True + archivemail.options.preserve_unread = True + + def testOldRead(self): + """--preserve-unread archives old read messages in a maildir.""" + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*181) + smd.write(msg, new=False, flags='S') + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert archivemail.should_archive(msg_obj) + + def testOldUnread(self): + """--preserve-unread preserves old unread messages in a maildir.""" + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*181) + smd.write(msg, new=False) + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert not archivemail.should_archive(msg_obj) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.preserve_unread = False + super(TestMaildirPreserveUnread, self).tearDown() + +class TestMaildirAll(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildirAll, self).setUp() + archivemail.options.quiet = True + archivemail.options.archive_all = True + + def testNew(self): + """New maildir messages should be archived with --all""" + self.add_messages(hours_old=24*181) + md = mailbox.Maildir(self.orig_maildir_obj.root) + msg_obj = md.next() + assert archivemail.should_archive(msg_obj) + + def testOld(self): + """Old maildir messages should be archived with --all""" + self.add_messages(hours_old=24*179) + md = mailbox.Maildir(self.orig_maildir_obj.root) + msg_obj = md.next() + assert archivemail.should_archive(msg_obj) + + def tearDown(self): + super(TestMaildirAll, self).tearDown() + archivemail.options.quiet = False + archivemail.options.archive_all = False + +class TestMaildirDryRun(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildirDryRun, self).setUp() + archivemail.options.quiet = True + archivemail.options.dry_run = True + + def testOld(self): + """archiving an old maildir mailbox with the 'dry-run' option""" + self.make_maildir(True, False) + archivemail.archive(self.maildir) + self.verify() + + def tearDown(self): + super(TestMaildirDryRun, self).tearDown() + archivemail.options.quiet = False + archivemail.options.dry_run = False + +class TestMaildirDelete(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildirDelete, self).setUp() + archivemail.options.quiet = True + archivemail.options.delete_old_mail = True + + def testOld(self): + """archiving an old maildir mailbox with the 'delete' option""" + self.make_maildir(True, False) + archivemail.archive(self.maildir) + self.verify() + + def testNew(self): + """archiving a new maildir mailbox with the 'delete' option""" + self.make_maildir(False, True) + archivemail.archive(self.maildir) + self.verify() + + def tearDown(self): + super(TestMaildirDelete, self).tearDown() + archivemail.options.quiet = False + archivemail.options.delete_old_mail = False + +class TestMaildirCopy(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildirCopy, self).setUp() + archivemail.options.quiet = True + archivemail.options.copy_old_mail = True + + def testOld(self): + """archiving an old maildir mailbox with the 'copy' option""" + self.make_maildir(True, False) + archivemail.archive(self.maildir) + self.verify() + + def testNew(self): + """archiving a new maildir mailbox with the 'copy' option""" + self.make_maildir(False, True) + archivemail.archive(self.maildir) + self.verify() + + def tearDown(self): + super(TestMaildirCopy, self).tearDown() + archivemail.options.quiet = False + archivemail.options.copy_old_mail = False + +class TestArchiveMaildirFlagged(TestCaseInTempdir): + """make sure the 'include_flagged' option works with maildir messages""" + def setUp(self): + super(TestArchiveMaildirFlagged, self).setUp() + archivemail.options.include_flagged = False + archivemail.options.quiet = True + + def testOld(self): + """by default, old flagged maildir messages should not be archived""" + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*181) + smd.write(msg, new=False, flags='F') + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert not archivemail.should_archive(msg_obj) + + def testIncludeFlaggedNew(self): + """new flagged maildir messages should not be archived with include_flagged""" + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*179) + smd.write(msg, new=False, flags='F') + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert not archivemail.should_archive(msg_obj) + + def testIncludeFlaggedOld(self): + """old flagged maildir messages should be archived with include_flagged""" + archivemail.options.include_flagged = True + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*181) + smd.write(msg, new=False, flags='F') + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert archivemail.should_archive(msg_obj) + + def tearDown(self): + super(TestArchiveMaildirFlagged, self).tearDown() + archivemail.options.include_flagged = False + archivemail.options.quiet = False + +class TestArchiveMaildirSize(TestCaseInTempdir): + """check that the 'size' argument works with maildir messages""" + def setUp(self): + super(TestArchiveMaildirSize, self).setUp() + archivemail.options.quiet = True + msg = make_message(hours_old=24*181) + self.msg_size = len(msg) + smd = SimpleMaildir("orig") + smd.write(msg, new=False) + md = mailbox.Maildir(smd.root) + self.msg_obj = md.next() + + def testSmaller(self): + """giving a size argument smaller than the maildir message""" + archivemail.options.min_size = self.msg_size - 1 + assert archivemail.should_archive(self.msg_obj) + + def testBigger(self): + """giving a size argument bigger than the maildir message""" + archivemail.options.min_size = self.msg_size + 1 + assert not archivemail.should_archive(self.msg_obj) + + def tearDown(self): + super(TestArchiveMaildirSize, self).tearDown() + archivemail.options.quiet = False + archivemail.options.min_size = None + +########## helper routines ############ + +def make_message(body=None, default_headers={}, hours_old=None, mkfrom=False, wantobj=False): + headers = copy.copy(default_headers) + if not headers: + headers = {} + headers['Message-Id'] = make_msgid() + if not headers.has_key('Date'): + time_message = time.time() - (60 * 60 * hours_old) + headers['Date'] = time.asctime(time.localtime(time_message)) + if not headers.has_key('From'): + headers['From'] = "sender@dummy.domain" + if not headers.has_key('To'): + headers['To'] = "receipient@dummy.domain" + if not headers.has_key('Subject'): + headers['Subject'] = "This is the subject" + if mkfrom and not headers.has_key('From_'): + headers['From_'] = "%s %s" % (headers['From'], headers['Date']) + if not body: + body = "This is the message body" + + msg = "" + if headers.has_key('From_'): + msg = msg + ("From %s\n" % headers['From_']) + del headers['From_'] + for key in headers.keys(): + if headers[key] is not None: + msg = msg + ("%s: %s\n" % (key, headers[key])) + msg = msg + "\n\n" + body + "\n\n" + if not wantobj: + return msg + fp = cStringIO.StringIO(msg) + return rfc822.Message(fp) + +def append_file(source, dest): + """appends the file named 'source' to the file named 'dest'""" + assert os.path.isfile(source) + assert os.path.isfile(dest) + read = open(source, "r") + write = open(dest, "a+") + shutil.copyfileobj(read,write) + read.close() + write.close() + + +def make_mbox(body=None, headers=None, hours_old=0, messages=1): + assert tempfile.tempdir + fd, name = tempfile.mkstemp() + file = os.fdopen(fd, "w") + for count in range(messages): + msg = make_message(body=body, default_headers=headers, + mkfrom=True, hours_old=hours_old) + file.write(msg) + file.close() + return name + +def make_archive_and_plain_copy(archive_name): + """Make an mbox archive of the given name like archivemail may have + created it. Also make an uncompressed copy of this archive and return its + name.""" + copy_fd, copy_name = tempfile.mkstemp() + copy_fp = os.fdopen(copy_fd, "w") + if archivemail.options.no_compress: + fd = os.open(archive_name, os.O_WRONLY|os.O_EXCL|os.O_CREAT) + fp = os.fdopen(fd, "w") + else: + archive_name += ".gz" + fd = os.open(archive_name, os.O_WRONLY|os.O_EXCL|os.O_CREAT) + rawfp = os.fdopen(fd, "w") + fp = gzip.GzipFile(fileobj=rawfp) + for count in range(3): + msg = make_message(hours_old=24*360) + fp.write(msg) + copy_fp.write(msg) + fp.close() + copy_fp.close() + if not archivemail.options.no_compress: + rawfp.close() + return copy_name + +def copy_maildir(maildir, prefix="tmp"): + """Create a copy of the given maildir and return the absolute path of the + new direcory.""" + newdir = tempfile.mkdtemp(prefix=prefix) + for d in "cur", "new", "tmp": + shutil.copytree(os.path.join(maildir, d), os.path.join(newdir, d)) + return newdir + +def assertEqualContent(firstfile, secondfile, zippedfirst=False): + """Verify that the two files exist and have identical content. If zippedfirst + is True, assume that firstfile is gzip-compressed.""" + assert os.path.exists(firstfile) + assert os.path.exists(secondfile) + if zippedfirst: + try: + fp1 = gzip.GzipFile(firstfile, "r") + fp2 = open(secondfile, "r") + assert cmp_fileobj(fp1, fp2) + finally: + fp1.close() + fp2.close() + else: + assert filecmp.cmp(firstfile, secondfile, shallow=0) + +def cmp_fileobj(fp1, fp2): + """Return if reading the fileobjects yields identical content.""" + bufsize = 8192 + while True: + b1 = fp1.read(bufsize) + b2 = fp2.read(bufsize) + if b1 != b2: + return False + if not b1: + return True + +if __name__ == "__main__": + unittest.main()