commit 62dc691279a0d0ee0a46a7b0bea6f65257e81f03 Author: Michael Krayer Date: Fri May 22 09:47:41 2020 +0200 forked from git://archivemail.git.sourceforge.net/gitroot/archivemail/archivemail diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..d0d4424 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,333 @@ +version 0.9.0 - 9 July 2011 + + * Fixed manpage installation path to be FHS compliant + * Speed up IMAP archiving with the --quiet option + * Ported the manpage from SGML to XML + * Fix test suite failures with Python 2.7. Closes: #3314293. + * IMAP: support international mailbox names containing non-ASCII characters. + * IMAP: handle broken servers sending no untagged SEARCH response. + Closes: #879716, #3213272. + * IMAP: support servers listening on non-standard ports. Closes: #3168416. + +version 0.8.2 - 16 October 2010 + + * IMAP: don't prepend NAMESPACE prefix to INBOX and its children. + Closes: #3083236. + +version 0.8.1 - 30 September 2010 + + * IMAP: fixed handling of LIST replies by the server where the mailbox name + is not a quoted string. (Thanks Karsten Müller) + +Version 0.8.0 - 9 August 2010 + + * Fixed date header parsing to be precise with timezone information. Also, + when writing From_ line timestamps, don't use UTC but local time, without + timezone information. + * To determine the delivery date of a message, archivemail now looks for the + timestamp of the latest 'Received' header before resorting to + 'Resent-Date' or 'Date'. This should give much better results when there + is no 'Delivery-date' header, which is still checked first. + (Thanks Andrew Ferrier & Christian Brabandt) + Closes: #1481316, #1764855, Debian bug #272666. + * If present, the 'Resent-date' header now takes precedence over 'Date'. + * IMAP: recognize when a server advertises LOGINDISABLED. + * New option --debug-imap; this just sets imaplib.Debug, thereby dumping + archivemail's chat with the server to stdout. + * Fixed crash with Python 2.5 when archiving an empty maildir. + (Thanks "Aidant") Closes: #1878940. + * New option --all to archive all messages in a mailbox. Closes: #1764846. + * Fixed a crash when archiving maildirs with --days=0. (Thanks John Goerzen) + * IMAP: automatically add NAMESPACE prefix to a mailbox path if necessary. + * Removed the feature to setuid to the mailbox owners when run as root. + This was a bad idea; it's really hard to do safely, if at all possible. + Obsoletes: patch #2783134. + * Replaced some simple minded file operation security checks with more + decent ones. This means we can safely operate in /tmp, for example. The + price is that we no longer accept symlinked files. Obsoletes: patch + #1874868. + * Don't use rename() to update mbox files and the archive, but write the + files directly. This is more fragile, but required for correct mbox + locking, and also for mboxes in mail spool directories where we don't have + permission to create files. It also means that if selinux is enabled, + archivemail now preserves the selinux security context of an mbox. + Closes: #2210732. + * Fixed the test suite to deal with nanosecond file timestamps. These are + provided by ext4 and XFS, for example. Closes: #2043900. + * Cleaned up the test suite, replacing a lot of duplicated code and avoiding + a lot of redundand testing. This speeds up the test suite by a factor of + 15 or so. + * mbox locking got completely rewritten. Switched from flock to lockf + locking, which is NFS-safe and portable, and we now lock with lockf first, + then with a dotlock, instead of the other way around. (This is makes + archivemail compatible with Debian systems. ;) + * We now omit the dotlock if we don't have sufficient permissions to create + it in the mbox directory. (The file is still locked with lockf.) + Since we also no longer use rename() to commit changes to an mbox, (see + above) this means archivemail can now operate on mbox files in the system + mail spool. Closes: #855269. + * Refactoring of the mbox classes; much of the code got rewritten. + * The archive now also gets locked while archivemail updates it. + * Various Python language fixes (for example don't use "0" and "1" as + boolean constants). + * Added a lot of test cases for maildir archiving to the test suite. + Maildir testing should now be roughly on par with mbox testing. + * IMAP servers (Dovecot and UW-IMAP at least) may store mailbox meta data + for mboxes in a pseudo message. Such messages are now detected and never + archived. Obsoletes: patch #2210707. (Thanks, "tlhackque") + * New option --prefix, or short -p, to specify an archive name prefix. Like + a suffix specified with the --suffix option, the prefix is expanded with + strftime(). Specifying this option disables the default archive name + suffix. Obsoletes: feature request #604281. (Thanks Serafeim Zanikolas + for an initial patch) + * When archiving a mailbox with a leading dot in the name and with no archive + name prefix specified, archivemail no longer creates hidden archives, but + strips the dot off the archive name. In particular, this makes working + with Maildir++ subfolders more convenient. Closes: feature request + #604281. + * New option --archive-name, or short -a, to hard-code an archive filename. + Like the --suffix and --prefix options, it is expanded with strftime(). + This option conflicts with archiving multiple mailboxes. Closes: feature + request #1306538. + * archivemail now expands wildcards in IMAP mailbox names. For example, the + url imaps://user@server/foo/* will expand to all subfolders of foo. + Closes: feature request #1978540. Obsoletes: patch #1918937. + +Version 0.7.2 - 9 November 2007 + + * IMAP: fixed crash by working around python bug #1277098, which is still pending + in python << 2.5. + +Version 0.7.1 - 7 November 2007 + + * Fixed incompatibility with Python 2.5 which broke Maildir handling. + Closes: #1670422 + * Username and password in IMAP URLs can now be double-quoted, so it should be + no longer a problem if they contain delimiters like the '@' character. + Closes: #1640878 + * Invalid messages/files in Maildirs caused archivemail to silently stop + processing mails and claim it's all done. Now skip these and go ahead. + (Thanks Elan Ruusamäe for tracking this down.) Closes: #1783369. + (The Debian package has a different fix for this problem since 0.6.1-4, + closing Debian bugs #255944 and #305902.) + * Fixed IMAP message flag conversion which was completely broken. (Thanks + Christian Brabandt) Closes: Debian bug #434807 + * New option --copy: archive mail, but don't delete it from the mailbox. + This is mainly useful for testing purposes, and complements --delete. + Closes: #981865, #988803, #1764851, Debian bug #434798 + * If running as root, only switch the effective uid and gid back if we have + actually switched them before. Closes: #1762907 + * The automatic seteuid feature of archivemail is insecure and thus + deprecated; it will be removed from later versions. + * Expand tilde in argument of long option --pwfile. (Thanks Christian + Brabandt) Closes: Debian bug #434813 + * archivemail now accepts --days=0 + * Fixed crash if --warn-duplicate is used with IMAP (Thanks Christian + Brabandt) Closes: Debian bug #434786 (the Debian package already has a fix) + * When converting from other formats to mbox, archivemail used to preserve + existing 'Status' and 'X-Status' mbox headers; these are now silently + overwritten to ensure they have correct values. + * IMAP: if selecting the mailbox fails, archivemail detects the server's + mailbox hierarchy delimiter, replaces slashes in the mailbox name with the + delimiter and tries again. Closes: #1826757, Debian bug #368112 + +Version 0.7.0 - 2 November 2006 + * Fixed long options --filter-append and --pwfile to accept their arguments. + Closes: #1555935 + * Fixed From_ line generation to actually look for the 'Return-path' and + 'From' headers. Closes: #1555797 + * Fixed IMAP authentication/URL parsing, which wasn't working at all in + v0.6.2. Require username encoded in URL, but be flexible with the password: + handle both --pwfile and URL-encoded password, and fallback to querying the + user if neither is present. Closes: #1555951 + * Convert on-the-wire CRLF to native EOL when saving messages from an IMAP + folder. Closes: #1555803 + * Updated man page. This also addresses #1555951 + * Fixed unittest failure by updating --suffix testcase. Based on analysis by + Peter Poeml. Thanks, Peter. + * Fixed invalid IMAP filter string for large messages (--size option). + (Thanks to the anonymous bug reporter) Closes: #863813 + * Fixed IMAP --dry-run so it doesn't download all messages that would be + archived. + * Fixed IMAP --delete which didn't work at all. (Thanks Anand) + Closes: Debian bug #203282 + * Terminate each message in newly written mbox with an empty line if the + message source is not an mbox-format folder. (Thanks Chung-chieh Shan) + Closes: Debian bug #250410 + * Mangle From_ in message body if the message source is not an mbox-format + folder. (Thanks Chung-chieh Shan) Closes: Debian bug #250402 + * Added new option --dont-mangle to turn off From_ mangling. + * Bumped Python dependency to version 2.3. + * Fixed unittest TestMboxExclusiveLock which failed on Solaris. (Thanks Paul + Rodger) Closes: #904652 + * Fixed unsafe creation of temporary files in the test suite. + This addresses Debian bug #385253, and reading the BTS log, it seems this + issue was assigned CVE-2006-4245, although I cannot find any further + reference to that CVE. Note that the bug was initially reported to affect + archivemail itself, too. This is not correct. (Thanks Joey Hess) + Closes: Debian bug #385253 + * Fixed cleanup of temporary files after test suite failures. + * Fixed dotlocking race condition. + * Stats are now working with IMAP. + * Stats now report the total size of a mailbox and of the archived messages + therefrom. + * Always barf if the archive destination directory is world-writable. + * Distributing man page with the tarball again and fixed distutils setup. + Closes: #1574720 (Thanks Grant Goodyear) + * Improved IMAP error reporting + +Version 0.6.2 - 27 June 2006 + * add -F/--filter-append option to append an arbitrary string to the IMAP + filter string + * don't delete more than a certain number of messages at a time. The max + command len is limited. Fixes bug 942403 (Archiving large IMAP folders fails) + * IMAP: try CRAM-MD5 login first, if that fails fallback to plain login + * add SSL support per imaps URL (after patch by Tobias Gruetzmacher) + * add -P/--pwfile option to supply the IMAP password, so it does not end up in + the shell history + * Fix SyntaxWarning: assignment to None (bug #843890) + * Use the archive cut date rather than the actual time with the --suffix + option. (Thanks Manuel Estrada Sainz) + +Version 0.6.1 - 31 October 2002 + * Removed a test rule that we could archive messages older than the + Unix epoch. Newer versions of python now give an overflow error calling + mktime() on dates < 1970 instead of returning a negative number. + +Version 0.6.0 - 3 October 2002 + * Added IMAP mailbox support. (Thanks Mark Roach) + +Version 0.5.1 - 18 September 2002 + * Fixed a bug where when running archivemail as root on a non-root mailbox, + the temporary container directory would be created as root before the + seteuid() took place. (Thanks Jay Hesselberth) + +Version 0.5.0 - 15 September 2002 + * Fixed a bug where mailbox locking would fail under Solaris. (Thanks Mark + Sheller) + * Fixed a bug where archiving maildir mailboxes without a 'Received Date' or + 'From' header would fail. (Thanks Hugo van der Merwe) + * Removed yet another bug where messages dated on the Unix epoch would fail. + +Version 0.4.9 - 18 August 2002 + * Fixed a bug where an exception was thrown if a message was dated exactly + on the Unix epoch. + * Fixed a bug where trailing slashes on the end of maildir/MH mailbox + arguments were being used in the archive name. + +Version 0.4.8 - 20 May 2002 + * Call mkdir() to create a container directory in which we can place any + created tempfiles + +Version 0.4.7 - 9 May 2002 + * Fixed a bug where archivemail would abort if it received a date header + with a completely blank value. + +Version 0.4.6 - 6 May 2002 + * Fixed a bug where the os.rename() calls could fail if we were moving + temporary files across different filesystems/partitions. + +Version 0.4.5 - 29 April 2002 + * Fixed a bug where if you used the '--delete' option to completely clean + an mbox mailbox you would get a python error. + * Added a lot more testing to test_archivemail.py (test_archivemail.py is + now 37% bigger than archivemail -- scary) + * Added a new '--size' option which allows you to only archive messages + over a certain size. + +Version 0.4.4 - 27 April 2002 + * Fixed a bug where the long --suffix option was not working (although the + short option, '-s' was). + * Added time-based format directives to the --suffix option, so that you + can do things like specify --suffix='%B%Y' to create archives named + after the current month and year + * Added some more tests to test_archivemail.py + +Version 0.4.3 - 26 April 2002 + * Fixed a couple of bugs where I was using python version 2.2 syntax that + was giving errors in python v2.0 and v2.1. + * Changed the python requirements for the test script from python version + 2.0 to version 2.1. This is because the unittest module is only available + in version 2.1 and above. + +Version 0.4.2 - 24 April 2002 + * Added the ability to archive messages older than a given absolute date + with the new option '--date'. + * Fixed a bug where archivemail would complain about messages older than + 1970. Yes, someone had a 'Date' header with 1967 :) + * Complain if the mailbox to be read does not look like a valid mbox-format + mailbox. + * Added a few more tests to test_archivemail.py + +Version 0.4.1 - 21 April 2002 + * Don't archive messages that are flagged important unless we are given the + --include-flagged option. + * Fixed a bug where when archiving messages from maildir mailboxes, we were + not preserving the status information contained in the filename suffix to + Status and X-Status mbox headers. This means we forgot if we had read or + replied to the message. + * We now complain if an mbox-format mailbox that is being read changes in + size -- this should not happen, since we have locked these files, but it + is a good sanity check. + * Changed from using the mailbox.PortableUnixMailbox class to read mbox + mailboxes to the mailbox.UnixMailbox class. This fixes bugs where unquoted + 'From ' lines in the body of messages were confusing archivemail. + +Version 0.4.0 - 17 April 2002 + * Added an option --no-compress to make archives but not compress them with + gzip. + * Added an option --preserve-unread to not archive unread messages. + * Added a few more unittests. + +Version 0.3.2 - 13 April 2002 + * Added a lot more information to the manpage, including examples and + notes. + * Fixed up the README file and archivemail usage message. + * Added an example archivemail shell script that I run from crontab. + +Version 0.3.1 - 12 April 2002 + * Stopped calling 'gzip' externally and started using the gzip library + so that we can append to a copy of the gzip archive directly. + * Removed 'bzip2' and 'compress' options since they are increasing + complexity without adding much, and needed to be called externally. + Maybe when python gets a bzip2 library I will add back an option to + compress archives using bzip2. + * Included a man page & sgml docbook source. + +Version 0.3.0 - 11 April 2002 + * We now preserve the last-accessed and last-modified timestamps correctly + * We now preserve the correct permissions on the original mailbox instead + of always mode 600 + * Fixed a bug where lockfiles were being created that were not + world-readable + * Made archivemail work better when used as a python module so it can + integrate better with unittest. (... although I still distribute it + without the .py extension - dodgy?) + * Bundled a unit-testing script for archivemail + * Started using a distutils 'setup.py' script for installation. + +Version 0.2.1 - 4 April 2002 + * Since we might not have a parse-able 'Date-Received' or 'Date' field, + use 5 different ways to guess the date of a message. + * Removed the '--use-mtime' option since it is obsolete -- we will always + use the file modification time for the message if other date-parsing + methods fail. + * Check to see if we are running as root -- if so, change our + effective userid and groupid to that of the original mailbox. This will + make sure any archives or tempfiles we write have the same ownership and + will allow the root user to do "archivemail /var/spool/mail/*" + * Fixed a bug where if you ran 'archivemail.py foo/mbox' it would create + the archive file in the current directory instead of the directory 'foo'. + +Version 0.2.0 - 3 April 2002 + * Added support for reading from MH mailboxes + * Refuse to proceed if we would be making tempfiles in world-writable + directories + * Clamped down with lots of assert()s checking function parameters + * Complain earlier if we do not have permission to write to the output + directory + * Use the 'Date' field of a message when constructing the 'From_' line + from a maildir/MH mailbox if there is no 'Delivery-Date' field. + +Version 0.1.0 - 31 March 2002 + * Initial release diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..86fd703 --- /dev/null +++ b/COPYING @@ -0,0 +1,341 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. + diff --git a/FAQ b/FAQ new file mode 100644 index 0000000..9fa2f29 --- /dev/null +++ b/FAQ @@ -0,0 +1,15 @@ + +1. Why doesn't archivemail support bzip2 compression in addition to gzip? +------------------------------------------------------------------------- + +The bzip2 module in Python 2.x is not fully compatible with the gzip module, +and cannot be used with the current implementation of compressed mailbox +support in archivemail. See Python feature request #5863 for details. + +2. Can you add a switch to archive mailboxes greater than a certain size? +------------------------------------------------------------------------- + +If you are using mbox format mailboxes instead, use the find(1) command instead, it is more flexible: + + find $HOME/Mail -type f ! -name '*archive*' + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..886a863 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +include CHANGELOG +include COPYING +include FAQ +include MANIFEST +include TODO +include NEWS +include archivemail.1 +include archivemail.xml +include db2man.xsl +graft examples +include test_archivemail diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..57bad9c --- /dev/null +++ b/Makefile @@ -0,0 +1,43 @@ +VERSION=$(shell python setup.py --version) +VERSION_TAG=v$(subst .,_,$(VERSION)) +TARFILE=archivemail-$(VERSION).tar.gz +HTDOCS=htdocs-$(VERSION) + +default: + @echo "no default target" + +clean: + rm -rf $(HTDOCS) + +test: + python test_archivemail + +clobber: clean + rm -rf build dist + rm -f $(HTDOCS).tgz + + +sdist: clobber doc + python setup.py sdist + +tag: + git tag -a $(VERSION_TAG) + +doc: archivemail.1 archivemail.html + +htdocs: $(HTDOCS).tgz +$(HTDOCS).tgz: index.html archivemail.html RELNOTES style.css manpage.css + install -d -m 775 $(HTDOCS) + install -m 664 $^ $(HTDOCS) + cd $(HTDOCS) && mv archivemail.html manpage.html + tar czf $(HTDOCS).tgz $(HTDOCS) + +archivemail.1: archivemail.xml db2man.xsl + xsltproc db2man.xsl archivemail.xml + +archivemail.html: archivemail.xml db2html.xsl + xsltproc --output archivemail.html \ + db2html.xsl archivemail.xml + tidy -modify -indent -f /dev/null archivemail.html || true + +.PHONY: default clean test clobber sdist tag doc htdocs diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..8c84fdd --- /dev/null +++ b/NEWS @@ -0,0 +1,44 @@ +Notable changes in archivemail 0.9.0: + + * IMAP: support for international mailbox names containing non-ASCII + characters. + +Notable changes in archivemail 0.8.0: + + * Removed the feature to setuid to the mailbox owners when run as root. + This was a bad idea; it's really hard to do safely, if at all possible. + Obsoletes: patch #2783134. + * New option --archive-name, or short -a, to hard-code an archive filename. + Like the --suffix and --prefix options, it is expanded with strftime(). + This option conflicts with archiving multiple mailboxes. Closes: feature + request #1306538. + * New option --prefix, or short -p, to specify an archive name prefix. Like + a suffix specified with the --suffix option, the prefix is expanded with + strftime(). Specifying this option disables the default archive name + suffix. Obsoletes: feature request #604281. (Thanks Serafeim Zanikolas + for an initial patch) + * New option --all to archive all messages in a mailbox. Closes: #1764846. + * archivemail now expands wildcards in IMAP mailbox names. For example, the + url imaps://user@server/foo/* will expand to all subfolders of foo. + Closes: feature request #1978540. Obsoletes: patch #1918937. + * To determine the delivery date of a message, archivemail now looks for the + timestamp of the latest 'Received' header before resorting to + 'Resent-Date' or 'Date'. This should give much better results when there + is no 'Delivery-date' header, which is still checked first. + (Thanks Andrew Ferrier & Christian Brabandt) + Closes: #1481316, #1764855, Debian bug #272666. + * We now omit the dotlock if we don't have sufficient permissions to create + it in the mbox directory. (The file is still locked with lockf.) + Together with more changes under the hood (see below) this means + archivemail can now operate on mbox files in the system mail spool. + Closes: #855269. + * Replaced some simple minded file operation security checks with more + decent ones. This means we can safely operate in /tmp, for example. The + price is that we no longer accept symlinked files. Obsoletes: patch + #1874868. + * The archive now also gets locked while archivemail updates it. + * mbox locking got completely rewritten. Switched from flock to lockf + locking, which is NFS-safe and portable, and we now lock with lockf first, + then with a dotlock, instead of the other way around. (This is makes + archivemail compatible with Debian systems. ;) + diff --git a/README b/README new file mode 100644 index 0000000..cbb3fb4 --- /dev/null +++ b/README @@ -0,0 +1,63 @@ + +----------------------------------------------------------- +archivemail - archive and compress old mail in your mailbox +----------------------------------------------------------- + +OVERVIEW: + +archivemail is a tool written in python(1) for archiving and compressing old +email in mailboxes. + +It can move messages older than the specified number of days to a separate +'archive' mbox-format mailbox that is compressed with 'gzip'. + +For example, have you been subscribing to the 'linux-kernel' mailing list +for the last 6 years and ended up with an 160-meg mailbox that 'mutt' is +taking a long time to load? 'archivemail' can move all messages that are +older than 6 months to a separate compressed mailbox, and leave you with +just the most recent messages. + +'archivemail' can save a lot of disk space and will significantly reduce +overhead on your mail reader. The number of days before mail is considered +'old' is up to you, but the default is 180 days. You can also archive messages +by an absolute date or only archive unread messages. + + +REQUIREMENTS: + +archivemail requires python version 2.3. It also uses some optional python +modules, but these should be pretty much standard; if you get an ImportError +nonetheless, please report it, thanks. (For contact addresses see below.) + +Python is available from http://www.python.org/ + + +INSTALLATION: + +If you want to test archivemail: + python test_archivemail + +To install archivemail, run: + python setup.py install + + +USE: + +For more detailed information and examples, look at the archivemail man page. + +The best way to run archivemail is from cron, giving the '-q' option to +archivemail to make it quiet, only printing messages if something went wrong. +Check out the 'examples' directory for an example shell script to be run from +cron. + +The archivemail website is at: http://archivemail.sourceforge.net/ + +If you have any feedback or bug reports about archivemail, you are very +welcome to email the maintainers; as of November 2006, these are: + +Nikolaus Schulz +Peter Poeml + + +-- Paul Rodger , archivemail author + Updated by: Nikolaus Schulz, maintainer diff --git a/TODO b/TODO new file mode 100644 index 0000000..e6950ed --- /dev/null +++ b/TODO @@ -0,0 +1,103 @@ +Integrate --debug-imap option into yet-to-be-implemented -vv switch? +I had the idea to provide separate debugging info levels anyway, see --debug +below. + +Gracefully close IMAP connection upon unexptected error (currently archivemail +just terminates). + +LOCKING & Co: +* Block signals while writing changed mailbox back. +* Double-check the entire locking code. + +Seems like existing archives are not read or validated in any way. New archive +data is blindly appended... Probably okay, but should be documented. + +IMAP SEARCH BEFORE disregards time and timezone information. This should at +least be documented. E.g. I've found that '-d 0' didn't match all messages in +an IMAP mailbox. This is because the SEARCH key is (BEFORE 14-Nov-2007) on 14 +November, not matching messages that arrived today. (This problem is probably +fixed for most use cases by the --all option.) + +Document mbox format issues: link to +http://homepages.tesco.net/~J.deBoynePollard/FGA/mail-mbox-formats.html, +qmail mbox manpage, Debian manpage, RFC 4155. Document what mbox format we can +read, and what we write. +FIXME: we cannot yet parse rfc 2822 addr-spec stuff like quoted local-parts in +return-path addresses. + +Minor annoyance: when a From_ line is generated, guess_delivery_time() reports +the used date header a second time. + +Check sf.net and Debian BTS for new bugs. Again. + +IMAP: ensure mailbox archives are properly named. Currently imap folder names +are mapped like this: + + IMAP URL | resulting mbox_archive + ------------+------------------------ + test.box | test.box_archive.gz + test/box | box_archive.gz + + +Implement --include-draft. But before, think about it again. (This is feature +request #1569305.) + +Implement a fallback if an IMAP server doesn't support SEARCH. (Ouch!) + +Add IMAP tests to the testsuite (upload test messages with IMAP "APPEND +date-string"). This should be done without any real network I/O. + +Try to port archivemail to email.message and the new mailboxes in Python 2.5. +Is these flexible enough for our needs? + +Line out what we want with respect to multiple selection criteria. +Some make sense, but this easily gets too complex, and if only it's a hassle +with adding all the options. Hm. + +Reject patch #1036022 "Added option to inverse date compare" after cooling down +because the patch is both stupid (copy+paste code) and broken. Don't see why +anyone should want this/we should support it. +If this is reasonable *at all*, I think we'd better go for all the complexity +to honour _two_ cut off dates (see Debian bug "#184124: archivemail: -D and -d +should not be incompatible", which is a comparably half-baken thought). + +Add --debug or -vv switch, and move the printing of diagnostic info for each +message to --debug. + +Perhaps add some more nice stuff like printing of subject, sender... +See tracker #868714 "added stats option to archivemail", which has a point. +Message-Ids are useful for diagnosis, but not very nice to read for humans. + +Be a nicer citizen with respect to mailbox locking. + +Perhaps prune/shorten IMAP mailbox URLs in messages? +They may be quite long and may contain the sensitive password. +Also shows up in the process list... +Perhaps find a clean, lean replacement for all that clutter in the IMAP urls. + +Require --output-dir for IMAP archiving? Otherwise we just drop the archive in +in the current working directory. + +Check all items below, which are from the original author. :-) + +.archivemailrc support + +When you get a file-not-found in the 6th mailbox of 10, it aborts the whole +run. Better to fail gracefully and keep going. + +Add more tests (see top of test_archivemail) + +We need some better checking to see if we are really looking at a valid +mbox-format mailbox. + +Add an option to not cut threads. + +Add MMDF mailbox support + +Add Babyl mailbox support + +Add option to archive depending on mailbox size threshold +- is this a good idea? + +Add option to archive depending on number of messages +- is this a good idea? diff --git a/archivemail b/archivemail new file mode 100755 index 0000000..26b9aca --- /dev/null +++ b/archivemail @@ -0,0 +1,1951 @@ +#! /usr/bin/env python +############################################################################ +# Copyright (C) 2002 Paul Rodger , +# (C) 2006 Peter Poeml , +# (C) 2006-2010 Nikolaus Schulz +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +############################################################################ +""" +Archive and compress old mail in mbox, MH or maildir-format mailboxes. +Website: http://archivemail.sourceforge.net/ +""" + +# global administrivia +__version__ = "archivemail v0.9.0" +__copyright__ = """\ +Copyright (C) 2002 Paul Rodger + (C) 2006 Peter Poeml , + (C) 2006-2011 Nikolaus Schulz +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.""" + +import sys + +def check_python_version(): + """Abort if we are running on python < v2.3""" + too_old_error = "This program requires python v2.3 or greater. " + \ + "Your version of python is:\n%s""" % sys.version + try: + version = sys.version_info # we might not even have this function! :) + if (version[0] < 2) or (version[0] == 2 and version[1] < 3): + print too_old_error + sys.exit(1) + except AttributeError: + print too_old_error + sys.exit(1) + +# define & run this early +# (IMAP over SSL requires Python >= 2.3) +check_python_version() + +import fcntl +import getopt +import gzip +import mailbox +import os +import pwd +import re +import rfc822 +import shutil +import signal +import stat +import string +import tempfile +import time +import urlparse +import errno +import socket +import locale + +# From_ mangling regex. +from_re = re.compile(r'^From ', re.MULTILINE) +imapsize_re = re.compile(r'^(?P[0-9]+) \(RFC822\.SIZE (?P[0-9]+)\)') + +userencoding = locale.getpreferredencoding() + +############## class definitions ############### + +class ArchivemailException(Exception): + pass +class UserError(ArchivemailException): + pass +class UnexpectedError(ArchivemailException): + pass +class LockUnavailable(ArchivemailException): + pass + +class Stats: + """Class to collect and print statistics about mailbox archival""" + __archived = 0 + __archived_size = 0 + __mailbox_name = None + __archive_name = None + __start_time = 0 + __total = 0 + __total_size = 0 + + def __init__(self, mailbox_name, final_archive_name): + """Constructor for a new set of statistics. + + Arguments: + mailbox_name -- filename/dirname of the original mailbox + final_archive_name -- filename for the final 'mbox' archive, without + compression extension (eg .gz) + + """ + assert mailbox_name + assert final_archive_name + self.__start_time = time.time() + self.__mailbox_name = mailbox_name + self.__archive_name = final_archive_name + ".gz" + + def another_message(self, size): + """Add one to the internal count of total messages processed + and record message size.""" + self.__total = self.__total + 1 + self.__total_size = self.__total_size + size + + def another_archived(self, size): + """Add one to the internal count of messages archived + and record message size.""" + self.__archived = self.__archived + 1 + self.__archived_size = self.__archived_size + size + + def display(self): + """Print statistics about how many messages were archived""" + end_time = time.time() + time_seconds = end_time - self.__start_time + action = "archived" + if options.delete_old_mail: + action = "deleted" + if options.dry_run: + action = "I would have " + action + print "%s:\n %s %d of %d message(s) (%s of %s) in %.1f seconds" % \ + (self.__mailbox_name, action, self.__archived, self.__total, + nice_size_str(self.__archived_size), + nice_size_str(self.__total_size), time_seconds) + + +class StaleFiles: + """Class to keep track of files to be deleted on abnormal exit""" + dotlock_files = [] # dotlock files for source mbox and final archive + temp_mboxes = [] # temporary retain and archive mboxes + temp_dir = None # our tempfile directory container + + def clean(self): + """Delete any temporary files or lockfiles that exist""" + while self.dotlock_files: + dotlock = self.dotlock_files.pop() + vprint("removing stale dotlock file '%s'" % dotlock) + try: + os.remove(dotlock) + except (IOError, OSError): pass + while self.temp_mboxes: + mbox = self.temp_mboxes.pop() + vprint("removing stale temporary mbox '%s'" % mbox) + try: + os.remove(mbox) + except (IOError, OSError): pass + if self.temp_dir: + vprint("removing stale tempfile directory '%s'" % self.temp_dir) + try: + os.rmdir(self.temp_dir) + except OSError, e: + if e.errno == errno.ENOTEMPTY: # Probably a bug + user_warning("cannot remove temporary directory '%s', " + "directory not empty" % self.temp_dir) + except IOError: pass + else: self.temp_dir = None + + + +class Options: + """Class to store runtime options, including defaults""" + archive_prefix = None + archive_suffix = None + archive_default_suffix = "_archive" + archive_name = None + days_old_max = 180 + date_old_max = None + delete_old_mail = False + dry_run = False + filter_append = None + include_flagged = False + locking_attempts = 5 + lockfile_extension = ".lock" + lock_sleep = True + no_compress = False + only_archive_read = False + output_dir = None + pwfile = None + preserve_unread = False + mangle_from = True + quiet = False + read_buffer_size = 8192 + script_name = os.path.basename(sys.argv[0]) + min_size = None + verbose = False + debug_imap = 0 + warn_duplicates = False + copy_old_mail = False + archive_all = False + + def parse_args(self, args, usage): + """Set our runtime options from the command-line arguments. + + Arguments: + args -- this is sys.argv[1:] + usage -- a usage message to display on '--help' or bad arguments + + Returns the remaining command-line arguments that have not yet been + parsed as a string. + + """ + try: + opts, args = getopt.getopt(args, '?D:S:Vd:hno:F:P:qs:p:a:uv', + ["date=", "days=", "delete", "dry-run", "help", + "include-flagged", "no-compress", "output-dir=", + "filter-append=", "pwfile=", "dont-mangle", + "preserve-unread", "quiet", "size=", "suffix=", + "prefix=", "archive-name=", "verbose", + "debug-imap=", "version", "warn-duplicate", + "copy", "all"]) + except getopt.error, msg: + user_error(msg) + + archive_by = None + + for o, a in opts: + if o == '--delete': + if self.copy_old_mail: + user_error("found conflicting options --copy and --delete") + self.delete_old_mail = True + if o == '--include-flagged': + self.include_flagged = True + if o == '--no-compress': + self.no_compress = True + if o == '--warn-duplicate': + self.warn_duplicates = True + if o in ('-D', '--date'): + if archive_by: + user_error("you cannot specify both -d and -D options") + archive_by = "date" + self.date_old_max = self.date_argument(a) + if o in ('-d', '--days'): + if archive_by: + user_error("you cannot specify both -d and -D options") + archive_by = "days" + self.days_old_max = string.atoi(a) + if o in ('-o', '--output-dir'): + self.output_dir = os.path.expanduser(a) + if o in ('-P', '--pwfile'): + self.pwfile = os.path.expanduser(a) + if o in ('-F', '--filter-append'): + self.filter_append = a + if o in ('-h', '-?', '--help'): + print usage + sys.exit(0) + if o in ('-n', '--dry-run'): + self.dry_run = True + if o in ('-q', '--quiet'): + self.quiet = True + if o in ('-s', '--suffix'): + self.archive_suffix = a + if o in ('-p', '--prefix'): + self.archive_prefix = a + if o in ('-a', '--archive-name'): + self.archive_name = os.path.expanduser(a) + if o in ('-S', '--size'): + self.min_size = string.atoi(a) + if o in ('-u', '--preserve-unread'): + self.preserve_unread = True + if o == '--dont-mangle': + self.mangle_from = False + if o in ('-v', '--verbose'): + self.verbose = True + if o == '--debug-imap': + self.debug_imap = int(a) + if o == '--copy': + if self.delete_old_mail: + user_error("found conflicting options --copy and --delete") + self.copy_old_mail = True + if o == '--all': + self.archive_all = True + if o in ('-V', '--version'): + print __version__ + "\n\n" + __copyright__ + sys.exit(0) + return args + + def sanity_check(self, args): + """Complain bitterly about our options now rather than later""" + if self.output_dir: + check_sane_destdir(self.output_dir) + if self.days_old_max < 0: + user_error("--days argument must be positive") + if self.days_old_max >= 10000: + user_error("--days argument must be less than 10000") + if self.min_size is not None and self.min_size < 1: + user_error("--size argument must be greater than zero") + if self.quiet and self.verbose: + user_error("you cannot use both the --quiet and --verbose options") + if self.pwfile: + if not os.path.isfile(self.pwfile): + user_error("pwfile %s does not exist" % self.pwfile) + if self.archive_name and len(args) > 1: + user_error("the --archive-name cannot be used with multiple " \ + "mailboxes") + + def date_argument(self, string): + """Converts a date argument string into seconds since the epoch""" + date_formats = ( + "%Y-%m-%d", # ISO format + "%d %b %Y" , # Internet format + "%d %B %Y" , # Internet format with full month names + ) + time.accept2dyear = False # I'm not going to support 2-digit years + for format in date_formats: + try: + date = time.strptime(string, format) + seconds = time.mktime(date) + return seconds + except (ValueError, OverflowError): + pass + user_error("cannot parse the date argument '%s'\n" + "The date should be in ISO format (eg '2002-04-23'),\n" + "Internet format (eg '23 Apr 2002') or\n" + "Internet format with full month names (eg '23 April 2002')" % + string) + + +class LockableMboxMixin: + """Locking methods for mbox files.""" + + def __init__(self, mbox_file, mbox_file_name): + self.mbox_file = mbox_file + self.mbox_file_name = mbox_file_name + self._locked = False + self._use_dotlock = True + + def lock(self): + """Lock this mbox with both a dotlock and a posix lock.""" + assert not self._locked + attempt = 1 + while True: + try: + self._posix_lock() + self._dotlock_lock() + break + except LockUnavailable, e: + self._posix_unlock() + attempt += 1 + if (attempt > options.locking_attempts): + unexpected_error(str(e)) + vprint("%s - sleeping..." % e) + time.sleep(options.lock_sleep) + except: + self._posix_unlock() + raise + self._locked = True + + def unlock(self): + """Unlock this mbox.""" + assert self._locked + self._dotlock_unlock() + self._posix_unlock() + self._locked = False + + def _posix_lock(self): + """Set an exclusive posix lock on the 'mbox' mailbox""" + vprint("trying to acquire posix lock on file '%s'" % self.mbox_file_name) + try: + fcntl.lockf(self.mbox_file, fcntl.LOCK_EX|fcntl.LOCK_NB) + except IOError, e: + if e.errno in (errno.EAGAIN, errno.EACCES): + raise LockUnavailable("posix lock for '%s' unavailable" % \ + self.mbox_file_name) + else: + raise + vprint("acquired posix lock on file '%s'" % self.mbox_file_name) + + def _posix_unlock(self): + """Unset any posix lock on the 'mbox' mailbox""" + vprint("dropping posix lock on file '%s'" % self.mbox_file_name) + fcntl.lockf(self.mbox_file, fcntl.LOCK_UN) + + def _dotlock_lock(self): + """Create a dotlock file for the 'mbox' mailbox""" + hostname = socket.gethostname() + pid = os.getpid() + box_dir, prelock_prefix = os.path.split(self.mbox_file_name) + prelock_suffix = ".%s.%s%s" % (hostname, pid, options.lockfile_extension) + lock_name = self.mbox_file_name + options.lockfile_extension + vprint("trying to create dotlock file '%s'" % lock_name) + try: + plfd, prelock_name = tempfile.mkstemp(prelock_suffix, prelock_prefix, + dir=box_dir) + except OSError, e: + if e.errno == errno.EACCES: + if not options.quiet: + user_warning("no write permissions: omitting dotlock for '%s'" % \ + self.mbox_file_name) + self._use_dotlock = False + return + raise + try: + try: + os.link(prelock_name, lock_name) + # We've got the lock. + except OSError, e: + if os.fstat(plfd)[stat.ST_NLINK] == 2: + # The Linux man page for open(2) claims that in this + # case we have actually succeeded to create the link, + # and this assumption seems to be folklore. + # So we've got the lock. + pass + elif e.errno == errno.EEXIST: + raise LockUnavailable("Dotlock for '%s' unavailable" % self.mbox_file_name) + else: + raise + _stale.dotlock_files.append(lock_name) + finally: + os.close(plfd) + os.unlink(prelock_name) + vprint("acquired lockfile '%s'" % lock_name) + + def _dotlock_unlock(self): + """Delete the dotlock file for the 'mbox' mailbox.""" + if not self._use_dotlock: + return + lock_name = self.mbox_file_name + options.lockfile_extension + vprint("removing lockfile '%s'" % lock_name) + os.remove(lock_name) + _stale.dotlock_files.remove(lock_name) + + def commit(self): + """Sync the mbox file to disk.""" + self.mbox_file.flush() + os.fsync(self.mbox_file.fileno()) + + def close(self): + """Close the mbox file""" + vprint("closing file '%s'" % self.mbox_file_name) + assert not self._locked + self.mbox_file.close() + + +class Mbox(mailbox.UnixMailbox, LockableMboxMixin): + """A mostly-read-only mbox with locking. The mbox content can only be + modified by overwriting the entire underlying file.""" + + def __init__(self, path): + """Constructor for opening an existing 'mbox' mailbox. + Extends constructor for mailbox.UnixMailbox() + + Named Arguments: + path -- file name of the 'mbox' file to be opened + """ + assert path + fd = safe_open_existing(path) + st = os.fstat(fd) + self.original_atime = st.st_atime + self.original_mtime = st.st_mtime + self.starting_size = st.st_size + self.mbox_file = os.fdopen(fd, "r+") + self.mbox_file_name = path + LockableMboxMixin.__init__(self, self.mbox_file, path) + mailbox.UnixMailbox.__init__(self, self.mbox_file) + + def reset_timestamps(self): + """Set the file timestamps to the original values""" + assert self.original_atime + assert self.original_mtime + assert self.mbox_file_name + os.utime(self.mbox_file_name, (self.original_atime, \ + self.original_mtime)) + + def get_size(self): + """Return the current size of the mbox file on disk""" + return os.path.getsize(self.mbox_file_name) + + def overwrite_with(self, mbox_filename): + """Overwrite the mbox content with the content of the given mbox file.""" + fin = open(mbox_filename, "r") + self.mbox_file.seek(0) + shutil.copyfileobj(fin, self.mbox_file) + self.mbox_file.truncate() + + +class ArchiveMbox(LockableMboxMixin): + """Simple append-only access to the archive mbox. Entirely content-agnostic.""" + + def __init__(self, path): + fd = safe_open(path) + self.mbox_file = os.fdopen(fd, "a") + LockableMboxMixin.__init__(self, self.mbox_file, path) + + def append(self, filename): + """Append the content of the given file to the mbox.""" + assert self._locked + fin = open(filename, "r") + oldsize = os.fstat(self.mbox_file.fileno()).st_size + try: + shutil.copyfileobj(fin, self.mbox_file) + except: + # We can safely abort here without data loss, because + # we have not yet changed the original mailbox + self.mbox_file.truncate(oldsize) + raise + fin.close() + + +class TempMbox: + """A write-only temporary mbox. No locking methods.""" + + def __init__(self, prefix=tempfile.template): + """Creates a temporary mbox file.""" + fd, filename = tempfile.mkstemp(prefix=prefix) + self.mbox_file_name = filename + _stale.temp_mboxes.append(filename) + self.mbox_file = os.fdopen(fd, "w") + # an empty gzip file is not really empty (it contains the gzip header + # and trailer), so we need to track manually if this mbox is empty + self.empty = True + + def write(self, msg): + """Write a rfc822 message object to the 'mbox' mailbox. + If the rfc822 has no Unix 'From_' line, then one is constructed + from other headers in the message. + + Arguments: + msg -- rfc822 message object to be written + + """ + assert msg + assert self.mbox_file + + self.empty = False + vprint("saving message to file '%s'" % self.mbox_file_name) + unix_from = msg.unixfrom + if unix_from: + msg_has_mbox_format = True + else: + msg_has_mbox_format = False + unix_from = make_mbox_from(msg) + self.mbox_file.write(unix_from) + assert msg.headers + self.mbox_file.writelines(msg.headers) + self.mbox_file.write(os.linesep) + + # The following while loop is about twice as fast in + # practice to 'self.mbox_file.writelines(msg.fp.readlines())' + assert options.read_buffer_size > 0 + linebuf = "" + while True: + body = msg.fp.read(options.read_buffer_size) + if (not msg_has_mbox_format) and options.mangle_from: + # Be careful not to break pattern matching + splitindex = body.rfind(os.linesep) + nicebody = linebuf + body[:splitindex] + linebuf = body[splitindex:] + body = from_re.sub('>From ', nicebody) + if not body: + break + self.mbox_file.write(body) + if not msg_has_mbox_format: + self.mbox_file.write(os.linesep) + + def commit(self): + """Sync the mbox file to disk.""" + self.mbox_file.flush() + os.fsync(self.mbox_file.fileno()) + + def close(self): + """Close the mbox file""" + vprint("closing file '%s'" % self.mbox_file_name) + self.mbox_file.close() + + def saveas(self, filename): + """Rename this temporary mbox file to the given name, making it + permanent. Emergency use only.""" + os.rename(self.mbox_file_name, filename) + _stale.temp_mboxes.remove(self.mbox_file_name) + + def remove(self): + """Delete the temporary mbox file.""" + os.remove(self.mbox_file_name) + _stale.temp_mboxes.remove(self.mbox_file_name) + + +class CompressedTempMbox(TempMbox): + """A compressed version of a TempMbox.""" + + def __init__(self, prefix=tempfile.template): + TempMbox.__init__(self, prefix) + self.raw_file = self.mbox_file + self.mbox_file = gzip.GzipFile(mode="a", fileobj=self.mbox_file) + # Workaround that GzipFile.close() isn't idempotent in Python < 2.6 + # (python issue #2959). There is no GzipFile.closed, so we need a + # replacement. + self.gzipfile_closed = False + + def commit(self): + """Finish gzip file and sync it to disk.""" + # This method is currently not used + self.mbox_file.close() # close GzipFile, writing gzip trailer + self.gzipfile_closed = True + self.raw_file.flush() + os.fsync(self.raw_file.fileno()) + + def close(self): + """Close the gzip file.""" + if not self.gzipfile_closed: + self.mbox_file.close() + self.raw_file.close() + + +class IdentityCache: + """Class used to remember Message-IDs and warn if they are seen twice""" + seen_ids = {} + mailbox_name = None + + def __init__(self, mailbox_name): + """Constructor: takes the mailbox name as an argument""" + assert mailbox_name + self.mailbox_name = mailbox_name + + def warn_if_dupe(self, msg): + """Print a warning message if the message has already appeared""" + assert msg + message_id = msg.get('Message-ID') + assert message_id + if self.seen_ids.has_key(message_id): + user_warning("duplicate message id: '%s' in mailbox '%s'" % + (message_id, self.mailbox_name)) + self.seen_ids[message_id] = True + + +# global class instances +options = Options() # the run-time options object +_stale = StaleFiles() # remember what we have to delete on abnormal exit + + +def main(args = sys.argv[1:]): + global _stale + + # this usage message is longer than 24 lines -- bad idea? + usage = """Usage: %s [options] mailbox [mailbox...] +Moves old mail in IMAP, mbox, MH or maildir-format mailboxes to an mbox-format +mailbox compressed with gzip. + +Options are as follows: + -d, --days=NUM archive messages older than NUM days (default: %d) + -D, --date=DATE archive messages older than DATE + -o, --output-dir=DIR directory to store archives (default: same as original) + -P, --pwfile=FILE file to read imap password from (default: None) + -F, --filter-append=STRING append arbitrary string to the IMAP filter string + -p, --prefix=NAME prefix for archive filename (default: none) + -s, --suffix=NAME suffix for archive filename (default: '%s') + -a, --archive-name=NAME specify complete archive filename + -S, --size=NUM only archive messages NUM bytes or larger + -n, --dry-run don't write to anything - just show what would be done + -u, --preserve-unread never archive unread messages + --dont-mangle do not mangle From_ in message bodies + --delete delete rather than archive old mail (use with caution!) + --copy copy rather than archive old mail + --include-flagged messages flagged important can also be archived + --all archive all messages + --no-compress do not compress archives with gzip + --warn-duplicate warn about duplicate Message-IDs in the same mailbox + -v, --verbose report lots of extra debugging information + --debug-imap=NUM set IMAP debugging output level (0 is none) + -q, --quiet quiet mode - print no statistics (suitable for crontab) + -V, --version display version information + -h, --help display this message + +Example: %s linux-kernel + This will move all messages older than %s days to a 'mbox' mailbox called + 'linux-kernel_archive.gz', deleting them from the original 'linux-kernel' + mailbox. If the 'linux-kernel_archive.gz' mailbox already exists, the + newly archived messages are appended. + +To archive IMAP mailboxes, format your mailbox argument like this: + imap://username:password@server/mailbox + (substitute 'imap' with 'imaps' for an SSL connection) + +Website: http://archivemail.sourceforge.net/ """ % \ + (options.script_name, options.days_old_max, options.archive_suffix, + options.script_name, options.days_old_max) + + args = options.parse_args(args, usage) + if len(args) == 0: + print usage + sys.exit(1) + + options.sanity_check(args) + + for mailbox_path in args: + archive(mailbox_path) + + +######## errors and debug ########## + +def vprint(string): + """Print the string argument if we are in verbose mode""" + if options.verbose: + print string + + +def unexpected_error(string): + """Print the string argument, a 'shutting down' message and abort. Raise + UnexpectedErrors if archivemail is run as a module. This function never + returns.""" + if not __name__ == '__main__': + raise UnexpectedError(string) + sys.stderr.write("%s: %s\n" % (options.script_name, string)) + sys.stderr.write("%s: unexpected error encountered - shutting down\n" % + options.script_name) + sys.exit(1) + + +def user_error(string): + """Print the string argument and abort. Raise UserError if archivemail is + run as a module. This function never returns.""" + if not __name__ == '__main__': + raise UserError(string) + sys.stderr.write("%s: %s\n" % (options.script_name, string)) + sys.exit(1) + + +def user_warning(string): + """Print the string argument""" + sys.stderr.write("%s: Warning - %s\n" % (options.script_name, string)) + +########### operations on a message ############ + +def make_mbox_from(message): + """Return a string suitable for use as a 'From_' mbox header for the + message. + + Arguments: + message -- the rfc822 message object + + """ + assert message + address = guess_return_path(message) + time_message = guess_delivery_time(message) + date = time.localtime(time_message) + assert date + date_string = time.asctime(date) + mbox_from = "From %s %s\n" % (address, date_string) + return mbox_from + + +def guess_return_path(message): + """Return a guess at the Return Path address of an rfc822 message""" + assert message + + for header in ('Return-path', 'From'): + address_header = message.get(header) + if address_header: + (name, address) = rfc822.parseaddr(address_header) + if address: + return address + # argh, we can't find any valid 'Return-path' guesses - just + # just use the current unix username like mutt does + login = pwd.getpwuid(os.getuid())[0] + assert login + return login + + +def guess_delivery_time(message): + """Return a guess at the delivery date of an rfc822 message""" + assert message + # try to guess the delivery date from various headers + # get more desparate as we go through the array + for header in 'Delivery-date', 'Received', 'Resent-Date', 'Date': + try: + if header == 'Received': + # This should be good enough for almost all headers in the wild; + # if we're guessing wrong, parsedate_tz() will fail graciously. + token = message.getrawheader(header).rsplit(';', 1)[-1] + else: + token = message.get(header) + date = rfc822.parsedate_tz(token) + if date: + time_message = rfc822.mktime_tz(date) + vprint("using valid time found from '%s' header" % header) + return time_message + except (AttributeError, IndexError, ValueError, OverflowError): pass + # as a second-last resort, try the date from the 'From_' line (ugly) + # this will only work from a mbox-format mailbox + if (message.unixfrom): + # Hmm. This will break with full-blown RFC 2822 addr-spec's. + header = message.unixfrom.split(None, 2)[-1] + # Interpret no timezone as localtime + date = rfc822.parsedate_tz(header) + if date: + try: + time_message = rfc822.mktime_tz(date) + vprint("using valid time found from unix 'From_' header") + return time_message + except (ValueError, OverflowError): pass + # the headers have no valid dates -- last resort, try the file timestamp + # this will not work for mbox mailboxes + try: + file_name = get_filename(message) + except AttributeError: + # we are looking at a 'mbox' mailbox - argh! + # Just return the current time - this will never get archived :( + vprint("no valid times found at all -- using current time!") + return time.time() + if not os.path.isfile(file_name): + unexpected_error("mailbox file name '%s' has gone missing" % \ + file_name) + time_message = os.path.getmtime(file_name) + vprint("using valid time found from '%s' last-modification time" % \ + file_name) + return time_message + + +def add_status_headers(message): + """ + Add Status and X-Status headers to a message from a maildir mailbox. + + Maildir messages store their information about being read/replied/etc in + the suffix of the filename rather than in Status and X-Status headers in + the message. In order to archive maildir messages into mbox format, it is + nice to preserve this information by putting it into the status headers. + + """ + status = "" + x_status = "" + file_name = get_filename(message) + match = re.search(":2,(.+)$", file_name) + if match: + flags = match.group(1) + for flag in flags: + if flag == "D": # (draft): the user considers this message a draft + pass # does this make any sense in mbox? + elif flag == "F": # (flagged): user-defined 'important' flag + x_status = x_status + "F" + elif flag == "R": # (replied): the user has replied to this message + x_status = x_status + "A" + elif flag == "S": # (seen): the user has viewed this message + status = status + "R" + elif flag == "T": # (trashed): user has moved this message to trash + pass # is this Status: D ? + else: + pass # no whingeing here, although it could be a good experiment + + # files in the maildir 'cur' directory are no longer new, + # they are the same as messages with 'Status: O' headers in mbox + last_dir = os.path.basename(os.path.dirname(file_name)) + if last_dir == "cur": + status = status + "O" + + # Overwrite existing 'Status' and 'X-Status' headers. They add no value in + # maildirs, and we better don't listen to them. + if status: + vprint("converting maildir status into Status header '%s'" % status) + message['Status'] = status + else: + del message['Status'] + if x_status: + vprint("converting maildir status into X-Status header '%s'" % x_status) + message['X-Status'] = x_status + else: + del message['X-Status'] + +def add_status_headers_imap(message, flags): + """Add Status and X-Status headers to a message from an imap mailbox.""" + status = "" + x_status = "" + for flag in flags: + if flag == "\\Draft": # (draft): the user considers this message a draft + pass # does this make any sense in mbox? + elif flag == "\\Flagged": # (flagged): user-defined 'important' flag + x_status = x_status + "F" + elif flag == "\\Answered": # (replied): the user has replied to this message + x_status = x_status + "A" + elif flag == "\\Seen": # (seen): the user has viewed this message + status = status + "R" + elif flag == "\\Deleted": # (trashed): user has moved this message to trash + pass # is this Status: D ? + else: + pass # no whingeing here, although it could be a good experiment + if not "\\Recent" in flags: + status = status + "O" + + # As with maildir folders, overwrite Status and X-Status headers + # if they exist. + vprint("converting imap status (%s)..." % " ".join(flags)) + if status: + vprint("generating Status header '%s'" % status) + message['Status'] = status + else: + vprint("not generating Status header") + del message['Status'] + if x_status: + vprint("generating X-Status header '%s'" % x_status) + message['X-Status'] = x_status + else: + vprint("not generating X-Status header") + del message['X-Status'] + +def is_flagged(message): + """return true if the message is flagged important, false otherwise""" + # MH and mbox mailboxes use the 'X-Status' header to indicate importance + x_status = message.get('X-Status') + if x_status and re.search('F', x_status): + vprint("message is important (X-Status header='%s')" % x_status) + return True + file_name = None + try: + file_name = get_filename(message) + except AttributeError: + pass + # maildir mailboxes use the filename suffix to indicate flagged status + if file_name and re.search(":2,.*F.*$", file_name): + vprint("message is important (filename info has 'F')") + return True + vprint("message is not flagged important") + return False + + +def is_unread(message): + """return true if the message is unread, false otherwise""" + # MH and mbox mailboxes use the 'Status' header to indicate read status + status = message.get('Status') + if status and re.search('R', status): + vprint("message has been read (status header='%s')" % status) + return False + file_name = None + try: + file_name = get_filename(message) + except AttributeError: + pass + # maildir mailboxes use the filename suffix to indicate read status + if file_name and re.search(":2,.*S.*$", file_name): + vprint("message has been read (filename info has 'S')") + return False + vprint("message is unread") + return True + + +def sizeof_message(message): + """Return size of message in bytes (octets).""" + assert message + file_name = None + message_size = None + try: + file_name = get_filename(message) + except AttributeError: + pass + if file_name: + # with maildir and MH mailboxes, we can just use the file size + message_size = os.path.getsize(file_name) + else: + # with mbox mailboxes, not so easy + message_size = 0 + if message.unixfrom: + message_size = message_size + len(message.unixfrom) + for header in message.headers: + message_size = message_size + len(header) + message_size = message_size + 1 # the blank line after the headers + start_offset = message.fp.tell() + message.fp.seek(0, 2) # seek to the end of the message + end_offset = message.fp.tell() + message.rewindbody() + message_size = message_size + (end_offset - start_offset) + return message_size + +def is_smaller(message, size): + """Return true if the message is smaller than size bytes, false otherwise""" + assert message + assert size > 0 + message_size = sizeof_message(message) + if message_size < size: + vprint("message is too small (%d bytes), minimum bytes : %d" % \ + (message_size, size)) + return True + else: + vprint("message is not too small (%d bytes), minimum bytes: %d" % \ + (message_size, size)) + return False + + +def should_archive(message): + """Return true if we should archive the message, false otherwise""" + if options.archive_all: + return True + old = False + time_message = guess_delivery_time(message) + if options.date_old_max == None: + old = is_older_than_days(time_message, options.days_old_max) + else: + old = is_older_than_time(time_message, options.date_old_max) + + # I could probably do this in one if statement, but then I wouldn't + # understand it. + if not old: + return False + if not options.include_flagged and is_flagged(message): + return False + if options.min_size and is_smaller(message, options.min_size): + return False + if options.preserve_unread and is_unread(message): + return False + return True + + +def is_older_than_time(time_message, max_time): + """Return true if a message is older than the specified time, + false otherwise. + + Arguments: + time_message -- the delivery date of the message measured in seconds + since the epoch + max_time -- maximum time allowed for message + + """ + days_old = (max_time - time_message) / 24 / 60 / 60 + if time_message < max_time: + vprint("message is %.2f days older than the specified date" % days_old) + return True + vprint("message is %.2f days younger than the specified date" % \ + abs(days_old)) + return False + + +def is_older_than_days(time_message, max_days): + """Return true if a message is older than the specified number of days, + false otherwise. + + Arguments: + time_message -- the delivery date of the message measured in seconds + since the epoch + max_days -- maximum number of days before message is considered old + """ + time_now = time.time() + if time_message > time_now: + vprint("warning: message has date in the future") + return False + secs_old_max = (max_days * 24 * 60 * 60) + days_old = (time_now - time_message) / 24 / 60 / 60 + vprint("message is %.2f days old" % days_old) + if ((time_message + secs_old_max) < time_now): + return True + return False + +def build_imap_filter(): + """Return an imap filter string""" + + imap_filter = [] + if options.date_old_max == None: + time_now = time.time() + secs_old_max = (options.days_old_max * 24 * 60 * 60) + time_old = time.gmtime(time_now - secs_old_max) + else: + time_old = time.gmtime(options.date_old_max) + time_str = time.strftime('%d-%b-%Y', time_old) + imap_filter.append("BEFORE %s" % time_str) + + if not options.include_flagged: + imap_filter.append("UNFLAGGED") + if options.min_size: + imap_filter.append("LARGER %d" % options.min_size) + if options.preserve_unread: + imap_filter.append("SEEN") + if options.filter_append: + imap_filter.append(options.filter_append) + + return '(' + string.join(imap_filter, ' ') + ')' + +############### mailbox operations ############### + +def archive(mailbox_name): + """Archives a mailbox. + + Arguments: + mailbox_name -- the filename/dirname/url of the mailbox to be archived + """ + assert mailbox_name + + # strip any trailing slash (we could be archiving a maildir or MH format + # mailbox and somebody was pressing in bash) - we don't want to use + # the trailing slash in the archive name + mailbox_name = mailbox_name.rstrip("/") + assert mailbox_name + + set_signal_handlers() + os.umask(077) # saves setting permissions on mailboxes/tempfiles + + vprint("processing '%s'" % mailbox_name) + is_imap = urlparse.urlparse(mailbox_name)[0] in ('imap', 'imaps') + if not is_imap: + # Check if the mailbox exists, and refuse to mess with other people's + # stuff + try: + fuid = os.stat(mailbox_name).st_uid + except OSError, e: + user_error(str(e)) + else: + if fuid != os.getuid(): + user_error("'%s' is owned by someone else!" % mailbox_name) + + old_temp_dir = tempfile.tempdir + try: + # create a temporary directory for us to work in securely + tempfile.tempdir = None + new_temp_dir = tempfile.mkdtemp('archivemail') + assert new_temp_dir + _stale.temp_dir = new_temp_dir + tempfile.tempdir = new_temp_dir + vprint("set tempfile directory to '%s'" % new_temp_dir) + + if is_imap: + vprint("guessing mailbox is of type: imap(s)") + _archive_imap(mailbox_name) + elif os.path.isfile(mailbox_name): + vprint("guessing mailbox is of type: mbox") + _archive_mbox(mailbox_name) + elif os.path.isdir(mailbox_name): + cur_path = os.path.join(mailbox_name, "cur") + new_path = os.path.join(mailbox_name, "new") + if os.path.isdir(cur_path) and os.path.isdir(new_path): + vprint("guessing mailbox is of type: maildir") + _archive_dir(mailbox_name, "maildir") + else: + vprint("guessing mailbox is of type: MH") + _archive_dir(mailbox_name, "mh") + else: + user_error("'%s' is not a normal file or directory" % mailbox_name) + + # remove our special temp directory - hopefully empty + os.rmdir(new_temp_dir) + _stale.temp_dir = None + + finally: + tempfile.tempdir = old_temp_dir + clean_up() + +def _archive_mbox(mailbox_name): + """Archive a 'mbox' style mailbox - used by archive_mailbox()""" + assert mailbox_name + final_archive_name = make_archive_name(mailbox_name) + vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name)) + check_archive(final_archive_name) + stats = Stats(mailbox_name, final_archive_name) + cache = IdentityCache(mailbox_name) + original = Mbox(path=mailbox_name) + if options.dry_run or options.copy_old_mail: + retain = None + else: + retain = TempMbox(prefix="retain") + archive = prepare_temp_archive() + + original.lock() + msg = original.next() + if not msg and (original.starting_size > 0): + user_error("'%s' is not a valid mbox-format mailbox" % mailbox_name) + if msg and 'X-IMAP' in msg: + # Dovecot and UW-IMAP pseudo message for mailbox meta data + vprint("detected IMAP pseudo message") + if retain: + retain.write(msg) + msg = original.next() + while (msg): + msg_size = sizeof_message(msg) + stats.another_message(msg_size) + vprint("processing message '%s'" % msg.get('Message-ID')) + if options.warn_duplicates: + cache.warn_if_dupe(msg) + if should_archive(msg): + stats.another_archived(msg_size) + if options.delete_old_mail: + vprint("decision: delete message") + else: + vprint("decision: archive message") + if archive: + archive.write(msg) + else: + vprint("decision: retain message") + if retain: + retain.write(msg) + msg = original.next() + vprint("finished reading messages") + if original.starting_size != original.get_size(): + unexpected_error("the mailbox '%s' changed size during reading!" % \ + mailbox_name) + # Write the new archive before modifying the mailbox, to prevent + # losing data if something goes wrong + commit_archive(archive, final_archive_name) + if retain: + pending_changes = original.mbox_file.tell() != retain.mbox_file.tell() + if pending_changes: + retain.commit() + retain.close() + vprint("writing back changed mailbox '%s'..." % \ + original.mbox_file_name) + # Prepare for recovery on error. + # FIXME: tempfile.tempdir is our nested dir. + saved_name = "%s/%s.%s.%s-%s-%s" % \ + (tempfile.tempdir, options.script_name, + os.path.basename(original.mbox_file_name), + socket.gethostname(), os.getuid(), + os.getpid()) + try: + original.overwrite_with(retain.mbox_file_name) + original.commit() + except: + retain.saveas(saved_name) + print "Error writing back changed mailbox; saved good copy to " \ + "%s" % saved_name + raise + else: + retain.close() + vprint("no changes to mbox '%s'" % original.mbox_file_name) + retain.remove() + original.unlock() + original.close() + original.reset_timestamps() # Minor race here; mutt has this too. + if not options.quiet: + stats.display() + + +def _archive_dir(mailbox_name, type): + """Archive a 'maildir' or 'MH' style mailbox - used by archive_mailbox()""" + assert mailbox_name + assert type + final_archive_name = make_archive_name(mailbox_name) + vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name)) + check_archive(final_archive_name) + stats = Stats(mailbox_name, final_archive_name) + delete_queue = [] + + if type == "maildir": + original = mailbox.Maildir(mailbox_name) + elif type == "mh": + original = mailbox.MHMailbox(mailbox_name) + else: + unexpected_error("unknown type: %s" % type) + cache = IdentityCache(mailbox_name) + archive = prepare_temp_archive() + + for msg in original: + if not msg: + vprint("ignoring invalid message '%s'" % get_filename(msg)) + continue + msg_size = sizeof_message(msg) + stats.another_message(msg_size) + vprint("processing message '%s'" % msg.get('Message-ID')) + if options.warn_duplicates: + cache.warn_if_dupe(msg) + if should_archive(msg): + stats.another_archived(msg_size) + if options.delete_old_mail: + vprint("decision: delete message") + else: + vprint("decision: archive message") + if archive: + if type == "maildir": + add_status_headers(msg) + archive.write(msg) + if not options.dry_run and not options.copy_old_mail: + delete_queue.append(get_filename(msg)) + else: + vprint("decision: retain message") + vprint("finished reading messages") + # Write the new archive before modifying the mailbox, to prevent + # losing data if something goes wrong + commit_archive(archive, final_archive_name) + for file_name in delete_queue: + vprint("removing original message: '%s'" % file_name) + try: os.remove(file_name) + except OSError, e: + if e.errno != errno.ENOENT: raise + if not options.quiet: + stats.display() + +def _archive_imap(mailbox_name): + """Archive an imap mailbox - used by archive_mailbox()""" + assert mailbox_name + import imaplib + import cStringIO + import getpass + + vprint("Setting imaplib.Debug = %d" % options.debug_imap) + imaplib.Debug = options.debug_imap + archive = None + imap_username, imap_password, \ + imap_server, imap_server_port, \ + imap_folder_pattern = parse_imap_url(mailbox_name) + if not imap_password: + if options.pwfile: + imap_password = open(options.pwfile).read().rstrip() + else: + if (not os.isatty(sys.stdin.fileno())) or options.quiet: + unexpected_error("No imap password specified") + imap_password = getpass.getpass('IMAP password: ') + + is_ssl = mailbox_name[:5].lower() == 'imaps' + if is_ssl: + vprint("establishing secure connection to server %s, port %s" % + (imap_server, imap_server_port)) + imap_srv = imaplib.IMAP4_SSL(imap_server, imap_server_port) + else: + vprint("establishing connection to server %s, port %s" % + (imap_server, imap_server_port)) + imap_srv = imaplib.IMAP4(imap_server, imap_server_port) + if "AUTH=CRAM-MD5" in imap_srv.capabilities: + vprint("authenticating (cram-md5) to server as %s" % imap_username) + result, response = imap_srv.login_cram_md5(imap_username, imap_password) + elif not "LOGINDISABLED" in imap_srv.capabilities: + vprint("logging in to server as %s" % imap_username) + result, response = imap_srv.login(imap_username, imap_password) + else: + user_error("imap server %s has login disabled (hint: " + "try ssl/imaps)" % imap_server) + + mailboxes = imap_find_mailboxes(imap_srv, imap_folder_pattern) + for imap_folder in mailboxes: + final_archive_name = make_archive_name(imap_folder) + vprint("archiving mailbox '%s' on IMAP server '%s' to '%s' ..." % + (imap_folder, imap_server, final_archive_name)) + check_archive(final_archive_name) + cur_mailbox = mailbox_name[:-len(imap_folder_pattern)] + imap_folder + stats = Stats(cur_mailbox, final_archive_name) + cache = IdentityCache(cur_mailbox) + + imap_smart_select(imap_srv, imap_folder) + total_msg_count = int(imap_srv.response("EXISTS")[1][0]) + vprint("folder has %d message(s)" % total_msg_count) + + # IIUIC the message sequence numbers are stable for the whole session, since + # we just send SEARCH, FETCH and STORE commands, which should prevent the + # server from sending untagged EXPUNGE responses -- see RFC 3501 (IMAP4rev1) + # 7.4.1 and RFC 2180 (Multi-Accessed Mailbox Practice). + # Worst thing should be that we bail out FETCHing a message that has been + # deleted. + + if options.archive_all: + message_list = [str(n) for n in range(1, total_msg_count+1)] + else: + imap_filter = build_imap_filter() + vprint("imap filter: '%s'" % imap_filter) + vprint("searching messages matching criteria") + result, response = imap_srv.search(None, imap_filter) + if result != 'OK': unexpected_error("imap search failed; server says '%s'" % + response[0]) + if response[0] is not None: + # response is a list with a single item, listing message + # sequence numbers like ['1 2 3 1016'] + message_list = response[0].split() + else: + # Broken server has sent no untagged response; assume empty result set. + message_list = [] + vprint("%d messages are matching filter" % len(message_list)) + + # First, gather data for the statistics. + if total_msg_count > 0 and not options.quiet: + vprint("fetching size of messages...") + result, response = imap_srv.fetch('1:*', '(RFC822.SIZE)') + if result != 'OK': unexpected_error("Failed to fetch message sizes; " + "server says '%s'" % response[0]) + # response is a list with entries like '1016 (RFC822.SIZE 3118)', + # where the first number is the message sequence number, the second is + # the size. + for x in response: + m = imapsize_re.match(x) + msn, msg_size = m.group('msn'), int(m.group('size')) + stats.another_message(msg_size) + if msn in message_list: + stats.another_archived(msg_size) + + if not options.dry_run: + if not options.delete_old_mail: + archive = prepare_temp_archive() + vprint("fetching messages...") + for msn in message_list: + # Fetching message flags and body together always finds \Seen + # set. To check \Seen, we must fetch the flags first. + result, response = imap_srv.fetch(msn, '(FLAGS)') + if result != 'OK': unexpected_error("Failed to fetch message " + "flags; server says '%s'" % response[0]) + msg_flags = imaplib.ParseFlags(response[0]) + result, response = imap_srv.fetch(msn, '(RFC822)') + if result != 'OK': unexpected_error("Failed to fetch message; " + "server says '%s'" % response[0]) + msg_str = response[0][1].replace("\r\n", os.linesep) + msg = rfc822.Message(cStringIO.StringIO(msg_str)) + vprint("processing message '%s'" % msg.get('Message-ID')) + add_status_headers_imap(msg, msg_flags) + if options.warn_duplicates: + cache.warn_if_dupe(msg) + archive.write(msg) + commit_archive(archive, final_archive_name) + if not options.copy_old_mail: + vprint("Deleting %s messages" % len(message_list)) + # do not delete more than a certain number of messages at a time, + # because the command length is limited. This avoids that servers + # terminate the connection with EOF or TCP RST. + max_delete = 100 + for i in range(0, len(message_list), max_delete): + result, response = imap_srv.store( \ + string.join(message_list[i:i+max_delete], ','), + '+FLAGS.SILENT', '\\Deleted') + if result != 'OK': unexpected_error("Error while deleting " + "messages; server says '%s'" % response[0]) + vprint("Closing mailbox.") + imap_srv.close() + if not options.quiet: + stats.display() + vprint("Terminating connection.") + imap_srv.logout() + + +############### IMAP functions ############### + + +# First, some IMAP modified UTF-7 support functions. + +# The modified BASE64 alphabet. 64 characters, each one encodes 6 Bit. +mb64alpha = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+,' + +def isprint_ascii(char): + """Test for an ASCII printable character.""" + return 0x20 <= ord(char) and ord(char) <= 0x7e + +def mod_utf7_encode(ustr): + """Encode unicode string object in modified UTF-7.""" + + def mb64_encode(tomb64): + """Encode unicode string object as a modified UTF-7 shifted sequence + in modified BASE64.""" + u16be = tomb64.encode('utf_16_be') + mb64 = "" + # Process 24-bit blocks, encoding them in 6-bit steps. + for block in [u16be[i:i+3] for i in range(0, len(u16be), 3)]: + idx = 0 + shift = 2 + for octet in block: + mb64 += mb64alpha[idx | (ord(octet) >> shift)] + idx = (ord(octet) << (6-shift)) & 0x3f + shift += 2 + mb64 += mb64alpha[idx] + return mb64 + + mu7 = "" + tomb64 = u"" + for c in ustr: + if not isprint_ascii(c): + tomb64 += c + continue + if tomb64: + mu7 += '&' + mb64_encode(tomb64) + '-' + tomb64 = u"" + if c == '&': + mu7 += '&-' + else: + mu7 += str(c) + if tomb64: + mu7 += '&' + mb64_encode(tomb64) + '-' + return mu7 + +def mod_utf7_decode(mu7): + """Decode a modified UTF-7 encoded string to an unicode string object.""" + + def mb64_decode(mb64): + """Decode a modified UTF-7 shifted sequence from modified BASE64 to an + unicode string object.""" + if not mb64: + # A null shift '&-' decodes to '&'. + return u"&" + u16be = "" + # Process blocks of 4 BASE64 characters, decoding each char to 6 bits. + for block in [mb64[i:i+4] for i in range(0, len(mb64), 4)]: + carrybits = mb64alpha.index(block[0]) << 2 + shift = 4 + for char in block[1:]: + bits = mb64alpha.index(char) + u16be += chr(carrybits | (bits >> shift)) + carrybits = (bits << (8-shift)) & 0xff + shift -= 2 + if carrybits: + raise ValueError("Ill-formed modified UTF-7 string: " + "trailing bits in shifted sequence") + return u16be.decode('utf_16_be') + + ustr = u"" + mb64 = "" + inmb64 = False + for octet in mu7: + if not isprint_ascii(octet): + raise ValueError("Ill-formed modified UTF-7 string: " + "contains non-printable ASCII" % ord(octet)) + if not inmb64: + if octet == '&': + inmb64 = True + else: + ustr += octet + continue + + if octet in mb64alpha: + mb64 += octet + continue + + if octet == '-': + inmb64 = False + ustr += mb64_decode(mb64) + mb64 = "" + else: + break # This triggers the exception below. + + if inmb64: + raise ValueError("Ill-formed modified UTF-7 string: " + "unterminated BASE64 sequence") + return ustr + + +def imap_quote(astring): + """Quote an IMAP `astring' string (see RFC 3501, section "Formal Syntax").""" + if astring.startswith('"') and astring.endswith('"'): + quoted = astring + else: + quoted = '"' + astring.replace('\\', '\\\\').replace('"', '\\"') + '"' + return quoted + +def imap_unquote(quoted): + """Un-quote a `quoted' IMAP string (see RFC 3501, section "Formal Syntax").""" + if not (quoted.startswith('"') and quoted.endswith('"')): + unquoted = quoted + else: + unquoted = re.sub(r'\\(\\|")', r'\1', quoted[1:-1]) + return unquoted + +def parse_imap_url(url): + """Parse IMAP URL and return username, password (if appliciable), servername + and foldername.""" + + def split_qstr(string, delim): + """Split string once at delim, keeping quoted substring intact. + Strip and unescape quotes where necessary.""" + rm = re.match(r'"(.+?(?"."|NIL)', response[0]) + if not m: + unexpected_error("imap_getdelim(): cannot parse '%s'" % response[0]) + delim = m.group('delim').strip('"') + vprint("Found mailbox hierarchy delimiter: '%s'" % delim) + if delim == "NIL": + return None + return delim + + +def imap_get_namespace(srv): + """Return the IMAP namespace prefixes and hierarchy delimiters.""" + assert 'NAMESPACE' in srv.capabilities + result, response = srv.namespace() + if result != 'OK': + unexpected_error("Cannot retrieve IMAP namespace; server says: '%s'" + % response[0]) + vprint("NAMESPACE response: %s" % repr(response[0])) + # Typical response is e.g. + # ['(("INBOX." ".")) NIL (("#shared." ".")("shared." "."))'] or + # ['(("" ".")) NIL NIL'], see RFC 2342. + # Make a reasonable guess parsing this beast. + try: + m = re.match(r'\(\("([^"]*)" (?:"(.)"|NIL)', response[0]) + nsprefix, hdelim = m.groups() + except: + print "Cannot parse IMAP NAMESPACE response %s" % repr(response) + raise + return nsprefix, hdelim + + +def imap_smart_select(srv, mailbox): + """Select the given mailbox on the IMAP server.""" + roflag = options.dry_run or options.copy_old_mail + # Work around python bug #1277098 (still pending in python << 2.5) + if not roflag: + roflag = None + if roflag: + vprint("examining imap folder '%s' read-only" % mailbox) + else: + vprint("selecting imap folder '%s'" % mailbox) + imap_mailbox = mod_utf7_encode(mailbox.decode(userencoding)) + result, response = srv.select(imap_quote(imap_mailbox), roflag) + if result != 'OK': + unexpected_error("selecting '%s' failed; server says: '%s'." \ + % (mailbox, response[0])) + if not roflag: + # Sanity check that we don't silently fail to delete messages. + # As to the following indices: IMAP4.response(key) returns + # a tuple (key, ['']) if the key is found, (key, [None]) + # otherwise. Imaplib just *loves* to nest trivial lists! + permflags = srv.response("PERMANENTFLAGS")[1][0] + if permflags: + permflags = permflags.strip('()').lower().split() + if not '\\deleted' in permflags: + unexpected_error("Server doesn't allow deleting messages in " \ + "'%s'." % mailbox) + elif "IMAP4REV1" in srv.capabilities: + vprint("Suspect IMAP4rev1 server, doesn't send PERMANENTFLAGS " \ + "upon SELECT") + + +def imap_find_mailboxes(srv, mailbox): + """Find matching mailboxes on the IMAP server, correcting an invalid + mailbox path if possible.""" + for curbox in imap_guess_mailboxnames(srv, mailbox): + if '%' in curbox or '*' in curbox: + vprint("Looking for mailboxes matching '%s'..." % curbox) + else: + vprint("Looking for mailbox '%s'..." % curbox) + curbox = mod_utf7_encode(curbox.decode(userencoding)) + result, response = srv.list(pattern=imap_quote(curbox)) + if result != 'OK': + unexpected_error("LIST command failed; " \ + "server says: '%s'" % response[0]) + # Say we queried for the mailbox "foo". + # Upon success, response is e.g. ['(\\HasChildren) "." foo']. + # Upon failure, response is [None]. Funky imaplib! + if response[0] != None: + break + else: + user_error("Cannot find mailbox '%s' on server." % mailbox) + mailboxes = [] + for mailbox_data in response: + if not mailbox_data: # imaplib sometimes returns an empty string + continue + try: + m = re.match(r'\((.*?)\) (?:"."|NIL) (.+)', mailbox_data) + except TypeError: + # May be a literal. For literals, imaplib returns a tuple like + # ('(\\HasNoChildren) "." {12}', 'with "quote"'). + m = re.match(r'\((.*?)\) (?:"."|NIL) \{\d+\}$', mailbox_data[0]) + if m is None: + unexpected_error("cannot parse LIST reply %s" % + (mailbox_data,)) + attrs = m.group(1) + name = mailbox_data[1] + else: + attrs, name = m.groups() + name = imap_unquote(name) + try: + name = mod_utf7_decode(name) + except ValueError: + vprint("Mailbox name '%s' returned by server doesn't look like " + "modified UTF-7" % name) + name = name.decode('utf-8') + name = name.encode(userencoding) + if '\\noselect' in attrs.lower().split(): + vprint("skipping not selectable mailbox '%s'" % name) + continue + vprint("Found mailbox '%s'" % name) + mailboxes.append(name) + if not mailboxes: + user_error("No matching folder is selectable") + return mailboxes + + +def imap_guess_mailboxnames(srv, mailbox): + """Return a list of possible real IMAP mailbox names in descending order + of preference, compiled by prepending an IMAP namespace prefix if necessary, + and by translating hierarchy delimiters.""" + if 'NAMESPACE' in srv.capabilities: + nsprefix, hdelim = imap_get_namespace(srv) + else: + vprint("Server doesn't support NAMESPACE command.") + nsprefix = "" + hdelim = imap_getdelim(srv) + vprint("IMAP namespace prefix: '%s', hierarchy delimiter: '%s'" % \ + (nsprefix, hdelim)) + if mailbox.upper() == "INBOX" or \ + (hdelim is not None and mailbox.upper().startswith("INBOX" + hdelim)): + # INBOX is not a real mailbox name, so namespace prefixes do not apply + # to INBOX and its children + boxnames = [mailbox] + elif mailbox.startswith(nsprefix): + boxnames = [mailbox] + else: + boxnames = [nsprefix + mailbox] + if os.path.sep in mailbox and hdelim is not None: + mailbox = mailbox.replace(os.path.sep, hdelim) + if mailbox.upper().startswith("INBOX" + hdelim): + boxnames.append(mailbox) + else: + if mailbox.startswith(nsprefix): + boxnames.append(mailbox) + if nsprefix: + boxnames.append(nsprefix + mailbox) + return boxnames + + +############### misc functions ############### + + +def set_signal_handlers(): + """set signal handlers to clean up temporary files on unexpected exit""" + # Make sure we clean up nicely - we don't want to leave stale dotlock + # files about if something bad happens to us. This is quite + # important, even though procmail will delete stale files after a while. + signal.signal(signal.SIGHUP, clean_up_signal) # signal 1 + # SIGINT (signal 2) is handled as a python exception + signal.signal(signal.SIGQUIT, clean_up_signal) # signal 3 + signal.signal(signal.SIGTERM, clean_up_signal) # signal 15 + + +def clean_up(): + """Delete stale files""" + vprint("cleaning up ...") + _stale.clean() + + +def clean_up_signal(signal_number, stack_frame): + """Delete stale files -- to be registered as a signal handler. + + Arguments: + signal_number -- signal number of the terminating signal + stack_frame -- the current stack frame + + """ + # this will run the above clean_up(), since unexpected_error() + # will abort with sys.exit() and clean_up will be registered + # at this stage + unexpected_error("received signal %s" % signal_number) + +def prepare_temp_archive(): + """Create temporary archive mbox.""" + if options.dry_run or options.delete_old_mail: + return None + if options.no_compress: + return TempMbox() + else: + return CompressedTempMbox() + +def commit_archive(archive, final_archive_name): + """Finalize temporary archive and write it to its final destination.""" + if not options.no_compress: + final_archive_name = final_archive_name + '.gz' + if archive: + archive.close() + if not archive.empty: + final_archive = ArchiveMbox(final_archive_name) + final_archive.lock() + try: + final_archive.append(archive.mbox_file_name) + final_archive.commit() + finally: + final_archive.unlock() + final_archive.close() + archive.remove() + +def make_archive_name(mailbox_name): + """Derive archive name and (relative) path from the mailbox name.""" + # allow the user to embed time formats such as '%B' in the archive name + if options.date_old_max == None: + tm = time.localtime(time.time() - options.days_old_max*24*60*60) + else: + tm = time.localtime(options.date_old_max) + prefix = suffix = "" + if options.archive_name: + archive_head = "" + archive_tail = time.strftime(options.archive_name, tm) + else: + if options.archive_prefix is None and options.archive_suffix is None: + suffix = options.archive_default_suffix + else: + if options.archive_prefix: + prefix = time.strftime(options.archive_prefix, tm) + if options.archive_suffix: + suffix = time.strftime(options.archive_suffix, tm) + archive_head, archive_tail = os.path.split(mailbox_name) + if not prefix: + # Don't create hidden archives, e.g. when processing Maildir++ + # subfolders + archive_tail = archive_tail.lstrip('.') + if options.output_dir: + archive_head = options.output_dir + archive_name = os.path.join(archive_head, prefix + archive_tail + suffix) + return archive_name + +def check_sane_destdir(dir): + """Do a very primitive check if the given directory looks like a reasonable + destination directory and bail out if it doesn't.""" + assert dir + if not os.path.isdir(dir): + user_error("output directory does not exist: '%s'" % dir) + if not os.access(dir, os.W_OK): + user_error("no write permission on output directory: '%s'" % dir) + +def check_archive(archive_name): + """Check if existing archive files are (not) compressed as expected and + check if we can work with the destination directory.""" + compressed_archive = archive_name + ".gz" + if options.no_compress: + if os.path.isfile(compressed_archive): + user_error("There is already a file named '%s'!\n" + "Have you been previously compressing this archive?\n" + "You probably should uncompress it manually, and try running me " + "again." % compressed_archive) + elif os.path.isfile(archive_name): + user_error("There is already a file named '%s'!\n" + "Have you been reading this archive?\n" + "You probably should re-compress it manually, and try running me " + "again." % archive_name) + dest_dir = os.path.dirname(archive_name) + if not dest_dir: + dest_dir = os.getcwd() + check_sane_destdir(dest_dir) + +def nice_size_str(size): + """Return given size in bytes as '12kB', '1.2MB'""" + kb = size / 1024.0 + mb = kb / 1024.0 + if mb >= 1.0: return str(round(mb, 1)) + 'MB' + if kb >= 1.0: return str(round(kb)) + 'kB' + return str(size) + 'B' + + +def get_filename(msg): + """If the given rfc822.Message can be identified with a file (no mbox), + return the filename, otherwise raise AttributeError.""" + try: + return msg.fp.name + except AttributeError: + # Ugh, that's ugly. msg.fp is not a plain file, it may be an + # instance of + # a. mailbox._Subfile + # (msg from mailbox.UnixMailbox, Python <= 2.4) + # File object is msg.fp.fp, we don't want that + # b. mailbox._PartialFile, subclass of mailbox._ProxyFile + # (msg from mailbox.UnixMailbox, Python >= 2.5) + # File object is msg.fp._file, we don't want that + # c. mailbox._ProxyFile + # (msg from mailbox.Maildir, Python >= 2.5) + # File object is msg.fp._file, we do want that. + if msg.fp.__class__ == mailbox._ProxyFile: + assert hasattr(mailbox, "_PartialFile") + return msg.fp._file.name + raise + +def safe_open_create(filename): + """Create and open a file in a NFSv2-safe way, and return a r/w file descriptor. + The new file is created with mode 600.""" + # This is essentially a simplified version of the dotlocking function. + vprint("Creating file '%s'" % filename) + dir, basename = os.path.split(filename) + # We rely on tempfile.mkstemp to create files safely and with 600 mode. + fd, pre_name = tempfile.mkstemp(prefix=basename+".pre-", dir=dir) + try: + try: + os.link(pre_name, filename) + except OSError, e: + if os.fstat(fd).st_nlink == 2: + pass + else: + raise + finally: + os.unlink(pre_name) + return fd + +def safe_open_existing(filename): + """Safely open an existing file, and return a r/w file descriptor.""" + lst = os.lstat(filename) + if stat.S_ISLNK(lst.st_mode): + unexpected_error("file '%s' is a symlink." % filename) + fd = os.open(filename, os.O_RDWR) + fst = os.fstat(fd) + if fst.st_nlink != 1: + unexpected_error("file '%s' has %d hard links." % \ + (filename, fst.st_nlink)) + if stat.S_ISDIR(fst.st_mode): + unexpected_error("file '%s' is a directory." % filename) + for i in stat.ST_DEV, stat.ST_INO, stat.ST_UID, stat.ST_GID, stat.ST_MODE, stat.ST_NLINK: + if fst[i] != lst[i]: + unexpected_error("file status changed unexpectedly") + return fd + +def safe_open(filename): + """Safely open a file, creating it if it doesn't exist, and return a + r/w file descriptor.""" + # This borrows from postfix code. + vprint("Opening archive...") + try: + fd = safe_open_existing(filename) + except OSError, e: + if e.errno != errno.ENOENT: raise + fd = safe_open_create(filename) + return fd + +# this is where it all happens, folks +if __name__ == '__main__': + main() diff --git a/archivemail.xml b/archivemail.xml new file mode 100644 index 0000000..9bc5642 --- /dev/null +++ b/archivemail.xml @@ -0,0 +1,794 @@ + + +lockf +2'> + + +gzip +1'> + + +procmail +1'> + + +python +1'> + + +crontab +5'> + + +mbox +5'> +]> + + + +5 July 2011 + + +archivemail +1 +archivemail user manual +archivemail 0.9.0 + + + +archivemail +archive and compress your old email + + + + + +archivemail + +MAILBOX + + + + + +Description + + +archivemail is a tool for archiving and compressing old +email in mailboxes. +By default it will read the mailbox MAILBOX, moving +messages that are older than the specified number of days (180 by default) to +a &mbox;-format mailbox in the same directory that is compressed with &gzip;. +It can also just delete old email rather than archive it. + + + +By default, archivemail derives the archive filename from +the mailbox name by appending an _archive suffix to the +mailbox name. For example, if you run archivemail on a +mailbox called exsouthrock, the archive will be created +with the filename exsouthrock_archive.gz. +This default behavior can be overridden with command line options, choosing +a custom suffix, a prefix, or a completely custom name for the archive. + + + +archivemail supports reading IMAP, +Maildir, MH and +mbox-format mailboxes, but always writes +mbox-format archives. + + + +Messages that are flagged important are not archived or deleted unless +explicitly requested with the option. +Also, archivemail can be configured not to archive unread +mail, or to only archive messages larger than a specified size. + + + +To archive an IMAP-format mailbox, use the format +imap://username:password@server/mailbox to specify +the mailbox. +archivemail will expand wildcards in +IMAP mailbox names according to +RFC 3501, which says: The +character "*" is a wildcard, and matches zero or more characters at this +position. The character "%" is similar to "*", but it does not match a +hierarchy delimiter. +You can omit the password from the URL; use the + option to make archivemail read +the password from a file, or alternatively just enter it upon request. +If the option is set, archivemail +does not look for a password in the URL, and the colon is +not considered a delimiter. +Substitute imap with +imaps, and archivemail will +establish a secure SSL connection. +See below for more IMAP peculiarities. + + + + +Options + + + + + + +Archive messages older than NUM +days. The default is 180. This option is incompatible with the + option below. + + + + + + +Archive messages older than DATE. +DATE can be a date string in ISO format (eg +2002-04-23), Internet format (eg 23 Apr +2002) or Internet format with full month names (eg +23 April 2002). Two-digit years are not supported. +This option is incompatible with the option above. + + + + + + +Use the directory name PATH to +store the mailbox archives. The default is the same directory as the mailbox +to be read. + + + + + + +Read IMAP password from file +FILE instead of from the command line. Note +that this will probably not work if you are archiving folders from +more than one IMAP account. + + + + + + +Append STRING to the +IMAP filter string. +For IMAP wizards. + + + + + + +Prefix NAME to the archive name. +NAME is expanded by the &python; function +time.strftime(), which means that you can specify special +directives in NAME to make an archive named after +the archive cut-off date. +See the discussion of the option for a list of valid +strftime() directives. +The default is not to add a prefix. + + + + + + + +Use the suffix NAME to create the filename used for +archives. The default is _archive, unless a prefix is +specified. + + +Like a prefix, the suffix NAME is expanded by the +&python; function time.strftime() with the archive +cut-off date. time.strftime() understands the following +directives: + + %a + + Locale's abbreviated weekday name. + + + %A + + Locale's full weekday name. + + + %b + + Locale's abbreviated month name. + + + %B + + Locale's full month name. + + + %c + + Locale's appropriate date and time representation. + + + %d + + Day of the month as a decimal number [01,31]. + + + %H + + Hour (24-hour clock) as a decimal number [00,23]. + + + %I + + Hour (12-hour clock) as a decimal number [01,12]. + + + %j + + Day of the year as a decimal number [001,366]. + + + %m + + Month as a decimal number [01,12]. + + + %M + + Minute as a decimal number [00,59]. + + + %p + + Locale's equivalent of either AM or PM. + + + %S + + Second as a decimal number [00,61]. (1) + + + %U + + Week number of the year (Sunday as the first day of the week) + as a decimal number [00,53]. All days in a new year preceding + the first Sunday are considered to be in week 0. + + + %w + + Weekday as a decimal number [0(Sunday),6]. + + + %W + + Week number of the year (Monday as the first day of the week) + as a decimal number [00,53]. All days in a new year preceding + the first Sunday are considered to be in week 0. + + + %x + + Locale's appropriate date representation. + + + %X + + Locale's appropriate time representation. + + + %y + + Year without century as a decimal number [00,99]. + + + %Y + + Year with century as a decimal number. + + + %Z + + Time zone name (or by no characters if no time zone exists). + + + %% + + A literal % character. + + + + + + + + + +Use NAME as the archive name, +ignoring the name of the mailbox that is archived. +Like prefixes and suffixes, NAME is expanded by +time.strftime() with the archive cut-off date. +Because it hard-codes the archive name, this option cannot be used when +archiving multiple mailboxes. + + + + + + +Only archive messages that are NUM +bytes or greater. + + + + + + + +Don't write to any files -- just show what would have been done. This is +useful for testing to see how many messages would have been archived. + + + + + + + +Do not archive any messages that have not yet been read. +archivemail determines if a message in a +mbox-format or MH-format mailbox has +been read by looking at the Status header (if it exists). +If the status header is equal to RO or +OR then archivemail +assumes the message has been read. +archivemail determines if a maildir +message has been read by looking at the filename. +If the filename contains an S after +:2, then it assumes the message has been read. + + + + + + + + +Do not mangle lines in message bodies beginning with +From . +When archiving a message from a mailbox not in mbox +format, by default archivemail mangles such lines by +prepending a > to them, since mail user +agents might otherwise interpret these lines as message separators. +Messages from mbox folders are never mangled. See &mbox; +for more information. + + + + + + + + +Delete rather than archive old mail. Use this option with caution! + + + + + + + + +Copy rather than archive old mail. +Creates an archive, but the archived messages are not deleted from the +originating mailbox, which is left unchanged. +This is a complement to the option, and mainly +useful for testing purposes. +Note that multiple passes will create duplicates, since messages are blindly +appended to an existing archive. + + + + + + + + +Archive all messages, without distinction. + + + + + + + + +Normally messages that are flagged important are not archived or deleted. If +you specify this option, these messages can be archived or deleted just like +any other message. + + + + + + + + +Do not compress any archives. + + + + + + + + +Warn about duplicate Message-IDs that appear in the input +mailbox. + + + + + + +Reports lots of extra debugging information about what is going on. + + + + + + + + +Set IMAP debugging level. This makes +archivemail dump its conversation with the +IMAP server and some internal IMAP +processing to stdout. Higher values for +NUM give more elaborate output. Set +NUM to 4 to see all exchanged +IMAP commands. (Actually, NUM +is just passed literally to imaplib.Debug.) + + + + + + + +Turns on quiet mode. Do not print any statistics about how many messages were +archived. This should be used if you are running +archivemail from cron. + + + + + + + +Display the version of archivemail and exit. + + + + + + + +Display brief summary information about how to run +archivemail. + + + + + + + +Notes + + +archivemail requires &python; version 2.3 or later. +When reading an mbox-format mailbox, +archivemail will create a lockfile with the extension +.lock so that &procmail; will not +deliver to the mailbox while it is being processed. It will also create an +advisory lock on the mailbox using &lockf;. The archive is locked in the same +way when it is updated. +archivemail will also complain and abort if a 3rd-party +modifies the mailbox while it is being read. + + + +archivemail will always attempt to preserve the last-access +and last-modify times of the input mailbox. Archive mailboxes are always +created with a mode of 0600. +If archivemail finds a pre-existing archive mailbox it will +append rather than overwrite that archive. +archivemail will refuse to operate on mailboxes that are +symbolic links. + + + +archivemail attempts to find the delivery date of a message +by looking for valid dates in the following headers, in order of precedence: +Delivery-date, +Received, +Resent-Date and +Date. +If it cannot find any valid date in these headers, it will use the +last-modified file timestamp on MH and +Maildir format mailboxes, or the date on the +From_ line on mbox-format mailboxes. + + + +When archiving mailboxes with leading dots in the name, +archivemail will strip the dots off the archive name, so +that the resulting archive file is not hidden. +This is not done if the or + option is used. +Should there really be mailboxes distinguished only by leading dots in the +name, they will thus be archived to the same archive file by default. + + + +A conversion from other formats to &mbox; will silently overwrite existing +Status and X-Status message headers. + + + +<acronym>IMAP</acronym> + +When archivemail processes an IMAP +folder, all messages in that folder will have their \Recent +flag unset, and they will probably not show up as new in your +user agent later on. +There is no way around this, it's just how IMAP works. +This does not apply, however, if you run archivemail with +the options or . + + +archivemail relies on server-side searches to determine the +messages that should be archived. +When matching message dates, IMAP servers refer to server +internal message dates, and these may differ from both delivery time of a +message and its Date header. +Also, there exist broken servers which do not implement server side searches. + +<acronym>IMAP</acronym> <acronym>URL</acronym>s + +archivemail's IMAP +URL parser was written with the RFC 2882 +(Internet Message Format) rules for the +local-part of email addresses in mind. +So, rather than enforcing an URL-style encoding of +non-ascii and reserved characters, it allows you to +double-quote the username and password. +If your username or password contains the delimiter characters +@ or :, just quote it like this: +imap://"username@bogus.com":"password"@imap.bogus.com/mailbox. +You can use a backslash to escape double-quotes that are part of a quoted +username or password. +Note that quoting only a substring will not work, and be aware that your shell +will probably remove unprotected quotes or backslashes. + + +Similarly, there is no need to percent-encode non-ascii +characters in IMAP mailbox names. +As long as your locale is configured properly, archivemail +should handle these without problems. +Note, however, that due to limitations of the IMAP +protocol, non-ascii characters do not mix well with +wildcards in mailbox names. + + +archivemail tries to be smart when handling mailbox paths. +In particular, it will automatically add an IMAP +NAMESPACE prefix to the mailbox path if necessary; and if +you are archiving a subfolder, you can use the slash as a path separator +instead of the IMAP server's internal representation. + + + + + + +Examples + + + +To archive all messages in the mailbox debian-user that +are older than 180 days to a compressed mailbox called +debian-user_archive.gz in the current directory: + +bash$ archivemail debian-user + + + + + + +To archive all messages in the mailbox debian-user that +are older than 180 days to a compressed mailbox called +debian-user_October_2001.gz (where the current month and +year is April, 2002) in the current directory: + +bash$ archivemail --suffix '_%B_%Y' debian-user + + + + + + +To archive all messages in the mailbox cm-melb that +are older than the first of January 2002 to a compressed mailbox called +cm-melb_archive.gz in the current directory: + +bash$ archivemail --date='1 Jan 2002' cm-melb + + + + + + +Exactly the same as the above example, using an ISO date +format instead: + +bash$ archivemail --date=2002-01-01 cm-melb + + + + + + +To delete all messages in the mailbox spam that +are older than 30 days: + +bash$ archivemail --delete --days=30 spam + + + + + + +To archive all read messages in the mailbox incoming that +are older than 180 days to a compressed mailbox called +incoming_archive.gz in the current directory: + +bash$ archivemail --preserve-unread incoming + + + + + + +To archive all messages in the mailbox received that +are older than 180 days to an uncompressed mailbox called +received_archive in the current directory: + +bash$ archivemail --no-compress received + + + + + + +To archive all mailboxes in the directory $HOME/Mail +that are older than 90 days to compressed mailboxes in the +$HOME/Mail/Archive directory: + +bash$ archivemail -d90 -o $HOME/Mail/Archive $HOME/Mail/* + + + + + + +To archive all mails older than 180 days from the given +IMAP INBOX to a compressed mailbox +INBOX_archive.gz in the +$HOME/Mail/Archive directory, quoting the password and +reading it from the environment variable PASSWORD: + + + +bash$ archivemail -o $HOME/Mail/Archive imaps://user:'"'$PASSWORD'"'@example.org/INBOX + + +Note the protected quotes. + + + + + +To archive all mails older than 180 days in subfolders of foo on the given IMAP +server to corresponding archives in the current working directory, reading the +password from the file ~/imap-pass.txt: + + +bash$ archivemail --pwfile=~/imap-pass.txt imaps://user@example.org/foo/* + + + + + +Tips + +Probably the best way to run archivemail is from your +&crontab; file, using the option. +Don't forget to try the and perhaps the + option for non-destructive testing. + + + + +Exit Status +Normally the exit status is 0. Nonzero indicates an unexpected error. + + + + +Bugs + +If an IMAP mailbox path contains slashes, the archive +filename will be derived from the basename of the mailbox. +If the server's folder separator differs from the Unix slash and is used in +the IMAP URL, however, the whole path +will be considered the basename of the mailbox. +E.g. the two URLs +imap://user@example.com/folder/subfolder and +imap://user@example.com/folder.subfolder will be +archived in subfolder_archive.gz and +folder.subfolder_archive.gz, respectively, although they +might refer to the same IMAP mailbox. + + +archivemail does not support reading +MMDF or Babyl-format mailboxes. In fact, +it will probably think it is reading an mbox-format mailbox +and cause all sorts of problems. + + + +archivemail is still too slow, but if you are running from +&crontab; you won't care. Archiving maildir-format +mailboxes should be a lot quicker than mbox-format +mailboxes since it is less painful for the original mailbox to be +reconstructed after selective message removal. + + + + +See Also + + &mbox; + &crontab; + &python; + &procmail; + + + + +<acronym>Url</acronym> +The archivemail home page is currently hosted at +sourceforge + + + + +Author + This manual page was written by Paul Rodger <paul at paulrodger +dot com>. Updated and supplemented by Nikolaus Schulz +microschulz@web.de + + + diff --git a/db2html.xsl b/db2html.xsl new file mode 100644 index 0000000..d3be5f6 --- /dev/null +++ b/db2html.xsl @@ -0,0 +1,10 @@ + + + + manpage.css + +

archivemail

+
+
+
diff --git a/db2man.xsl b/db2man.xsl new file mode 100644 index 0000000..bcc3f61 --- /dev/null +++ b/db2man.xsl @@ -0,0 +1,29 @@ + + + + + + + + + .TP + + + + + + + + + + + + + + + + + + + diff --git a/examples/archivemail_all b/examples/archivemail_all new file mode 100644 index 0000000..30cb05e --- /dev/null +++ b/examples/archivemail_all @@ -0,0 +1,31 @@ +#!/bin/sh +# +# This is an example shell script I use from my crontab(5) file to selectively +# archive some of my mailboxes. Most of these mailboxes come directly from +# procmail and are in maildir-format -- not that it should really matter. +# +# I probably could have done them all as: +# $ARCMAIL $HOME/Mail/* +# ...if I had enough disk space to keep mail uncompressed for 180 days :) +# +set -e + +ARCMAIL="/usr/local/bin/archivemail --quiet --output-dir=$HOME/Mail/Archive/ " + +$ARCMAIL --days 14 $HOME/Mail/debian-user \ + $HOME/Mail/linux-kernel \ + $HOME/Mail/python-list \ + $HOME/Mail/spam-l + +$ARCMAIL --days 30 --delete $HOME/Mail/duplicates + +$ARCMAIL --days 90 $HOME/Mail/bugtraq \ + $HOME/Mail/debian-devel \ + $HOME/Mail/debian-mentors \ + $HOME/Mail/spam + +$ARCMAIL $HOME/Mail/cm-melb \ + $HOME/Mail/exsouthrock \ + $HOME/Mail/received \ + $HOME/Mail/sent \ + $HOME/Mail/vim diff --git a/index.html b/index.html new file mode 100644 index 0000000..90bef89 --- /dev/null +++ b/index.html @@ -0,0 +1,192 @@ + + + + + archivemail – a tool for archiving and compressing old email + + + + + +

archivemail

+ +
+
+ Latest version: 0.9.0
+ Released on 2011-07-09 +
+

What is it?

+

+archivemail is a tool for archiving and compressing +old email in mailboxes. +It moves messages older than the specified number of +days to a separate mbox format mailbox that is +compressed with gzip. +It can also just delete old email rather than archive it. + +

What can it do for me?

+

+Maybe some of your mailboxes are quite large (eg, over 10,000 messages) and +they are taking a while to load in your mail reader. Perhaps they are taking +up too much space on your disk. Archiving old messages to a separate, +compressed mailbox will mean: +

    +
  1. Your mail reader will get a huge performance boost loading and reading + your mail. +
  2. You will be taking up less disk space, since old mail will be compressed. + (Mail usually compresses quite nicely.) +
  3. You won't be confronted with semi-obsolete mail all the time. +
+

+You can also use archivemail as a simple backup +tool. + +

Features overview

+
    +
  • Supports archiving + IMAP, + mbox, + MH and + Maildir format + mailboxes. +
  • Old mail can be either archived or just deleted. +
  • The age in days that is considered old is configurable – it defaults + to 180 days. + You can also set an absolute limit date. +
  • Messages that are flagged important are not archived or deleted unless + explicitely requested. + +
  • Can be configured to preserve unread mail. +
  • Can be configured to only archive messages over a given byte size. +
  • Stores the compressed archive mailboxes in a directory of + your choice, with an extension of your choice. +
  • Easy read-only testing, not touching your valuable data. +
  • Supports IMAPS/SSL. +
  • When archiving IMAP mailboxes, the message selection can be refined by + extending the underlying IMAP SEARCH command with arbitrary + search keys (you will have to cope with the raw IMAP protocol, though). +
+ +

Documentation

+

+The archivemail manpage is the primary documentation +for archivemail. +To see what has changed in the latest version, check the release notes. +If you want to have a closer look at the current development status, here's the + +CHANGELOG and the +TODO list, fresh from the repository. + +

Where can I get it?

+

+You can grab the latest version of archivemail +directly from the archivemail +download area at Sourceforge. +There should be up-to-date binary RPM packages at the +OpenSUSE build +service for SUSE Linux and Fedora Core. +Also, many Linux distributions provide packages; e.g. there is a +Debian +package. + +

+archivemail is written in Python, and hacking it is +easy and fun; you can check out the source from the git repository with +the following command: +

+git clone git://archivemail.git.sourceforge.net/gitroot/archivemail/archivemail +
+

+See also the short +introduction to git access at sourceforge. + + + +

Getting involved

+ + +

Requirements

+

+archivemail requires Python 2.3 or newer. +It also uses some optional python modules, but these should be pretty much +standard; if you get an ImportError nonetheless, please +report it, thanks. + +

License

+

+This software is licensed under the terms of the +GNU GPL, either +version 2 of the license, or any later version. + +

Credits

+

+archivemail was written by Paul Rodger +<paul at paulrodger dot + com> +and is currently maintained by Peter Poeml <poeml + at suse dot de>, Nikolaus Schulz <microschulz@web.de> +and Brandon Knitter. + +


+

+ + + + SourceForge.net Logo + + + + + diff --git a/manpage.css b/manpage.css new file mode 100644 index 0000000..915cea2 --- /dev/null +++ b/manpage.css @@ -0,0 +1,15 @@ +@import "style.css"; +h2 { + font-variant: small-caps; + font-size: 170%; +} +.informalexample { + margin-bottom: 1.2em; +} +div.informalexample .screen { + margin-left: 2ex; +} + +a#strftime + dl dt { float: left; margin: 0.3ex 0; width: 1.5em; } +a#strftime + dl dd { float: left; margin: 0.3ex 0; margin-left: 1.2em; width: 90%; } +dt { clear: left; } diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..e4521c9 --- /dev/null +++ b/setup.py @@ -0,0 +1,33 @@ +#! /usr/bin/env python + +import sys + +def check_python_version(): + """Abort if we are running on python < v2.0""" + too_old_error = """This program requires python v2.0 or greater. +Your version of python is: %s""" % sys.version + try: + version = sys.version_info # we might not even have this function! :) + if (version[0] < 2): + print too_old_error + sys.exit(1) + except AttributeError: + print too_old_error + sys.exit(1) + +# define & run this early - 'distutils.core' requires Python >= 2.0 +check_python_version() +from distutils.core import setup + +setup(name="archivemail", + version="0.9.0", + description="archive and compress old email", + license="GNU GPL", + url="http://archivemail.sourceforge.net/", + author="Paul Rodger", + author_email="paul@paulrodger.com", + maintainer="Nikolaus Schulz, Peter Poeml", + maintainer_email="nikosch@users.sourceforge.net, poeml@users.sourceforge.net", + scripts=["archivemail"], + data_files=[("share/man/man1", ["archivemail.1"])], + ) diff --git a/style.css b/style.css new file mode 100644 index 0000000..7893786 --- /dev/null +++ b/style.css @@ -0,0 +1,49 @@ +body { + padding: 2%; + line-height: 130%; + margin: 0; + /*color: #036;*/ +} +h1 { + font-size: 220%; + font-weight: bold; + padding: 0 0 0.4em 0.1em; + /*margin: 0 0 0.5em 0; */ + margin: 0; + /*border-bottom: 2px solid black;*/ +} +hr { + border: 1px #b8b8b8 solid; +} +h1 + hr { + margin-top: 0; + margin-bottom: 1.7em; +} +h2 { + margin: 1em 0 0.8em 0; + padding: 0; + font-size: 150%; +} +img {border: none;} +a { + text-decoration: underline; +} +a:link { + /*color: #0073c7;*/ + color: blue; + background-color: inherit; +} + +a:visited { + /*color: #5A88B5;*/ + color: #844084; + background-color: inherit; +} + +/* +a:hover, +a:active { + color: #0073c7; + background-color: #f0f0f0; +} +*/ diff --git a/test_archivemail b/test_archivemail new file mode 100755 index 0000000..ed033e2 --- /dev/null +++ b/test_archivemail @@ -0,0 +1,1771 @@ +#! /usr/bin/env python +############################################################################ +# Copyright (C) 2002 Paul Rodger +# (C) 2006-2011 Nikolaus Schulz +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +############################################################################ +""" +Unit-test archivemail using 'PyUnit'. + +TODO: add tests for: + * dotlock locks already existing + * archiving MH-format mailboxes + * a 3rd party process changing the mbox file being read + +""" + +import sys + +def check_python_version(): + """Abort if we are running on python < v2.4""" + too_old_error = "This test script requires python version 2.4 or later. " + \ + "Your version of python is:\n%s" % sys.version + try: + version = sys.version_info # we might not even have this function! :) + if (version[0] < 2) or (version[0] == 2 and version[1] < 4): + print too_old_error + sys.exit(1) + except AttributeError: + print too_old_error + sys.exit(1) + +# define & run this early because 'unittest' requires Python >= 2.1 +check_python_version() + +import copy +import fcntl +import filecmp +import os +import re +import shutil +import stat +import tempfile +import time +import unittest +import gzip +import cStringIO +import rfc822 +import mailbox + +from types import ModuleType +archivemail = ModuleType("archivemail") +try: + module_fp = open("archivemail", "r") +except IOError: + print "The archivemail script should be in the current directory in order" + print "to be imported and tested. Sorry." + sys.exit(1) +exec module_fp in archivemail.__dict__ + +# We want to iterate over messages in a compressed archive mbox and verify +# them. This involves seeking in the mbox. The gzip.Gzipfile.seek() in +# Python 2.5 doesn't understand whence; this is Python bug #1355023, triggered +# by mailbox._PartialFile.seek(). The bug is still pending as of Python +# 2.5.2. To work around it, we subclass gzip.GzipFile. +# +# It should be noted that seeking backwards in a GzipFile is emulated by +# re-reading the entire file from the beginning, which is extremely +# inefficient and won't work with large files; but our test archives are all +# small, so it's okay. + +class FixedGzipFile(gzip.GzipFile): + """GzipFile with seek method accepting whence parameter.""" + def seek(self, offset, whence=0): + try: + # Try calling gzip.GzipFile.seek with the whence parameter. + # For Python >= 2.7, it returns the new offset; pass that on. + return gzip.GzipFile.seek(self, offset, whence) + except TypeError: + if whence: + if whence == 1: + offset = self.offset + offset + else: + raise ValueError('Seek from end not supported') + return gzip.GzipFile.seek(self, offset) + +# precision of os.utime() when restoring mbox timestamps +utimes_precision = 5 + +class MessageIdFactory: + """Factory to create `uniqe' message-ids.""" + def __init__(self): + self.seq = 0 + def __call__(self): + self.seq += 1 + return "" % self.seq + +make_msgid = MessageIdFactory() + +class IndexedMailboxDir: + """An indexed mailbox directory, providing random message access by + message-id. Intended as a base class for a maildir and an mh subclass.""" + + def __init__(self, mdir_name): + assert tempfile.tempdir + self.root = tempfile.mkdtemp(prefix=mdir_name) + self.msg_id_dict = {} + self.deliveries = 0 + + def _add_to_index(self, msg_text, fpath): + """Add the given message to the index, for later random access.""" + # Extract the message-id as index key + msg_id = None + fp = cStringIO.StringIO(msg_text) + while True: + line = fp.readline() + # line empty means we didn't find a message-id + assert line + if line.lower().startswith("message-id:"): + msg_id = line.split(":", 1)[-1].strip() + assert msg_id + break + assert not self.msg_id_dict.has_key(msg_id) + self.msg_id_dict[msg_id] = fpath + + def get_all_filenames(self): + """Return all relative pathnames of files in this mailbox.""" + return self.msg_id_dict.values() + +class SimpleMaildir(IndexedMailboxDir): + """Primitive Maildir class, just good enough for generating short-lived + test maildirs.""" + + def __init__(self, mdir_name='maildir'): + IndexedMailboxDir.__init__(self, mdir_name) + for d in "cur", "tmp", "new": + os.mkdir(os.path.join(self.root, d)) + + def write(self, msg_str, new=True, flags=[]): + """Store a message with the given flags.""" + assert not (new and flags) + if new: + subdir = "new" + else: + subdir = "cur" + fname = self._mkname(new, flags) + relpath = os.path.join(subdir, fname) + path = os.path.join(self.root, relpath) + assert not os.path.exists(path) + f = open(path, "w") + f.write(msg_str) + f.close() + self._add_to_index(msg_str, relpath) + + def _mkname(self, new, flags): + """Generate a unique filename for a new message.""" + validflags = 'DFPRST' + for f in flags: + assert f in validflags + # This 'unique' name should be good enough, since nobody else + # will ever write messages to this maildir folder. + uniq = str(self.deliveries) + self.deliveries += 1 + if new: + return uniq + if not flags: + return uniq + ':2,' + finfo = "".join(sorted(flags)) + return uniq + ':2,' + finfo + + def get_message_and_mbox_status(self, msgid): + """For the Message-Id msgid, return the matching message in text + format and its status, expressed as a set of mbox flags.""" + fpath = self.msg_id_dict[msgid] # Barfs if not found + mdir_flags = fpath.rsplit('2,', 1)[-1] + flagmap = { + 'F': 'F', + 'R': 'A', + 'S': 'R' + } + mbox_flags = set([flagmap[x] for x in mdir_flags]) + if fpath.startswith("cur/"): + mbox_flags.add('O') + fp = open(os.path.join(self.root, fpath), "r") + msg = fp.read() + fp.close() + return msg, mbox_flags + + +class TestCaseInTempdir(unittest.TestCase): + """Base class for testcases that need to create temporary files. + All testcases that create temporary files should be derived from this + class, not directly from unittest.TestCase. + TestCaseInTempdir provides these methods: + + setUp() Creates a safe temporary directory and sets tempfile.tempdir. + + tearDown() Recursively removes the temporary directory and unsets + tempfile.tempdir. + + Overriding methods should call the ones above.""" + temproot = None + + def setUp(self): + if not self.temproot: + assert not tempfile.tempdir + self.temproot = tempfile.tempdir = \ + tempfile.mkdtemp(prefix="test-archivemail") + + def tearDown(self): + assert tempfile.tempdir == self.temproot + if self.temproot: + shutil.rmtree(self.temproot) + tempfile.tempdir = self.temproot = None + + +############ Mbox Class testing ############## + +class TestMboxDotlock(TestCaseInTempdir): + def setUp(self): + super(TestMboxDotlock, self).setUp() + self.mbox_name = make_mbox() + self.mbox_mode = os.stat(self.mbox_name)[stat.ST_MODE] + self.mbox = archivemail.Mbox(self.mbox_name) + + def testDotlock(self): + """dotlock_lock/unlock should create/delete a lockfile""" + lock = self.mbox_name + ".lock" + self.mbox._dotlock_lock() + assert os.path.isfile(lock) + self.mbox._dotlock_unlock() + assert not os.path.isfile(lock) + + def testDotlockingSucceedsUponEACCES(self): + """A dotlock should silently be omitted upon EACCES.""" + archivemail.options.quiet = True + mbox_dir = os.path.dirname(self.mbox_name) + os.chmod(mbox_dir, 0500) + try: + self.mbox._dotlock_lock() + self.mbox._dotlock_unlock() + finally: + os.chmod(mbox_dir, 0700) + archivemail.options.quiet = False + +class TestMboxPosixLock(TestCaseInTempdir): + def setUp(self): + super(TestMboxPosixLock, self).setUp() + self.mbox_name = make_mbox() + self.mbox = archivemail.Mbox(self.mbox_name) + + def testPosixLock(self): + """posix_lock/unlock should create/delete an advisory lock""" + + # The following code snippet heavily lends from the Python 2.5 mailbox + # unittest. + # BEGIN robbery: + + # Fork off a subprocess that will lock the file for 2 seconds, + # unlock it, and then exit. + pid = os.fork() + if pid == 0: + # In the child, lock the mailbox. + self.mbox._posix_lock() + time.sleep(2) + self.mbox._posix_unlock() + os._exit(0) + + # In the parent, sleep a bit to give the child time to acquire + # the lock. + time.sleep(0.5) + # The parent's file self.mbox.mbox_file shares fcntl locks with the + # duplicated FD in the child; reopen it so we get a different file + # table entry. + file = open(self.mbox_name, "r+") + lock_nb = fcntl.LOCK_EX | fcntl.LOCK_NB + fd = file.fileno() + try: + self.assertRaises(IOError, fcntl.lockf, fd, lock_nb) + + finally: + # Wait for child to exit. Locking should now succeed. + exited_pid, status = os.waitpid(pid, 0) + + fcntl.lockf(fd, lock_nb) + fcntl.lockf(fd, fcntl.LOCK_UN) + # END robbery + + +class TestMboxNext(TestCaseInTempdir): + def setUp(self): + super(TestMboxNext, self).setUp() + self.not_empty_name = make_mbox(messages=18) + self.empty_name = make_mbox(messages=0) + + def testNextEmpty(self): + """mbox.next() should return None on an empty mailbox""" + mbox = archivemail.Mbox(self.empty_name) + msg = mbox.next() + self.assertEqual(msg, None) + + def testNextNotEmpty(self): + """mbox.next() should a message on a populated mailbox""" + mbox = archivemail.Mbox(self.not_empty_name) + for count in range(18): + msg = mbox.next() + assert msg + msg = mbox.next() + self.assertEqual(msg, None) + + +############ TempMbox Class testing ############## + +class TestTempMboxWrite(TestCaseInTempdir): + def setUp(self): + super(TestTempMboxWrite, self).setUp() + + def testWrite(self): + """mbox.write() should append messages to a mbox mailbox""" + read_file = make_mbox(messages=3) + mbox_read = archivemail.Mbox(read_file) + mbox_write = archivemail.TempMbox() + write_file = mbox_write.mbox_file_name + for count in range(3): + msg = mbox_read.next() + mbox_write.write(msg) + mbox_read.close() + mbox_write.close() + assert filecmp.cmp(read_file, write_file, shallow=0) + + def testWriteNone(self): + """calling mbox.write() with no message should raise AssertionError""" + write = archivemail.TempMbox() + self.assertRaises(AssertionError, write.write, None) + +class TestTempMboxRemove(TestCaseInTempdir): + def setUp(self): + super(TestTempMboxRemove, self).setUp() + self.mbox = archivemail.TempMbox() + self.mbox_name = self.mbox.mbox_file_name + + def testMboxRemove(self): + """remove() should delete a mbox mailbox""" + assert os.path.exists(self.mbox_name) + self.mbox.remove() + assert not os.path.exists(self.mbox_name) + + + +########## options class testing ################# + +class TestOptionDefaults(unittest.TestCase): + def testVerbose(self): + """verbose should be off by default""" + self.assertEqual(archivemail.options.verbose, False) + + def testDaysOldMax(self): + """default archival time should be 180 days""" + self.assertEqual(archivemail.options.days_old_max, 180) + + def testQuiet(self): + """quiet should be off by default""" + self.assertEqual(archivemail.options.quiet, False) + + def testDeleteOldMail(self): + """we should not delete old mail by default""" + self.assertEqual(archivemail.options.delete_old_mail, False) + + def testNoCompress(self): + """no-compression should be off by default""" + self.assertEqual(archivemail.options.no_compress, False) + + def testIncludeFlagged(self): + """we should not archive flagged messages by default""" + self.assertEqual(archivemail.options.include_flagged, False) + + def testPreserveUnread(self): + """we should not preserve unread messages by default""" + self.assertEqual(archivemail.options.preserve_unread, False) + +class TestOptionParser(unittest.TestCase): + def setUp(self): + self.oldopts = copy.copy(archivemail.options) + + def testOptionDate(self): + """--date and -D options are parsed correctly""" + date_formats = ( + "%Y-%m-%d", # ISO format + "%d %b %Y" , # Internet format + "%d %B %Y" , # Internet format with full month names + ) + date = time.strptime("2000-07-29", "%Y-%m-%d") + unixdate = time.mktime(date) + for df in date_formats: + d = time.strftime(df, date) + for opt in '-D', '--date=': + archivemail.options.date_old_max = None + archivemail.options.parse_args([opt+d], "") + self.assertEqual(unixdate, archivemail.options.date_old_max) + + def testOptionPreserveUnread(self): + """--preserve-unread option is parsed correctly""" + archivemail.options.parse_args(["--preserve-unread"], "") + assert archivemail.options.preserve_unread + archivemail.options.preserve_unread = False + archivemail.options.parse_args(["-u"], "") + assert archivemail.options.preserve_unread + + def testOptionSuffix(self): + """--suffix and -s options are parsed correctly""" + for suffix in ("_static_", "_%B_%Y", "-%Y-%m-%d"): + archivemail.options.parse_args(["--suffix="+suffix], "") + self.assertEqual(archivemail.options.archive_suffix, suffix) + archivemail.options.archive_suffix = None + archivemail.options.parse_args(["-s", suffix], "") + self.assertEqual(archivemail.options.archive_suffix, suffix) + + def testOptionPrefix(self): + """--prefix and -p options are parsed correctly""" + for prefix in ("_static_", "_%B_%Y", "-%Y-%m-%d"): + archivemail.options.parse_args(["--prefix="+prefix], "") + self.assertEqual(archivemail.options.archive_prefix, prefix) + archivemail.options.archive_prefix = None + archivemail.options.parse_args(["-p", prefix], "") + self.assertEqual(archivemail.options.archive_prefix, prefix) + + def testOptionArchivename(self): + """--archive-name and -a options are parsed correctly""" + for name in ("custom", ".withdot", "custom_%Y", "%Y/joe"): + archivemail.options.parse_args(["--archive-name="+name], "") + self.assertEqual(archivemail.options.archive_name, name) + archivemail.options.archive_name = None + archivemail.options.parse_args(["-a", name], "") + self.assertEqual(archivemail.options.archive_name, name) + + def testOptionDryrun(self): + """--dry-run option is parsed correctly""" + archivemail.options.parse_args(["--dry-run"], "") + assert archivemail.options.dry_run + archivemail.options.preserve_unread = False + archivemail.options.parse_args(["-n"], "") + assert archivemail.options.dry_run + + def testOptionDays(self): + """--days and -d options are parsed correctly""" + archivemail.options.parse_args(["--days=11"], "") + self.assertEqual(archivemail.options.days_old_max, 11) + archivemail.options.days_old_max = None + archivemail.options.parse_args(["-d11"], "") + self.assertEqual(archivemail.options.days_old_max, 11) + + def testOptionDelete(self): + """--delete option is parsed correctly""" + archivemail.options.parse_args(["--delete"], "") + assert archivemail.options.delete_old_mail + + def testOptionCopy(self): + """--copy option is parsed correctly""" + archivemail.options.parse_args(["--copy"], "") + assert archivemail.options.copy_old_mail + + def testOptionOutputdir(self): + """--output-dir and -o options are parsed correctly""" + for path in "/just/some/path", "relative/path": + archivemail.options.parse_args(["--output-dir=%s" % path], "") + self.assertEqual(archivemail.options.output_dir, path) + archivemail.options.output_dir = None + archivemail.options.parse_args(["-o%s" % path], "") + self.assertEqual(archivemail.options.output_dir, path) + + def testOptionNocompress(self): + """--no-compress option is parsed correctly""" + archivemail.options.parse_args(["--no-compress"], "") + assert archivemail.options.no_compress + + def testOptionSize(self): + """--size and -S options are parsed correctly""" + size = "666" + archivemail.options.parse_args(["--size=%s" % size ], "") + self.assertEqual(archivemail.options.min_size, int(size)) + archivemail.options.parse_args(["-S%s" % size ], "") + self.assertEqual(archivemail.options.min_size, int(size)) + + def tearDown(self): + archivemail.options = self.oldopts + +########## archivemail.is_older_than_days() unit testing ################# + +class TestIsTooOld(unittest.TestCase): + def testVeryOld(self): + """with max_days=360, should be true for these dates > 1 year""" + for years in range(1, 10): + time_msg = time.time() - (years * 365 * 24 * 60 * 60) + assert archivemail.is_older_than_days(time_message=time_msg, + max_days=360) + + def testOld(self): + """with max_days=14, should be true for these dates > 14 days""" + for days in range(14, 360): + time_msg = time.time() - (days * 24 * 60 * 60) + assert archivemail.is_older_than_days(time_message=time_msg, + max_days=14) + + def testJustOld(self): + """with max_days=1, should be true for these dates >= 1 day""" + for minutes in range(0, 61): + time_msg = time.time() - (25 * 60 * 60) + (minutes * 60) + assert archivemail.is_older_than_days(time_message=time_msg, + max_days=1) + + def testNotOld(self): + """with max_days=9, should be false for these dates < 9 days""" + for days in range(0, 9): + time_msg = time.time() - (days * 24 * 60 * 60) + assert not archivemail.is_older_than_days(time_message=time_msg, + max_days=9) + + def testJustNotOld(self): + """with max_days=1, should be false for these hours <= 1 day""" + for minutes in range(0, 60): + time_msg = time.time() - (23 * 60 * 60) - (minutes * 60) + assert not archivemail.is_older_than_days(time_message=time_msg, + max_days=1) + + def testFuture(self): + """with max_days=1, should be false for times in the future""" + for minutes in range(0, 60): + time_msg = time.time() + (minutes * 60) + assert not archivemail.is_older_than_days(time_message=time_msg, + max_days=1) + +########## archivemail.parse_imap_url() unit testing ################# + +class TestParseIMAPUrl(unittest.TestCase): + def setUp(self): + archivemail.options.quiet = True + archivemail.options.verbose = False + archivemail.options.pwfile = None + + urls_withoutpass = [ + ('imap://user@example.org@imap.example.org/upperbox/lowerbox', + ('user', None, 'example.org@imap.example.org', 143, + 'upperbox/lowerbox')), + ('imap://"user@example.org"@imap.example.org/upperbox/lowerbox', + ('user@example.org', None, 'imap.example.org', 143, + 'upperbox/lowerbox')), + ('imap://user@example.org"@imap.example.org/upperbox/lowerbox', + ('user', None, 'example.org"@imap.example.org', 143, + 'upperbox/lowerbox')), + ('imaps://"user@example.org@imap.example.org/upperbox/lowerbox', + ('"user', None, 'example.org@imap.example.org', 993, + 'upperbox/lowerbox')), + ('imaps://"us\\"er@example.org"@imap.example.org/upperbox/lowerbox', + ('us"er@example.org', None, 'imap.example.org', 993, + 'upperbox/lowerbox')), + ('imaps://user\\@example.org@imap.example.org/upperbox/lowerbox', + ('user\\', None, 'example.org@imap.example.org', 993, + 'upperbox/lowerbox')) + ] + urls_withpass = [ + ('imap://user@example.org:passwd@imap.example.org/upperbox/lowerbox', + ('user@example.org', 'passwd', 'imap.example.org', 143, + 'upperbox/lowerbox')), + ('imaps://"user@example.org:passwd@imap.example.org/upperbox/lowerbox', + ('"user@example.org', "passwd", 'imap.example.org', 993, + 'upperbox/lowerbox')), + ('imaps://u\\ser\\@example.org:"p@sswd"@imap.example.org/upperbox/lowerbox', + ('u\\ser\\@example.org', 'p@sswd', 'imap.example.org', 993, + 'upperbox/lowerbox')) + ] + # These are invalid when the password's not stripped. + urls_onlywithpass = [ + ('imaps://"user@example.org":passwd@imap.example.org/upperbox/lowerbox', + ('user@example.org', "passwd", 'imap.example.org', + 'upperbox/lowerbox')) + ] + def testUrlsWithoutPwfile(self): + """Parse test urls with --pwfile option unset. This parses a password in + the URL, if present.""" + archivemail.options.pwfile = None + for mbstr in self.urls_withpass + self.urls_withoutpass: + url = mbstr[0] + result = archivemail.parse_imap_url(url) + self.assertEqual(result, mbstr[1]) + + def testUrlsWithPwfile(self): + """Parse test urls with --pwfile set. In this case the ':' character + loses its meaning as a delimiter.""" + archivemail.options.pwfile = "whocares.txt" + for mbstr in self.urls_onlywithpass: + url = mbstr[0] + self.assertRaises(archivemail.UnexpectedError, + archivemail.parse_imap_url, url) + + def testUrlsDefaultPorts(self): + """If an IMAP URL does not specify a server port, the standard ports + are used.""" + archivemail.options.pwfile = "doesnotexist.txt" + self.assertEqual(143, archivemail.parse_imap_url("imap://user@host/box")[3]) + self.assertEqual(993, archivemail.parse_imap_url("imaps://user@host/box")[3]) + + def testUrlsWithPassAndPortnumber(self): + """IMAP URLs with an embedded password and a server port number are + correctly parsed.""" + self.assertEqual(1234, archivemail.parse_imap_url("imap://user:pass@host:1234/box")[3]) + self.assertEqual(1234, archivemail.parse_imap_url("imap://user:pass@host:1234/box")[3]) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.verbose = False + archivemail.options.pwfile = None + +########## quoting and un-quoting of IMAP strings ########## + +class TestIMAPQuoting(unittest.TestCase): + stringlist = ( + ('{braces} and space', '"{braces} and space"'), + ('\\backslash', '"\\\\backslash"'), + ('with "quotes" inbetween', '"with \\"quotes\\" inbetween"'), + ('ending with "quotes"', '"ending with \\"quotes\\""'), + ('\\"backslash before quote', '"\\\\\\"backslash before quote"') + ) + + def testQuote(self): + for unquoted, quoted in self.stringlist: + self.assertEqual(archivemail.imap_quote(unquoted), quoted) + + def testUnquote(self): + for unquoted, quoted in self.stringlist: + self.assertEqual(unquoted, archivemail.imap_unquote(quoted)) + + +########## Modified UTF-7 support functions ########## + +class TestModUTF7(unittest.TestCase): + goodpairs = ( + (u"A\N{NOT IDENTICAL TO}A.", "A&ImI-A."), + (u"Hi Mom -\N{WHITE SMILING FACE}-!", "Hi Mom -&Jjo--!"), + (u"~peter/mail/\u53f0\u5317/\u65e5\u672c\u8a9e", + "~peter/mail/&U,BTFw-/&ZeVnLIqe-") + ) + + def testEncode(self): + """Ensure that encoding text in modified UTF-7 works properly.""" + for text, code in self.goodpairs: + self.assertEqual(archivemail.mod_utf7_encode(text), code) + + def testDecode(self): + """Ensure that decoding modified UTF-7 to text works properly.""" + for text, code in self.goodpairs: + self.assertEqual(archivemail.mod_utf7_decode(code), text) + + +########## acceptance testing ########### + +class TestArchive(TestCaseInTempdir): + """Base class defining helper functions for doing test archiving runs.""" + mbox = None # mbox file that will be processed by archivemail + good_archive = None # Uncompressed reference archive file to verify the + # archive after processing + good_mbox = None # Reference mbox file to verify the mbox after processing + + def verify(self): + assert os.path.exists(self.mbox) + if self.good_mbox is not None: + assertEqualContent(self.mbox, self.good_mbox) + else: + self.assertEqual(os.path.getsize(self.mbox), 0) + archive_name = self.mbox + "_archive" + if not archivemail.options.no_compress: + archive_name += ".gz" + iszipped = True + else: + assert not os.path.exists(archive_name + ".gz") + iszipped = False + if self.good_archive is not None: + assertEqualContent(archive_name, self.good_archive, iszipped) + else: + assert not os.path.exists(archive_name) + + def make_old_mbox(self, body=None, headers=None, messages=1, make_old_archive=False): + """Prepare for a test run with an old mbox by making an old mbox, + optionally an existing archive, and a reference archive to verify the + archive after archivemail has run.""" + self.mbox = make_mbox(body, headers, 181*24, messages) + archive_does_change = not (archivemail.options.dry_run or + archivemail.options.delete_old_mail) + mbox_does_not_change = archivemail.options.dry_run or \ + archivemail.options.copy_old_mail + if make_old_archive: + archive = archivemail.make_archive_name(self.mbox) + self.good_archive = make_archive_and_plain_copy(archive) + if archive_does_change: + append_file(self.mbox, self.good_archive) + elif archive_does_change: + self.good_archive = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_archive) + if mbox_does_not_change: + if archive_does_change and not make_old_archive: + self.good_mbox = self.good_archive + else: + self.good_mbox = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_mbox) + + def make_mixed_mbox(self, body=None, headers=None, messages=1, make_old_archive=False): + """Prepare for a test run with a mixed mbox by making a mixed mbox, + optionally an existing archive, a reference archive to verify the + archive after archivemail has run, and likewise a reference mbox to + verify the mbox.""" + self.make_old_mbox(body, headers, messages=messages, make_old_archive=make_old_archive) + new_mbox_name = make_mbox(body, headers, 179*24, messages) + append_file(new_mbox_name, self.mbox) + if self.good_mbox is None: + self.good_mbox = new_mbox_name + else: + if self.good_mbox == self.good_archive: + self.good_mbox = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_mbox) + else: + append_file(new_mbox_name, self.good_mbox) + + def make_new_mbox(self, body=None, headers=None, messages=1, make_old_archive=False): + """Prepare for a test run with a new mbox by making a new mbox, + optionally an exiting archive, and a reference mbox to verify the mbox + after archivemail has run.""" + self.mbox = make_mbox(body, headers, 179*24, messages) + self.good_mbox = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_mbox) + if make_old_archive: + archive = archivemail.make_archive_name(self.mbox) + self.good_archive = make_archive_and_plain_copy(archive) + + +class TestArchiveMbox(TestArchive): + """archiving should work based on the date of messages given""" + + def setUp(self): + self.oldopts = copy.copy(archivemail.options) + archivemail.options.quiet = True + super(TestArchiveMbox, self).setUp() + + def testOld(self): + """archiving an old mailbox""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOldFromInBody(self): + """archiving an old mailbox with 'From ' in the body""" + body = """This is a message with ^From at the start of a line +From is on this line +This is after the ^From line""" + self.make_old_mbox(messages=3, body=body) + archivemail.archive(self.mbox) + self.verify() + + def testDateSystem(self): + """test that the --date option works as expected""" + test_headers = ( + { + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : None, + }, + { + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : None, + 'Delivery-date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : None, + 'Resent-Date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + ) + for headers in test_headers: + msg = make_message(default_headers=headers, wantobj=True) + date = time.strptime("2000-07-29", "%Y-%m-%d") + archivemail.options.date_old_max = time.mktime(date) + assert archivemail.should_archive(msg) + date = time.strptime("2000-07-27", "%Y-%m-%d") + archivemail.options.date_old_max = time.mktime(date) + assert not archivemail.should_archive(msg) + + def testMixed(self): + """archiving a mixed mailbox""" + self.make_mixed_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testNew(self): + """archiving a new mailbox""" + self.make_new_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOldExisting(self): + """archiving an old mailbox with an existing archive""" + self.make_old_mbox(messages=3, make_old_archive=True) + archivemail.archive(self.mbox) + self.verify() + + def testOldWeirdHeaders(self): + """archiving old mailboxes with weird headers""" + weird_headers = ( + { # we should archive because of the date on the 'From_' line + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : 'Friskhdfkjkh, 28 Jul 2002 1line noise6:11:36 +1000', + }, + { # we should archive because of the date on the 'From_' line + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : None, + }, + { # we should archive because of the date in 'Delivery-date' + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : 'Frcorruptioni, 28 Jul 20line noise00 16:6 +1000', + 'Delivery-date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { # we should archive because of the date in 'Delivery-date' + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : None, + 'Delivery-date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { # we should archive because of the date in 'Resent-Date' + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : 'Frcorruptioni, 28 Jul 20line noise00 16:6 +1000', + 'Resent-Date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { # we should archive because of the date in 'Resent-Date' + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2030', + 'Date' : None, + 'Resent-Date' : 'Fri, 28 Jul 2000 16:11:36 +1000', + }, + { # completely blank dates were crashing < version 0.4.7 + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : '', + }, + { # completely blank dates were crashing < version 0.4.7 + 'From_' : 'sender@dummy.domain Fri Jul 28 16:11:36 2000', + 'Date' : '', + 'Resent-Date' : '', + }, + ) + fd, self.mbox = tempfile.mkstemp() + fp = os.fdopen(fd, "w") + for headers in weird_headers: + msg_text = make_message(default_headers=headers) + fp.write(msg_text*2) + fp.close() + self.good_archive = tempfile.mkstemp()[1] + shutil.copyfile(self.mbox, self.good_archive) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options = self.oldopts + super(TestArchiveMbox, self).tearDown() + + +class TestArchiveMboxTimestamp(TestCaseInTempdir): + """original mbox timestamps should always be preserved""" + def setUp(self): + super(TestArchiveMboxTimestamp, self).setUp() + archivemail.options.quiet = True + self.mbox_name = make_mbox(messages=3, hours_old=(24 * 180)) + self.mtime = os.path.getmtime(self.mbox_name) - 66 + self.atime = os.path.getatime(self.mbox_name) - 88 + os.utime(self.mbox_name, (self.atime, self.mtime)) + + def testNew(self): + """mbox timestamps should not change after no archival""" + archivemail.options.days_old_max = 181 + archivemail.archive(self.mbox_name) + self.verify() + + def testOld(self): + """mbox timestamps should not change after archival""" + archivemail.options.days_old_max = 179 + archivemail.archive(self.mbox_name) + self.verify() + + def verify(self): + assert os.path.exists(self.mbox_name) + new_atime = os.path.getatime(self.mbox_name) + new_mtime = os.path.getmtime(self.mbox_name) + self.assertAlmostEqual(self.mtime, new_mtime, utimes_precision) + self.assertAlmostEqual(self.atime, new_atime, utimes_precision) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.days_old_max = 180 + os.remove(self.mbox_name) + super(TestArchiveMboxTimestamp, self).tearDown() + + +class TestArchiveMboxAll(unittest.TestCase): + def setUp(self): + archivemail.options.quiet = True + archivemail.options.archive_all = True + + def testNew(self): + """new messages should be archived with --all""" + self.msg = make_message(hours_old=24*179, wantobj=True) + assert archivemail.should_archive(self.msg) + + def testOld(self): + """old messages should be archived with --all""" + self.msg = make_message(hours_old=24*181, wantobj=True) + assert archivemail.should_archive(self.msg) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_all = False + +class TestArchiveMboxPreserveUnread(unittest.TestCase): + """make sure the 'preserve_unread' option works""" + def setUp(self): + archivemail.options.quiet = True + archivemail.options.preserve_unread = True + self.msg = make_message(hours_old=24*181, wantobj=True) + + def testOldRead(self): + """old read messages should be archived with --preserve-unread""" + self.msg["Status"] = "RO" + assert archivemail.should_archive(self.msg) + + def testOldUnread(self): + """old unread messages should not be archived with --preserve-unread""" + self.msg["Status"] = "O" + assert not archivemail.should_archive(self.msg) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.preserve_unread = False + + +class TestArchiveMboxSuffix(unittest.TestCase): + """make sure the 'suffix' option works""" + def setUp(self): + archivemail.options.quiet = True + + def testSuffix(self): + """archiving with specified --suffix arguments""" + for suffix in ("_static_", "_%B_%Y", "-%Y-%m-%d"): + mbox_name = "foobar" + archivemail.options.archive_suffix = suffix + days_old_max = 180 + parsed_suffix_time = time.time() - days_old_max*24*60*60 + parsed_suffix = time.strftime(suffix, + time.localtime(parsed_suffix_time)) + archive_name = mbox_name + parsed_suffix + self.assertEqual(archive_name, + archivemail.make_archive_name(mbox_name)) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_suffix = None + +class TestArchiveMboxPrefix(unittest.TestCase): + """make sure the 'prefix' option works""" + def setUp(self): + archivemail.options.quiet = True + + def testPrefix(self): + """archiving with specified --prefix arguments""" + for archive_prefix in ("_static_", "_%B_%Y", "-%Y-%m-%d", "%Y/%m/"): + archivemail.options.archive_prefix = archive_prefix + for mbox_name in "foobar", "/tmp/foobar", "schnorchz/foobar": + archive_dir, archive_base = os.path.split(mbox_name) + days = archivemail.options.days_old_max + tm = time.localtime(time.time() - days*24*60*60) + prefix = time.strftime(archive_prefix, tm) + archive_name = os.path.join(archive_dir, prefix + archive_base) + self.assertEqual(archive_name, + archivemail.make_archive_name(mbox_name)) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_prefix = None + +class TestArchiveName(unittest.TestCase): + def setUp(self): + archivemail.options.quiet = True + + def testArchiveName(self): + """test the --archive-name option""" + archive_names = ("custom", ".withdot", "custom_%Y", "%Y/joe") + mbox = "foobar" + for name in archive_names: + archivemail.options.archive_name = name + days = archivemail.options.days_old_max + tm = time.localtime(time.time() - days*24*60*60) + name = time.strftime(name, tm) + self.assertEqual(archivemail.make_archive_name(mbox), name) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_name = None + +class TestArchiveAffixes(unittest.TestCase): + def setUp(self): + self.mbox = "harbsch" + self.archive_prefix = "wurbl+" + self.archive_suffix = "+schronk&borsz" + archivemail.options.quiet = True + + def testDefaultPrefix(self): + """if no archive name affix is specified, the default archive suffix is appended""" + self.assertEqual(archivemail.make_archive_name(self.mbox), + self.mbox + archivemail.options.archive_default_suffix) + + def testPrefixKillsDefaultSuffix(self): + """if an archive name prefix is specified, the default archive suffix is not appended""" + archivemail.options.archive_prefix = self.archive_prefix + self.assertEqual(archivemail.make_archive_name(self.mbox), + self.archive_prefix + self.mbox) + + def testPrefixAndSuffix(self): + """specifying both an archive name prefix and suffix works""" + archivemail.options.archive_prefix = self.archive_prefix + archivemail.options.archive_suffix = self.archive_suffix + self.assertEqual(archivemail.make_archive_name(self.mbox), + self.archive_prefix + self.mbox + self.archive_suffix) + + def tearDown(self): + archivemail.options.archive_prefix = None + archivemail.options.archive_suffix = None + archivemail.options.quiet = False + +class TestArchiveHiddenMbox(unittest.TestCase): + def setUp(self): + archivemail.options.quiet = True + self.mbox = ".upper.lower" + + def testHiddenMbox(self): + """leading dots are stripped from the archive name when no prefix is added""" + self.assertEqual(archivemail.make_archive_name(self.mbox), + self.mbox.lstrip('.') + + archivemail.options.archive_default_suffix) + + def testHiddenMboxPrefixedArchive(self): + """no dots are stripped from the archive name when a prefix is added""" + prefix = ".hidden_" + archivemail.options.archive_prefix = prefix + self.assertEqual(archivemail.make_archive_name(self.mbox), + prefix + self.mbox) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.archive_prefix = None + +class TestArchiveDryRun(TestArchive): + """make sure the 'dry-run' option works""" + def setUp(self): + super(TestArchiveDryRun, self).setUp() + archivemail.options.quiet = True + archivemail.options.dry_run = True + + def testOld(self): + """archiving an old mailbox with the 'dry-run' option""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options.dry_run = False + archivemail.options.quiet = False + super(TestArchiveDryRun, self).tearDown() + + +class TestArchiveDelete(TestArchive): + """make sure the 'delete' option works""" + def setUp(self): + super(TestArchiveDelete, self).setUp() + archivemail.options.quiet = True + archivemail.options.delete_old_mail = True + + def testNew(self): + """archiving a new mailbox with the 'delete' option""" + self.make_new_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testMixed(self): + """archiving a mixed mailbox with the 'delete' option""" + self.make_mixed_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOld(self): + """archiving an old mailbox with the 'delete' option""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options.delete_old_mail = False + archivemail.options.quiet = False + super(TestArchiveDelete, self).tearDown() + + +class TestArchiveCopy(TestArchive): + """make sure the 'copy' option works""" + def setUp(self): + super(TestArchiveCopy, self).setUp() + archivemail.options.quiet = True + archivemail.options.copy_old_mail = True + + def testNew(self): + """archiving a new mailbox with the 'copy' option""" + self.make_new_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testMixed(self): + """archiving a mixed mailbox with the 'copy' option""" + self.make_mixed_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOld(self): + """archiving an old mailbox with the 'copy' option""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options.copy_old_mail = False + archivemail.options.quiet = False + super(TestArchiveCopy, self).tearDown() + + +class TestArchiveMboxFlagged(unittest.TestCase): + """make sure the 'include_flagged' option works""" + def setUp(self): + archivemail.options.include_flagged = False + archivemail.options.quiet = True + + def testOld(self): + """by default, old flagged messages should not be archived""" + msg = make_message(default_headers={"X-Status": "F"}, + hours_old=24*181, wantobj=True) + assert not archivemail.should_archive(msg) + + def testIncludeFlaggedNew(self): + """new flagged messages should not be archived with include_flagged""" + msg = make_message(default_headers={"X-Status": "F"}, + hours_old=24*179, wantobj=True) + assert not archivemail.should_archive(msg) + + def testIncludeFlaggedOld(self): + """old flagged messages should be archived with include_flagged""" + archivemail.options.include_flagged = True + msg = make_message(default_headers={"X-Status": "F"}, + hours_old=24*181, wantobj=True) + assert archivemail.should_archive(msg) + + def tearDown(self): + archivemail.options.include_flagged = False + archivemail.options.quiet = False + + +class TestArchiveMboxOutputDir(unittest.TestCase): + """make sure that the 'output-dir' option works""" + def setUp(self): + archivemail.options.quiet = True + + def testOld(self): + """archiving an old mailbox with a sepecified output dir""" + for dir in "/just/a/path", "relative/path": + archivemail.options.output_dir = dir + archive_dir = archivemail.make_archive_name("/tmp/mbox") + self.assertEqual(dir, os.path.dirname(archive_dir)) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.output_dir = None + + +class TestArchiveMboxUncompressed(TestArchive): + """make sure that the 'no_compress' option works""" + mbox_name = None + new_mbox = None + old_mbox = None + copy_name = None + + def setUp(self): + archivemail.options.quiet = True + archivemail.options.no_compress = True + super(TestArchiveMboxUncompressed, self).setUp() + + def testOld(self): + """archiving an old mailbox uncompressed""" + self.make_old_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testNew(self): + """archiving a new mailbox uncompressed""" + self.make_new_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testMixed(self): + """archiving a mixed mailbox uncompressed""" + self.make_mixed_mbox(messages=3) + archivemail.archive(self.mbox) + self.verify() + + def testOldExists(self): + """archiving an old mailbox uncopressed with an existing archive""" + self.make_old_mbox(messages=3, make_old_archive=True) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.no_compress = False + super(TestArchiveMboxUncompressed, self).tearDown() + + +class TestArchiveSize(unittest.TestCase): + """check that the 'size' argument works""" + def setUp(self): + archivemail.options.quiet = True + msg_text = make_message(hours_old=24*181) + self.msg_size = len(msg_text) + fp = cStringIO.StringIO(msg_text) + self.msg = rfc822.Message(fp) + + def testSmaller(self): + """giving a size argument smaller than the message""" + archivemail.options.min_size = self.msg_size - 1 + assert archivemail.should_archive(self.msg) + + def testBigger(self): + """giving a size argument bigger than the message""" + archivemail.options.min_size = self.msg_size + 1 + assert not archivemail.should_archive(self.msg) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.min_size = None + + +class TestXIMAPMessage(TestArchive): + """Test if IMAP pseudo messages in mboxes are properly handled.""" + def setUp(self): + super(TestXIMAPMessage, self).setUp() + archivemail.options.quiet = True + + def testXIMAPMbox(self): + """IMAP pseudo messages in an mbox are always preserved.""" + self.good_mbox = make_mbox(hours_old=181*24, headers={'X-IMAP': 'dummytext'}, + messages=1) + self.good_archive = make_mbox(hours_old=181*24, messages=3) + self.mbox = tempfile.mkstemp()[-1] + shutil.copyfile(self.good_mbox, self.mbox) + append_file(self.good_archive, self.mbox) + archivemail.archive(self.mbox) + self.verify() + + def tearDown(self): + super(TestXIMAPMessage, self).tearDown() + archivemail.options.quiet = False + + +############# Test archiving maildirs ############### + +class TestArchiveMailboxdir(TestCaseInTempdir): + """Base class defining helper functions for doing test archive runs with + maildirs.""" + maildir = None # Maildir that will be processed by archivemail + orig_maildir_obj = None # A backup copy of the maildir, a SimpleMaildir object + remaining_msg = set() # Filenames of maildir messages that should be preserved + number_archived = 0 # Number of messages that get archived + orig_archive = None # An uncompressed copy of a pre-existing archive, + # if one exists + + def setUp(self): + super(TestArchiveMailboxdir, self).setUp() + self.orig_maildir_obj = SimpleMaildir() + + def verify(self): + self._verify_remaining() + self._verify_archive() + + def _verify_remaining(self): + """Verify that the preserved messages weren't altered.""" + assert self.maildir + # Compare maildir with backup object. + dcmp = filecmp.dircmp(self.maildir, self.orig_maildir_obj.root) + # Top-level has only directories cur, new, tmp and must be unchanged. + self.assertEqual(dcmp.left_list, dcmp.right_list) + found = set() + for d in dcmp.common_dirs: + dcmp2 = dcmp.subdirs[d] + # We need to verify three things. + # 1. directory is a subset of the original... + assert not dcmp2.left_only + # 2. all common files are identical... + self.assertEqual(dcmp2.common_files, dcmp2.same_files) + found = found.union([os.path.join(d, x) for x in dcmp2.common_files]) + # 3. exactly the `new' messages (recorded in self.remaining_msg) + # were preserved. + self.assertEqual(found, self.remaining_msg) + + def _verify_archive(self): + """Verify the archive correctness.""" + # TODO: currently make_archive_name does not include the .gz suffix. + # Is this something that should be fixed? + archive = archivemail.make_archive_name(self.maildir) + if archivemail.options.no_compress: + iszipped = False + else: + archive += '.gz' + iszipped = True + if self.number_archived == 0: + if self.orig_archive: + assertEqualContent(archive, self.orig_archive, iszipped) + else: + assert not os.path.exists(archive) + return + fp_new = fp_archive = tmp_archive_name = None + try: + if self.orig_archive: + new_size = os.path.getsize(archive) + # Brute force: split archive in old and new part and verify the + # parts separately. (Of course this destroys the archive.) + fp_archive = open(archive, "r+") + fp_archive.seek(self.orig_archive_size) + fd, tmp_archive_name = tempfile.mkstemp() + fp_new = os.fdopen(fd, "w") + shutil.copyfileobj(fp_archive, fp_new) + fp_new.close() + fp_archive.truncate(self.orig_archive_size) + fp_archive.close() + assertEqualContent(archive, self.orig_archive, iszipped) + new_archive = tmp_archive_name + else: + new_archive = archive + if archivemail.options.no_compress: + fp_archive = open(new_archive, "r") + else: + fp_archive = FixedGzipFile(new_archive, "r") + mb = mailbox.UnixMailbox(fp_archive) + found = 0 + for msg in mb: + self.verify_maildir_has_msg(self.orig_maildir_obj, msg) + found += 1 + self.assertEqual(found, self.number_archived) + finally: + if tmp_archive_name: + os.remove(tmp_archive_name) + if fp_new is not None: + fp_new.close() + if fp_archive is not None: + fp_archive.close() + + def verify_maildir_has_msg(self, maildir, msg): + """Assert that the given maildir has a copy of the rfc822 message.""" + mid = msg['Message-Id'] # Complains if there is no message-id + mdir_msg_str, mdir_flags = \ + maildir.get_message_and_mbox_status(mid) + mbox_flags = set(msg.get('status', '') + msg.get('x-status', '')) + self.assertEqual(mdir_flags, mbox_flags) + + headers = filter(lambda h: msg.isheader(h) not in ('status', 'x-status'), + msg.headers) + headers = "".join(headers) + msg.rewindbody() + # Discard last mbox LF which is not part of the message. + body = msg.fp.read()[:-1] + msg_str = headers + os.linesep + body + self.assertEqual(mdir_msg_str, msg_str) + + def add_messages(self, body=None, headers=None, hours_old=0, messages=1): + for count in range(messages): + msg = make_message(body, default_headers=headers, mkfrom=False, + hours_old=hours_old) + self.orig_maildir_obj.write(msg, new=False) + + def make_maildir(self, mkold, mknew, body=None, headers=None, messages=1, + make_old_archive=False): + mailbox_does_change = not (archivemail.options.dry_run or + archivemail.options.copy_old_mail) + archive_does_change = not (archivemail.options.dry_run or + archivemail.options.delete_old_mail) + if mknew: + self.add_messages(body, headers, 179*24, messages) + if archive_does_change and archivemail.options.archive_all: + self.number_archived += messages + if mailbox_does_change: + self.remaining_msg = set(self.orig_maildir_obj.get_all_filenames()) + if mkold: + self.add_messages(body, headers, 181*24, messages) + if archive_does_change: + self.number_archived += messages + if not mailbox_does_change: + self.remaining_msg = set(self.orig_maildir_obj.get_all_filenames()) + self.maildir = copy_maildir(self.orig_maildir_obj.root) + if make_old_archive: + archive = archivemail.make_archive_name(self.maildir) + self.orig_archive = make_archive_and_plain_copy(archive) + # FIXME: .gz extension handling is a mess II + if not archivemail.options.no_compress: + archive += '.gz' + self.orig_archive_size = os.path.getsize(archive) + +class TestEmptyMaildir(TestCaseInTempdir): + def setUp(self): + super(TestEmptyMaildir, self).setUp() + archivemail.options.quiet = True + + def testEmpty(self): + """Archiving an empty maildir should not result in an archive.""" + self.mdir = SimpleMaildir() + archivemail.archive(self.mdir.root) + assert not os.path.exists(self.mdir.root + '_archive.gz') + + def tearDown(self): + super(TestEmptyMaildir, self).tearDown() + archivemail.options.quiet = False + +class TestMaildir(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildir, self).setUp() + archivemail.options.quiet = True + + def testOld(self): + self.make_maildir(True, False, messages=3) + archivemail.archive(self.maildir) + self.verify() + + def testNew(self): + self.make_maildir(False, True, messages=3) + archivemail.archive(self.maildir) + self.verify() + + def testMixed(self): + self.make_maildir(True, True, messages=3) + archivemail.archive(self.maildir) + self.verify() + + def testMixedExisting(self): + self.make_maildir(True, True, messages=3, make_old_archive=True) + archivemail.archive(self.maildir) + self.verify() + + def tearDown(self): + archivemail.options.quiet = False + super(TestMaildir, self).tearDown() + + +class TestMaildirPreserveUnread(TestCaseInTempdir): + """Test if the preserve_unread option works with maildirs.""" + def setUp(self): + super(TestMaildirPreserveUnread, self).setUp() + archivemail.options.quiet = True + archivemail.options.preserve_unread = True + + def testOldRead(self): + """--preserve-unread archives old read messages in a maildir.""" + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*181) + smd.write(msg, new=False, flags='S') + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert archivemail.should_archive(msg_obj) + + def testOldUnread(self): + """--preserve-unread preserves old unread messages in a maildir.""" + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*181) + smd.write(msg, new=False) + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert not archivemail.should_archive(msg_obj) + + def tearDown(self): + archivemail.options.quiet = False + archivemail.options.preserve_unread = False + super(TestMaildirPreserveUnread, self).tearDown() + +class TestMaildirAll(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildirAll, self).setUp() + archivemail.options.quiet = True + archivemail.options.archive_all = True + + def testNew(self): + """New maildir messages should be archived with --all""" + self.add_messages(hours_old=24*181) + md = mailbox.Maildir(self.orig_maildir_obj.root) + msg_obj = md.next() + assert archivemail.should_archive(msg_obj) + + def testOld(self): + """Old maildir messages should be archived with --all""" + self.add_messages(hours_old=24*179) + md = mailbox.Maildir(self.orig_maildir_obj.root) + msg_obj = md.next() + assert archivemail.should_archive(msg_obj) + + def tearDown(self): + super(TestMaildirAll, self).tearDown() + archivemail.options.quiet = False + archivemail.options.archive_all = False + +class TestMaildirDryRun(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildirDryRun, self).setUp() + archivemail.options.quiet = True + archivemail.options.dry_run = True + + def testOld(self): + """archiving an old maildir mailbox with the 'dry-run' option""" + self.make_maildir(True, False) + archivemail.archive(self.maildir) + self.verify() + + def tearDown(self): + super(TestMaildirDryRun, self).tearDown() + archivemail.options.quiet = False + archivemail.options.dry_run = False + +class TestMaildirDelete(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildirDelete, self).setUp() + archivemail.options.quiet = True + archivemail.options.delete_old_mail = True + + def testOld(self): + """archiving an old maildir mailbox with the 'delete' option""" + self.make_maildir(True, False) + archivemail.archive(self.maildir) + self.verify() + + def testNew(self): + """archiving a new maildir mailbox with the 'delete' option""" + self.make_maildir(False, True) + archivemail.archive(self.maildir) + self.verify() + + def tearDown(self): + super(TestMaildirDelete, self).tearDown() + archivemail.options.quiet = False + archivemail.options.delete_old_mail = False + +class TestMaildirCopy(TestArchiveMailboxdir): + def setUp(self): + super(TestMaildirCopy, self).setUp() + archivemail.options.quiet = True + archivemail.options.copy_old_mail = True + + def testOld(self): + """archiving an old maildir mailbox with the 'copy' option""" + self.make_maildir(True, False) + archivemail.archive(self.maildir) + self.verify() + + def testNew(self): + """archiving a new maildir mailbox with the 'copy' option""" + self.make_maildir(False, True) + archivemail.archive(self.maildir) + self.verify() + + def tearDown(self): + super(TestMaildirCopy, self).tearDown() + archivemail.options.quiet = False + archivemail.options.copy_old_mail = False + +class TestArchiveMaildirFlagged(TestCaseInTempdir): + """make sure the 'include_flagged' option works with maildir messages""" + def setUp(self): + super(TestArchiveMaildirFlagged, self).setUp() + archivemail.options.include_flagged = False + archivemail.options.quiet = True + + def testOld(self): + """by default, old flagged maildir messages should not be archived""" + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*181) + smd.write(msg, new=False, flags='F') + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert not archivemail.should_archive(msg_obj) + + def testIncludeFlaggedNew(self): + """new flagged maildir messages should not be archived with include_flagged""" + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*179) + smd.write(msg, new=False, flags='F') + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert not archivemail.should_archive(msg_obj) + + def testIncludeFlaggedOld(self): + """old flagged maildir messages should be archived with include_flagged""" + archivemail.options.include_flagged = True + smd = SimpleMaildir("orig") + msg = make_message(hours_old=24*181) + smd.write(msg, new=False, flags='F') + md = mailbox.Maildir(smd.root) + msg_obj = md.next() + assert archivemail.should_archive(msg_obj) + + def tearDown(self): + super(TestArchiveMaildirFlagged, self).tearDown() + archivemail.options.include_flagged = False + archivemail.options.quiet = False + +class TestArchiveMaildirSize(TestCaseInTempdir): + """check that the 'size' argument works with maildir messages""" + def setUp(self): + super(TestArchiveMaildirSize, self).setUp() + archivemail.options.quiet = True + msg = make_message(hours_old=24*181) + self.msg_size = len(msg) + smd = SimpleMaildir("orig") + smd.write(msg, new=False) + md = mailbox.Maildir(smd.root) + self.msg_obj = md.next() + + def testSmaller(self): + """giving a size argument smaller than the maildir message""" + archivemail.options.min_size = self.msg_size - 1 + assert archivemail.should_archive(self.msg_obj) + + def testBigger(self): + """giving a size argument bigger than the maildir message""" + archivemail.options.min_size = self.msg_size + 1 + assert not archivemail.should_archive(self.msg_obj) + + def tearDown(self): + super(TestArchiveMaildirSize, self).tearDown() + archivemail.options.quiet = False + archivemail.options.min_size = None + +########## helper routines ############ + +def make_message(body=None, default_headers={}, hours_old=None, mkfrom=False, wantobj=False): + headers = copy.copy(default_headers) + if not headers: + headers = {} + headers['Message-Id'] = make_msgid() + if not headers.has_key('Date'): + time_message = time.time() - (60 * 60 * hours_old) + headers['Date'] = time.asctime(time.localtime(time_message)) + if not headers.has_key('From'): + headers['From'] = "sender@dummy.domain" + if not headers.has_key('To'): + headers['To'] = "receipient@dummy.domain" + if not headers.has_key('Subject'): + headers['Subject'] = "This is the subject" + if mkfrom and not headers.has_key('From_'): + headers['From_'] = "%s %s" % (headers['From'], headers['Date']) + if not body: + body = "This is the message body" + + msg = "" + if headers.has_key('From_'): + msg = msg + ("From %s\n" % headers['From_']) + del headers['From_'] + for key in headers.keys(): + if headers[key] is not None: + msg = msg + ("%s: %s\n" % (key, headers[key])) + msg = msg + "\n\n" + body + "\n\n" + if not wantobj: + return msg + fp = cStringIO.StringIO(msg) + return rfc822.Message(fp) + +def append_file(source, dest): + """appends the file named 'source' to the file named 'dest'""" + assert os.path.isfile(source) + assert os.path.isfile(dest) + read = open(source, "r") + write = open(dest, "a+") + shutil.copyfileobj(read,write) + read.close() + write.close() + + +def make_mbox(body=None, headers=None, hours_old=0, messages=1): + assert tempfile.tempdir + fd, name = tempfile.mkstemp() + file = os.fdopen(fd, "w") + for count in range(messages): + msg = make_message(body=body, default_headers=headers, + mkfrom=True, hours_old=hours_old) + file.write(msg) + file.close() + return name + +def make_archive_and_plain_copy(archive_name): + """Make an mbox archive of the given name like archivemail may have + created it. Also make an uncompressed copy of this archive and return its + name.""" + copy_fd, copy_name = tempfile.mkstemp() + copy_fp = os.fdopen(copy_fd, "w") + if archivemail.options.no_compress: + fd = os.open(archive_name, os.O_WRONLY|os.O_EXCL|os.O_CREAT) + fp = os.fdopen(fd, "w") + else: + archive_name += ".gz" + fd = os.open(archive_name, os.O_WRONLY|os.O_EXCL|os.O_CREAT) + rawfp = os.fdopen(fd, "w") + fp = gzip.GzipFile(fileobj=rawfp) + for count in range(3): + msg = make_message(hours_old=24*360) + fp.write(msg) + copy_fp.write(msg) + fp.close() + copy_fp.close() + if not archivemail.options.no_compress: + rawfp.close() + return copy_name + +def copy_maildir(maildir, prefix="tmp"): + """Create a copy of the given maildir and return the absolute path of the + new direcory.""" + newdir = tempfile.mkdtemp(prefix=prefix) + for d in "cur", "new", "tmp": + shutil.copytree(os.path.join(maildir, d), os.path.join(newdir, d)) + return newdir + +def assertEqualContent(firstfile, secondfile, zippedfirst=False): + """Verify that the two files exist and have identical content. If zippedfirst + is True, assume that firstfile is gzip-compressed.""" + assert os.path.exists(firstfile) + assert os.path.exists(secondfile) + if zippedfirst: + try: + fp1 = gzip.GzipFile(firstfile, "r") + fp2 = open(secondfile, "r") + assert cmp_fileobj(fp1, fp2) + finally: + fp1.close() + fp2.close() + else: + assert filecmp.cmp(firstfile, secondfile, shallow=0) + +def cmp_fileobj(fp1, fp2): + """Return if reading the fileobjects yields identical content.""" + bufsize = 8192 + while True: + b1 = fp1.read(bufsize) + b2 = fp2.read(bufsize) + if b1 != b2: + return False + if not b1: + return True + +if __name__ == "__main__": + unittest.main()