tools/cvs2cl/cvs2cl.pl

   1 #!/bin/sh
   2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5 ##############################################################
   6 ###                                                        ###
   7 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
   8 ###                                                        ###
   9 ##############################################################
  10
  11 ## $Revision: 1.1 $
  12 ## $Date: 2001-01-24 11:20:22 $
  13 ## $Author: bradleyb $
  14 ##
  15 ##   (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
  16 ##
  17 ##   (Extensively hacked on by Melissa O'Neill <oneill@cs.sfu.ca>.)
  18 ##
  19 ## cvs2cl.pl is free software; you can redistribute it and/or modify
  20 ## it under the terms of the GNU General Public License as published by
  21 ## the Free Software Foundation; either version 2, or (at your option)
  22 ## any later version.
  23 ##
  24 ## cvs2cl.pl is distributed in the hope that it will be useful,
  25 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
  26 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  27 ## GNU General Public License for more details.
  28 ##
  29 ## You may have received a copy of the GNU General Public License
  30 ## along with cvs2cl.pl; see the file COPYING.  If not, write to the
  31 ## Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  32 ## Boston, MA 02111-1307, USA.
  33
  34
  35 \f
  36 use strict;
  37 use Text::Wrap;
  38 use Time::Local;
  39 use File::Basename;
  40
  41 \f
  42 # The Plan:
  43 #
  44 # Read in the logs for multiple files, spit out a nice ChangeLog that
  45 # mirrors the information entered during `cvs commit'.
  46 #
  47 # The problem presents some challenges. In an ideal world, we could
  48 # detect files with the same author, log message, and checkin time --
  49 # each <filelist, author, time, logmessage> would be a changelog entry.
  50 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
  51 # so checkins can span a range of times.  Also, the directory structure
  52 # could be hierarchical.
  53 #
  54 # Another question is whether we really want to have the ChangeLog
  55 # exactly reflect commits. An author could issue two related commits,
  56 # with different log entries, reflecting a single logical change to the
  57 # source. GNU style ChangeLogs group these under a single author/date.
  58 # We try to do the same.
  59 #
  60 # So, we parse the output of `cvs log', storing log messages in a
  61 # multilevel hash that stores the mapping:
  62 #   directory => author => time => message => filelist
  63 # As we go, we notice "nearby" commit times and store them together
  64 # (i.e., under the same timestamp), so they appear in the same log
  65 # entry.
  66 #
  67 # When we've read all the logs, we twist this mapping into
  68 # a time => author => message => filelist mapping for each directory.
  69 #
  70 # If we're not using the `--distributed' flag, the directory is always
  71 # considered to be `./', even as descend into subdirectories.
  72
  73 \f
  74 ############### Globals ################
  75
  76
  77 # What we run to generate it:
  78 my $Log_Source_Command = "cvs log";
  79
  80 # In case we have to print it out:
  81 my $VERSION = '$Revision: 1.1 $';
  82 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
  83
  84 ## Vars set by options:
  85
  86 # Print debugging messages?
  87 my $Debug = 0;
  88
  89 # Just show version and exit?
  90 my $Print_Version = 0;
  91
  92 # Just print usage message and exit?
  93 my $Print_Usage = 0;
  94
  95 # Single top-level ChangeLog, or one per subdirectory?
  96 my $Distributed = 0;
  97
  98 # What file should we generate (defaults to "ChangeLog")?
  99 my $Log_File_Name = "ChangeLog";
 100
 101 # Expand usernames to email addresses based on a map file?
 102 my $User_Map_File = "";
 103
 104 # Output to a file or to stdout?
 105 my $Output_To_Stdout = 0;
 106
 107 # Eliminate empty log messages?
 108 my $Prune_Empty_Msgs = 0;
 109
 110 # Don't call Text::Wrap on the body of the message
 111 my $No_Wrap = 0;
 112
 113 # Separates header from log message.  Code assumes it is either " " or
 114 # "\n\n", so if there's ever an option to set it to something else,
 115 # make sure to go through all conditionals that use this var.
 116 my $After_Header = " ";
 117
 118 # Format more for programs than for humans.
 119 my $XML_Output = 0;
 120
 121 # Do some special tweaks for log data that was written in FSF
 122 # ChangeLog style.
 123 my $FSF_Style = 0;
 124
 125 # Show times in UTC instead of local time
 126 my $UTC_Times = 0;
 127
 128 # Show day of week in output?
 129 my $Show_Day_Of_Week = 0;
 130
 131 # Show revision numbers in output?
 132 my $Show_Revisions = 0;
 133
 134 # Show tags (symbolic names) in output?
 135 my $Show_Tags = 0;
 136
 137 # Show branches by symbolic name in output?
 138 my $Show_Branches = 0;
 139
 140 # Show only revisions on these branches or their ancestors.
 141 my @Follow_Branches;
 142
 143 # Don't bother with files matching this regexp.
 144 my @Ignore_Files;
 145
 146 # How exactly we match entries.  We definitely want "o",
 147 # and user might add "i" by using --case-insensitive option.
 148 my $Case_Insensitive = 0;
 149
 150 # Maybe only show log messages matching a certain regular expression.
 151 my $Regexp_Gate = "";
 152
 153 # Pass this global option string along to cvs, to the left of `log':
 154 my $Global_Opts = "";
 155
 156 # Pass this option string along to the cvs log subcommand:
 157 my $Command_Opts = "";
 158
 159 # Read log output from stdin instead of invoking cvs log?
 160 my $Input_From_Stdin = 0;
 161
 162 # Don't show filenames in output.
 163 my $Hide_Filenames = 0;
 164
 165 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
 166 # times that span a range of time. We assume that checkins will last no
 167 # longer than $Max_Checkin_Duration seconds, and that similarly, no
 168 # checkins will happen from the same users with the same message less
 169 # than $Max_Checkin_Duration seconds apart.
 170 my $Max_Checkin_Duration = 180;
 171
 172 # What to put at the front of [each] ChangeLog.
 173 my $ChangeLog_Header = "";
 174
 175 ## end vars set by options.
 176
 177 # In 'cvs log' output, one long unbroken line of equal signs separates
 178 # files:
 179 my $file_separator = "======================================="
 180                    . "======================================";
 181
 182 # In 'cvs log' output, a shorter line of dashes separates log messages
 183 # within a file:
 184 my $logmsg_separator = "----------------------------";
 185
 186
 187 ############### End globals ############
 188
 189
 190
 191 \f
 192 &parse_options ();
 193 &derive_change_log ();
 194
 195
 196 \f
 197 ### Everything below is subroutine definitions. ###
 198
 199 # Fills up a ChangeLog structure in the current directory.
 200 sub derive_change_log ()
 201 {
 202   # See "The Plan" above for a full explanation.
 203
 204   my %grand_poobah;
 205
 206   my $file_full_path;
 207   my $time;
 208   my $revision;
 209   my $author;
 210   my $msg_txt;
 211   my $detected_file_separator;
 212
 213   # We might be expanding usernames
 214   my %usermap;
 215
 216   # In general, it's probably not very maintainable to use state
 217   # variables like this to tell the loop what it's doing at any given
 218   # moment, but this is only the first one, and if we never have more
 219   # than a few of these, it's okay.
 220   my $collecting_symbolic_names = 0;
 221   my %symbolic_names;    # Where tag names get stored.
 222   my %branch_names;      # We'll grab branch names while we're at it.
 223   my %branch_numbers;    # Save some revisions for @Follow_Branches
 224   my @branch_roots;      # For showing which files are branch ancestors.
 225
 226   # Bleargh.  Compensate for a deficiency of custom wrapping.
 227   if (($After_Header ne " ") and $FSF_Style)
 228   {
 229     $After_Header .= "\t";
 230   }
 231
 232   if (! $Input_From_Stdin) {
 233     open (LOG_SOURCE, "$Log_Source_Command |")
 234         or die "unable to run \"${Log_Source_Command}\"";
 235   }
 236   else {
 237     open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
 238   }
 239
 240   %usermap = &maybe_read_user_map_file ();
 241
 242   while (<LOG_SOURCE>)
 243   {
 244     # If on a new file and don't see filename, skip until we find it, and
 245     # when we find it, grab it.
 246     if ((! (defined $file_full_path)) and /^Working file: (.*)/)
 247     {
 248       $file_full_path = $1;
 249       if (@Ignore_Files)
 250       {
 251         my $base;
 252         ($base, undef, undef) = fileparse ($file_full_path);
 253         # Ouch, I wish trailing operators in regexps could be
 254         # evaluated on the fly!
 255         if ($Case_Insensitive) {
 256           if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) {
 257             undef $file_full_path;
 258           }
 259         }
 260         elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) {
 261           undef $file_full_path;
 262         }
 263       }
 264       next;
 265     }
 266
 267     # Just spin wheels if no file defined yet.
 268     next if (! $file_full_path);
 269
 270     # Collect tag names in case we're asked to print them in the output.
 271     if (/^symbolic names:$/) {
 272       $collecting_symbolic_names = 1;
 273       next;  # There's no more info on this line, so skip to next
 274     }
 275     if ($collecting_symbolic_names)
 276     {
 277       # All tag names are listed with whitespace in front in cvs log
 278       # output; so if see non-whitespace, then we're done collecting.
 279       if (/^\S/) {
 280         $collecting_symbolic_names = 0;
 281       }
 282       else    # we're looking at a tag name, so parse & store it
 283       {
 284         # According to the Cederqvist manual, in node "Tags", tag
 285         # names must start with an uppercase or lowercase letter and
 286         # can contain uppercase and lowercase letters, digits, `-',
 287         # and `_'.  However, it's not our place to enforce that, so
 288         # we'll allow anything CVS hands us to be a tag:
 289         /^\s+([^:]+): ([\d.]+)$/;
 290         my $tag_name = $1;
 291         my $tag_rev  = $2;
 292
 293         # A branch number either has an odd number of digit sections
 294         # (and hence an even number of dots), or has ".0." as the
 295         # second-to-last digit section.  Test for these conditions.
 296         my $real_branch_rev = "";
 297         if (($tag_rev =~ /^(\d+\.\d+\.)+\d+$/)   # Even number of dots...
 298             and (! ($tag_rev =~ /^(1\.)+1$/)))   # ...but not "1.[1.]1"
 299         {
 300           $real_branch_rev = $tag_rev;
 301         }
 302         elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/)  # Has ".0."
 303         {
 304           $real_branch_rev = $1 . $3;
 305         }
 306         # If we got a branch, record its number.
 307         if ($real_branch_rev)
 308         {
 309           $branch_names{$real_branch_rev} = $tag_name;
 310           if (@Follow_Branches) {
 311             if (grep ($_ eq $tag_name, @Follow_Branches)) {
 312               $branch_numbers{$tag_name} = $real_branch_rev;
 313             }
 314           }
 315         }
 316         else {
 317           # Else it's just a regular (non-branch) tag.
 318           push (@{$symbolic_names{$tag_rev}}, $tag_name);
 319         }
 320       }
 321     }
 322     # End of code for collecting tag names.
 323
 324     # If have file name, but not revision, and see revision, then grab
 325     # it.  (We collect unconditionally, even though we may or may not
 326     # ever use it.)
 327     if ((! (defined $revision)) and (/^revision (\d+\.[\d.]+)/))
 328     {
 329       $revision = $1;
 330
 331       if (@Follow_Branches)
 332       {
 333         foreach my $branch (@Follow_Branches)
 334         {
 335           # Special case for following trunk revisions
 336           if (($branch =~ /^trunk$/i) and ($revision =~ /^[0-9]+\.[0-9]+$/))
 337           {
 338             goto dengo;
 339           }
 340
 341           my $branch_number = $branch_numbers{$branch};
 342           if ($branch_number)
 343           {
 344             # Are we on one of the follow branches or an ancestor of
 345             # same?
 346             #
 347             # If this revision is a prefix of the branch number, or
 348             # possibly is less in the minormost number, OR if this
 349             # branch number is a prefix of the revision, then yes.
 350             # Otherwise, no.
 351             #
 352             # So below, we determine if any of those conditions are
 353             # met.
 354
 355             # Trivial case: is this revision on the branch?
 356             # (Compare this way to avoid regexps that screw up Emacs
 357             # indentation, argh.)
 358             if ((substr ($revision, 0, ((length ($branch_number)) + 1)))
 359                 eq ($branch_number . "."))
 360             {
 361               goto dengo;
 362             }
 363             # Non-trivial case: check if rev is ancestral to branch
 364             elsif ((length ($branch_number)) > (length ($revision)))
 365             {
 366               $revision =~ /^((?:\d+\.)+)(\d+)$/;
 367               my $r_left = $1;          # still has the trailing "."
 368               my $r_end = $2;
 369
 370               $branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/;
 371               my $b_left = $1;  # still has trailing "."
 372               my $b_mid  = $2;   # has no trailing "."
 373
 374               if (($r_left eq $b_left)
 375                   && ($r_end <= $b_mid))
 376               {
 377                 goto dengo;
 378               }
 379             }
 380           }
 381         }
 382       }
 383       else    # (! @Follow_Branches)
 384       {
 385         next;
 386       }
 387
 388       # Else we are following branches, but this revision isn't on the
 389       # path.  So skip it.
 390       undef $revision;
 391     dengo:
 392       next;
 393     }
 394
 395     # If we don't have a revision right now, we couldn't possibly
 396     # be looking at anything useful.
 397     if (! (defined ($revision))) {
 398       $detected_file_separator = /^$file_separator$/o;
 399       if ($detected_file_separator) {
 400         # No revisions for this file; can happen, e.g. "cvs log -d DATE"
 401         goto CLEAR;
 402       }
 403       else {
 404         next;
 405       }
 406     }
 407
 408     # If have file name but not date and author, and see date or
 409     # author, then grab them:
 410     unless (defined $time)
 411     {
 412       if (/^date: .*/)
 413       {
 414         ($time, $author) = &parse_date_and_author ($_);
 415         if (defined ($usermap{$author}) and $usermap{$author}) {
 416           $author = $usermap{$author};
 417         }
 418       }
 419       else {
 420         $detected_file_separator = /^$file_separator$/o;
 421         if ($detected_file_separator) {
 422           # No revisions for this file; can happen, e.g. "cvs log -d DATE"
 423           goto CLEAR;
 424         }
 425       }
 426       # If the date/time/author hasn't been found yet, we couldn't
 427       # possibly care about anything we see.  So skip:
 428       next;
 429     }
 430
 431     # A "branches: ..." line here indicates that one or more branches
 432     # are rooted at this revision.  If we're showing branches, then we
 433     # want to show that fact as well, so we collect all the branches
 434     # that this is the latest ancestor of and store them in
 435     # @branch_roots.  Just for reference, the format of the line we're
 436     # seeing at this point is:
 437     #
 438     #    branches:  1.5.2;  1.5.4;  ...;
 439     #
 440     # Okay, here goes:
 441
 442     if (/^branches:\s+(.*);$/)
 443     {
 444       if ($Show_Branches)
 445       {
 446         my $lst = $1;
 447         $lst =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
 448         if ($lst) {
 449           @branch_roots = split (/;\s+/, $lst);
 450         }
 451         else {
 452           undef @branch_roots;
 453         }
 454         next;
 455       }
 456       else
 457       {
 458         # Ugh.  This really bothers me.  Suppose we see a log entry
 459         # like this:
 460         #
 461         #    ----------------------------
 462         #    revision 1.1
 463         #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
 464         #    branches:  1.1.2;
 465         #    Intended first line of log message begins here.
 466         #    ----------------------------
 467         #
 468         # The question is, how we can tell the difference between that
 469         # log message and a *two*-line log message whose first line is
 470         #
 471         #    "branches:  1.1.2;"
 472         #
 473         # See the problem?  The output of "cvs log" is inherently
 474         # ambiguous.
 475         #
 476         # For now, we punt: we liberally assume that people don't
 477         # write log messages like that, and just toss a "branches:"
 478         # line if we see it but are not showing branches.  I hope no
 479         # one ever loses real log data because of this.
 480         next;
 481       }
 482     }
 483
 484     # If have file name, time, and author, then we're just grabbing
 485     # log message texts:
 486     $detected_file_separator = /^$file_separator$/o;
 487     if ($detected_file_separator && ! (defined $revision)) {
 488       # No revisions for this file; can happen, e.g. "cvs log -d DATE"
 489       goto CLEAR;
 490     }
 491     unless ($detected_file_separator || /^$logmsg_separator$/o)
 492     {
 493       $msg_txt .= $_;   # Normally, just accumulate the message...
 494       next;
 495     }
 496     # ... until a msg separator is encountered:
 497     # Ensure the message contains something:
 498     if ((! $msg_txt)
 499         || ($msg_txt =~ /^\s*\.\s*$|^\s*$/)
 500         || ($msg_txt =~ /\*\*\* empty log message \*\*\*/))
 501     {
 502       if ($Prune_Empty_Msgs) {
 503         goto CLEAR;
 504       }
 505       # else
 506       $msg_txt = "[no log message]\n";
 507     }
 508
 509     ### Store it all in the Grand Poobah:
 510     {
 511       my $dir_key;        # key into %grand_poobah
 512       my %qunk;           # complicated little jobbie, see below
 513
 514       # Each revision of a file has a little data structure (a `qunk')
 515       # associated with it.  That data structure holds not only the
 516       # file's name, but any additional information about the file
 517       # that might be needed in the output, such as the revision
 518       # number, tags, branches, etc.  The reason to have these things
 519       # arranged in a data structure, instead of just appending them
 520       # textually to the file's name, is that we may want to do a
 521       # little rearranging later as we write the output.  For example,
 522       # all the files on a given tag/branch will go together, followed
 523       # by the tag in parentheses (so trunk or otherwise non-tagged
 524       # files would go at the end of the file list for a given log
 525       # message).  This rearrangement is a lot easier to do if we
 526       # don't have to reparse the text.
 527       #
 528       # A qunk looks like this:
 529       #
 530       #   {
 531       #     filename    =>    "hello.c",
 532       #     revision    =>    "1.4.3.2",
 533       #     time        =>    a timegm() return value (moment of commit)
 534       #     tags        =>    [ "tag1", "tag2", ... ],
 535       #     branch      =>    "branchname" # There should be only one, right?
 536       #     branchroots =>    [ "branchtag1", "branchtag2", ... ]
 537       #   }
 538
 539       if ($Distributed) {
 540         # Just the basename, don't include the path.
 541         ($qunk{'filename'}, $dir_key, undef) = fileparse ($file_full_path);
 542       }
 543       else {
 544         $dir_key = "./";
 545         $qunk{'filename'} = $file_full_path;
 546       }
 547
 548       # This may someday be used in a more sophisticated calculation
 549       # of what other files are involved in this commit.  For now, we
 550       # don't use it, because the common-commit-detection algorithm is
 551       # hypothesized to be "good enough" as it stands.
 552       $qunk{'time'} = $time;
 553
 554       # We might be including revision numbers and/or tags and/or
 555       # branch names in the output.  Most of the code from here to
 556       # loop-end deals with organizing these in qunk.
 557
 558       $qunk{'revision'} = $revision;
 559
 560       # Grab the branch, even though we may or may not need it:
 561       $qunk{'revision'} =~ /((?:\d+\.)+)\d+/;
 562       my $branch_prefix = $1;
 563       $branch_prefix =~ s/\.$//;  # strip off final dot
 564       if ($branch_names{$branch_prefix}) {
 565         $qunk{'branch'} = $branch_names{$branch_prefix};
 566       }
 567
 568       # If there's anything in the @branch_roots array, then this
 569       # revision is the root of at least one branch.  We'll display
 570       # them as branch names instead of revision numbers, the
 571       # substitution for which is done directly in the array:
 572       if (@branch_roots) {
 573         my @roots = map { $branch_names{$_} } @branch_roots;
 574         $qunk{'branchroots'} = \@roots;
 575       }
 576
 577       # Save tags too.
 578       if (defined ($symbolic_names{$revision})) {
 579         $qunk{'tags'} = $symbolic_names{$revision};
 580         delete $symbolic_names{$revision};
 581       }
 582
 583       # Add this file to the list
 584       # (We use many spoonfuls of autovivication magic. Hashes and arrays
 585       # will spring into existence if they aren't there already.)
 586
 587       &debug ("(pushing log msg for ${dir_key}$qunk{'filename'})\n");
 588
 589       # Store with the files in this commit.  Later we'll loop through
 590       # again, making sure that revisions with the same log message
 591       # and nearby commit times are grouped together as one commit.
 592       push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, \%qunk);
 593     }
 594
 595   CLEAR:
 596     # Make way for the next message
 597     undef $msg_txt;
 598     undef $time;
 599     undef $revision;
 600     undef $author;
 601     undef @branch_roots;
 602
 603     # Maybe even make way for the next file:
 604     if ($detected_file_separator) {
 605       undef $file_full_path;
 606       undef %branch_names;
 607       undef %branch_numbers;
 608       undef %symbolic_names;
 609     }
 610   }
 611
 612   close (LOG_SOURCE);
 613
 614   ### Process each ChangeLog
 615
 616   while (my ($dir,$authorhash) = each %grand_poobah)
 617   {
 618     &debug ("DOING DIR: $dir\n");
 619
 620     # Here we twist our hash around, from being
 621     #   author => time => message => filelist
 622     # in %$authorhash to
 623     #   time => author => message => filelist
 624     # in %changelog.
 625     #
 626     # This is also where we merge entries.  The algorithm proceeds
 627     # through the timeline of the changelog with a sliding window of
 628     # $Max_Checkin_Duration seconds; within that window, entries that
 629     # have the same log message are merged.
 630     #
 631     # (To save space, we zap %$authorhash after we've copied
 632     # everything out of it.)
 633
 634     my %changelog;
 635     while (my ($author,$timehash) = each %$authorhash)
 636     {
 637       my $lasttime;
 638       my %stamptime;
 639       foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash))
 640       {
 641         my $msghash = $timehash->{$time};
 642         while (my ($msg,$qunklist) = each %$msghash)
 643         {
 644           my $stamptime = $stamptime{$msg};
 645           if ((defined $stamptime)
 646               and (($time - $stamptime) < $Max_Checkin_Duration)
 647               and (defined $changelog{$stamptime}{$author}{$msg}))
 648           {
 649             push(@{$changelog{$stamptime}{$author}{$msg}}, @$qunklist);
 650           }
 651           else {
 652             $changelog{$time}{$author}{$msg} = $qunklist;
 653             $stamptime{$msg} = $time;
 654           }
 655         }
 656       }
 657     }
 658     undef (%$authorhash);
 659
 660     ### Now we can write out the ChangeLog!
 661
 662     my ($logfile_here, $logfile_bak, $tmpfile);
 663
 664     if (! $Output_To_Stdout) {
 665       $logfile_here =  $dir . $Log_File_Name;
 666       $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
 667       $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
 668       $logfile_bak  = "${logfile_here}.bak";
 669
 670       open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
 671     }
 672     else {
 673       open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
 674     }
 675
 676     print LOG_OUT $ChangeLog_Header;
 677
 678     if ($XML_Output) {
 679       print LOG_OUT "<?xml version=\"1.0\"?>\n\n"
 680           . "<changelog xmlns=\"http://www.red-bean.com/xmlns/cvs2cl/\">\n\n";
 681     }
 682
 683     foreach my $time (sort {$main::b <=> $main::a} (keys %changelog))
 684     {
 685       my $authorhash = $changelog{$time};
 686       while (my ($author,$mesghash) = each %$authorhash)
 687       {
 688         # If XML, escape in outer loop to avoid compound quoting:
 689         if ($XML_Output) {
 690           $author = &xml_escape ($author);
 691         }
 692
 693         while (my ($msg,$qunklist) = each %$mesghash)
 694         {
 695           my $files               = &pretty_file_list ($qunklist);
 696           my $header_line;          # date and author
 697           my $body;                 # see below
 698           my $wholething;           # $header_line + $body
 699
 700           # Set up the date/author line.
 701           # kff todo: do some more XML munging here, on the header
 702           # part of the entry:
 703           my ($ignore,$min,$hour,$mday,$mon,$year,$wday)
 704               = $UTC_Times ? gmtime($time) : localtime($time);
 705
 706           # XML output includes everything else, we might as well make
 707           # it always include Day Of Week too, for consistency.
 708           if ($Show_Day_Of_Week or $XML_Output) {
 709             $wday = ("Sunday", "Monday", "Tuesday", "Wednesday",
 710                      "Thursday", "Friday", "Saturday")[$wday];
 711             $wday = ($XML_Output) ? "<weekday>${wday}</weekday>\n" : " $wday";
 712           }
 713           else {
 714             $wday = "";
 715           }
 716
 717           if ($XML_Output) {
 718             $header_line =
 719                 sprintf ("<date>%4u-%02u-%02u</date>\n"
 720                          . "${wday}"
 721                          . "<time>%02u:%02u</time>\n"
 722                          . "<author>%s</author>\n",
 723                          $year+1900, $mon+1, $mday, $hour, $min, $author);
 724           }
 725           else {
 726             $header_line =
 727                 sprintf ("%4u-%02u-%02u${wday} %02u:%02u  %s\n\n",
 728                          $year+1900, $mon+1, $mday, $hour, $min, $author);
 729           }
 730
 731           # Reshape the body according to user preferences.
 732           if ($XML_Output)
 733           {
 734             $msg = &preprocess_msg_text ($msg);
 735             $body = $files . $msg;
 736           }
 737           elsif ($No_Wrap)
 738           {
 739             $msg = &preprocess_msg_text ($msg);
 740             $files = wrap ("\t", "      ", "$files");
 741             $msg =~ s/\n(.*)/\n\t$1/g;
 742             unless ($After_Header eq " ") {
 743               $msg =~ s/^(.*)/\t$1/g;
 744             }
 745             $body = $files . $After_Header . $msg;
 746           }
 747           else  # do wrapping, either FSF-style or regular
 748           {
 749             if ($FSF_Style)
 750             {
 751               $files = wrap ("\t", "        ", "$files");
 752
 753               my $files_last_line_len = 0;
 754               if ($After_Header eq " ")
 755               {
 756                 $files_last_line_len = &last_line_len ($files);
 757                 $files_last_line_len += 1;  # for $After_Header
 758               }
 759
 760               $msg = &wrap_log_entry
 761                   ($msg, "\t", 69 - $files_last_line_len, 69);
 762               $body = $files . $After_Header . $msg;
 763             }
 764             else  # not FSF-style
 765             {
 766               $msg = &preprocess_msg_text ($msg);
 767               $body = $files . $After_Header . $msg;
 768               $body = wrap ("\t", "        ", "$body");
 769             }
 770           }
 771
 772           $wholething = $header_line . $body;
 773
 774           if ($XML_Output) {
 775             $wholething = "<entry>\n${wholething}</entry>\n";
 776           }
 777
 778           # One last check: make sure it passes the regexp test, if the
 779           # user asked for that.  We have to do it here, so that the
 780           # test can match against information in the header as well
 781           # as in the text of the log message.
 782
 783           # How annoying to duplicate so much code just because I
 784           # can't figure out a way to evaluate scalars on the trailing
 785           # operator portion of a regular expression.  Grrr.
 786           if ($Case_Insensitive) {
 787             unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/oi)) {
 788               print LOG_OUT "${wholething}\n";
 789             }
 790           }
 791           else {
 792             unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/o)) {
 793               print LOG_OUT "${wholething}\n";
 794             }
 795           }
 796         }
 797       }
 798     }
 799
 800     if ($XML_Output) {
 801       print LOG_OUT "</changelog>\n";
 802     }
 803
 804     close (LOG_OUT);
 805
 806     if (! $Output_To_Stdout)
 807     {
 808       if (-f $logfile_here) {
 809         rename ($logfile_here, $logfile_bak);
 810       }
 811       rename ($tmpfile, $logfile_here);
 812     }
 813   }
 814 }
 815
 816
 817 sub parse_date_and_author ()
 818 {
 819   # Parses the date/time and author out of a line like:
 820   #
 821   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
 822
 823   my $line = shift;
 824
 825   my ($year, $mon, $mday, $hours, $min, $secs, $author) = $line =~
 826       m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);#
 827           or  die "Couldn't parse date ``$line''";
 828   die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258);
 829   # Kinda arbitrary, but useful as a sanity check
 830   my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900);
 831
 832   return ($time, $author);
 833 }
 834
 835
 836 # Here we take a bunch of qunks and convert them into printed
 837 # summary that will include all the information the user asked for.
 838 sub pretty_file_list ()
 839 {
 840   if ($Hide_Filenames and (! $XML_Output)) {
 841     return "";
 842   }
 843
 844   my $qunksref = shift;
 845   my @qunkrefs = @$qunksref;
 846   my @filenames;
 847   my $beauty = "";          # The accumulating header string for this entry.
 848   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 849   my %unanimous_tags;       # Tags found in all qunks
 850   my %all_branches;         # Branches found in any qunk
 851   my $common_dir = undef;   # Dir prefix common to all files ("" if none)
 852   my $fbegun = 0;           # Did we begin printing filenames yet?
 853
 854   # First, loop over the qunks gathering all the tag/branch names.
 855   # We'll put them all in non_unanimous_tags, and take out the
 856   # unanimous ones later.
 857   foreach my $qunkref (@qunkrefs)
 858   {
 859     # Keep track of whether all the files in this commit were in the
 860     # same directory, and memorize it if so.  We can make the output a
 861     # little more compact by mentioning the directory only once.
 862     if ((scalar (@qunkrefs)) > 1)
 863     {
 864       if (! (defined ($common_dir)))
 865       {
 866         my ($base, $dir);
 867         ($base, $dir, undef) = fileparse ($$qunkref{'filename'});
 868
 869         if ((! (defined ($dir)))  # this first case is sheer paranoia
 870             or ($dir eq "")
 871             or ($dir eq "./")
 872             or ($dir eq ".\\"))
 873         {
 874           $common_dir = "";
 875         }
 876         else
 877         {
 878           $common_dir = $dir;
 879         }
 880       }
 881       elsif ($common_dir ne "")
 882       {
 883         # Already have a common dir prefix, so how much of it can we preserve?
 884         $common_dir = &common_path_prefix ($$qunkref{'filename'}, $common_dir);
 885       }
 886     }
 887     else  # only one file in this entry anyway, so common dir not an issue
 888     {
 889       $common_dir = "";
 890     }
 891
 892     if (defined ($$qunkref{'branch'})) {
 893       $all_branches{$$qunkref{'branch'}} = 1;
 894     }
 895     if (defined ($$qunkref{'tags'})) {
 896       foreach my $tag (@{$$qunkref{'tags'}}) {
 897         $non_unanimous_tags{$tag} = 1;
 898       }
 899     }
 900   }
 901
 902   # Any tag held by all qunks will be printed specially... but only if
 903   # there are multiple qunks in the first place!
 904   if ((scalar (@qunkrefs)) > 1) {
 905     foreach my $tag (keys (%non_unanimous_tags)) {
 906       my $everyone_has_this_tag = 1;
 907       foreach my $qunkref (@qunkrefs) {
 908         if ((! (defined ($$qunkref{'tags'})))
 909             or (! (grep ($_ eq $tag, @{$$qunkref{'tags'}})))) {
 910           $everyone_has_this_tag = 0;
 911         }
 912       }
 913       if ($everyone_has_this_tag) {
 914         $unanimous_tags{$tag} = 1;
 915         delete $non_unanimous_tags{$tag};
 916       }
 917     }
 918   }
 919
 920   if ($XML_Output)
 921   {
 922     # If outputting XML, then our task is pretty simple, because we
 923     # don't have to detect common dir, common tags, branch prefixing,
 924     # etc.  We just output exactly what we have, and don't worry about
 925     # redundancy or readability.
 926
 927     foreach my $qunkref (@qunkrefs)
 928     {
 929       my $filename    = $$qunkref{'filename'};
 930       my $revision    = $$qunkref{'revision'};
 931       my $tags        = $$qunkref{'tags'};
 932       my $branch      = $$qunkref{'branch'};
 933       my $branchroots = $$qunkref{'branchroots'};
 934
 935       $filename = &xml_escape ($filename);   # probably paranoia
 936       $revision = &xml_escape ($revision);   # definitely paranoia
 937
 938       $beauty .= "<file>\n";
 939       $beauty .= "<name>${filename}</name>\n";
 940       $beauty .= "<revision>${revision}</revision>\n";
 941       if ($branch) {
 942         $branch   = &xml_escape ($branch);     # more paranoia
 943         $beauty .= "<branch>${branch}</branch>\n";
 944       }
 945       foreach my $tag (@$tags) {
 946         $tag = &xml_escape ($tag);  # by now you're used to the paranoia
 947         $beauty .= "<tag>${tag}</tag>\n";
 948       }
 949       foreach my $root (@$branchroots) {
 950         $root = &xml_escape ($root);  # which is good, because it will continue
 951         $beauty .= "<branchroot>${root}</branchroot>\n";
 952       }
 953       $beauty .= "</file>\n";
 954     }
 955
 956     # Theoretically, we could go home now.  But as long as we're here,
 957     # let's print out the common_dir and utags, as a convenience to
 958     # the receiver (after all, earlier code calculated that stuff
 959     # anyway, so we might as well take advantage of it).
 960
 961     if ((scalar (keys (%unanimous_tags))) > 1) {
 962       foreach my $utag ((keys (%unanimous_tags))) {
 963         $utag = &xml_escape ($utag);   # the usual paranoia
 964         $beauty .= "<utag>${utag}</utag>\n";
 965       }
 966     }
 967     if ($common_dir) {
 968       $common_dir = &xml_escape ($common_dir);
 969       $beauty .= "<commondir>${common_dir}</commondir>\n";
 970     }
 971
 972     # That's enough for XML, time to go home:
 973     return $beauty;
 974   }
 975
 976   # Else not XML output, so complexly compactify for chordate
 977   # consumption.  At this point we have enough global information
 978   # about all the qunks to organize them non-redundantly for output.
 979
 980   if ($common_dir) {
 981     # Note that $common_dir still has its trailing slash
 982     $beauty .= "$common_dir: ";
 983   }
 984
 985   if ($Show_Branches)
 986   {
 987     # For trailing revision numbers.
 988     my @brevisions;
 989
 990     foreach my $branch (keys (%all_branches))
 991     {
 992       foreach my $qunkref (@qunkrefs)
 993       {
 994         if ((defined ($$qunkref{'branch'}))
 995             and ($$qunkref{'branch'} eq $branch))
 996         {
 997           if ($fbegun) {
 998             # kff todo: comma-delimited in XML too?  Sure.
 999             $beauty .= ", ";
1000           }
1001           else {
1002             $fbegun = 1;
1003           }
1004           my $fname = substr ($$qunkref{'filename'}, length ($common_dir));
1005           $beauty .= $fname;
1006           $$qunkref{'printed'} = 1;  # Just setting a mark bit, basically
1007
1008           if ($Show_Tags && (defined @{$$qunkref{'tags'}})) {
1009             my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});
1010             if (@tags) {
1011               $beauty .= " (tags: ";
1012               $beauty .= join (', ', @tags);
1013               $beauty .= ")";
1014             }
1015           }
1016
1017           if ($Show_Revisions) {
1018             # Collect the revision numbers' last components, but don't
1019             # print them -- they'll get printed with the branch name
1020             # later.
1021             $$qunkref{'revision'} =~ /.+\.([\d]+)$/;
1022             push (@brevisions, $1);
1023
1024             # todo: we're still collecting branch roots, but we're not
1025             # showing them anywhere.  If we do show them, it would be
1026             # nifty to just call them revision "0" on a the branch.
1027             # Yeah, that's the ticket.
1028           }
1029         }
1030       }
1031       $beauty .= " ($branch";
1032       if (@brevisions) {
1033         if ((scalar (@brevisions)) > 1) {
1034           $beauty .= ".[";
1035           $beauty .= (join (',', @brevisions));
1036           $beauty .= "]";
1037         }
1038         else {
1039           $beauty .= ".$brevisions[0]";
1040         }
1041       }
1042       $beauty .= ")";
1043     }
1044   }
1045
1046   # Okay; any qunks that were done according to branch are taken care
1047   # of, and marked as printed.  Now print everyone else.
1048
1049   foreach my $qunkref (@qunkrefs)
1050   {
1051     next if (defined ($$qunkref{'printed'}));   # skip if already printed
1052
1053     if ($fbegun) {
1054       $beauty .= ", ";
1055     }
1056     else {
1057       $fbegun = 1;
1058     }
1059     $beauty .= substr ($$qunkref{'filename'}, length ($common_dir));
1060     # todo: Shlomo's change was this:
1061     # $beauty .= substr ($$qunkref{'filename'},
1062     #              (($common_dir eq "./") ? "" : length ($common_dir)));
1063     $$qunkref{'printed'} = 1;  # Set a mark bit.
1064
1065     if ($Show_Revisions || $Show_Tags)
1066     {
1067       my $started_addendum = 0;
1068
1069       if ($Show_Revisions) {
1070         $started_addendum = 1;
1071         $beauty .= " (";
1072         $beauty .= "$$qunkref{'revision'}";
1073       }
1074       if ($Show_Tags && (defined $$qunkref{'tags'})) {
1075         my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});
1076         if ((scalar (@tags)) > 0) {
1077           if ($started_addendum) {
1078             $beauty .= ", ";
1079           }
1080           else {
1081             $beauty .= " (tags: ";
1082           }
1083           $beauty .= join (', ', @tags);
1084           $started_addendum = 1;
1085         }
1086       }
1087       if ($started_addendum) {
1088         $beauty .= ")";
1089       }
1090     }
1091   }
1092
1093   # Unanimous tags always come last.
1094   if ($Show_Tags && %unanimous_tags)
1095   {
1096     $beauty .= " (utags: ";
1097     $beauty .= join (', ', keys (%unanimous_tags));
1098     $beauty .= ")";
1099   }
1100
1101   # todo: still have to take care of branch_roots?
1102
1103   $beauty = "* $beauty:";
1104
1105   return $beauty;
1106 }
1107
1108
1109 sub common_path_prefix ()
1110 {
1111   my $path1 = shift;
1112   my $path2 = shift;
1113
1114   my ($dir1, $dir2);
1115   (undef, $dir1, undef) = fileparse ($path1);
1116   (undef, $dir2, undef) = fileparse ($path2);
1117
1118   # Transmogrify Windows filenames to look like Unix.
1119   # (It is far more likely that someone is running cvs2cl.pl under
1120   # Windows than that they would genuinely have backslashes in their
1121   # filenames.)
1122   $dir1 =~ tr#\\#/#;
1123   $dir2 =~ tr#\\#/#;
1124
1125   my $accum1 = "";
1126   my $accum2 = "";
1127   my $last_common_prefix = "";
1128
1129   while ($accum1 eq $accum2)
1130   {
1131     $last_common_prefix = $accum1;
1132     last if ($accum1 eq $dir1);
1133     my ($tmp1) = split (/\//, (substr ($dir1, length ($accum1))));
1134     my ($tmp2) = split (/\//, (substr ($dir2, length ($accum2))));
1135     $accum1 .= "$tmp1/" if ((defined ($tmp1)) and $tmp1);
1136     $accum2 .= "$tmp2/" if ((defined ($tmp2)) and $tmp2);
1137   }
1138
1139   return $last_common_prefix;
1140 }
1141
1142
1143 sub preprocess_msg_text ()
1144 {
1145   my $text = shift;
1146
1147   # Strip out carriage returns (as they probably result from DOSsy editors).
1148   $text =~ s/\r\n/\n/g;
1149
1150   # If it *looks* like two newlines, make it *be* two newlines:
1151   $text =~ s/\n\s*\n/\n\n/g;
1152
1153   if ($XML_Output)
1154   {
1155     $text = &xml_escape ($text);
1156     $text = "<msg>${text}</msg>\n";
1157   }
1158   elsif (! $No_Wrap)
1159   {
1160     # Strip off lone newlines, but only for lines that don't begin with
1161     # whitespace or a mail-quoting character, since we want to preserve
1162     # that kind of formatting.  Also don't strip newlines that follow a
1163     # period; we handle those specially next.  And don't strip
1164     # newlines that precede an open paren.
1165     1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g);
1166
1167     # If a newline follows a period, make sure that when we bring up the
1168     # bottom sentence, it begins with two spaces.
1169     1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g);
1170   }
1171
1172   return $text;
1173 }
1174
1175
1176 sub last_line_len ()
1177 {
1178   my $files_list = shift;
1179   my @lines = split (/\n/, $files_list);
1180   my $last_line = pop (@lines);
1181   return length ($last_line);
1182 }
1183
1184
1185 # A custom wrap function, sensitive to some common constructs used in
1186 # log entries.
1187 sub wrap_log_entry ()
1188 {
1189   my $text = shift;                  # The text to wrap.
1190   my $left_pad_str = shift;          # String to pad with on the left.
1191
1192   # These do NOT take left_pad_str into account:
1193   my $length_remaining = shift;      # Amount left on current line.
1194   my $max_line_length  = shift;      # Amount left for a blank line.
1195
1196   my $wrapped_text = "";             # The accumulating wrapped entry.
1197   my $user_indent = "";              # Inherited user_indent from prev line.
1198
1199   my $first_time = 1;                # First iteration of the loop?
1200   my $suppress_line_start_match = 0; # Set to disable line start checks.
1201
1202   my @lines = split (/\n/, $text);
1203   while (@lines)   # Don't use `foreach' here, it won't work.
1204   {
1205     my $this_line = shift (@lines);
1206     chomp $this_line;
1207
1208     if ($this_line =~ /^(\s+)/) {
1209       $user_indent = $1;
1210     }
1211     else {
1212       $user_indent = "";
1213     }
1214
1215     # If it matches any of the line-start regexps, print a newline now...
1216     if ($suppress_line_start_match)
1217     {
1218       $suppress_line_start_match = 0;
1219     }
1220     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1221            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1222            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1223            || ($this_line =~ /^(\s+)(\S+)/)
1224            || ($this_line =~ /^(\s*)- +/)
1225            || ($this_line =~ /^()\s*$/)
1226            || ($this_line =~ /^(\s*)\*\) +/)
1227            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1228     {
1229       # Make a line break immediately, unless header separator is set
1230       # and this line is the first line in the entry, in which case
1231       # we're getting the blank line for free already and shouldn't
1232       # add an extra one.
1233       unless (($After_Header ne " ") and ($first_time))
1234       {
1235         if ($this_line =~ /^()\s*$/) {
1236           $suppress_line_start_match = 1;
1237           $wrapped_text .= "\n${left_pad_str}";
1238         }
1239
1240         $wrapped_text .= "\n${left_pad_str}";
1241       }
1242
1243       $length_remaining = $max_line_length - (length ($user_indent));
1244     }
1245
1246     # Now that any user_indent has been preserved, strip off leading
1247     # whitespace, so up-folding has no ugly side-effects.
1248     $this_line =~ s/^\s*//;
1249
1250     # Accumulate the line, and adjust parameters for next line.
1251     my $this_len = length ($this_line);
1252     if ($this_len == 0)
1253     {
1254       # Blank lines should cancel any user_indent level.
1255       $user_indent = "";
1256       $length_remaining = $max_line_length;
1257     }
1258     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1259     {
1260       # Walk backwards from the end.  At first acceptable spot, break
1261       # a new line.
1262       my $idx = $length_remaining - 1;
1263       if ($idx < 0) { $idx = 0 };
1264       while ($idx > 0)
1265       {
1266         if (substr ($this_line, $idx, 1) =~ /\s/)
1267         {
1268           my $line_now = substr ($this_line, 0, $idx);
1269           my $next_line = substr ($this_line, $idx);
1270           $this_line = $line_now;
1271
1272           # Clean whitespace off the end.
1273           chomp $this_line;
1274
1275           # The current line is ready to be printed.
1276           $this_line .= "\n${left_pad_str}";
1277
1278           # Make sure the next line is allowed full room.
1279           $length_remaining = $max_line_length - (length ($user_indent));
1280
1281           # Strip next_line, but then preserve any user_indent.
1282           $next_line =~ s/^\s*//;
1283
1284           # Sneak a peek at the user_indent of the upcoming line, so
1285           # $next_line (which will now precede it) can inherit that
1286           # indent level.  Otherwise, use whatever user_indent level
1287           # we currently have, which might be none.
1288           my $next_next_line = shift (@lines);
1289           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1290             $next_line = $1 . $next_line if (defined ($1));
1291             # $length_remaining = $max_line_length - (length ($1));
1292             $next_next_line =~ s/^\s*//;
1293           }
1294           else {
1295             $next_line = $user_indent . $next_line;
1296           }
1297           if (defined ($next_next_line)) {
1298             unshift (@lines, $next_next_line);
1299           }
1300           unshift (@lines, $next_line);
1301
1302           # Our new next line might, coincidentally, begin with one of
1303           # the line-start regexps, so we temporarily turn off
1304           # sensitivity to that until we're past the line.
1305           $suppress_line_start_match = 1;
1306
1307           last;
1308         }
1309         else
1310         {
1311           $idx--;
1312         }
1313       }
1314
1315       if ($idx == 0)
1316       {
1317         # We bottomed out because the line is longer than the
1318         # available space.  But that could be because the space is
1319         # small, or because the line is longer than even the maximum
1320         # possible space.  Handle both cases below.
1321
1322         if ($length_remaining == ($max_line_length - (length ($user_indent))))
1323         {
1324           # The line is simply too long -- there is no hope of ever
1325           # breaking it nicely, so just insert it verbatim, with
1326           # appropriate padding.
1327           $this_line = "\n${left_pad_str}${this_line}";
1328         }
1329         else
1330         {
1331           # Can't break it here, but may be able to on the next round...
1332           unshift (@lines, $this_line);
1333           $length_remaining = $max_line_length - (length ($user_indent));
1334           $this_line = "\n${left_pad_str}";
1335         }
1336       }
1337     }
1338     else  # $this_len < $length_remaining, so tack on what we can.
1339     {
1340       # Leave a note for the next iteration.
1341       $length_remaining = $length_remaining - $this_len;
1342
1343       if ($this_line =~ /\.$/)
1344       {
1345         $this_line .= "  ";
1346         $length_remaining -= 2;
1347       }
1348       else  # not a sentence end
1349       {
1350         $this_line .= " ";
1351         $length_remaining -= 1;
1352       }
1353     }
1354
1355     # Unconditionally indicate that loop has run at least once.
1356     $first_time = 0;
1357
1358     $wrapped_text .= "${user_indent}${this_line}";
1359   }
1360
1361   # One last bit of padding.
1362   $wrapped_text .= "\n";
1363
1364   return $wrapped_text;
1365 }
1366
1367
1368 sub xml_escape ()
1369 {
1370   my $txt = shift;
1371   $txt =~ s/&/&amp;/g;
1372   $txt =~ s/</&lt;/g;
1373   $txt =~ s/>/&gt;/g;
1374   return $txt;
1375 }
1376
1377
1378 sub maybe_read_user_map_file ()
1379 {
1380   my %expansions;
1381
1382   if ($User_Map_File)
1383   {
1384     open (MAPFILE, "<$User_Map_File")
1385         or die ("Unable to open $User_Map_File ($!)");
1386
1387     while (<MAPFILE>)
1388     {
1389       next if /^\s*#/;  # Skip comment lines.
1390       next if not /:/;  # Skip lines without colons.
1391
1392       # It is now safe to split on ':'.
1393       my ($username, $expansion) = split ':';
1394       chomp $expansion;
1395       $expansion =~ s/^'(.*)'$/$1/;
1396       $expansion =~ s/^"(.*)"$/$1/;
1397
1398       # If it looks like the expansion has a real name already, then
1399       # we toss the username we got from CVS log.  Otherwise, keep
1400       # it to use in combination with the email address.
1401
1402       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
1403         # Also, add angle brackets if none present
1404         if (! ($expansion =~ /<\S+@\S+>/)) {
1405           $expansions{$username} = "$username <$expansion>";
1406         }
1407         else {
1408           $expansions{$username} = "$username $expansion";
1409         }
1410       }
1411       else {
1412         $expansions{$username} = $expansion;
1413       }
1414     }
1415
1416     close (MAPFILE);
1417   }
1418
1419   return %expansions;
1420 }
1421
1422
1423 sub parse_options ()
1424 {
1425   # Check this internally before setting the global variable.
1426   my $output_file;
1427
1428   # If this gets set, we encountered unknown options and will exit at
1429   # the end of this subroutine.
1430   my $exit_with_admonishment = 0;
1431
1432   while (my $arg = shift (@ARGV))
1433   {
1434     if ($arg =~ /^-h$|^-help$|^--help$|^--usage$|^-?$/) {
1435       $Print_Usage = 1;
1436     }
1437     elsif ($arg =~ /^--debug$/) {        # unadvertised option, heh
1438       $Debug = 1;
1439     }
1440     elsif ($arg =~ /^--version$/) {
1441       $Print_Version = 1;
1442     }
1443     elsif ($arg =~ /^-g$|^--global-opts$/) {
1444       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1445       # Don't assume CVS is called "cvs" on the user's system:
1446       $Log_Source_Command =~ s/(^\S*)/$1 $narg/;
1447     }
1448     elsif ($arg =~ /^-l$|^--log-opts$/) {
1449       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1450       $Log_Source_Command .= " $narg";
1451     }
1452     elsif ($arg =~ /^-f$|^--file$/) {
1453       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1454       $output_file = $narg;
1455     }
1456     elsif ($arg =~ /^--fsf$/) {
1457       $FSF_Style = 1;
1458     }
1459     elsif ($arg =~ /^-U$|^--usermap$/) {
1460       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1461       $User_Map_File = $narg;
1462     }
1463     elsif ($arg =~ /^-W$|^--window$/) {
1464       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1465       $Max_Checkin_Duration = $narg;
1466     }
1467     elsif ($arg =~ /^-I$|^--ignore$/) {
1468       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1469       push (@Ignore_Files, $narg);
1470     }
1471     elsif ($arg =~ /^-C$|^--case-insensitive$/) {
1472       $Case_Insensitive = 1;
1473     }
1474     elsif ($arg =~ /^-R$|^--regexp$/) {
1475       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1476       $Regexp_Gate = $narg;
1477     }
1478     elsif ($arg =~ /^--stdout$/) {
1479       $Output_To_Stdout = 1;
1480     }
1481     elsif ($arg =~ /^--version$/) {
1482       $Print_Version = 1;
1483     }
1484     elsif ($arg =~ /^-d$|^--distributed$/) {
1485       $Distributed = 1;
1486     }
1487     elsif ($arg =~ /^-P$|^--prune$/) {
1488       $Prune_Empty_Msgs = 1;
1489     }
1490     elsif ($arg =~ /^-S$|^--separate-header$/) {
1491       $After_Header = "\n\n";
1492     }
1493     elsif ($arg =~ /^--no-wrap$/) {
1494       $No_Wrap = 1;
1495     }
1496     elsif ($arg =~ /^--gmt$|^--utc$/) {
1497       $UTC_Times = 1;
1498     }
1499     elsif ($arg =~ /^-w$|^--day-of-week$/) {
1500       $Show_Day_Of_Week = 1;
1501     }
1502     elsif ($arg =~ /^-r$|^--revisions$/) {
1503       $Show_Revisions = 1;
1504     }
1505     elsif ($arg =~ /^-t$|^--tags$/) {
1506       $Show_Tags = 1;
1507     }
1508     elsif ($arg =~ /^-b$|^--branches$/) {
1509       $Show_Branches = 1;
1510     }
1511     elsif ($arg =~ /^-F$|^--follow$/) {
1512       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1513       push (@Follow_Branches, $narg);
1514     }
1515     elsif ($arg =~ /^--stdin$/) {
1516       $Input_From_Stdin = 1;
1517     }
1518     elsif ($arg =~ /^--header$/) {
1519       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1520       $ChangeLog_Header = &slurp_file ($narg);
1521       if (! defined ($ChangeLog_Header)) {
1522         $ChangeLog_Header = "";
1523       }
1524     }
1525     elsif ($arg =~ /^--xml$/) {
1526       $XML_Output = 1;
1527     }
1528     elsif ($arg =~ /^--hide-filenames$/) {
1529       $Hide_Filenames = 1;
1530       $After_Header = "";
1531     }
1532     else {
1533       # Just add a filename as argument to the log command
1534       $Log_Source_Command .= " $arg";
1535     }
1536   }
1537
1538   ## Check for contradictions...
1539
1540   if ($Output_To_Stdout && $Distributed) {
1541     print STDERR "cannot pass both --stdout and --distributed\n";
1542     $exit_with_admonishment = 1;
1543   }
1544
1545   if ($Output_To_Stdout && $output_file) {
1546     print STDERR "cannot pass both --stdout and --file\n";
1547     $exit_with_admonishment = 1;
1548   }
1549
1550   # Or if any other error message has already been printed out, we
1551   # just leave now:
1552   if ($exit_with_admonishment) {
1553     &usage ();
1554     exit (1);
1555   }
1556   elsif ($Print_Usage) {
1557     &usage ();
1558     exit (0);
1559   }
1560   elsif ($Print_Version) {
1561     &version ();
1562     exit (0);
1563   }
1564
1565   ## Else no problems, so proceed.
1566
1567   if ($Output_To_Stdout) {
1568     undef $Log_File_Name;       # not actually necessary
1569   }
1570   elsif ($output_file) {
1571     $Log_File_Name = $output_file;
1572   }
1573 }
1574
1575
1576 sub slurp_file ()
1577 {
1578   my $filename = shift || die ("no filename passed to slurp_file()");
1579   my $retstr;
1580
1581   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
1582   my $saved_sep = $/;
1583   undef $/;
1584   $retstr = <SLURPEE>;
1585   $/ = $saved_sep;
1586   close (SLURPEE);
1587   return $retstr;
1588 }
1589
1590
1591 sub debug ()
1592 {
1593   if ($Debug) {
1594     my $msg = shift;
1595     print STDERR $msg;
1596   }
1597 }
1598
1599
1600 sub version ()
1601 {
1602   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
1603 }
1604
1605
1606 sub usage ()
1607 {
1608   &version ();
1609   print <<'END_OF_INFO';
1610 Generate GNU-style ChangeLogs in CVS working copies.
1611
1612 Notes about the output format(s):
1613
1614    The default output of cvs2cl.pl is designed to be compact, formally
1615    unambiguous, but still easy for humans to read.  It is largely
1616    self-explanatory, I hope; the one abbreviation that might not be
1617    obvious is "utags".  That stands for "universal tags" -- a
1618    universal tag is one held by all the files in a given change entry.
1619
1620    If you need output that's easy for a program to parse, use the
1621    --xml option.  Note that with XML output, just about all available
1622    information is included with each change entry, whether you asked
1623    for it or not, on the theory that your parser can ignore anything
1624    it's not looking for.
1625
1626 Notes about the options and arguments (the actual options are listed
1627 last in this usage message):
1628
1629   * The -I and -F options may appear multiple times.
1630
1631   * To follow trunk revisions, use "-F trunk" ("-F TRUNK" also works).
1632     This is okay because no would ever, ever be crazy enough to name a
1633     branch "trunk", right?  Right.
1634
1635   * For the -U option, the UFILE should be formatted like
1636     CVSROOT/users. That is, each line of UFILE looks like this
1637        jrandom:jrandom@red-bean.com
1638     or maybe even like this
1639        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
1640     Don't forget to quote the portion after the colon if necessary.
1641
1642   * Many people want to filter by date.  To do so, invoke cvs2cl.pl
1643     like this:
1644        cvs2cl.pl -l "-d'DATESPEC'"
1645     where DATESPEC is any date specification valid for "cvs log -d".
1646     (Note that CVS 1.10.7 and below requires there be no space between
1647     -d and its argument).
1648
1649 Options/Arguments:
1650
1651   -h, -help, --help, or -?     Show this usage and exit
1652   --version                    Show version and exit
1653   -r, --revisions              Show revision numbers in output
1654   -b, --branches               Show branch names in revisions when possible
1655   -t, --tags                   Show tags (symbolic names) in output
1656   --stdin                      Read from stdin, don't run cvs log
1657   --stdout                     Output to stdout not to ChangeLog
1658   -d, --distributed            Put ChangeLogs in subdirs
1659   -f FILE, --file FILE         Write to FILE instead of "ChangeLog"
1660   --fsf                        Use this if log data is in FSF ChangeLog style
1661   -W SECS, --window SECS       Window of time within which log entries unify
1662   -U UFILE, --usermap UFILE    Expand usernames to email addresses from UFILE
1663   -R REGEXP, --regexp REGEXP   Include only entries that match REGEXP
1664   -I REGEXP, --ignore REGEXP   Ignore files whose names match REGEXP
1665   -C, --case-insensitive       Any regexp matching is done case-insensitively
1666   -F BRANCH, --follow BRANCH   Show only revisions on or ancestral to BRANCH
1667   -S, --separate-header        Blank line between each header and log message
1668   --no-wrap                    Don't auto-wrap log message (recommend -S also)
1669   --gmt, --utc                 Show times in GMT/UTC instead of local time
1670   -w, --day-of-week            Show day of week
1671   --header FILE                Get ChangeLog header from FILE ("-" means stdin)
1672   --xml                        Output XML instead of ChangeLog format
1673   --hide-filenames             Don't show filenames (ignored for XML output)
1674   -P, --prune                  Don't show empty log messages
1675   -g OPTS, --global-opts OPTS  Invoke like this "cvs OPTS log ..."
1676   -l OPTS, --log-opts OPTS     Invoke like this "cvs ... log OPTS"
1677   FILE1 [FILE2 ...]            Show only log information for the named FILE(s)
1678
1679 See http://www.red-bean.com/cvs2cl for maintenance and bug info.
1680 END_OF_INFO
1681 }
1682
1683 __END__
1684
1685 =head1 NAME
1686
1687 cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by
1688     running "cvs log" and parsing the output.  Shared log entries are
1689     unified in an intuitive way.
1690
1691 =head1 DESCRIPTION
1692
1693 This script generates GNU-style ChangeLog files from CVS log
1694 information.  Basic usage: just run it inside a working copy and a
1695 ChangeLog will appear.  It requires repository access (i.e., 'cvs log'
1696 must work).  Run "cvs2cl.pl --help" to see more advanced options.
1697
1698 See http://www.red-bean.com/cvs2cl for updates, and for instructions
1699 on getting anonymous CVS access to this script.
1700
1701 Maintainer: Karl Fogel <kfogel@red-bean.com>
1702 Please report bugs to <bug-cvs2cl@red-bean.com>.
1703
1704 =head1 README
1705
1706 This script generates GNU-style ChangeLog files from CVS log
1707 information.  Basic usage: just run it inside a working copy and a
1708 ChangeLog will appear.  It requires repository access (i.e., 'cvs log'
1709 must work).  Run "cvs2cl.pl --help" to see more advanced options.
1710
1711 See http://www.red-bean.com/cvs2cl for updates, and for instructions
1712 on getting anonymous CVS access to this script.
1713
1714 Maintainer: Karl Fogel <kfogel@red-bean.com>
1715 Please report bugs to <bug-cvs2cl@red-bean.com>.
1716
1717 =head1 PREREQUISITES
1718
1719 This script requires C<Text::Wrap>, C<Time::Local>, and
1720 C<File::Basename>.
1721 It also seems to require C<Perl 5.004_04> or higher.
1722
1723 =pod OSNAMES
1724
1725 any
1726
1727 =pod SCRIPT CATEGORIES
1728
1729 Version_Control/CVS
1730
1731 =cut
1732
1733
1734 -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
1735
1736 Note about a bug-slash-opportunity:
1737 -----------------------------------
1738
1739 There's a bug in Text::Wrap, which affects cvs2cl.  This script
1740 reveals it:
1741
1742   #!/usr/bin/perl -w
1743
1744   use Text::Wrap;
1745
1746   my $test_text =
1747   "This script demonstrates a bug in Text::Wrap.  The very long line
1748   following this paragraph will be relocated relative to the surrounding
1749   text:
1750
1751   ====================================================================
1752
1753   See?  When the bug happens, we'll get the line of equal signs below
1754   this paragraph, even though it should be above.";
1755
1756
1757   # Print out the test text with no wrapping:
1758   print "$test_text";
1759   print "\n";
1760   print "\n";
1761
1762   # Now print it out wrapped, and see the bug:
1763   print wrap ("\t", "        ", "$test_text");
1764   print "\n";
1765   print "\n";
1766
1767 If the line of equal signs were one shorter, then the bug doesn't
1768 happen.  Interesting.
1769
1770 Anyway, rather than fix this in Text::Wrap, we might as well write a
1771 new wrap() which has the following much-needed features:
1772
1773 * initial indentation, like current Text::Wrap()
1774 * subsequent line indentation, like current Text::Wrap()
1775 * user chooses among: force-break long words, leave them alone, or die()?
1776 * preserve existing indentation: chopped chunks from an indented line
1777   are indented by same (like this line, not counting the asterisk!)
1778 * optional list of things to preserve on line starts, default ">"
1779
1780 Note that the last two are essentially the same concept, so unify in
1781 implementation and give a good interface to controlling them.
1782
1783 And how about:
1784
1785 Optionally, when encounter a line pre-indented by same as previous
1786 line, then strip the newline and refill, but indent by the same.
1787 Yeah...