#!/usr/bin/perl -T
#
# darcs.cgi - the darcs repository viewer
#
# Copyright (c) 2004 Will Glozer
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#
# This program calls darcs (or its own subroutines) to generate XML
# which is rendered into HTML by XSLT.  It is capable of displaying
# the files in a repository, various patch histories, annotations, etc.
#

use strict;

use CGI qw( :standard );
use CGI::Util;
use File::Basename;
use File::stat;
use IO::File;
use POSIX;

## the following variables can be customized to reflect your system
## configuration by defining them appropriately in the file
## "/usr/local/share/darcs/darcs/cgi.conf".  The syntax accepts equals signs or simply
## blanks separating values from assignments.

$ENV{'PATH'} = read_conf('PATH', $ENV{'PATH'});

# path to executables, or just the executable if they are in $ENV{'PATH'}
my $darcs_program    = read_conf("darcs", "darcs");
my $xslt_program     = read_conf("xsltproc", "xsltproc");

# directory containing repositories
my $repository_root  = read_conf("reposdir", "/var/www");

# XSLT template locations
my $template_root = read_conf("xslt_dir", '/usr/local/share/darcs/xslt');

my $xslt_annotate = "$template_root/annotate.xslt";
my $xslt_browse   = "$template_root/browse.xslt";
my $xslt_patches  = "$template_root/patches.xslt";
my $xslt_repos    = "$template_root/repos.xslt";
my $xslt_rss      = "$template_root/rss.xslt";

my $xslt_errors   = "$template_root/errors.xslt";

# CSS stylesheet that XSLT templates refer to.  This is a HTTP request
# path, not a local file system path. The default will cause darcs.cgi
# to serve the stylesheet rather than the web server.
my $stylesheet = read_conf("stylesheet", "/cgi-bin/darcs.cgi/styles.css");

# location of the CSS stylesheet that darcs.cgi will serve if it
# receives a request for '/styles.css'
my $css_styles = read_conf("css_styles", '/usr/local/share/darcs/darcs/styles.css');

# location of the favicon that darcs.cgi will serve if it
# receives a request for '/[\w\-]+/favicon.ico'
my $favicon = read_conf("favicon", "/cgi-bin/favicon.ico");

# XML source for the error pages
my $xml_errors = "$template_root/errors.xml";

# encoding to include in XML declaration
my $xml_encoding = read_conf("xml_encoding", "UTF-8");

## end customization

# ----------------------------------------------------------------------

# read a value from the cgi.conf file.
{
  my(%conf);

  sub read_conf {
    my ($flag, $val) = @_;
    $val = "" if !defined($val);
    
    if (!%conf && open(CGI_CONF, '/usr/local/share/darcs/darcs/cgi.conf')) {
      while (<CGI_CONF>) {
        chomp;
	next if /^\s*(?:\#.*)?$/;   # Skip blank lines and comment lines
        if (/^\s*(\S+)\s*(?:\=\s*)?(\S+)\s*$/) {
           $conf{$1} = $2;
	   # print "read_conf: $1 = $2\n";
        } else {
           warn "read_conf: $_\n";
        }
      }
      close(CGI_CONF);
    }

    $val = $conf{$flag} if exists($conf{$flag});

    return $val;
  }
}

# open xsltproc to transform and output `xml' with stylesheet file `xslt'
sub transform {
    my ($xslt, $args, $content_type) = @_;

    $| = 1;
    printf "Content-type: %s\r\n\r\n", $content_type || "text/html";
    my $pipe = new IO::File "| $xslt_program $args $xslt -";
    $pipe->autoflush(0);
    return $pipe;
}

sub pristine_dir {
    my ($repo) = @_;
    my $pristine = "current";
    if (! -d "${repository_root}/${repo}/_darcs/$pristine") {
        $pristine = "pristine";
    }
    return "${repository_root}/${repo}/_darcs/$pristine";
}

# begin an XML document with a root element and the repository path
sub make_xml {
    my ($fh, $repo, $dir, $file) = @_;
    my ($full_path, $path) = '/';

    printf $fh qq(<?xml version="1.0" encoding="$xml_encoding"?>\n);

    printf $fh qq(<darcs repository="$repo" target="%s/%s%s">\n),
        $repo, ($dir ? "$dir/" : ''), ($file ? "$file" : '');

    print $fh qq(<path>\n);
    foreach $path (split('/', "$repo/$dir")) {
        $full_path .= "$path/";
        print $fh qq(<directory full-path="$full_path">$path</directory>\n);
    }
    if ($file) {
        print $fh qq(<file full-path="$full_path$file">$file</file>\n) if $file;
    }
    print $fh qq(</path>\n\n);
}

# finish XML output
sub finish_xml {
    my ($fh) = @_;
    print $fh "\n</darcs>\n";
    $fh->flush;
}

# run darcs and wrap the output in an XML document
sub darcs_xml {
    my ($fh, $repo, $cmd, $args, $dir, $file) = @_;

    make_xml($fh, $repo, $dir, $file);

    push(@$args, '--xml-output');
    darcs($fh, $repo, $cmd, $args, $dir, $file);

    finish_xml($fh);
}

# run darcs with output redirected to the specified file handle
sub darcs {
    my ($fh, $repo, $cmd, $args, $dir, $file) = @_;
    my (@darcs_argv) = ($darcs_program, $cmd, @$args);

    # push target only if there is one, otherwise darcs will get an empty param
    if ($dir || $file) {
        push(@darcs_argv, sprintf("%s%s%s", $dir, ($dir ? '/' : ''), $file));
    }

    my($pid) = fork;
    if ($pid) {
	# in the parent process
	my($status) = waitpid($pid, 0);
	die "$darcs_program exited with status $?\n" if $?;
    } elsif(defined($pid)) {
	# in the child process
	open(STDIN, '/dev/null');
	if (defined($fh)) {
	    open(STDOUT, '>&', $fh)
		|| die "can't dup to stdout: $!\n";
	}
	chdir "$repository_root/$repo"
	    || die "chdir: $repository_root/$repo: $!\n";
	exec @darcs_argv;
	die "can't exec ".$darcs_argv[0].": $!\n";
    } else {
	# fork failed
	die "can't fork: $!\n";
    }
}

# get a directory listing as XML output
sub dir_listing {
    my ($fh, $repo, $dir) = @_;
    make_xml($fh, $repo, $dir, '');

    print $fh "<files>\n";
    my $dir_ = pristine_dir ($repo) . "/$dir";
    opendir(DH, $dir_);
    while( defined (my $file_ = readdir(DH)) ) {
        next if $file_ =~ /^\.\.?$/;
        my $file = "$dir_/$file_";
        my $secs  = stat($file)->mtime;
        my $mtime = localtime($secs);
        my $ts = POSIX::strftime("%Y%m%d%H%M%S", gmtime $secs);

        my ($name, $type);

         if (-d $file) {
             ($name, $type) = (basename($file) . '/', 'directory');
         } else {
             ($name, $type) = (basename($file), 'file');
         }
         print $fh qq(  <$type name="$name" modified="$mtime" ts="$ts" />\n);
    }
    closedir(DH);
    print $fh "</files>\n";

    finish_xml($fh);
}

# get a repository listing as XML output
sub repo_listing {
    my($fh) = @_;

    make_xml($fh, "", "", "");

    print $fh "<repositories>\n";
    opendir(DH, $repository_root);
    while( defined (my $name = readdir(DH)) ) {
        next if $name =~ /^\.\.?$/;
        if (-d "$repository_root/$name/_darcs") {
            print $fh qq(  <repository name="$name" />\n);
        }
    }
    closedir(DH);
    print $fh "</repositories>\n";

    finish_xml($fh);
    return $fh;
}

# show an error page
sub show_error {
    my ($type, $code, $message) = @_;
    my $xml;

    # set the xslt processing arguments
    my $xslt_args = qq {
        --stringparam error-type '$type'
        --stringparam stylesheet '$stylesheet'
    };
    $xslt_args =~ s/\s+/ /gm;

    print "Status: $code $message\r\n\r\n";
    system("$xslt_program $xslt_args $xslt_errors $xml_errors");
}

# check if the requested resource has been modified since the client last
# saw it. If not send HTTP status code 304, otherwise set the Last-modified
# and Cache-control headers.
sub is_cached {
    my ($path) = @_;
    my ($stat) = stat($path);

    # stat may fail because the path was renamed or deleted but still referred
    # to by older darcs patches
    if ($stat) {
        my $last_modified = CGI::expires($stat->mtime);

        if (http('If-Modified-Since') eq $last_modified) {
            print("Status: 304 Not Modified\r\n\r\n");
            return 1;
        }

        print("Cache-control: max-age=0, must-revalidate\r\n");
        print("Last-modified: $last_modified\r\n");
    }
    return 0;
}

# safely extract a parameter from the http request.  This applies a regexp
# to the parameter which should group only the appropriate parameter value
sub safe_param {
    my ($param, $regex, $default) = @_;
    my $value = CGI::Util::unescape(param($param));
    return ($value =~ $regex) ? $1 : $default;
}

# common regular expressions for validating passed parameters
my $hash_regex = qr/^([\w\-.]+)$/;
my $path_regex = qr@^([^\\!\$\^&*()\[\]{}<>`|';"?\r\n]+)$@;

# respond to a CGI request
sub respond {
    # untaint the full URL to this CGI
    my $cgi_url = CGI::Util::unescape(url());
    $cgi_url =~ $path_regex or die qq(bad url "$cgi_url");
    $cgi_url = $1;

    # untaint script_name, reasonable to expect only \w, -, /, and . in the name
    my $script_name = CGI::Util::unescape(script_name());
    $script_name =~ qr~^([\w/.\-\~]+)$~ or die qq(bad script_name "$script_name");
    $script_name = $1;

    # untaint simple parameters, which can only have chars matching \w+
    my $cmd  = safe_param('c', '^(\w+)$', 'browse');
    my $sort = safe_param('s', '^(\w+)$', '');

    # set the xslt processing arguments
    my $xslt_args = qq {
        --stringparam cgi-program '$script_name'
        --stringparam cgi-url '$cgi_url'
        --stringparam sort-by '$sort'
        --stringparam stylesheet '$stylesheet'
    };
    $xslt_args =~ s/\s+/ /gm;

    my ($path) = CGI::Util::unescape(path_info());
    # don't allow ./ or ../ in paths
    $path =~ s|[.]+/||g;

    # check whether we're asking for styles.css
    if ($path eq '/styles.css') {
        return if is_cached($css_styles);

        open (STYLES_CSS, $css_styles) or die qq(couldn't open "${css_styles}");
        my $size = stat($css_styles)->size;

        print "Content-length: $size\r\n";
        print "Content-type: text/css\r\n\r\n";

        while (<STYLES_CSS>) {
          print $_;
        }
        close (STYLES_CSS);
        return;
    }

    # check whether we're asking for favicon.ico
    if ($path =~ '/[\w\-]+/favicon.ico') {
        return if is_cached($favicon);

        open (FAVICON, $favicon) or die qq(couldn't open "${favicon}");
        my $size = stat($favicon)->size;

        print "Content-length: $size\r\n";
        print "Content-type: image/x-icon\r\n\r\n";

        while (<FAVICON>) {
          print $_;
        }
        close (FAVICON);
        return;
    }

    # when no repository is requested display available repositories
    if (length($path) < 2) {
        my $fh = transform($xslt_repos, $xslt_args);
        repo_listing($fh);
        return;
    }

    # don't allow any shell meta characters in paths
    $path =~ $path_regex or die qq(bad path_info "$path");
    my @path = split('/', substr($1, 1));

    # split the path into a repository, directory, and file
    my ($repo, $dir, $file, @bits) = ('', '', '');
    while (@path > 0) {
        $repo = join('/', @path);
        # check if remaining path elements refer to a repo
        if (-d "${repository_root}/${repo}/_darcs") {
            if (@bits > 1) {
                $dir  = join('/', @bits[0..$#bits - 1]);
            }
            $file = $bits[$#bits];
            # check if last element of path, stored in $file, is really a dir
            if (-d (pristine_dir ($repo) . "/${dir}/${file}")) {
                $dir = ($dir ? "$dir/$file" : $file);
                $file = '';
            }
            last;
        } else {
            $repo = '';
            unshift(@bits, pop @path);
        }
    }

    # make sure the repository exists
    unless ($repo) {
        show_error('invalid-repository', '404', 'Invalid repository');
        return;
    }

    # don't generate output unless the requested path has been
    # modified since the client last saw it.
    return if is_cached(pristine_dir ($repo) . "/$dir/$file");

    # untaint patches and tags. Tags can have arbitrary values, so
    # never pass these unquoted, on pain of pain!
    my $patch = safe_param('p', $hash_regex);
    my $tag   = safe_param('t', '^(.+)$');

    my @darcs_args;
    push(@darcs_args, '--match', "hash $patch") if $patch;
    push(@darcs_args, '-t', $tag) if $tag;

    # process the requested command
    if ($cmd eq 'browse') {
        my $fh = transform($xslt_browse, $xslt_args);
        dir_listing($fh, $repo, $dir);
    } elsif ($cmd eq 'patches') {
        # patches as an option is used to support "--patches"
        if (my $patches = safe_param('patches','^(.+)$')) {
            push @darcs_args, '--patches', $patches;
        }

        my $fh = transform($xslt_patches, $xslt_args);
        darcs_xml($fh, $repo, "changes", \@darcs_args, $dir, $file);
    } elsif ($cmd eq 'annotate') {
        push(@darcs_args, '--summary');

        my $creator_hash  = safe_param('ch', $hash_regex);
        my $original_path = safe_param('o', $path_regex);
        my $fh = transform($xslt_annotate, $xslt_args);

        # use the creator hash and original file name when available so
        # annotations can span renames
        if ($creator_hash ne '' && $original_path ne '') {
            push(@darcs_args, '--creator-hash', $creator_hash);
            darcs_xml($fh, $repo, "annotate", \@darcs_args, '', $original_path);
        } else {
            darcs_xml($fh, $repo, "annotate", \@darcs_args, $dir, $file);
        }
    } elsif ($cmd eq 'diff') {
        push(@darcs_args, '-u');
        print "Content-type: text/plain\r\n\r\n";
        darcs(undef, $repo, "diff", \@darcs_args, $dir, $file);
    } elsif ($cmd eq 'rss') {
        push(@darcs_args, '--last', '25');

        my $fh = transform($xslt_rss, $xslt_args, "application/rss+xml");
        darcs_xml($fh, $repo, "changes", \@darcs_args, $dir, $file);
    } else {
        show_error('invalid-command', '400', 'Invalid command');
    }
}

# run a self-test when the --check argument is supplied
if ($ARGV[0] eq '--check') {
    (read_conf("css_styles", "abc") ne "abc") ||
        die "cannot read config file: $!\n";

    (`$darcs_program`) ||
        die "cannot execute darcs as '$darcs_program': $!\n";
    (`$xslt_program`) ||
        die "cannot execute xstlproc as '$xslt_program': $!\n";

    (-d $repository_root && -r $repository_root) ||
        die "cannot read repository root directory '$repository_root': $!\n";
    (-d $template_root && -r $template_root) ||
        die "cannot read template root directory '$template_root': $!\n";
    (-f $css_styles) ||
        die "cannot read css stylesheet '$css_styles': $!\n";
    (-f $xml_errors) ||
        die "cannot read error messages '$xml_errors': $!\n";

    exit 0;
}

# handle the CGI request
respond();

