#!/usr/bin/perl

# Copyright (c) 2004 by Charles A. Loomis, Jr. and Le Centre National de
# la Recherche Scientifique (CNRS).  All rights reserved.

# The software was distributed with and is covered by the European
# DataGrid License.  A copy of this license can be found in the included
# LICENSE file.  This license can also be obtained from
# http://www.eu-datagrid.org/license.

use strict;

my $ignore_missing_dep = 0;

while ( ( $#ARGV > 0 ) && ( $ARGV[0] =~ /^-/ ) ) {
  if ( $ARGV[0] eq '-h' ) {
    info();
    exit(0);
  }
  elsif ( $ARGV[0] eq '-i' ) {
    $ignore_missing_dep = 1;
  }
  shift;
}

# There must be at least the file containing the provides database and
# at least one directory.
if ( $#ARGV < 1 ) {
  info();
  exit(1);
}

# Check all is well with the DB file.
my $providedb = shift(@ARGV);
if ( !-f $providedb ) {
  print STDERR "First argument must be provides DB file.\n";
  info();
  exit(1);
}

# Read in the database and put into hash.
my %provides;
my $distrib_arch;
open IN, "<$providedb";
while (<IN>) {
  chomp;
  m%^\s*(\S+)\s*\|\s*(\S+)\s*\|\s*(\S+)\s*$%;
  if ( defined( $provides{$1} ) && ( $provides{$1}->{rpm} != $2 ) ) {
    print STDERR "Duplicate provides with different value:\n";
    print STDERR "  Existing value: $provides{$1}->{rpm}\n";
    print STDERR "  New value:      $2\n\n";
  }
  $provides{$1}->{rpm} = $2;

  #unless (defined($provides{$1}->{archs})) {
  #    $provides{$1}->{archs} = {};
  #}
  $provides{$1}->{archs}->{$3} = 1;

  if ( ( $1 eq "kernel" ) && ( $2 eq "kernel" ) ) {
    if ( $3 eq "i686" ) {
      $distrib_arch = "i386";
    }
    else {
      $distrib_arch = $3;
    }

    #print STDERR "Distribution architecture = $distrib_arch\n";
  }
}
close IN;

# Collect all of the RPMs.
my $error = 0;
my @rpms  = ();
foreach my $dir (@ARGV) {

  # Is it really a directory?
  unless ( -d $dir ) {
    print STDERR "$dir is not a directory\n";
    $error = 1;
  }

  # Open it up and collect all rpms.
  opendir DIR, $dir;
  my @files = grep /\.rpm$/, map "$dir/$_", readdir DIR;

  # Push each onto the list.
  foreach my $file (@files) {
    push @rpms, $file;
  }
}

# Don't do anything if there was an error.
exit(1) if ($error);

# Collect all of the packages with their dependencies.  Duplicates
# are output to the file.  It is the responsibility of the stylesheet
# to do something sensible with the duplicates.
my %all;

# Loop over each rpm generating the package dependency information.
print "<pkgs>\n";
foreach my $rpm (@rpms) {

  # Get info from the RPM.
  my $rpminfo = `rpm -qp --queryformat '%{name} %{version} %{release} %{arch}\n' $rpm 2>/dev/null`;
  chomp($rpminfo);
  my ( $rpmname, $rpmver, $rpmrel, $rpmarch ) = split '\s+', $rpminfo;

  # Generate the dependency information for this RPM.
  # For each dependency, search the RPM providing the dependency
  # (as indicated by %provides) with the appropriate arch match.
  my %deps;
  open IN, "rpm -qp --requires $rpm 2>/dev/null |";
  while (<IN>) {
    chomp;
    m%^\s*(\S+)(\s+.*)?%;
    if ( defined( $provides{$1} ) && ( $provides{$1}->{rpm} ne $rpmname ) ) {
      my $deparch = undef;
      my @arch_candidates;

      # If rpmarch is i386, be sure to search for i686 first
      push @arch_candidates, "i686" if $rpmarch eq "i386";
      # Add same arch as the processed RPM
      push @arch_candidates, $rpmarch;
      # If rpmarch is i686, look also for i386
      push @arch_candidates, "i386" if $rpmarch eq "i686";
      # Next add distribution arch is not the same as the currently processed RPM
      push @arch_candidates, $distrib_arch if $rpmarch ne $distrib_arch;
      # Hack for SL 3.05 x86-64
      push @arch_candidates, "i386"
        if ( $rpmarch eq "x86_64" ) and ( $rpmname eq "openoffice.org-style-gnome" );
      # And last, add noarch (should not conflict with anything else
      push @arch_candidates, "noarch" if $rpmarch ne "noarch";
      foreach my $arch (@arch_candidates) {
        if ( defined( $provides{$1}->{archs}->{$arch} ) ) {
          $deparch = $arch;
          last;
        }
      }
      unless ( defined($deparch) ) {
        print STDERR "No valid arch found for $rpmname ($rpmarch) dependency $1\n";
        print STDERR "Available archs for $1 : " . join(',', keys(%{$provides{$1}->{archs}})) . "\n";
        unless ($ignore_missing_dep) {
          print STDERR "Use option -i to ignore missing dependencies.\n";
          exit(2);
        }
      }
      $deps{ $provides{$1}->{rpm} } = $deparch;
    }
  }
  close IN;

  # Generate the entry in the XML file.
  my $value = "  <pkg>\n"
    . "    <id>$rpmname</id>\n"
    . "    <version>$rpmver-$rpmrel</version>\n"
    . "    <arch>$rpmarch</arch>\n";
  foreach ( sort keys %deps ) {
    $value .= "    <dep><rpm>$_</rpm><arch>$deps{$_}</arch></dep>\n";
  }
  $value .= "  </pkg>\n";

  # Save it.
  print $value;

}

# Generate distribution architecture entry

if ($distrib_arch) {
  my $value = "  <arch>\n" . "    <id>$distrib_arch</id>\n" . "  </arch>\n";
  print $value;
}

print "</pkgs>\n";

exit;

# Print out information on how to use this script.
sub info {

  print << "EOF"

This script takes a file generated from the rpmProvides.pl script
containing the package dependency information and a list of
directories containing RPMs.  It generates an XML file suitable 
for use with the comps.xsl stylesheet.   

The script extracts the necessary information via the rpm command.
Consequently, this must be available from the standard PATH.
Extracting the necessary information may take some time for a large
number of RPMs.

Usage:

./rpmRequires.pl [-i] <provides DB file> <directory> [<directories...>]

    -i : don't consider missing dependencies as an error. Just ignore them.

Giving no arguments prints this help message.

IMPORTANT NOTE: Duplicates (files provided by 2 different packages)are
    not output from this script.  It is the responsibility of the 
    consumer to do something sensible with these duplicates (often they
    can be ignored).

EOF
    ;

}
