#!/usr/bin/perl -w
# Copyright (c) Dave Horsfall.
# stolen from slapd debian package and extended for skolelinux 
# by Andreas Schuldei <andreas@debian.org>
# License remains GPL
#
# RDNCHK
#
# Given a slapcat input file, check for mismatched DN/RDN pairs etc.
# Optionally make fixes (use with care).
#
# The data structure is a hash of references to hashes of anonymous lists:
#
#   $entries{$dn} =	# $dn has been normalised
#   {
#     origDN => "original DN",
#     attr1 => [ "value1-a", "value1-b" ],
#     attr2 => [ "value2" ]
#   }
#
# which is accessed as (e.g):
#
#   @{entries{$dn}{"attr1"}}
#
# to return an array of the value(s) of $dn's attr1.
#
# Note that this structure is optimised for access to the DNs, *not*
# for searches.
#
# The DN is low-cased and leading/trailing/multiple spaces stripped
# (and the original stored for posterity).
#
# I assume that caseIgnoreMatch applies across the board, as otherwise
# it's too damned difficult.  This only fails, in practice, for encoded
# fields such as passwords, but I'm not looking at those (passwords are
# rarely, if ever, a candidate for being an RDN).  Remember: the specific
# purpose of this program is to perform a quick but reasonably thorough
# check for DN/RDN consistency, and it sorta grew from there.
#
# We can't use Perl Net::LDAP::LDIF, because it's not a core module
# (too hard to maintain our remote branches when upgrading).
#
# TODO:
#	Check custom stuff:
#
#	    ciDefPrinter is single-value per ciPrinterClass.
#	    Fundamentally difficult, because these are keys
#	    into printcap, not LDAP.
#

# Things to add for skolelinux:
# can we add samba attributes and mappings per default?

use Data::Dumper;
use Getopt::Long;
use MIME::Base64;
use strict;
use diagnostics;

my $origDN = '.origDN';    # Attribute stores original DN
my $maxID = 0;
my %flags;         # like "have_nextID", "have_capability"
my (%users, %groups);
my %privGroups;    # to keep track of users and their priv groups
my %classGroups;   # for groups that could be classes
my %authGroups;    # for groups that could be authority groups
my @ou_list = qw/people attic machines pam domains group variables/; # list of organisational units needed
my @authority_list = qw/teachers students admins jradmins/; # list of organisational units needed

# the command line options

my ($opt_dump, $opt_fix, $opt_inheritance, $opt_suffix, $opt_write,
    $opt_no_auth, $opt_org);

# some big hashes
my (%entries, %schema, @single);

my $suffix;

&parse_options;
$opt_write = 1 if $opt_fix;

#
# Process each entry.
# A list (returned in @_) holds each line, with the DN first.
#
while ( @_ = &GetEntry )    # Loop per entry (exit on EOF)
{
    my $dn = shift @_;

    # Check if base64 encoded
    next if !$dn =~ /^dn::? /i;
    my $encoded;
    if ( $dn =~ /^dn:: /i ) {
        $dn =~ s/dn:: (.*)/$1/;
        $dn = decode_base64($dn);
        $dn =~ s/\s$//;
        $encoded = 1;
    }
    else {
        $dn =~ s/dn: (.*)/$1/;
        $encoded = 0;
    }
    my $cdn = &canon($dn);
    $entries{$cdn}{$origDN} = $dn;
    $entries{$cdn}{"encoded"} = $encoded;

    #
    # Infer the suffix.
    # Assume it's the shortest DN.
    #
    if ( !$opt_suffix ) {
        $suffix = $cdn
	    if ( !defined $suffix ) || ( length $cdn < length $suffix );
    }

    #
    # Extract the first component (the RDN)
    # for later tests.
    #
    my ( $rdn, undef ) = split ( /,/, $cdn );
    my ( $rdnattr, $rdnval ) = split ( /=/, $rdn );

    #
    # Get the attributes/values.
    # Attributes are low-cased.
    #
    for (@_) {
        my ( $attr, $val ) = split ( /\s/, $_, 2 );    # In case of "::"
        $attr =~ s/://;
        if ( $attr =~ /:/ )                         # Must be binary (base-64)
        {
            $attr =~ s/://;
            $val = &demime($val);
        }
        push @{ $entries{$cdn}{ lc $attr } }, $val;
    }

    # 
    # check for old  nextID
    #
    if( $cdn =~ /^uid=nextid,ou=variables,/i ) {
	delete $entries{$cdn};
	next;
	# kill old nextID on sight. it generates 
	# annoying messages, since it is quite inconsistent.
    }

    #
    # Does the RDN exist?
    #
    if ( !defined @{ $entries{$cdn}{$rdnattr} } ) {
        print STDERR "dn: $dn\nMissing RDN";
        if ($opt_fix) {
            push @{ $entries{$cdn}{$rdnattr} }, $rdnval;
            print STDERR "; inserted \"$rdnattr=$rdnval\"";
        }
        print STDERR "\n\n";
    }

    #
    # And how many?  Multiples are permitted
    # in some contexts, but not in ours.
    #
    my $attrs = $entries{$cdn}{$rdnattr};    # Actually a reference
    my $nrdn  = @{$attrs};
    if ( $nrdn > 1 ) {
        print STDERR "dn: $dn\nMultiple RDNs: \"@{$attrs}[0]\"";
        for ( my $i = 1 ; $i < $nrdn ; $i++ ) {
            print STDERR ", \"@{$attrs}[$i]\"";
        }
        if ($opt_fix) {
            print STDERR "; using \"$rdnval\"";
            $entries{$cdn}{$rdnattr} = [$rdnval];
        }
        print STDERR "\n\n";
    }

    #
    # Do they match?
    #
    if ( defined @{$attrs} && $rdnval ne &canon( @{$attrs}[0] ) ) {
        print STDERR "dn: $dn\nMismatched RDN: \"$rdnattr=@{$attrs}[0]\"";
        if ($opt_fix) {
            print STDERR "; using \"$rdnval\"";
            $entries{$cdn}{$rdnattr} = [$rdnval];
        }
        print STDERR "\n\n";
    }

    #
    # Check single-value attributes.
    #
    foreach my $attr (@single) {
        my $nval  = 0;
        my $attrs = $entries{$cdn}{ lc $attr };
        $nval = @{$attrs} if defined @{$attrs};
        if ( $nval > 1 ) {
            print STDERR
              "dn: $dn\nMultiple attrs for \"$attr\": \"@{$attrs}[0]\"";
            for ( my $i = 1 ; $i < $nval ; $i++ ) {
                print STDERR ", \"@{$attrs}[$i]\"";
            }
            if ($opt_fix) {
                print STDERR "; using \"@{$attrs}[0]\"";
                $entries{$cdn}{ lc $attr } = [ @{$attrs}[0] ];
            }
            print STDERR "\n\n";
        }
    }

    #
    # Check the objectclass inheritance and hirarcy.
    #
    if ($opt_inheritance)    # Will soon be mandatory
    {
	my $obj_ref = objlist2hash( $entries{$cdn}{'objectclass'} );
	$obj_ref->{top} = 1 # it might be that top is not there yet. 
	    unless $obj_ref->{alias};

	# remove unknown object classes
	foreach my $i ( keys %$obj_ref ) {
            next if $i eq "top";    # top is topless :-)
            unless ( $schema{objectclass}{$i} ) {
		# check if objectclass is known in the first place
                print STDERR "dn: $dn\nUnknown objectclass: \"$i\"";
                if ($opt_fix) {
                    print STDERR "; ignored";
                    delete $obj_ref->{$i};
                }
                print STDERR "\n\n";
            }
	}
	
	#
        # check if we have one and only one structural 
	# object class and remove superfluous object classes.
	#
	my %structural_objectclasses;
	foreach my $i ( keys %$obj_ref ) {	
	    next if $i eq "top";    # top is topless :-)
	    
	    if ( $schema{objectclass}{$i}{structural} ) {
		$structural_objectclasses{$i}=1;
	    }
	}
	if  (1 < scalar keys %structural_objectclasses ) {
	    print STDERR "dn: $dn\nMore then one structural objectclass:";
	    for my $structural_objectclass ( keys %structural_objectclasses ) {
		print STDERR " \"$structural_objectclass\"";
	    }
	    print STDERR ".";
	    my $removable_objectclasses_ref = 
		resolve_structural_clash ( \%structural_objectclasses, $entries{$cdn} );
	    if ($opt_fix) {
		print STDERR " Removing ";
		for my $obj_class ( @$removable_objectclasses_ref ) {
		    print STDERR " \"$obj_class\"";
		    delete $obj_ref->{$obj_class};
		}
		print STDERR ".\n\n";
	    }
	}
	#
	# Now we find and add missing superior objectclasses
	#
	foreach my $i ( keys %$obj_ref ) {	
	    next if $i eq "top";    # top is topless :-)
	    for my $sup ( @{ $schema{objectclass}{$i}{sup} } ) {
		unless ( $obj_ref->{$sup} ) {
		    print STDERR "dn: $dn\nNo sup for \"$i\"";
		    if ($opt_fix) {
			print STDERR "; inserted";
			$obj_ref->{$sup} = 1;
		    }
		    print STDERR "\n\n";
		}
	    }  
	} 

	#
	# see if all mandatory attributes are there
	#
	my %must;
	for my $i ( keys %$obj_ref ) {	
	    next if $i eq "top";    # top is topless :-)
	    
	    for my $attrib_must ( @{ $schema{objectclass}{$i}{must} } ) {
		$must{ $attrib_must } = 1;
	    }
	}
	my %must_missing;
      MUST:
	for my $i ( keys %must ) {
	    next if ($i eq "cn"            or # there is no schema entry for cn!
		     $i eq "objectclass");    # or for objectclass
	    for my $name ( @{ $schema{attributetype}{$i}{names} } ) {
		next MUST if $entries{$cdn}{$name};
	    }
	    $must_missing{$i} = 1;
	}
	for my $i ( keys %must_missing ) {
	    print STDERR "dn: $dn\nAttribut \"$i\": mandatory but missing";
	    if ($opt_fix) {
		print STDERR "; inserted";
		$entries{$cdn}{$i} = [ "" ]; # FIXME: figure out proper syntax
	    }
	    print STDERR "\n\n";
	}

	# see if any attributes are orphans
	# everything is allowed with extensibleobject. skip this case
	unless ( $obj_ref->{extensibleobject} ) { 

	    my %attrib_all = %must;
	    foreach my $i ( keys %$obj_ref ) {
		
		next if $i eq "top";    # top is topless :-)
		
		for my $attrib_may ( @{ $schema{objectclass}{$i}{may} } ) {
		    $attrib_all{ $attrib_may } = 1;
		}
	    }
	    my %attrib_orphan;
	    for my $attrib ( keys %{ $entries{$cdn} } ) {
		next if ".origDN"     eq $attrib or
		    "encoded"         eq $attrib or
		    "objectclass"     eq $attrib or
		    "creatorsname"    eq $attrib or
		    "createtimestamp" eq $attrib or
		    "modifiersname"   eq $attrib or
		    "modifytimestamp" eq $attrib or
		    "cdn"             eq $attrib or
		    "dn"              eq $attrib;
		unless ( $attrib_all{ $attrib } ) {
		    $attrib_orphan{ $attrib } = 1;
		}
	    }
	    for my $i ( keys %attrib_orphan ) {
		print STDERR "dn: $dn\nAttribut \"$i\": is not part of objectclasses";
		for my $objclss ( keys %$obj_ref) {
		    print STDERR  " \"$objclss\""; 
		}
		if ($opt_fix) {
		    print STDERR "; removed";
		    delete $entries{$cdn}{$i}; 
		}
		print STDERR "\n\n";
	    }
	} # extensibleobject.
	# at this point we should have an entry with all needed sups, nicely cleand up 
	objhash2list( $entries{$cdn}{objectclass} , $obj_ref ); # back to the old format
    } # inheritance

    #
    # check for organisational dn
    #
    if( $cdn =~ /^dc=/ ) {
	$flags{base} = $cdn;
    } 

    #
    # check for necessary organisationalUnits
    #
    for my $ou (@ou_list){
	if ($ou eq "machines" and $cdn =~ /^ou=$ou,ou=people/) {
	    $flags{$ou} = $cdn;
	    last;
	}
	elsif( $cdn =~ /^ou=$ou,/ ) {
	    
            $flags{$ou} = $cdn;
            last;
        }
    } 

    #
    # check for the used authorityGroups
    #
    for my $auth_group (@authority_list){
        if( $cdn =~ /^cn=$auth_group,ou=group,/i ) {
            $flags{$auth_group} = $cdn;
            last;
        }
    } 

    # 
    # check for nextID
    #
    if( $cdn =~ /^cn=nextid,ou=variables,/i ) {
	$flags{nextID} = $cdn;
    }
    
    #
    # check for capabilities
    #
    if( $cdn =~ /^cn=capabilities,ou=variables,/i ) {
	$flags{capabilities} = $cdn;
    }

    #
    # add lisGroup to groups
    # and check group for gidNumber/maxID
    #
    if( $cdn =~ /ou=group/i ) {
        #
        # check for capabilities
        #
        if( $cdn =~ /^cn=genagegp,ou=group,/i and  
            $entries{$cdn}{grouptype}[0] eq "age_group" ) {
            $flags{generic_age_group} = $cdn;
        }
        
	my $obj_ref = objlist2hash( $entries{$cdn}{objectclass} );

        # make the checks easier, this has no effect if not
        # written back (if the cases below dont apply.
	delete $obj_ref->{top} if $obj_ref->{top};
	
        # some old ldif: 
        #   posixGroup switches to lisAclGroup+lisGroup
	if ( ( $obj_ref->{posixgroup}        and 
	       (1 == keys %$obj_ref)
	       ) or 
	     ( $obj_ref->{posixgroup}        and
	       $obj_ref->{sambagroupmapping} and
	       (2 == keys %$obj_ref)
	       )
	     ) 
	{
	    $entries{$cdn}{"grouptype"} = [ "dontcare" ] 
		unless $entries{$cdn}{"grouptype"}; 
	    $obj_ref->{lisgroup}    = 1;
	    $obj_ref->{top}         = 1;
	    objhash2list( $entries{$cdn}{objectclass}, $obj_ref );
	}
	
	if ( $entries{$cdn}{"gidnumber"} ) {
	    # save entry for later
	    $groups{$cdn} = 1;
	    # search for highes ID
	    $maxID = $entries{$cdn}{"gidnumber"}[0] 
		if ( $entries{$cdn}{"gidnumber" }[0] > $maxID ); 
	}
    }

    #
    # check account for uidNumber/maxID
    #
    if( $cdn =~ /ou=people/i ) {
	if ( $entries{$cdn}{"uidnumber"} ) {
	    # save entry for later
	    $users{$entries{$cdn}{"gidnumber"}[0]} = $cdn;
	    # search for highes ID
	    $maxID = $entries{$cdn}{"uidnumber"}[0]
	    if ( $entries{$cdn}{"uidnumber"}[0] > $maxID ); 

	    my $obj_ref = objlist2hash( $entries{$cdn}{objectclass} );
	    $obj_ref->{top}           = 1;
	    $obj_ref->{shadowaccount} = 1;
	    delete $obj_ref->{account};
	    objhash2list( $entries{$cdn}{objectclass}, $obj_ref );
	}
    }
}    # main loop

#
# Make sure each entry has a parent.
# For now, we kill orphans on sight...
#
$suffix = $opt_suffix if $opt_suffix;
foreach my $thisdn ( keys %entries ) {

    my $i = $thisdn;
    $i =~ s/[^,]*,//;
    if ( !$entries{$i} && $thisdn ne &canon($suffix) ) {
        print STDERR "dn: $thisdn\nOrphan";
        if ($opt_fix) {
            print STDERR "; deleted";
            delete $entries{$thisdn};
        }
        print STDERR "\n\n";
    }

    # Fix up the suffix dn if it's our mess, adding a structural objectclass.
    if ( $thisdn eq &canon($suffix) ) {
	my $obj_ref = objlist2hash( $entries{$thisdn}{'objectclass'} );
	if ( ( 1 == keys %$obj_ref  
	       and $obj_ref->{dcobject} )
	     or 
	     ( 2 == keys %$obj_ref  
	       and $obj_ref->{dcobject} 
	       and $obj_ref->{top} )
	     )
        {
            if ( defined($opt_org) ) {
                push ( @{ $entries{$thisdn}{'objectclass'} }, 'organization' );
                push ( @{ $entries{$thisdn}{'o'} },           $opt_org );
            }
            else {
                push ( @{ $entries{$thisdn}{'objectclass'} }, 'domain' );
            }
        }

        # check for $classes == dcObject.
    }
}

for my $ou (@ou_list){
    unless( $flags{$ou} ) {
        my $base = $flags{base};
	if ($ou eq "machines") {
	    $base = "ou=people,$base";
	}
        my $dn = "ou=$ou,$base";
        $entries{$dn}{objectclass} = ['organizationalUnit',"top"];
        $entries{$dn}{ou} = [ $ou ];
        $entries{$dn}{"encoded"} = 0;
        $entries{$dn}{$origDN} = $dn;
    }
} 

unless ($opt_no_auth) {
    for my $auth_group (@authority_list){
	unless( $flags{$auth_group} ) {
	    my $base = $flags{base};
	    my $dn = "cn=$auth_group,ou=Group,$base";
	    $entries{$dn}{objectclass} = ["posixGroup", "top", "lisGroup", "lisAclGroup"];
	    $entries{$dn}{cn} = [ $auth_group ];
	    $entries{$dn}{grouptype} = [ "authority_group" ];
	    $entries{$dn}{member} = [ "" ];
	    $entries{$dn}{gidnumber} = [ ++$maxID ];
	    $entries{$dn}{description} = [ $auth_group ];
	    $entries{$dn}{"encoded"} = 0;
	    $entries{$dn}{$origDN} = $dn;
	}
    } 
}

if( $flags{generic_age_group} ) {
    delete $entries{$flags{generic_age_group}};
}


GROUP_LOOP:
foreach my $group (keys %groups) {

    ## newer ldifs with grouptype
    
    if ($entries{$group}{grouptype} and 
	$entries{$group}{grouptype} eq 'authority_group') {
	upgrade_authority_group ($entries{$group});
	delete $groups{$group}; # remove the solved cases
	next GROUP_LOOP;
    }

    ## older ldifs without grouptype
 
    #dont fiddle with groups that have a valid type allready
    if ($entries{$group}{grouptype} and
	$entries{$group}{grouptype}[0] ne 'dontcare') {
	delete $groups{$group}; # remove the solved cases
#	print STDERR  Dumper( $entries{$group} );
	next GROUP_LOOP;
    }

    # check for private groups
    my $gidnumber = $entries{$group}{gidnumber}[0];
    # check if a user with the same id exists
    if ($users{$gidnumber} and 
	($entries{$users{$gidnumber}}{uid}[0] eq $entries{$group}{cn}[0])){
	$entries{$group}{grouptype} = ["private"]; #set the correct type
	delete $groups{$group}; # remove the solved cases
	next GROUP_LOOP;
    }

    # check for authority groups
    my $cn = $entries{$group}{"cn"}[0];
    if ( ($cn eq "teachers") or
	 ($cn eq "admins") or
	 ($cn eq "jradmins") or
	 ($cn eq "students"))
    {
	$entries{$group}{grouptype} = ["authority_group"]; #set the correct type
	upgrade_authority_group ($entries{$group});
	delete $groups{$group}; # remove the solved cases
	next GROUP_LOOP;
    }

    # and the rest are classes?

}

# set the remaining groups to type school_class 
foreach my $group (keys %groups) {
    $entries{$group}{grouptype} = ["school_class"]; #set the correct type
    delete $groups{$group}; # remove the solved cases
    next;    
} # now we should have no groups left...


# create nextID, or check its value if it exists.
if($flags{nextID}) {
    my $nextid_dn = $flags{nextID};
    my $nextID = $entries{$nextid_dn}{gidnumber}[0];
    unless ( $nextID > $maxID ) {
	$entries{$nextid_dn}{gidnumber}[0] = ++$maxID;
    }
}
else { # create a nextid entry
    my $base = $flags{base};
    my $dn = "cn=nextID,ou=Variables,$base";
    $entries{$dn}{objectclass} = ["posixGroup","top"];
    $entries{$dn}{cn} = ["nextID"];
    $entries{$dn}{structuralObjectClass} = ["posixGroup"];
    $entries{$dn}{gidnumber} = [ ++$maxID ];
    $entries{$dn}{"encoded"} = 0;
    $entries{$dn}{$origDN} = $dn;
}

if ( $flags{capabilities} ){
    # update capabilities if necessary
    my $dn = $flags{capabilities};
    my %caps;
    
    #split up capability array into hash, with version as value
    foreach my $cap ( @{$entries{$dn}{capability}} ) {
	($cap, my $ver) = split(/ /, $cap);
	$caps{$cap} = $ver;
    }
    
    # increase or create the capabilities we know of today...
    $caps{nextID}       = "1" 
	if ( !$caps{nextID}       or $caps{nextID}       < 1);
    $caps{groupType}    = "1" 
	if ( !$caps{groupType}    or $caps{groupType}    < 1);
    $caps{aclGroup}     = "1" 
	if ( !$caps{aclGroup}     or $caps{aclGroup}     < 1);
#    $caps{ageGroup}     = "2" 
#	if ( !$caps{ageGroup}     or $caps{ageGroup}     < 2);
    $caps{attic}     = "1" 
	if ( !$caps{attic}        or $caps{attic}        < 1);
    $caps{capabilities} = "1" 
	if ( !$caps{capabilities} or $caps{capabilities} < 1);
    
    # put the hash into a list again and store the data
    my @capabilities;
    foreach my $key (keys %caps) {
	push @capabilities, "$key " . $caps{$key}; 
    }
    $entries{$dn}{capability} = \@capabilities;
    
}
else { # create a capabilities field from scratch
    my $base = $flags{base};
    my $dn = "cn=capabilities,ou=Variables,$base";
    $entries{$dn}{objectclass} = ["lisLdapCapabilities","top"];
    $entries{$dn}{cn} = ["capabilities"];
    $entries{$dn}{structuralObjectClass} = ["lisLdapCapabilities"];
    $entries{$dn}{capability} = [ "nextID 1", 
				  "groupType 1", 
#				  "ageGroup 2", 
				  "attic 1", 
				  "capabilities 1",
                                  "aclGroup 1",
				 ]; 
    $entries{$dn}{"encoded"} = 0;
    $entries{$dn}{$origDN} = $dn;
}

print STDERR Dumper(%entries) if $opt_dump;

#
# Write out (possibly fixed) file if requested.
#
# The DN keys are sorted by length, which ensures that
# parents come before children.
#
if ($opt_write) {
    foreach my $dn ( sort { length($a) <=> length($b) } keys %entries ) {
        &write_out($dn);
    }
}

exit 0;

###########################################################################

#
# Canonicalise a string.
# Delete leading/trailing blanks around commas, and lowcase.
#
sub canon {
    ($_) = @_;
    s/\s+/ /g;    # Catch tabs as well
    s/ ,/,/g;
    s/, /,/g;
    lc;
}

#
# Write an entry to standard output.
#
# Ought to wrap at 78 cols as well.
#
sub write_out {
    my ($dn) = @_;
    my $odn = $entries{$dn}{$origDN};

    if ( $entries{$dn}{"encoded"} == 1 ) {
        my $encoded = encode_base64( $odn, "" );
        print "dn:: $encoded\n";
    }
    else {
        print "dn: $odn\n";
    }
    foreach my $attr ( keys %{ $entries{$dn} } ) {
        next if $attr eq $origDN or $attr eq "encoded" ;
        foreach my $value ( @{ $entries{$dn}{$attr} } ) {
            print "$attr:";

            if ( $value and ( $attr =~ /userpassword/i
                || $value =~ /(^[ :]|[\x00-\x1f\x7f-\xff])/ )  )
            {
                print ": ", &enmime( $value, "" );
            }
            elsif ( defined $value and "" ne $value) { 
                print " $value";
            }
            print "\n";

        }
    }
#    print STDERR  "\n";
    print "\n";
}

sub INIT {

    my $schema_ref = parse_schemas();
    %schema = %$schema_ref;

    #
    # Single-value attributes.
    #
    @single = (
        "ciAppType",    "ciDBPath", "ciDomainName", "ciLdapEnabled",
        "ciLdapServer", "ciOSType", "ciPortNum",    "ciPrinterClass",
        "ciRegion",     "ciStatus",
    );

    
    #
    # Random stuff.
    #
    $/ = "";    # Read input in paragraph mode
}

#
# Process options.
#
sub parse_options {
    $SIG{'__WARN__'} = sub { die $_[0] };    # Exit on bad options

    Getopt::Long::Configure("bundling");     # Old-style (-xyz, --word)
    GetOptions(
        "--dump" => \$opt_dump,    # Dump data structure
        "-D"     => \$opt_dump,

        "--fix" => \$opt_fix,      # Fix errors if possible
        "-f"    => \$opt_fix,      # (also implies "write")

        "--inheritance" => \$opt_inheritance,    # Check obj inheritance
        "-i"            => \$opt_inheritance,    # (too many false alarms)

        "--suffix=s" => \$opt_suffix,            # Specify directory suffix
        "-s=s"       => \$opt_suffix,

        "--write" => \$opt_write,                # Write ordered file
        "-w"      => \$opt_write,

        "--no-add-auth-groups" => \$opt_no_auth, # dont add authority groups
        "-n"      => \$opt_no_auth,

        "--org=s" => \$opt_org,                  # Organization to use for
        "-o=s"    => \$opt_org,                  # fixing up the suffix
    );
}

#
# Get a complete entry as a list of lines.
# We use the trick of setting the input delimiter
# to "", to read a paragraph at a time, so we can
# join continued lines.
#
sub GetEntry {
    my @a;
    do {
        $_ = (<>);
        return () if !defined;    # EOF
        s/$/\n/;                  # In case we strip last newline below
        s/#.*\n//g;               # Comments
        chomp;                    # Always strips >= 2 newlines
        s/\n //g;                 # Join lines
        @a = split /\n/;
    } while ( @a < 2 );    # Skips phantom entries (caused by comments)
    return @a;
}

#
# Given a string, return a de-mimed version.
# Can't use MIME::Base64 because it's not a core module.
# Instead, I pinched the code from it...
#
sub demime {
    local ($^W) = 0;    # unpack("u",...) gives bogus warning in 5.00[123]

    my $str = shift;
    $str =~ tr|A-Za-z0-9+=/||cd;    # remove non-base64 chars
    if ( length($str) % 4 ) {
        require Carp;
        Carp::carp("Length of base64 data not a multiple of 4");
    }
    $str =~ s/=+$//;                # remove padding
    $str =~ tr|A-Za-z0-9+/| -_|;    # convert to uuencoded format

    return join '',
      map( unpack( "u", chr( 32 + length($_) * 3 / 4 ) . $_ ),
      $str =~ /(.{1,60})/gs );
}

#
# En-mime same.
# I didn't write this bletcherous code either.
#
sub enmime {
    my $res = "";
    my $eol = $_[1];
    $eol = "\n" unless defined $eol;
    pos( $_[0] ) = 0;    # ensure start at the beginning

    $res = join '',
      map( pack( 'u', $_ ) =~ /^.(\S*)/, ( $_[0] =~ /(.{1,45})/gs ) );

    $res =~ tr|` -_|AA-Za-z0-9+/|;    # `# help emacs
                                      # fix padding at the end
    my $padding = ( 3 - length( $_[0] ) % 3 ) % 3;
    $res =~ s/.{$padding}$/'=' x $padding/e if $padding;

    # break encoded string into lines of no more than 76 characters each
    if ( length $eol ) {
        $res =~ s/(.{1,76})/$1$eol/g;
    }
    return $res;
}

sub upgrade_authority_group {
    my ($group_ref) = @_;
    
    my $obj_ref = objlist2hash( $group_ref->{objectclass} );
    
    # make the checks easier:
    delete $obj_ref->{top} if $obj_ref->{top};

    # some old ldif: 
    #   posixGroup switches to lisAclGroup+lisGroup
    if ( ( $obj_ref->{posixgroup}        and
	   $obj_ref->{lisgroup}          and 
	    2 == keys %$obj_ref 
	   ) or 
	 ( $obj_ref->{posixgroup}        and
	   $obj_ref->{lisgroup}          and 
	   $obj_ref->{sambagroupmapping} and 
	   3 == keys %$obj_ref 
	   ) 
	 ) 
    {
	delete $obj_ref->{posixgroup};
	$obj_ref->{lisaclgroup} = 1;
	$obj_ref->{lisgroup}    = 1; # FIXME unnecessary, but does not hurt either
	$obj_ref->{top}         = 1;
	objhash2list( $group_ref->{objectclass} , $obj_ref );
	for my $memberUid ( @{$group_ref->{memberuid}} ) {
	    push ( @{ $group_ref->{member} }
		   , "uid=$memberUid,ou=People," . $flags{base} );
        }
        unless ( defined $group_ref->{member} ) {
            $group_ref->{member} = [ "" ]
        }
    } 
    # initial converstion from posixgroup to lisgroup
    # FIXME: isnt this partly done earlier on, allready?
    # this should be done more consistent.
    elsif ( ( $obj_ref->{posixgroup}        and
	      $obj_ref->{sambagroupmapping} and 
	      2 == keys %$obj_ref 
	      ) or
	    ( $obj_ref->{posixgroup}        and
	      1 == keys %$obj_ref 
	      )
	    )
    {
	delete $obj_ref->{posixgroup};
	$obj_ref->{lisaclgroup} = 1;
	$obj_ref->{lisgroup}    = 1;
	$obj_ref->{top}         = 1;
	objhash2list( $group_ref->{objectclass}, $obj_ref );
	for my $memberUid ( @{ $group_ref->{memberuid} } ) {
	    push ( @{ $group_ref->{member} }
		   , "uid=$memberUid,ou=People," . $flags{base} );
        }
        unless ( defined $group_ref->{member} ) {
            $group_ref->{member} = [ "" ]
        }
    } 
    elsif ( ( $obj_ref->{lisaclgroup}       and
	      $obj_ref->{lisgroup}          and 
	      2 == keys %$obj_ref  
	      ) or
	    ( $obj_ref->{lisaclgroup}       and
	      $obj_ref->{lisgroup}          and 
	      $obj_ref->{sambagroupmapping} and 
	      3 == keys %$obj_ref  
	      )
	    ) 
    {
	# check if we have members 
	unless ( $group_ref->{member} ) {
	    for my $memberUid ( @{$group_ref->{memberuid}} ) {
		push ( @{ $group_ref->{'member'} }
		       , "uid=$memberUid,ou=People," . $flags{base} );
	    }
	}
        unless ( $group_ref->{member} ) {
            $group_ref->{member} = [ "" ]
        }
    }
    else {
	die "unknown ldif configuration in $! " . Dumper ($group_ref); 
    }
    $group_ref->{structuralobjectclass} = [ "lisaclgroup" ];
}

sub read_config {
    my ($file) = @_;
    
    open CONFIG,   "<  $file"  or die "can't open $file: $!";

    my %config;
    while ( <CONFIG> ) {
	chomp;
	s/\#.*//;
	s/^\s+//;
	s/\s+$//;
	next unless length;
	my ($var, $value) = split(/\s*=\s*/, $_, 2);
	$config{$var} = $value;
    }
    
    close CONFIG;
    
    return \%config;
}

sub read_slapd_config {
    my ($file) = @_;
    
    open CONFIG,   "<  $file"  or die "can't open $file: $!";

    my $seperator = $/; # save the seperator since it is non-standard
    undef $/;
    my $whole_file =  <CONFIG>; # sluuuurp
    $whole_file =~ s/\n(?!\n)\s+/ /g; # merge logical line as the ldap config parser does
    $/= $seperator; # restore the original line seperator;

    my @whole_file = split (/\n/, $whole_file);


    my %config;
    while ( @whole_file ) {
	$_ = pop @whole_file;
	chomp;
	s/\#.*//;
	s/^\s+//;
	s/\s+$//;
	next unless length;
	my ($var, $value) = split(/\s+/, $_, 2);
	push @{ $config{$var} }, $value; 

	# this does not keep the order of the configuration easily
	# accessable, which is bad for things like databases and
	# suffixes but does not matter for the include lines of the
	# schemas. With some efford one could pirce together the order
	# by getting it from the order in the anonymous arrays.

    }
    
    close CONFIG;
    
    return \%config;
}

sub extract_system_schemas {
    my ($core_schema, $schema_raw_ref) = @_;

    while ( $core_schema ) {
	$core_schema =~ s/^.*?\n\# system schema\n\#(.*?\))(\n\n.*)$/$2/s;
	last unless $1;
	my $core_def = $1;
	$core_def =~ s/\n(?!\n)\#\s+/ /g;
	chomp $core_def;
	$core_def =~s/^\s+//;
	$core_def =~s/\s+$//;
	next unless length $core_def;
	push @$schema_raw_ref, "$core_def\n"; 
    }
}

sub read_schema {
    my ($file) = @_;
    
    open SCHEMA,   "<  $file"  or die "can't open $file: $!";

    my $seperator = $/; # save the seperator since it is non-standard
    undef $/;
    my $whole_file =  <SCHEMA>; # sluuuurp

    my @schema_raw;    
    if ($file eq "/etc/ldap/schema/core.schema") {
	# There are some system schema entries which are hard coded
	# into openLdap.  They are marked "system schema" in the
	# core.schema. we try to detect them and remove the comments
	# in front of those.
	extract_system_schemas( $whole_file, \@schema_raw);
	
    }

    $whole_file =~ s/\n(?!\n)\s+/ /g; 
    # this is dubious, since we should watch not lines starting with 
    # whitespaces but balance the parantecies. but this works well.
    $/= $seperator; # restore the original line seperator;

    
    my @whole_file = split (/\n/, $whole_file);

    
    while ( @whole_file ) {
	$_ = pop @whole_file;
	chomp;
	s/\#.*//;
	s/^\s+//;
	s/\s+$//;
	next unless length;
	push @schema_raw, "$_\n"; 
    }
    
    close SCHEMA;
    return \@schema_raw;
}


sub find_slapd_config {

    my $defaults = "/etc/default/slapd";
    my $slapd_defaults_ref;
    
    if ( -f $defaults ) {
	$slapd_defaults_ref = read_config( $defaults );	
    }

    unless ( $slapd_defaults_ref->{SLAPD_CONF} and 
	 -f $slapd_defaults_ref->{SLAPD_CONF} ) 
    {
	$slapd_defaults_ref->{SLAPD_CONF} = "/etc/ldap/slapd.conf";
    }
	
    return $slapd_defaults_ref->{SLAPD_CONF};

}

sub parse_slapd_config {
    
    my ($slapd_config_file) = @_;
    
    my $slapd_config_href = read_slapd_config( $slapd_config_file );
    
    return $slapd_config_href;
}

sub find_active_schemas {
    my ($slapd_config_href) = @_;

    return \@{ $slapd_config_href->{include} }; 
}
sub preprocess_schemas {
    my ( $schemas_list_ref ) = @_;
    
    my @schemas_raw;
    
    for my $file ( @{$schemas_list_ref} ) {
	push @schemas_raw, @{ read_schema( $file ) }; 
    }
    return \@schemas_raw;
}

sub get_used_schemas {

    my $slapd_config_path = find_slapd_config();
    my $slapd_config_href = parse_slapd_config( $slapd_config_path );
    my $schemas_list_aref = find_active_schemas( $slapd_config_href );
    my $schemas_raw_aref = preprocess_schemas( $schemas_list_aref );
    return $schemas_raw_aref;
}

sub parse_schemas {

    my $schemas_raw_aref = get_used_schemas();
    
    my %schema;
    while ( @$schemas_raw_aref ) {
	$_= pop @$schemas_raw_aref;
	chomp;

	#poor man`s parser

	my ( $type ) =
	    /^(\w+)\s/;
	
	my ( $structural ) = 
	    /^.*\s(STRUCTURAL)\s.*$/;
	
	my ( $auxiliary ) = 	
	    /^.*\s(AUXILIARY)\s.*$/;
	
	my ( $description ) = 
	    /^.*\s+DESC\s+\'([^\']+)\'.*$/;

	my ( $syntax ) = 	
	    /^.*\s+SYNTAX\s+([\d\.\{\}]+).*$/;


	my @names;
 	if ( /^.*\s+NAME\s+\(\s*\'([\w\s\']+)\'\s*\).*$/  ) {
	     @names = split(/\'\s+\'/, lc $1);
	}
	elsif ( /^.*\s+NAME\s+\'(\w+)\'\s.*$/ ) {
	    push @names, lc $1;
	}
	
	my @sup;
	if ( /^.*\s+SUP\s+\(\s*([^\)]+?)\s*\).*$/ ) {
	    @sup = split(/\s*\$\s*/, lc $1);
	}
	elsif ( /^.*\s+SUP\s+(\w+)\s.*$/ ) {
	    push @sup, lc $1;
	}
	
	my @must;
	if ( /^.*\s+MUST\s+\(\s*([^\)]+?)\s*\).*$/ ) {
	    @must = split(/\s*\$\s*/, lc $1);
	}
	elsif ( /^.*\s+MUST\s+(\w+)\s.*$/ ) {
	    push @must, lc $1 ;
	}
	
	my @may;
	if ( /^.*\s+MAY\s+\(\s*([^\)]+?)\s*\).*$/ ) {
	    @may = split(/\s*\$\s*/, lc $1);
	}
	elsif ( /^.*\s+MAY\s+(\w+)\s.*$/ ) {
	    push @may, lc $1 ;
	}

	unless ($type eq "attributetype" or 
		$type eq "objectclass"   or 
		@names ) 
	{
	    print STDERR "$_\n"; 
	}
	else {
	    for my $name ( @names ) {
		$name = lc $name;

		$schema{$type}{$name}{names}       = \@names;
		$schema{$type}{$name}{description}= $description 
		    if $description;
		$schema{$type}{$name}{syntax}     = $syntax
		    if $syntax;
		$schema{$type}{$name}{structural} = 1
		    if $structural;
		$schema{$type}{$name}{auxiliary}  = 1
		    if $auxiliary;
		$schema{$type}{$name}{must}       = \@must 
		    if @must;
		$schema{$type}{$name}{may}        = \@may
		    if @may;
		$schema{$type}{$name}{sup}        = \@sup 
		    if @sup;
	    }
	}
    }
    return \%schema;
}

sub objlist2hash {
    my ($list_ref) = @_;
    
    my %objectclass;
    for my $objclass ( @$list_ref ) {
	$objclass = lc $objclass;
	$objectclass{$objclass } = 1;
    }
    
    return \%objectclass;
}

sub objhash2list {
    my ($array_ref , $obj_hash_ref ) = @_;
    
    @$array_ref = keys %$obj_hash_ref;
}

sub resolve_structural_clash {
    my ( $structural_objectclasses_ref, $entry_ref ) = @_;

    my @removable_objectclasses;

    # remove automountmap
    # i dont know good heuristics to decide which one i 
    # should remove, so this is hard coded.
    # what other common cases are there?
    if ($structural_objectclasses_ref->{automountmap}       and
	$structural_objectclasses_ref->{organizationalunit} and 
	2 == keys %$structural_objectclasses_ref ) {
	push @removable_objectclasses, "automountmap";
    }
    return \@removable_objectclasses;
}
