#!/usr/local/bin/perl -w # -*- perl -*- ###################################################################### # gffstats.pl -- Simple program to print statistics # Copyright (c) 2007 Tero Kivinen # All Rights Reserved. ###################################################################### # Program: gffstats.pl # $Source: /u/samba/nwn/bin/RCS/gffstats.pl,v $ # Author : $Author: kivinen $ # # (C) Tero Kivinen 2007 # # Creation : 21:22 Jan 6 2008 kivinen # Last Modification : 21:42 Jan 6 2008 kivinen # Last check in : $Date: 2008/01/06 19:43:52 $ # Revision number : $Revision: 1.1 $ # State : $State: Exp $ # Version : 1.30 # Edit time : 19 min # # Description : Simple program to print statistics # # $Log: gffstats.pl,v $ # Revision 1.1 2008/01/06 19:43:52 kivinen # Created. # # $EndLog$ # # # # ###################################################################### # initialization require 5.6.0; package GffStats; use strict; use Getopt::Long; use File::Glob ':glob'; use Gff; use GffRead; use Time::HiRes qw(time); use Pod::Usage; $Opt::verbose = 0; $Opt::exclude = undef; $Opt::include = undef; $Opt::print_filename = 0; $Opt::print_basename = 0; $Opt::only_total = 0; ###################################################################### # Get version information open(PROGRAM, "<$0") || die "Cannot open myself from $0 : $!"; undef $/; $Prog::program = ; $/ = "\n"; close(PROGRAM); if ($Prog::program =~ /\$revision:\s*([\d.]*)\s*\$/i) { $Prog::revision = $1; } else { $Prog::revision = "?.?"; } if ($Prog::program =~ /version\s*:\s*([\d.]*\.)*([\d]*)\s/mi) { $Prog::save_version = $2; } else { $Prog::save_version = "??"; } if ($Prog::program =~ /edit\s*time\s*:\s*([\d]*)\s*min\s*$/mi) { $Prog::edit_time = $1; } else { $Prog::edit_time = "??"; } $Prog::version = "$Prog::revision.$Prog::save_version.$Prog::edit_time"; $Prog::progname = $0; $Prog::progname =~ s/^.*[\/\\]//g; $| = 1; ###################################################################### # Read rc-file if (defined($ENV{'HOME'})) { read_rc_file("$ENV{'HOME'}/.gffmatchrc"); } ###################################################################### # Option handling Getopt::Long::Configure("no_ignore_case"); if (!GetOptions("config=s" => \$Opt::config, "verbose|v+" => \$Opt::verbose, "help|h" => \$Opt::help, "exclude|i=s" => \$Opt::exclude, "include|e=s" => \$Opt::include, "print-filename|f" => \$Opt::print_filename, "print-basename|b" => \$Opt::print_basename, "total|t" => \$Opt::only_total, "version|V" => \$Opt::version) || defined($Opt::help)) { usage(); } if (defined($Opt::version)) { print("\u$Prog::progname version " . "$Prog::version by Tero Kivinen.\n"); exit(0); } while (defined($Opt::config)) { my($tmp); $tmp = $Opt::config; undef $Opt::config; if (-f $tmp) { read_rc_file($tmp); } else { die "Config file $Opt::config not found: $!"; } } ###################################################################### # Main loop $| = 1; my($i, $t0, %args); %args = (include => $Opt::include, exclude => $Opt::exclude); if (join(";", @ARGV) =~ /[*?]/) { my(@argv); foreach $i (@ARGV) { push(@argv, bsd_glob($i)); } @ARGV = @argv; } foreach $i (@ARGV) { my($gff, $j, $total, $cnt); $args{'filename'} = $i; $t0 = time(); if (defined($Opt::print_filename) && $Opt::print_filename) { $main::file = $i . ": "; } else { if (defined($Opt::print_basename) && $Opt::print_basename) { $main::file = $i; $main::file =~ s/^.*[\/\\]//g; $main::file .= ": "; } else { $main::file = ""; } } if ($Opt::verbose) { print("Reading file $i...\n"); } $gff = GffRead::read(%args); if ($Opt::verbose) { printf("Read done, %g seconds\n", time() - $t0); } $total = 0; foreach $j (sort keys %{$gff}) { next if ($j eq ''); next if ($j =~ /^ /); next if ($j =~ /____((struct_|file_|)type|file_version)$/); next if (ref($$gff{$j}) eq 'HASH'); $cnt = $#{$$gff{$j}} + 1; $total += $cnt; if ($cnt != 0 && !$Opt::only_total) { printf("%s%s:\t%d\n", $main::file, $j, $cnt); } } printf("%s%s%s%d\n", $main::file, ($Opt::only_total ? '' : 'Total:'), (($Opt::only_total && $main::file eq '') ? '' : "\t"), $total); } exit 0; ###################################################################### # Read rc file sub read_rc_file { my($file) = @_; my($next, $space); if (open(RCFILE, "<$file")) { while () { chomp; while (/\\$/) { $space = 0; if (/\s+\\$/) { $space = 1; } s/\s*\\$//g; $next = ; chomp $next; if ($next =~ s/^\s+//g) { $space = 1; } if ($space) { $_ .= " " . $next; } else { $_ .= $next; } } if (/^\s*([a-zA-Z0-9_]+)\s*$/) { eval('$Opt::' . lc($1) . ' = 1;'); } elsif (/^\s*([a-zA-Z0-9_]+)\s*=\s*\"([^\"]*)\"\s*$/) { my($key, $value) = ($1, $2); $value =~ s/\\n/\n/g; $value =~ s/\\t/\t/g; eval('$Opt::' . lc($key) . ' = $value;'); } elsif (/^\s*([a-zA-Z0-9_]+)\s*=\s*(.*)\s*$/) { my($key, $value) = ($1, $2); $value =~ s/\\n/\n/g; $value =~ s/\\t/\t/g; eval('$Opt::' . lc($key) . ' = $value;'); } } close(RCFILE); } } ###################################################################### # Usage sub usage { Pod::Usage::pod2usage(0); } =head1 NAME gffstats - print statistics of gff structure =head1 SYNOPSIS gffstats [B<--help>|B<-h>] [B<--version>|B<-V>] [B<--verbose>|B<-v>] [B<--config> I] [B<--print-filename>|B<-f>] [B<--print-basename>|B<-b>] [B<--total>|B<-t>] [B<--exclude>|B<-e> I] [B<--include>|B<-i> I] I ... gffstats B<--help> =head1 DESCRIPTION B prints number of items in each of the toplevel gff structure. If B<-t> option is given then print only total sum of objects on arrays on the toplevel. =head1 OPTIONS =over 4 =item B<--help> B<-h> Prints out the usage information. =item B<--version> B<-V> Prints out the version information. =item B<--verbose> B<-v> Enables the verbose prints. This option can be given multiple times, and each time it enables more verbose prints. =item B<--config> I All options given by the command line can also be given in the configuration file. This option is used to read another configuration file in addition to the default configuration file. =item B<--print-filename> B<-f> Prefix the output with the full file name. =item B<--print-basename> B<-b> Prefix the output with the base filename, i.e. the file name where the path component is removed. =item B<--total> B<-t> Only print total number of objects. =item B<--exclude> B<-e> I Exclude the given regexp when reading the data in. This will skip the whole structure behind the given structure, meaning that B<--include> cannot be used to get parts of that back. This can be used to speed up the processing if only specific parts of the tree is required. Normally this should be something like I<^/Creature List> meaning that all creature list information is skipped when reading gff. =item B<--include> B<-i> I Only include the given regexp when reading the data in. This will skip all other structures which do not match the regexp. This can be used to speed up the processing if only specific parts of the tree is required. Normally this should be something like I<^/Creature List> meaning that only creature list information is read in. =back =head1 EXAMPLES gffstats.pl *.git gffstats.pl -t *.git =head1 FILES =over 6 =item ~/.gffmatchrc Default configuration file. =back =head1 SEE ALSO gffprint(1), gffmatch(1), Gff(3), and GffRead(3). =head1 AUTHOR Tero Kivinen . =head1 HISTORY This program was created when someone on nwn2cr channel wanted to know how many objects area files have.