#!/usr/bin/perl # # $Id: resolve_refs.pl,v 1.1 2001/02/14 16:06:53 alberto Exp alberto $ # # ADS reference resolver script # # NOTE: to each bibcodes returned by this script corresponds a URL of # http://$server/cgi-bin/bib_query?BIBCODE # this is the most general ADS URL for the document identified by # BIBCODE and is guaranteed to work in the future. # # Requires the libwww-perl-5 available from http://www.linpro.no/lwp/ # Written by Alberto Accomazzi # # $Log: resolve_refs.pl,v $ # Revision 1.1 2001/02/14 16:06:53 alberto # Initial revision # # Revision 1.2 2000/09/22 17:55:53 alberto # Written up a usage message, added -help option, moved the formatting # code into a subroutine to facilitate plug-in replacement. # # Revision 1.1 2000/09/22 03:19:41 alberto # Initial revision # # use strict; use ADS::RefResolver::Client; # customizeable variables: my $server = "adsabs.harvard.edu"; # server used to compose URLs my $confidence = 1; # minimum confidence level my $debug = 0; # set to 1 to enable debugging output my $script = $0; $script =~ s:^.*/::; my $version = sprintf("%s/%d.%02d", q$RCSfile: resolve_refs.pl,v $ =~ /:\s*(\w+)/, q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/); my $method = "POST"; # read options from command line while ($ARGV[0] =~ /^-\w/) { $_ = shift(@ARGV); if (/^-confidence/) { $confidence = shift(@ARGV); } elsif (/^-debug/) { $debug++; } elsif (/^-get/) { $method = 'GET'; } elsif (/^-h/) { # -help &usage(); } else { &usage("$script: unknown option \"$_\"\n"); } } my $resolver = ADS::RefResolver::Client->new(Debug => $debug, HttpMethod => $method) or die "$script: error creating resolver object"; print STDERR "$script: reading from STDIN...\n" unless (@ARGV); # read all references in an array my @refs = <>; &usage("no input references specified!\n") unless (@refs); warn "$script: read ", scalar(@refs), " input references\n"; my @output = $resolver->resolve(@refs); my $resolved = 0; while (@output) { my $in = shift(@refs); my $out = shift(@output); my $out_string = &format_record($out->{bibcode},$out->{confidence},$in); print $out_string if ($out_string); $resolved++ if ($out->{confidence} >= $confidence); } warn "$script: resolved ", $resolved, " bibcodes with ADS\n"; # this is the function that formats the output records; the input # arguments are: (bibcode, confidence, reference_string). # Whatever is returned by this function is printed to STDOUT. # To avoid printing a record, just return an empty string. sub format_record { my ($bib,$conf,$orig) = @_; # replace bibcode with blank string of equal length if the # confidence is below the minimum threshold $bib = " " x 19 unless ($conf >= $confidence); # now just return the formatted record return "$bib\t$orig"; } # here is another possible formatting function that one could use # to quickly see what exactly comes back: it simply returns the # input fields separated by tabs sub display_record { return join("\t",@_); } sub usage { print STDERR "$script: ", @_ if (@_); print STDERR <<"EOF"; Usage: $script [OPTIONS] [FILE ...] This script uses the ADS reference resolver to identify bibliographic records in the ADS which correspond to the list of reference strings found in the input files (STDIN if no input files are specified). The references should be specified as plain ASCII strings, one per line. The output format (which can be easily modified) consists of a bibcode identifier followed by a TAB followed by the original reference string for successfully matched records. If a record could not be matched with sufficient confidence, a blank string will replace the bibcode. OPTIONS: -confidence NUMBER return only bibcodes which have a confidence score greater or equal to NUMBER (default: $confidence) -debug print debugging information to STDERR -get use an HTTP GET method (rather than POST) to send query to the reference resolver -help print this usage message For more information about the reference resolver itself, please see http://adsabs.harvard.edu/pubs/resolver/ EOF ; exit(1); }