The perl SOAP server below takes a list of identifiers as input, performs a binary search for each one of them on an the external table datalinks.tab to decide whether the dataset is known, and then returns a list of structured records as described below. You should use this script as a template to adapt to your own needs in order to verify the existance of data identifiers in your own archive.
#!/usr/bin/perl
#
# Sample PERL SOAP script to verify the existance of a particular
# dataset at this center.
#
# The server accepts an of identifiers in input and returns a list
# of structured records as output:
#
# input => input query string (identifier)
# result => integer value:
# 0 - The identifier has an unrecognizable syntax.
# 1 - The identifier is valid and known to the system
# -1 - The identifier has a valid syntax but is not
# known to the system
# url => Link to data associated with dataset (optional)
#
use SOAP::Transport::HTTP;
SOAP::Transport::HTTP::CGI
-> dispatch_to('DataVerifier')
-> handle;
package DataVerifier;
use Search::Dict;
sub verify {
my $self = shift;
my @results = ();
while (@_) {
my $dataid = shift;
my $result = -1; # assume failure
my $url = undef;
# check syntax
if (syntax_error($dataid)) {
$result = 0;
$url = get_url($dataid);
} elsif (verify_id($dataid)) {
$result = 1;
}
push(@results,{ input => $dataid,
result => $result,
url => $url,
});
}
return [ @results ];
}
# return an error if the data identifier contain any blanks
sub syntax_error {
my $dataid = shift;
$dataid =~ /\s/;
}
# Dataset identifier verifier function.
# datalinks.tab is a sorted file containing a list of
# dataset identifiers, which could be in one of two forms:
# - fully qualified: center_id:data_id
# - center-specific: data_id
# we try to match them in both cases, although we should really
# check and make sure that for fully-qualified identifiers the
# set_id matches our center's identification so that there is
# no possibility of confusion -- maybe this should be enforced
# by the spec or protocol implementation
sub verify_id {
my $dataid = shift;
my $datafile = "datalinks.tab";
open(my $df, $datafile) or die "error opening file $datafile: $!";
# try full match first
look($df, $dataid, 0, 0);
my $entry = <$df>;
$entry =~ s/^\s+|\s+$//g;
return 1 if ($entry eq $dataid);
# strip data center prefix and try match again
return 0 unless ($dataid =~ s/^.*?://g);
look($df, $dataid, 0, 0);
$entry = <$df>;
$entry =~ s/^\s+|\s+$//g;
return 1 if ($entry eq $dataid);
return 0;
}
# just a place holder
sub get_url {
return undef;
}
We provide a sample client
script that queries the server script above.
#!/bin/env perl -w
# Usage: dataverify-client.pl datasetid [...]
use SOAP::Lite;
use XML::Simple;
# this is the SOAP server that does the verification for you
my $service = 'http://ads.harvard.edu/ws/dataverify';
my $response = SOAP::Lite
-> uri('http://ads.harvard.edu/DataVerifier')
-> proxy($service)
-> verify(@ARGV);
die $response->faultcode, ', ', $response->faultstring if
($response->fault);
print XMLout($response->result, noattr => 1);
Example of client usage:
alberto@adsfore-366: perl ./dataverify-client.pl Y0Q70101T Y0Q70101T 1