#!/usr/bin/perl -Tw
# Copyright (C) 1996 Steven E. Brenner
# $Header: /ga/proj/scop/dev/RCS/search_cdb.cgi,v 1.2 2000/04/09 18:53:09 loredana Exp loredana $
#
# Modified 1997 Bartlett G Ailey, additional functions at the bottom of the file.
# 20000409.llc doesn't work
#
# 20000909.llc the content of QUERY_STRING is truncated at the first blank
# "search.cgi?pdb=1mbd, 2cro" behaves as "search.cgi?pdb=1mbd," (cgi bug?)
#
# 20000915.llc rewritten to use new stable identifiers' based indices,
# fix bugs etc.
#
# 20010405.llc &srch modified to copy with d1g2c.a
# sunid, sccs enabled
# 20010408.llc accept astral identifiers
#
# TODO: check sccs, sunid works properly, add comments for new indices
#
# Needs:
# All indicies in subdirectory 'search':
#
# TODO: update list of indices
#
# index.ec.pdb ec->pdb
# index.pdb.scop entry->sid
# index.scop word->sid
# sid.scop sid->page
# sid.title.scop sid->title
# pdb.sprsde obsolete pdb id ->replacement (pdbsprsde)
#
# Directories
#
# dir.dom.scop.txt - domains in scop # obsolete
# dir.lin.scop.txt - classification # obsolete
# dir.cla.scop.txt - classification # replace dir.dom
# dir.des.scop.txt - classification # and dir.lin
# dir.hie.scop.txt - hierarchy (sunid) # scop hierarchy
# dir.pdb.scop.txt - pdb entries in scop
# dir.ec.scop.txt - ec codes in scop
use strict;
require "./cgi-lib.pl";
use Search::Dict;
use CDB_File;
my ($Date, $End, %lev, $debug, %SrchFunc, %Dirs, $MatchMsg, $NoMatchMsg, %in, $nvar,$scopver,$scopverstr,$scopverstr2);
my (%sunid, %obshist, %pdbpx, %obs);
$| = 1;
$Date = "\$Date";
$End = "\$";
%lev = ( 'rt', 0, 'cl', '1', 'cf', '2', 'sf', '3', 'fa', '4', 'dm', '5', 'sp', '5', 'px', '6');
$debug = 0;
# Each search func accepts a $key and returns a list of @sids
# or never returns at all
%SrchFunc =
('dir', \&ProcDir,
'sid', \&ProcSid,
'key', \&ProcKeys,
# 'key', \&ProcKey, # replaced by &ProcKeys
'pdb', \&ProcPdb,
'ec' , \&ProcEc,
'sccs' ,\&ProcSccs,
'sunid' ,\&ProcSunid,
'newfa',\&ProcNew,
'newsf',\&ProcNew,
'newcf',\&ProcNew,
'newcl',\&ProcNew
);
# 20030513.llc modified to keep parseable files in parse
# 20031210.llc dir.pdb.scop.txt and dir.ec.scop.txt not distributed anymore
%Dirs =
('pdb', '../parse/dir.obs.txt',
'ec' , '../parse/dir.obs.txt',
'dom', '../parse/dir.obs.txt',
'lin', '../parse/dir.obs.txt',
'cla', '../parse/dir.cla.scop.txt',
'des', '../parse/dir.des.scop.txt',
'hie', '../parse/dir.hie.scop.txt',
'com', '../parse/dir.com.scop.txt'
);
MAIN:
{
open (fd0,"search/scopver.txt") ||
&CgiDie ("scopver.txt not found.");
$scopver = ;
chomp $scopver;
close fd0;
if ($nvar=&ReadParse(\%in)) {
if(defined($in{'ver'})){
$scopver=$in{'ver'};
}
$scopverstr=";ver=$scopver";
$scopverstr2="?ver=$scopver";
#&CgiDie ("$scopver $scopverstr $scopverstr2");
if ($nvar==1 && $in{'ver'}) {
&PrintQueryForm;
} else {
&Process(%in);
}
} else {
&PrintQueryForm;
}
}
sub Process
# 20000909.llc modified to deal with sunid instead of sid
# try to guess the kind of search in case type of search = 'key'
{
my (%in) = @_;
my ($loc, # location where to return pages
@sunids, # scop unique identifiers to return
$srchtype, # search type being tested
$srch, # type of search to be done
$key, # parameter to the search
$func, # function to call to process search
$search_type, # can be either scop or pdb
@match,
);
chdir('search') || &CgiError("Couldn't change to search directory: $!\n");
$loc = &SetLoc;
foreach $srchtype (keys %SrchFunc) {
if (defined $in{$srchtype}) {
if (defined $key) {
&CgiDie ("Illegal Parameters",
"Only one search type may be specified at a time");
} else {
$srch = $srchtype;
$in{$srch} =~ s/^\s+//; $in{$srch} =~ s/\s+$//;
# 20010408.llc: accept astral identifiers (aeid)
if ($in{$srch} =~ /^e([\ds][\da-z]{3}\.[0-9_a-z])[0-9a-z]/)
{ $in{$srch} = "d$1"; }
# 20010703.llc: accept astral identifiers (genetic domains)
if ($in{$srch} =~ /^g([\ds][\da-z]{3}[\w\.][0-9_a-z])$/)
{ $in{$srch} = "d$1"; }
$key = $in{$srch};
$search_type = $in{'search_type'};
# &CgiDie("|$search_type| |$in{$srch}|");
if ($search_type eq 'pdb') {
print "Location: http://www.ebi.ac.uk/msd-srv/msdlite/apps/query?textSearch=$in{$srch}&show=scopId\n\n";
return 0;
}
# try to guess the search type
if ($srch eq 'key') { $srch = &srch ($in{$srch}); }
# 20010706.llc: rename 'tlev' if necessary
$in{'lev'} = $in{'tlev'} if defined $in{'tlev'};
if ((defined $in{'lev'}) && ($srch eq 'key')) {
&CgiDie ("Invalid request: lev option not defined for keyword search.\n",
&PrintVariables(%in));
}
$func = $SrchFunc{$srch};
($srch eq 'key') ? (@match = &$func($key, %in))
: (@sunids = &$func($key, %in));
}
}
}
if (!defined $key) {
&CgiDie ("Invalid Request",&PrintVariables(%in));
}
if (defined $in{'lev'}) { @sunids = &DispLev(@sunids); }
@match = &SunidToPage(@sunids) if ($srch ne 'key');
@match = &RemoveDups(@match);
&ShowResults($srch, $key, $loc, @match);
}
# try to guess the search type, based on the first keyword
sub srch
{
my ($srch) = @_;
$_ = $srch; s/\s+//; $_ = lc $_;
($_) = split (/[,]/,$srch);
s/\s+//; $_ = lc $_;
$srch = $_; my $found = 0;
tie %sunid, 'CDB_File', "sunid.pge.cdb" ||
&CgiDie ("Couldn't open sunid.pge.cdb: $!\n");
tie %pdbpx, 'CDB_File', "pdb.px.cdb" ||
&CgiDie ("Couldn't open pdb.px.cdb: $!\n");
# 20010405.llc modified to account for d1g2c.a:
$found = $srch =~ /^d[\ds][\da-z]{3}[\w\.][0-9_a-z]$/;
return 'sid' if $found;
# 20010408.llc: accept astral identifiers (aeid)
$found = $srch =~ /^e([\ds][\da-z]{3}\.[0-9_a-z])[0-9a-z]/;
return 'sid' if $found;
# 20010703.llc: accept astral identifiers (genetic domains)
$found = $srch =~ /^g([\ds][\da-z]{3}[\w\.][0-9_a-z])$/;
return 'sid' if $found;
# NOTE: this shadows pdb id (like 1914), use search.cgi?pdb=1914
$found = (($srch =~ /^\d+$/) and (defined $sunid{$srch}));
return 'sunid' if $found;
$found = (($srch =~ /^[\ds][\da-z]{3}$/) and (defined $pdbpx{$srch}));
return 'pdb' if $found;
$found = ($srch =~ /^[a-z]{1,2}(\.\d+){0,3}$/);
return 'sccs' if $found;
$found = ($srch =~ /^\d+(\.\d+){3}$/);
return 'ec' if $found;
# 20020506.llc: history
return 'newfa' if ($srch =~ /newfa/);
return 'newsf' if ($srch =~ /newsf/);
return 'newcf' if ($srch =~ /newcf/);
return 'newcl' if ($srch =~ /newcl/);
# 20030501.llc
# obsolete entries history
my %obshist;
tie %obshist, 'CDB_File', "hist.obs.cdb" ||
&CgiDie ("Couldn't open hist.obs.cdb: $!\n");
$found = (defined $obshist{$srch});
$found = ($found and ($srch =~ /^\d+$/));
return 'sunid' if $found;
$found = ($found and ($srch =~ /^[a-z]{1,2}(\.\d+){0,3}$/));
return 'sccs' if $found;
tie %obs, 'CDB_File', "pdb.sprsde.cdb" ||
&CgiDie ("Couldn't open pdb.sprsde.cdb $!\n");
$found = (($srch =~ /^[\ds][\da-z]{3}$/) and (defined $obs{$srch}));
return 'pdb' if $found;
return 'key';
}
sub ProcDir
# 20030513.llc modified to keep parseable files at MRC in "parse" dir
{
my ($dir) = @_;
my ($fn);
$dir =~ tr/A-Z/a-z/;
if (defined ($fn = $Dirs{$dir})) {
$fn .= "_$scopver" unless $fn =~ /obs.txt/;
print "Content-type: text/plain\n\n";
&PrintFile($fn);
} else {
&CgiError("Parseable files available at http://scop.mrc-lmb.cam.ac.uk/scop/parse");
# &CgiError("Directory type $dir not supported");
}
exit;
}
# Doesn't deal with missing leading 0's.
# Doesn't do any error checking at all!
sub ProcSid
# 20000911.llc modified to deal with px
# 20020122.llc: history
{
my ($sids) = @_;
my (@sids, $sid, @sunids, %sid, %obshist);
$sids =~ s/\s+//g;
$sids = lc $sids;
@sids = split(',',$sids);
# verify sids
tie %sid, 'CDB_File', "pxidx.cdb" ||
&CgiDie ("Couldn't open pxidx.cdb: $!\n");
# obsolete entries history
tie %obshist, 'CDB_File', "hist.obs.cdb" ||
&CgiDie ("Couldn't open hist.obs.cdb: $!\n");
foreach $sid (@sids) {
if (defined $sid{$sid}) {
&CgiError("$sid gets sunid $sid{$sid}\n") if $debug;
push (@sunids, $sid{$sid});
} else {
if (defined $obshist{$sid}) {
$MatchMsg .= $obshist{$sid};
}
}
}
untie %sid;
return @sunids;
}
#sub ProcEc
# 20000909.llc modified to deal with px
# 20031210.llc obsolete starting with scop 1.65
#{
# my ($ec) = @_;
# my (@ecs, @pdbs, @pxs);
# $ec =~ s/\s+//g;
# @ecs = split(/,/,$ec);
# @pdbs = &EcToPdb(@ecs);
# @pxs = &PdbToPx(@pdbs);
# return (@pxs);
#}
sub ProcEc
{
my (@ecs) = @_;
my ($ec, %ecpx, @pxs);
tie %ecpx, 'CDB_File', "ec.px.cdb" ||
&CgiDie ("Couldn't open ec.px.cdb: $!\n");
foreach $ec (@ecs) {
push (@pxs, split (/,/,$ecpx{$ec}));
}
untie %ecpx;
return (@pxs);
}
sub ProcSccs
# 20020122.llc: history
{
my ($sccs) = @_;
my (@sccs, %sccssunid, @sunids, @matches, %obshist);
$sccs =~ s/\s+//g;
@sccs = split(/,/,$sccs);
tie %sccssunid, 'CDB_File', "sccs.sunid.cdb" ||
&CgiDie ("Couldn't open sccs.sunid.cdb: $!\n");
# obsolete entries history
tie %obshist, 'CDB_File', "hist.obs.cdb" ||
&CgiDie ("Couldn't open hist.obs.cdb: $!\n");
foreach $sccs (@sccs) {
if (defined $sccssunid{$sccs}) {
&CgiError("$sccs gets sunid $sccssunid{$sccs}\n") if $debug;
push (@sunids, $sccssunid{$sccs});
} else {
if (defined $obshist{$sccs}) {
$MatchMsg .= $obshist{$sccs};
}
}
}
untie %sccssunid; untie %obshist;
return (@sunids);
}
sub ProcSunid
# 20020122.llc history
{
my ($sunids) = @_;
my (@sunids, @sunid0, %sunid, %obshist, $sunid);
$sunids =~ s/\s+//g;
@sunids = split(',',$sunids);
# verify sunids
tie %sunid, 'CDB_File', "sunid.pge.cdb" ||
&CgiDie ("Couldn't open sunid.pge.cdb: $!\n");
# obsolete entries history
tie %obshist, 'CDB_File', "hist.obs.cdb" ||
&CgiDie ("Couldn't open hist.obs.cdb: $!\n");
@sunid0 = ();
foreach $sunid (@sunids) {
if (defined $sunid{$sunid}) {
push (@sunid0, $sunid);
} else {
if (defined $obshist{$sunid}) {
$MatchMsg .= $obshist{$sunid};
}
}
}
untie %sunid; untie %obshist;
return @sunid0;
}
sub ProcPdb
# 20000909.llc modified to use px instead of sid
{
my ($pdb) = @_;
my (@pdbs,@pxs);
$pdb =~ tr/A-Z/a-z/;
$pdb =~ s/\s+//g;
@pdbs = split(/,/,$pdb);
@pxs = &PdbToPx(@pdbs);
return (@pxs);
}
sub ProcKey
{
my ($key, %in) = @_;
my ($mkey, $match, $line, $file, @files);
local (*INDEX);
$key =~ tr/A-Z/a-z/;
#TODO# fix this, it causes errors when searching with "[52630 for example DONE?
$key =~ s/\s*$//; $key =~ s/\[//; $key =~ s/\]//;
$mkey = $key;
$mkey .= '\s' unless ($key =~ s/\+$//);
open (INDEX,"index.scop") || &CgiDie ("Couldn't open index.scop: $!\n");
&look(*INDEX,$key);
while (($line = ) =~ /^$mkey/) {
($match, $file) = split (/\s+/,$line);
push (@files,"$file");
}
close (INDEX) || &CgiDie ("Couldn't close index: $!\n");
return @files;
}
sub ProcNew
{
my ($key) = @_;
my $key0 = "$key-$scopver";
my %s;
$s{newfa} = 'new families';
$s{newsf} = 'new superfamilies';
$s{newcf} = 'new folds';
$s{newcl} = 'new classes';
my %new; my ($new, $sunid, @sunids);
open (fd0, "new-clfa.txt") || &CgiDie ("Couldn't open new-clfa.txt");
while () {
chomp;
($new, $sunid) = split (/ /,$_,2);
$sunid = -1 if not defined ($sunid);
$new{$new} = $sunid;
}
close fd0;
if ($new{"$key0"} == -1) {
$MatchMsg .= "no $s{$key} in $scopver";
} elsif (not defined $new{"$key0"}) {
$MatchMsg .= "no information available about $s{$key} in $scopver";
} else {
@sunids = split (/ /,$new{$key0});
# &CgiDie ("$key $new{$key}","@sunids");
}
return (@sunids);
}
# display output
sub ShowResults
# 20020514.llc modified to sort according to master file
# (as reflected by pcn)
{
my ($srch, $key, $loc, @match) = @_;
my ($name, $title, $file, $file0, @s, $i, $match);
my $pad = 4; # padding length for pcn
if (@match == 0) {
&ShowNoMatch($srch, $key);
} elsif ((@match == 1) && (!defined $MatchMsg)) {
&CgiError("Match = 1", "@match") if $debug;
($name,$title,$file) = split(/\t/,$match[0]);
$file =~ s/\s*//g;
print "Location: $loc" , "data/$file\n\n";
} else {
print &PrintHeader;
print "SCOP: Search Results for $key";
print &ScopHeader;
&CgiError ("Match != 1", "@match") if $debug;
print "Search Results for \"$key\" [scop $scopver]
" ;
my %h = ();
foreach $match (@match) {
($name,$title,$file) = split(/\t/,$match);
$file =~ s/\s*//g;
# format $file0 for easy sorting
# NOTE: sort will be based on master file order (reflected by pcn)
# and not on sccs, even if it appears in the title (sccs ARE NOT ordered)
@s = split (/\./,$file);
for ($i=0; $i<=$#s; $i++) {
$s[$i] = ' ' x (4 - length $s[$i]) . $s[$i];
}
$file0 = join (".", @s);
$h{$file0}{file} = $file;
$h{$file0}{title} = $title;
}
foreach (sort keys %h) {
print "$h{$_}{title}
\n";
}
print "$MatchMsg" if defined $MatchMsg;
print &ScopFooter;
}
}
sub ShowNoMatch
{
my ($srch, $key) = @_;
# &CgiError("Match = 0", "@match") if $debug;
print &PrintHeader;
print "SCOP: Search Results: None";
print &ScopHeader;
print qq!No matches for "$key" (search type "$srch")
\n!;
print "$MatchMsg
\n" if defined $MatchMsg;
print $NoMatchMsg if defined $NoMatchMsg;
print qq!!;
$key =~ tr/A-Z/a-z/;
if (($key =~ /^[\ds][\da-z]{3}/) && ($MatchMsg !~ /replaced/)) {
print "You can also ";
print qq!search for structural matches!;
print " of PDB $key in SCOP using SSM\n";
print "
(If nothing seems to happen, check behind this window)";
}
print &ScopFooter;
}
sub ScopHeader
{
return <Structural Classification of Proteins
END
}
sub ScopFooter
{
return <
Copyright © 1994-2009 The scop authors /
scop\@mrc-lmb.cam.ac.uk
June 2009
END
}
sub PrintQueryForm {
print &PrintHeader;
print "SCOP: Search Form";
print &ScopHeader;
print <Search the scop database [scop $scopver]
EOT
print &ScopFooter;
}
sub PrintFile {
my ($fn) = @_;
my ($buf);
local (*FILE);
open(FILE, $fn) || &CgiError ("Couldn't open file $fn: $!");
while (read(FILE, $buf, 16384)) {
print $buf;
}
close(FILE);
}
sub SetLoc
{
my ($loc);
if (defined $in{'loc'}) {
$loc = "$in{'loc'}";
$loc .= '/' unless $loc =~ /\/$/;
} else {
my ($path) = $ENV{"SCRIPT_NAME"} =~ /(.*)\//;
my $port = $ENV{'SERVER_PORT'} == 80 ? "" : ":" . $ENV{'SERVER_PORT'};
$loc = "http://" . $ENV{"SERVER_NAME"} . $port . $path . "/";
}
return $loc;
}
# Translate sunids to pages
sub SunidToPage
{
my (@sunids) = @_;
my ($sunid,@match,%sunid,%pgetit,$pge,$title,%sunid2sccs);
tie %sunid, 'CDB_File', "sunid.pge.cdb" ||
&CgiDie ("Couldn't open sunid.pge.cdb: $!\n");
tie %pgetit, 'CDB_File', "pge.title.cdb" ||
&CgiDie ("Couldn't open pge.title.cdb: $!\n");
tie %sunid2sccs, 'CDB_File', "sunid.sccs.cdb" ||
&CgiDie ("Couldn't open sunid.sccs.cdb: $!\n");
foreach $sunid (@sunids) {
$pge = $sunid{$sunid};
$title = $pgetit{$pge} if $pge;
if (defined $sunid2sccs{$sunid}) { $title .= " [$sunid2sccs{$sunid}]"; }
push (@match, $sunid."\t".$title."\t".$pge) if $pge;
&CgiError ("Dealing with sunid :$sunid: :$title: :$pge:" ) if
$debug;
}
untie %sunid; untie %pgetit; untie %sunid2sccs;
return @match;
}
# Translate sids to pages
# Problems: doesn't munge whitespace, missing 0s. :
sub SidToPage
# 20000909.llc obsolete (see SunidToPage)
{
my (@sids) = @_;
my ($sid,@match,%sid,%sidtit);
tie %sid, 'CDB_File', "sid.scop.cdb" ||
&CgiDie ("Couldn't open sid.scop.cdb: $!\n");
tie %sidtit, 'CDB_File', "sid.title.scop.cdb" ||
&CgiDie ("Couldn't open sid.title.scop.cdb: $!\n");
foreach $sid (@sids) {
push (@match, $sid."\t".$sidtit{$sid}."\t".$sid{$sid}) if $sid{$sid};
&CgiError ("Dealing with sid :$sid: :$sidtit{$sid}: :$sid{$sid}:" ) if
$debug;
}
return @match;
}
sub DispLev
{
my (@sunids) = @_;
my ($lev, %px2scls, @sunid0, $sunid, $newsunid);
$lev = $in{'lev'} || $in{'tlev'};
$lev =~ tr/A-Z/a-z/;
$lev =~ s/\s//g;
# for internal consistence (scopm), 'pr' is called 'dm'
# for users is 'pr'
# indices are based on 'dm', therefore:
$lev =~ s/pr/dm/;
if (!defined $lev{$lev}) {
&CgiDie ("Invalid level request (lev)",&PrintVariables(%in));
}
tie %px2scls, 'CDB_File', "px.scls.cdb" ||
&CgiDie ("Couldn't open px.scls.cdb: $!\n");
foreach $sunid (@sunids) {
($newsunid) = ($px2scls{$sunid} =~ /$lev=(\d+)/);
push (@sunid0, $newsunid);
}
untie %px2scls;
return (@sunid0);
}
# convert to appropriate level
sub SetDispLev
# 20000909.llc obsolete (see DispLev)
{
my (@match) = @_;
my ($lev,$name,$title,$file,$dots,$i,@file,%sidtit);
# 20000406.llc modified to cope with:
# WARNING WARNING WARNING WARNING: This will only work when:
# 1) Parameter is EXACTLY correct and
# 2) The level that would have been returned is further down in the
# tree than the level being requested with the 'key' or 'pdb'
# parameter.
# To be fixed:
# BUG BUG BUG: The '$title' field contains text of level that will be
# returned. This text is not modified to reflect the level
# which is produced by this routine
$lev = $in{'lev'};
$lev =~ tr/A-Z/a-z/;
$lev =~ s/\s//g;
if (!defined $lev{$lev}) {
&CgiDie ("Invalid level request (lev)",&PrintVariables(%in));
}
tie %sidtit, 'CDB_File', "sid.title.scop.cdb" ||
&CgiDie ("Couldn't open sid.title.scop.cdb: $!\n");
$dots = $lev{$lev} + 1;
for $i (0 .. $#match) {
($name,$title,$file) = split(/\t/,$match[$i]);
@file = split(/\./, $file);
$file = join ('.', @file[0 .. $dots], 'html');
$title = $sidtit{$name};
$match[$i] = join ("\t", $name,$title,$file);
}
untie %sidtit;
return @match;
}
sub RemoveDups
# 20020514.llc modified to respect original order (no sorting)
{
my(@match) = @_;
my($name,$title,$file,%match,$key,$match,@match0);
# remove duplicate pages (generated from different structures on one page)
foreach $match (@match) {
($name,$title,$file) = split(/\t/,$match);
if (not defined $match{$file}) {
push (@match0, "\t$title\t$file\t");
}
$match{$file}=$title;
}
@match = @match0;
# @match = ();
# foreach $key (keys(%match)) {
# push(@match,"\t$match{$key}\t$key\t");
# }
return @match;
}
#sub EcToPdb
# 20031210.llc obsolete starting with scop 1.65
#{
# my (@ecs) = @_;
# my ($ec, %ecpdb,@pdbs);
#
# tie %ecpdb, 'CDB_File', "ec.pdb.cdb" ||
# &CgiDie ("Couldn't open ec.pdb.cdb: $!\n");
#
# foreach $ec (@ecs) {
# push (@pdbs, split ("\0",$ecpdb{$ec}));
# }
#
# untie %ecpdb;
#
# return (@pdbs);
#}
sub PdbToPx
{
my (@pdbs) = @_;
my (%pdbpx, $pdb, @pxs, @matches);
my (%obs, $newid, $date, $lev);
tie %pdbpx, 'CDB_File', "pdb.px.cdb" ||
&CgiDie ("Couldn't open pdb.px.cdb: $!\n");
tie %obs, 'CDB_File', "pdb.sprsde.cdb" ||
&CgiDie ("Couldn't open pdb.sprsde.cdb $!\n");
foreach $pdb (@pdbs) {
if (defined $pdbpx{$pdb}) {
push (@pxs, split (/,/,$pdbpx{$pdb}));
} else {
if ($newid = $obs{$pdb}) {
$date = $obs{"$pdb-date"};
$lev =''; if (defined $in{'lev'}) { $lev = '&lev=' . $in{'lev'}; }
$MatchMsg .= "Entry $pdb was replaced by entry " .
qq!$newid on $date.
\n!;
}
}
}
untie %pdbpx; untie %obs;
return @pxs;
}
sub PdbToSid
# 20000909.llc obsolete (see PdbToPx)
{
my (@pdbs) = @_;
my (%db,$pdb,@sids,@matches,$line,$code);
local (*PDBIDX);
open (PDBIDX, "index.pdb.scop") or
&CgiDie("Couldn't open index.pdb.scop: $!\n");
foreach $pdb (@pdbs) {
&look (*PDBIDX, $pdb);
$line = ;
($code, @matches) = split (/[\s\0]+/, $line);
&CgiError("$pdb gets sid @matches\n") if $debug;
push (@sids,@matches) if $code eq $pdb;
}
close (PDBIDX);
return @sids;
}
sub SearchScop
# 20000912.llc modified to deal with new index.scop
{
my ($mkey, $key) = @_;
my (@files, $line, $match, $file, %pgetit);
local (*INDEX);
open (INDEX,"index.scop") || &CgiDie ("Couldn't open index.scop: $!\n");
&look(*INDEX,$key);
while (($line = ) =~ /^$mkey/) {
($match, $file) = split (/\s+/,$line);
push (@files,"$file");
}
close (INDEX) || &CgiDie ("Couldn't close index: $!\n");
return (@files);
}
sub ProcKeys {
my ($keys, %in) = @_;
my (%pgetit, @files, $file, %files, @words, $word, @match, $s);
my $ex = "
For keyword search, enter one or more [+|-]keywords.
Examples:
-
immunoglobulin returns a (list of)
SCOP page(s) referring to immunoglobulin
-
immun+ returns a (list of)
SCOP page(s) containing all
completions of immun
-
immunoglobulin +variable returns a (list of)
SCOP page(s) in which both
immunoglobulin and variable appear
-
immunoglobulin -variable returns a (list of)
SCOP page(s) corresponding to
immunoglobulin but not containing variable
";
local (*INDEX);
tie %pgetit, 'CDB_File', "pge.title.cdb" ||
&CgiDie ("Couldn't open pge.title.cdb: $!\n");
$_ = $keys; s/\s+/ /g; s/^ //; s/ $//;
@words = split (/ /,lc($_));
@files = &ProcKey(shift @words, %in);
%files = (); foreach (@files) { $files{$_}++; }
foreach (@words) {
$s = substr($_,0,1); s/^\+//; s/^\-//; $word = $_;
if ($s eq '-') {
@files = &ProcKey($word, %in);
foreach (@files) { delete $files{$_} if defined $files{$_}; }
} elsif ($s eq '+') {
@files = &ProcKey($word, %in);
@files = grep (defined $files{$_}, @files);
%files = (); foreach (@files) { $files{$_}++; }
} else { &CgiDie
("Invalid request: use '+' or '-' prefix for multiple word search.\n",
&PrintVariables(%in),$ex);
}
}
@match = ();
foreach $file (keys %files) {
push (@match,"-1\t$pgetit{$file}\t$file");
}
return @match;
}