#!/usr/bin/perl -Tw # Copyright (C) 1996 Steven E. Brenner # $Header: /ga/proj/scop/dev/RCS/search_cdb.cgi,v 1.2 2000/04/09 18:53:09 loredana Exp loredana $ # # Modified 1997 Bartlett G Ailey, additional functions at the bottom of the file. # 20000409.llc doesn't work # # 20000909.llc the content of QUERY_STRING is truncated at the first blank # "search.cgi?pdb=1mbd, 2cro" behaves as "search.cgi?pdb=1mbd," (cgi bug?) # # 20000915.llc rewritten to use new stable identifiers' based indices, # fix bugs etc. # # 20010405.llc &srch modified to copy with d1g2c.a # sunid, sccs enabled # 20010408.llc accept astral identifiers # # TODO: check sccs, sunid works properly, add comments for new indices # # Needs: # All indicies in subdirectory 'search': # # TODO: update list of indices # # index.ec.pdb ec->pdb # index.pdb.scop entry->sid # index.scop word->sid # sid.scop sid->page # sid.title.scop sid->title # pdb.sprsde obsolete pdb id ->replacement (pdbsprsde) # # Directories # # dir.dom.scop.txt - domains in scop # obsolete # dir.lin.scop.txt - classification # obsolete # dir.cla.scop.txt - classification # replace dir.dom # dir.des.scop.txt - classification # and dir.lin # dir.hie.scop.txt - hierarchy (sunid) # scop hierarchy # dir.pdb.scop.txt - pdb entries in scop # dir.ec.scop.txt - ec codes in scop use strict; require "./cgi-lib.pl"; use Search::Dict; use CDB_File; my ($Date, $End, %lev, $debug, %SrchFunc, %Dirs, $MatchMsg, $NoMatchMsg, %in, $nvar,$scopver,$scopverstr,$scopverstr2); my (%sunid, %obshist, %pdbpx, %obs); $| = 1; $Date = "\$Date"; $End = "\$"; %lev = ( 'rt', 0, 'cl', '1', 'cf', '2', 'sf', '3', 'fa', '4', 'dm', '5', 'sp', '5', 'px', '6'); $debug = 0; # Each search func accepts a $key and returns a list of @sids # or never returns at all %SrchFunc = ('dir', \&ProcDir, 'sid', \&ProcSid, 'key', \&ProcKeys, # 'key', \&ProcKey, # replaced by &ProcKeys 'pdb', \&ProcPdb, 'ec' , \&ProcEc, 'sccs' ,\&ProcSccs, 'sunid' ,\&ProcSunid, 'newfa',\&ProcNew, 'newsf',\&ProcNew, 'newcf',\&ProcNew, 'newcl',\&ProcNew ); # 20030513.llc modified to keep parseable files in parse # 20031210.llc dir.pdb.scop.txt and dir.ec.scop.txt not distributed anymore %Dirs = ('pdb', '../parse/dir.obs.txt', 'ec' , '../parse/dir.obs.txt', 'dom', '../parse/dir.obs.txt', 'lin', '../parse/dir.obs.txt', 'cla', '../parse/dir.cla.scop.txt', 'des', '../parse/dir.des.scop.txt', 'hie', '../parse/dir.hie.scop.txt', 'com', '../parse/dir.com.scop.txt' ); MAIN: { open (fd0,"search/scopver.txt") || &CgiDie ("scopver.txt not found."); $scopver = <fd0>; chomp $scopver; close fd0; if ($nvar=&ReadParse(\%in)) { if(defined($in{'ver'})){ $scopver=$in{'ver'}; } $scopverstr=";ver=$scopver"; $scopverstr2="?ver=$scopver"; #&CgiDie ("$scopver $scopverstr $scopverstr2"); if ($nvar==1 && $in{'ver'}) { &PrintQueryForm; } else { &Process(%in); } } else { &PrintQueryForm; } } sub Process # 20000909.llc modified to deal with sunid instead of sid # try to guess the kind of search in case type of search = 'key' { my (%in) = @_; my ($loc, # location where to return pages @sunids, # scop unique identifiers to return $srchtype, # search type being tested $srch, # type of search to be done $key, # parameter to the search $func, # function to call to process search $search_type, # can be either scop or pdb @match, ); chdir('search') || &CgiError("Couldn't change to search directory: $!\n"); $loc = &SetLoc; foreach $srchtype (keys %SrchFunc) { if (defined $in{$srchtype}) { if (defined $key) { &CgiDie ("Illegal Parameters", "Only one search type may be specified at a time"); } else { $srch = $srchtype; $in{$srch} =~ s/^\s+//; $in{$srch} =~ s/\s+$//; # 20010408.llc: accept astral identifiers (aeid) if ($in{$srch} =~ /^e([\ds][\da-z]{3}\.[0-9_a-z])[0-9a-z]/) { $in{$srch} = "d$1"; } # 20010703.llc: accept astral identifiers (genetic domains) if ($in{$srch} =~ /^g([\ds][\da-z]{3}[\w\.][0-9_a-z])$/) { $in{$srch} = "d$1"; } $key = $in{$srch}; $search_type = $in{'search_type'}; # &CgiDie("|$search_type| |$in{$srch}|"); if ($search_type eq 'pdb') { print "Location: http://www.ebi.ac.uk/msd-srv/msdlite/apps/query?textSearch=$in{$srch}&show=scopId\n\n"; return 0; } # try to guess the search type if ($srch eq 'key') { $srch = &srch ($in{$srch}); } # 20010706.llc: rename 'tlev' if necessary $in{'lev'} = $in{'tlev'} if defined $in{'tlev'}; if ((defined $in{'lev'}) && ($srch eq 'key')) { &CgiDie ("Invalid request: lev option not defined for keyword search.\n", &PrintVariables(%in)); } $func = $SrchFunc{$srch}; ($srch eq 'key') ? (@match = &$func($key, %in)) : (@sunids = &$func($key, %in)); } } } if (!defined $key) { &CgiDie ("Invalid Request",&PrintVariables(%in)); } if (defined $in{'lev'}) { @sunids = &DispLev(@sunids); } @match = &SunidToPage(@sunids) if ($srch ne 'key'); @match = &RemoveDups(@match); &ShowResults($srch, $key, $loc, @match); } # try to guess the search type, based on the first keyword sub srch { my ($srch) = @_; $_ = $srch; s/\s+//; $_ = lc $_; ($_) = split (/[,]/,$srch); s/\s+//; $_ = lc $_; $srch = $_; my $found = 0; tie %sunid, 'CDB_File', "sunid.pge.cdb" || &CgiDie ("Couldn't open sunid.pge.cdb: $!\n"); tie %pdbpx, 'CDB_File', "pdb.px.cdb" || &CgiDie ("Couldn't open pdb.px.cdb: $!\n"); # 20010405.llc modified to account for d1g2c.a: $found = $srch =~ /^d[\ds][\da-z]{3}[\w\.][0-9_a-z]$/; return 'sid' if $found; # 20010408.llc: accept astral identifiers (aeid) $found = $srch =~ /^e([\ds][\da-z]{3}\.[0-9_a-z])[0-9a-z]/; return 'sid' if $found; # 20010703.llc: accept astral identifiers (genetic domains) $found = $srch =~ /^g([\ds][\da-z]{3}[\w\.][0-9_a-z])$/; return 'sid' if $found; # NOTE: this shadows pdb id (like 1914), use search.cgi?pdb=1914 $found = (($srch =~ /^\d+$/) and (defined $sunid{$srch})); return 'sunid' if $found; $found = (($srch =~ /^[\ds][\da-z]{3}$/) and (defined $pdbpx{$srch})); return 'pdb' if $found; $found = ($srch =~ /^[a-z]{1,2}(\.\d+){0,3}$/); return 'sccs' if $found; $found = ($srch =~ /^\d+(\.\d+){3}$/); return 'ec' if $found; # 20020506.llc: history return 'newfa' if ($srch =~ /newfa/); return 'newsf' if ($srch =~ /newsf/); return 'newcf' if ($srch =~ /newcf/); return 'newcl' if ($srch =~ /newcl/); # 20030501.llc # obsolete entries history my %obshist; tie %obshist, 'CDB_File', "hist.obs.cdb" || &CgiDie ("Couldn't open hist.obs.cdb: $!\n"); $found = (defined $obshist{$srch}); $found = ($found and ($srch =~ /^\d+$/)); return 'sunid' if $found; $found = ($found and ($srch =~ /^[a-z]{1,2}(\.\d+){0,3}$/)); return 'sccs' if $found; tie %obs, 'CDB_File', "pdb.sprsde.cdb" || &CgiDie ("Couldn't open pdb.sprsde.cdb $!\n"); $found = (($srch =~ /^[\ds][\da-z]{3}$/) and (defined $obs{$srch})); return 'pdb' if $found; return 'key'; } sub ProcDir # 20030513.llc modified to keep parseable files at MRC in "parse" dir { my ($dir) = @_; my ($fn); $dir =~ tr/A-Z/a-z/; if (defined ($fn = $Dirs{$dir})) { $fn .= "_$scopver" unless $fn =~ /obs.txt/; print "Content-type: text/plain\n\n"; &PrintFile($fn); } else { &CgiError("Parseable files available at http://scop.mrc-lmb.cam.ac.uk/scop/parse"); # &CgiError("Directory type $dir not supported"); } exit; } # Doesn't deal with missing leading 0's. # Doesn't do any error checking at all! sub ProcSid # 20000911.llc modified to deal with px # 20020122.llc: history { my ($sids) = @_; my (@sids, $sid, @sunids, %sid, %obshist); $sids =~ s/\s+//g; $sids = lc $sids; @sids = split(',',$sids); # verify sids tie %sid, 'CDB_File', "pxidx.cdb" || &CgiDie ("Couldn't open pxidx.cdb: $!\n"); # obsolete entries history tie %obshist, 'CDB_File', "hist.obs.cdb" || &CgiDie ("Couldn't open hist.obs.cdb: $!\n"); foreach $sid (@sids) { if (defined $sid{$sid}) { &CgiError("$sid gets sunid $sid{$sid}\n") if $debug; push (@sunids, $sid{$sid}); } else { if (defined $obshist{$sid}) { $MatchMsg .= $obshist{$sid}; } } } untie %sid; return @sunids; } #sub ProcEc # 20000909.llc modified to deal with px # 20031210.llc obsolete starting with scop 1.65 #{ # my ($ec) = @_; # my (@ecs, @pdbs, @pxs); # $ec =~ s/\s+//g; # @ecs = split(/,/,$ec); # @pdbs = &EcToPdb(@ecs); # @pxs = &PdbToPx(@pdbs); # return (@pxs); #} sub ProcEc { my (@ecs) = @_; my ($ec, %ecpx, @pxs); tie %ecpx, 'CDB_File', "ec.px.cdb" || &CgiDie ("Couldn't open ec.px.cdb: $!\n"); foreach $ec (@ecs) { push (@pxs, split (/,/,$ecpx{$ec})); } untie %ecpx; return (@pxs); } sub ProcSccs # 20020122.llc: history { my ($sccs) = @_; my (@sccs, %sccssunid, @sunids, @matches, %obshist); $sccs =~ s/\s+//g; @sccs = split(/,/,$sccs); tie %sccssunid, 'CDB_File', "sccs.sunid.cdb" || &CgiDie ("Couldn't open sccs.sunid.cdb: $!\n"); # obsolete entries history tie %obshist, 'CDB_File', "hist.obs.cdb" || &CgiDie ("Couldn't open hist.obs.cdb: $!\n"); foreach $sccs (@sccs) { if (defined $sccssunid{$sccs}) { &CgiError("$sccs gets sunid $sccssunid{$sccs}\n") if $debug; push (@sunids, $sccssunid{$sccs}); } else { if (defined $obshist{$sccs}) { $MatchMsg .= $obshist{$sccs}; } } } untie %sccssunid; untie %obshist; return (@sunids); } sub ProcSunid # 20020122.llc history { my ($sunids) = @_; my (@sunids, @sunid0, %sunid, %obshist, $sunid); $sunids =~ s/\s+//g; @sunids = split(',',$sunids); # verify sunids tie %sunid, 'CDB_File', "sunid.pge.cdb" || &CgiDie ("Couldn't open sunid.pge.cdb: $!\n"); # obsolete entries history tie %obshist, 'CDB_File', "hist.obs.cdb" || &CgiDie ("Couldn't open hist.obs.cdb: $!\n"); @sunid0 = (); foreach $sunid (@sunids) { if (defined $sunid{$sunid}) { push (@sunid0, $sunid); } else { if (defined $obshist{$sunid}) { $MatchMsg .= $obshist{$sunid}; } } } untie %sunid; untie %obshist; return @sunid0; } sub ProcPdb # 20000909.llc modified to use px instead of sid { my ($pdb) = @_; my (@pdbs,@pxs); $pdb =~ tr/A-Z/a-z/; $pdb =~ s/\s+//g; @pdbs = split(/,/,$pdb); @pxs = &PdbToPx(@pdbs); return (@pxs); } sub ProcKey { my ($key, %in) = @_; my ($mkey, $match, $line, $file, @files); local (*INDEX); $key =~ tr/A-Z/a-z/; #TODO# fix this, it causes errors when searching with "[52630 for example DONE? $key =~ s/\s*$//; $key =~ s/\[//; $key =~ s/\]//; $mkey = $key; $mkey .= '\s' unless ($key =~ s/\+$//); open (INDEX,"index.scop") || &CgiDie ("Couldn't open index.scop: $!\n"); &look(*INDEX,$key); while (($line = <INDEX>) =~ /^$mkey/) { ($match, $file) = split (/\s+/,$line); push (@files,"$file"); } close (INDEX) || &CgiDie ("Couldn't close index: $!\n"); return @files; } sub ProcNew { my ($key) = @_; my $key0 = "$key-$scopver"; my %s; $s{newfa} = 'new families'; $s{newsf} = 'new superfamilies'; $s{newcf} = 'new folds'; $s{newcl} = 'new classes'; my %new; my ($new, $sunid, @sunids); open (fd0, "new-clfa.txt") || &CgiDie ("Couldn't open new-clfa.txt"); while (<fd0>) { chomp; ($new, $sunid) = split (/ /,$_,2); $sunid = -1 if not defined ($sunid); $new{$new} = $sunid; } close fd0; if ($new{"$key0"} == -1) { $MatchMsg .= "no $s{$key} in $scopver"; } elsif (not defined $new{"$key0"}) { $MatchMsg .= "no information available about $s{$key} in $scopver"; } else { @sunids = split (/ /,$new{$key0}); # &CgiDie ("$key $new{$key}","@sunids"); } return (@sunids); } # display output sub ShowResults # 20020514.llc modified to sort according to master file # (as reflected by pcn) { my ($srch, $key, $loc, @match) = @_; my ($name, $title, $file, $file0, @s, $i, $match); my $pad = 4; # padding length for pcn if (@match == 0) { &ShowNoMatch($srch, $key); } elsif ((@match == 1) && (!defined $MatchMsg)) { &CgiError("Match = 1", "@match") if $debug; ($name,$title,$file) = split(/\t/,$match[0]); $file =~ s/\s*//g; print "Location: $loc" , "data/$file\n\n"; } else { print &PrintHeader; print "<head><title>SCOP: Search Results for $key</title></head><body>"; print &ScopHeader; &CgiError ("Match != 1", "@match") if $debug; print "<h1>Search Results for \"$key\" [scop $scopver]</h1>" ; my %h = (); foreach $match (@match) { ($name,$title,$file) = split(/\t/,$match); $file =~ s/\s*//g; # format $file0 for easy sorting # NOTE: sort will be based on master file order (reflected by pcn) # and not on sccs, even if it appears in the title (sccs ARE NOT ordered) @s = split (/\./,$file); for ($i=0; $i<=$#s; $i++) { $s[$i] = ' ' x (4 - length $s[$i]) . $s[$i]; } $file0 = join (".", @s); $h{$file0}{file} = $file; $h{$file0}{title} = $title; } foreach (sort keys %h) { print "<a href=\"$loc" , "data/$h{$_}{file}\">$h{$_}{title}</a><br>\n"; } print "$MatchMsg" if defined $MatchMsg; print &ScopFooter; } } sub ShowNoMatch { my ($srch, $key) = @_; # &CgiError("Match = 0", "@match") if $debug; print &PrintHeader; print "<head><title>SCOP: Search Results: None</title></head><body>"; print &ScopHeader; print qq!<h1>No matches for "$key" (search type "$srch")</h1>\n!; print "$MatchMsg<br>\n" if defined $MatchMsg; print $NoMatchMsg if defined $NoMatchMsg; print qq!<h2>You may try <a href="search.cgi$scopverstr2">another search</a></h2>!; $key =~ tr/A-Z/a-z/; if (($key =~ /^[\ds][\da-z]{3}/) && ($MatchMsg !~ /replaced/)) { print "You can also "; print qq!<a href="http://www.ebi.ac.uk/msd-srv/ssm/cgi-bin/ssmserver?q=$key;a=SCOP;qp=70;tp=50;view=backbone;scopf=on" target=extlnk onFocus=true>search for structural matches</a>!; print " of PDB $key in SCOP using <a href=http://www.ebi.ac.uk/msd-srv/ssm target=extlnk onFocus=true>SSM</a>\n"; print "<br>(If nothing seems to happen, check behind this window)"; } print &ScopFooter; } sub ScopHeader { return <<END; <i>Structural Classification of Proteins</i><hr> <a href="index.html" onMouseOver="window.status='scop home'; return true"><img height=40 width=40 src="I/r.scop.gif" alt="home "></a><a href="http://scop.mrc-lmb.cam.ac.uk/scop/mail.cgi" onMouseOver="window.status='send mail to scop authors'; return true"><img height=40 width=40 src="I/r.mail.gif" alt="mail "></a><a href="help.html" onMouseOver="window.status='scop help and information'; return true"><img height=40 width=40 src="I/r.ques.gif" alt="help "></a> END } sub ScopFooter { return <<END; <hr> <address>Copyright © 1994-2009 The scop authors / scop\@mrc-lmb.cam.ac.uk</address> June 2009 END } sub PrintQueryForm { print &PrintHeader; print "<head><title>SCOP: Search Form</title></head><body>"; print &ScopHeader; print <<EOT; <h1>Search the scop database [scop $scopver]</h1> <form method=GET action="search.cgi"> <input type="hidden" name="ver" value="$scopver"> You can use this search engine to search the <font size=-1>SCOP</font></b> database using several access methods (including <i>sunid, sid, sccs,</i> <font size=-1>PDB</font></b> identifiers, and any word that appears in any of the <font size=-1>SCOP</font></b> pages) as well as more sophisticated options. Please read the <a href="http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html#search">release notes</a> for a detailed explanation and examples. This kind of search is internal to a <font size=-1>SCOP</font></b> release and therefore will always provide complete results. <p> By checking the <font size=-1>PDB</font></b> box, you can also search <font size=-1>SCOP</font></b> using the external MSDlite search engine for words that appear in several <i>text fields</i> in the corresponding <font size=-1>PDB</font></b> file (including header, author names, abstract, and MeSH terms from the primary citation). Please refer to <a href=http://www.ebi.ac.uk/msd-srv/msdlite/index.html><font size=-1>MSD</font>lite</a> for more details. <p> <input name="key"> <p> <input type=radio name="search_type" value="scop" checked> Search the <b><font size=-1>SCOP</font></b> database.<br> <input type=radio name="search_type" value="pdb"> Search the <b><font size=-1>PDB</font></b> database using <a href=http://www.ebi.ac.uk/msd-srv/msdlite/index.html><font size=-1>MSD</font>lite</a>.<br> <input type=submit value="Retrieve information"><br> <input type="reset" value="Clear"> the search form. </p> </form> EOT print &ScopFooter; } sub PrintFile { my ($fn) = @_; my ($buf); local (*FILE); open(FILE, $fn) || &CgiError ("Couldn't open file $fn: $!"); while (read(FILE, $buf, 16384)) { print $buf; } close(FILE); } sub SetLoc { my ($loc); if (defined $in{'loc'}) { $loc = "$in{'loc'}"; $loc .= '/' unless $loc =~ /\/$/; } else { my ($path) = $ENV{"SCRIPT_NAME"} =~ /(.*)\//; my $port = $ENV{'SERVER_PORT'} == 80 ? "" : ":" . $ENV{'SERVER_PORT'}; $loc = "http://" . $ENV{"SERVER_NAME"} . $port . $path . "/"; } return $loc; } # Translate sunids to pages sub SunidToPage { my (@sunids) = @_; my ($sunid,@match,%sunid,%pgetit,$pge,$title,%sunid2sccs); tie %sunid, 'CDB_File', "sunid.pge.cdb" || &CgiDie ("Couldn't open sunid.pge.cdb: $!\n"); tie %pgetit, 'CDB_File', "pge.title.cdb" || &CgiDie ("Couldn't open pge.title.cdb: $!\n"); tie %sunid2sccs, 'CDB_File', "sunid.sccs.cdb" || &CgiDie ("Couldn't open sunid.sccs.cdb: $!\n"); foreach $sunid (@sunids) { $pge = $sunid{$sunid}; $title = $pgetit{$pge} if $pge; if (defined $sunid2sccs{$sunid}) { $title .= " [$sunid2sccs{$sunid}]"; } push (@match, $sunid."\t".$title."\t".$pge) if $pge; &CgiError ("Dealing with sunid :$sunid: :$title: :$pge:" ) if $debug; } untie %sunid; untie %pgetit; untie %sunid2sccs; return @match; } # Translate sids to pages # Problems: doesn't munge whitespace, missing 0s. : sub SidToPage # 20000909.llc obsolete (see SunidToPage) { my (@sids) = @_; my ($sid,@match,%sid,%sidtit); tie %sid, 'CDB_File', "sid.scop.cdb" || &CgiDie ("Couldn't open sid.scop.cdb: $!\n"); tie %sidtit, 'CDB_File', "sid.title.scop.cdb" || &CgiDie ("Couldn't open sid.title.scop.cdb: $!\n"); foreach $sid (@sids) { push (@match, $sid."\t".$sidtit{$sid}."\t".$sid{$sid}) if $sid{$sid}; &CgiError ("Dealing with sid :$sid: :$sidtit{$sid}: :$sid{$sid}:" ) if $debug; } return @match; } sub DispLev { my (@sunids) = @_; my ($lev, %px2scls, @sunid0, $sunid, $newsunid); $lev = $in{'lev'} || $in{'tlev'}; $lev =~ tr/A-Z/a-z/; $lev =~ s/\s//g; # for internal consistence (scopm), 'pr' is called 'dm' # for users is 'pr' # indices are based on 'dm', therefore: $lev =~ s/pr/dm/; if (!defined $lev{$lev}) { &CgiDie ("Invalid level request (lev)",&PrintVariables(%in)); } tie %px2scls, 'CDB_File', "px.scls.cdb" || &CgiDie ("Couldn't open px.scls.cdb: $!\n"); foreach $sunid (@sunids) { ($newsunid) = ($px2scls{$sunid} =~ /$lev=(\d+)/); push (@sunid0, $newsunid); } untie %px2scls; return (@sunid0); } # convert to appropriate level sub SetDispLev # 20000909.llc obsolete (see DispLev) { my (@match) = @_; my ($lev,$name,$title,$file,$dots,$i,@file,%sidtit); # 20000406.llc modified to cope with: # WARNING WARNING WARNING WARNING: This will only work when: # 1) Parameter is EXACTLY correct and # 2) The level that would have been returned is further down in the # tree than the level being requested with the 'key' or 'pdb' # parameter. # To be fixed: # BUG BUG BUG: The '$title' field contains text of level that will be # returned. This text is not modified to reflect the level # which is produced by this routine $lev = $in{'lev'}; $lev =~ tr/A-Z/a-z/; $lev =~ s/\s//g; if (!defined $lev{$lev}) { &CgiDie ("Invalid level request (lev)",&PrintVariables(%in)); } tie %sidtit, 'CDB_File', "sid.title.scop.cdb" || &CgiDie ("Couldn't open sid.title.scop.cdb: $!\n"); $dots = $lev{$lev} + 1; for $i (0 .. $#match) { ($name,$title,$file) = split(/\t/,$match[$i]); @file = split(/\./, $file); $file = join ('.', @file[0 .. $dots], 'html'); $title = $sidtit{$name}; $match[$i] = join ("\t", $name,$title,$file); } untie %sidtit; return @match; } sub RemoveDups # 20020514.llc modified to respect original order (no sorting) { my(@match) = @_; my($name,$title,$file,%match,$key,$match,@match0); # remove duplicate pages (generated from different structures on one page) foreach $match (@match) { ($name,$title,$file) = split(/\t/,$match); if (not defined $match{$file}) { push (@match0, "\t$title\t$file\t"); } $match{$file}=$title; } @match = @match0; # @match = (); # foreach $key (keys(%match)) { # push(@match,"\t$match{$key}\t$key\t"); # } return @match; } #sub EcToPdb # 20031210.llc obsolete starting with scop 1.65 #{ # my (@ecs) = @_; # my ($ec, %ecpdb,@pdbs); # # tie %ecpdb, 'CDB_File', "ec.pdb.cdb" || # &CgiDie ("Couldn't open ec.pdb.cdb: $!\n"); # # foreach $ec (@ecs) { # push (@pdbs, split ("\0",$ecpdb{$ec})); # } # # untie %ecpdb; # # return (@pdbs); #} sub PdbToPx { my (@pdbs) = @_; my (%pdbpx, $pdb, @pxs, @matches); my (%obs, $newid, $date, $lev); tie %pdbpx, 'CDB_File', "pdb.px.cdb" || &CgiDie ("Couldn't open pdb.px.cdb: $!\n"); tie %obs, 'CDB_File', "pdb.sprsde.cdb" || &CgiDie ("Couldn't open pdb.sprsde.cdb $!\n"); foreach $pdb (@pdbs) { if (defined $pdbpx{$pdb}) { push (@pxs, split (/,/,$pdbpx{$pdb})); } else { if ($newid = $obs{$pdb}) { $date = $obs{"$pdb-date"}; $lev =''; if (defined $in{'lev'}) { $lev = '&lev=' . $in{'lev'}; } $MatchMsg .= "Entry $pdb was replaced by entry " . qq!<a href="search.cgi?pdb=$newid$scopverstr$lev">$newid</a> on $date.</p>\n!; } } } untie %pdbpx; untie %obs; return @pxs; } sub PdbToSid # 20000909.llc obsolete (see PdbToPx) { my (@pdbs) = @_; my (%db,$pdb,@sids,@matches,$line,$code); local (*PDBIDX); open (PDBIDX, "index.pdb.scop") or &CgiDie("Couldn't open index.pdb.scop: $!\n"); foreach $pdb (@pdbs) { &look (*PDBIDX, $pdb); $line = <PDBIDX>; ($code, @matches) = split (/[\s\0]+/, $line); &CgiError("$pdb gets sid @matches\n") if $debug; push (@sids,@matches) if $code eq $pdb; } close (PDBIDX); return @sids; } sub SearchScop # 20000912.llc modified to deal with new index.scop { my ($mkey, $key) = @_; my (@files, $line, $match, $file, %pgetit); local (*INDEX); open (INDEX,"index.scop") || &CgiDie ("Couldn't open index.scop: $!\n"); &look(*INDEX,$key); while (($line = <INDEX>) =~ /^$mkey/) { ($match, $file) = split (/\s+/,$line); push (@files,"$file"); } close (INDEX) || &CgiDie ("Couldn't close index: $!\n"); return (@files); } sub ProcKeys { my ($keys, %in) = @_; my (%pgetit, @files, $file, %files, @words, $word, @match, $s); my $ex = " <p>For keyword search, enter one or more [+|-]keywords.</p> Examples: <ul> <li> <tt><b>immunoglobulin</b></tt> returns a (list of) <b><font size=-1>SCOP</font></b> page(s) referring to <tt>immunoglobulin</tt> <li> <tt><b>immun+</b></tt> returns a (list of) <b><font size=-1>SCOP</font></b> page(s) containing all completions of <tt>immun</tt> <li> <tt><b>immunoglobulin +variable</b></tt> returns a (list of) <b><font size=-1>SCOP</font></b> page(s) in which both <tt>immunoglobulin</tt> and <tt>variable</tt> appear <li> <tt><b>immunoglobulin -variable</b></tt> returns a (list of) <b><font size=-1>SCOP</font></b> page(s) corresponding to <tt>immunoglobulin</tt> but not containing <tt>variable</tt></p> </ul>"; local (*INDEX); tie %pgetit, 'CDB_File', "pge.title.cdb" || &CgiDie ("Couldn't open pge.title.cdb: $!\n"); $_ = $keys; s/\s+/ /g; s/^ //; s/ $//; @words = split (/ /,lc($_)); @files = &ProcKey(shift @words, %in); %files = (); foreach (@files) { $files{$_}++; } foreach (@words) { $s = substr($_,0,1); s/^\+//; s/^\-//; $word = $_; if ($s eq '-') { @files = &ProcKey($word, %in); foreach (@files) { delete $files{$_} if defined $files{$_}; } } elsif ($s eq '+') { @files = &ProcKey($word, %in); @files = grep (defined $files{$_}, @files); %files = (); foreach (@files) { $files{$_}++; } } else { &CgiDie ("Invalid request: use '+' or '-' prefix for multiple word search.\n", &PrintVariables(%in),$ex); } } @match = (); foreach $file (keys %files) { push (@match,"-1\t$pgetit{$file}\t$file"); } return @match; }