#!/usr/bin/perl -Tw
# Copyright (C) 1996 Steven E. Brenner  
# $Header: /ga/proj/scop/dev/RCS/search_cdb.cgi,v 1.2 2000/04/09 18:53:09 loredana Exp loredana $
#
# Modified 1997 Bartlett G Ailey, additional functions at the bottom of the file.
# 20000409.llc doesn't work
#
# 20000909.llc the content of QUERY_STRING is truncated at the first blank
# "search.cgi?pdb=1mbd, 2cro" behaves as "search.cgi?pdb=1mbd," (cgi bug?)
#
# 20000915.llc rewritten to use new stable identifiers' based indices, 
# fix bugs etc.
#
# 20010405.llc &srch modified to copy with d1g2c.a
#	       sunid, sccs enabled
# 20010408.llc accept astral identifiers
#
# TODO: check sccs, sunid works properly, add comments for new indices
# 
# Needs:
# All indicies in subdirectory 'search':
#
# TODO: update list of indices
#
# index.ec.pdb  ec->pdb
# index.pdb.scop  entry->sid
# index.scop  word->sid
# sid.scop sid->page
# sid.title.scop sid->title
# pdb.sprsde  obsolete pdb id ->replacement (pdbsprsde)
#
# Directories
#
# dir.dom.scop.txt  - domains in scop		# obsolete
# dir.lin.scop.txt  - classification 		# obsolete
# dir.cla.scop.txt  - classification 		# replace dir.dom
# dir.des.scop.txt  - classification 		# and dir.lin
# dir.hie.scop.txt  - hierarchy (sunid) 	# scop hierarchy
# dir.pdb.scop.txt  - pdb entries in scop	
# dir.ec.scop.txt   - ec codes in scop

use strict;
require "./cgi-lib.pl";
use Search::Dict;
use CDB_File;

my ($Date, $End, %lev, $debug, %SrchFunc, %Dirs, $MatchMsg, $NoMatchMsg, %in, $nvar,$scopver,$scopverstr,$scopverstr2);
my (%sunid, %obshist, %pdbpx, %obs);

$| = 1;

$Date = "\$Date";
$End = "\$";
%lev = ( 'rt', 0, 'cl', '1', 'cf', '2', 'sf', '3', 'fa', '4', 'dm', '5', 'sp', '5', 'px', '6');

$debug = 0;

# Each search func accepts a $key and returns a list of @sids
# or never returns at all
%SrchFunc = 
  ('dir', \&ProcDir,
   'sid', \&ProcSid,
   'key', \&ProcKeys,
#   'key', \&ProcKey,		# replaced by &ProcKeys
   'pdb', \&ProcPdb,
   'ec' , \&ProcEc,
   'sccs' ,\&ProcSccs,		
   'sunid' ,\&ProcSunid,
   'newfa',\&ProcNew,	 
   'newsf',\&ProcNew,	 
   'newcf',\&ProcNew,	 
   'newcl',\&ProcNew	 
   );

# 20030513.llc modified to keep parseable files in parse
# 20031210.llc dir.pdb.scop.txt and dir.ec.scop.txt not distributed anymore
%Dirs = 
  ('pdb', '../parse/dir.obs.txt',
   'ec' , '../parse/dir.obs.txt',
   'dom', '../parse/dir.obs.txt',
   'lin', '../parse/dir.obs.txt',
   'cla', '../parse/dir.cla.scop.txt',
   'des', '../parse/dir.des.scop.txt',
   'hie', '../parse/dir.hie.scop.txt',
   'com', '../parse/dir.com.scop.txt'
   );


MAIN:
{
  open (fd0,"search/scopver.txt") ||         
    &CgiDie ("scopver.txt not found.");
  $scopver = <fd0>;
  chomp $scopver;
  close fd0;

  if ($nvar=&ReadParse(\%in)) {
    if(defined($in{'ver'})){
      $scopver=$in{'ver'};
    } 
    $scopverstr=";ver=$scopver";
    $scopverstr2="?ver=$scopver";
#&CgiDie ("$scopver $scopverstr $scopverstr2");

    if ($nvar==1 && $in{'ver'}) {
      &PrintQueryForm;
    } else {
      &Process(%in);
    }
  } else {
      &PrintQueryForm;
  }
}          

sub Process 
# 20000909.llc modified to deal with sunid instead of sid
# try to guess the kind of search in case type of search = 'key'  
{
  my (%in) = @_;
  
  my    ($loc,       # location where to return pages
         @sunids,    # scop unique identifiers to return
         $srchtype,  # search type being tested
         $srch,      # type of search to be done
         $key,       # parameter to the search
         $func,      # function to call to process search
         $search_type,    # can be either scop or pdb
         @match,
         );

  chdir('search') || &CgiError("Couldn't change to search directory: $!\n");
  $loc = &SetLoc;


  foreach $srchtype (keys %SrchFunc) {

    if (defined $in{$srchtype}) {
      if (defined $key) {
        &CgiDie ("Illegal Parameters", 
                 "Only one search type may be specified at a time");
      } else {

        $srch = $srchtype;
        $in{$srch} =~ s/^\s+//; $in{$srch} =~ s/\s+$//;

	# 20010408.llc: accept astral identifiers (aeid)
        if ($in{$srch} =~ /^e([\ds][\da-z]{3}\.[0-9_a-z])[0-9a-z]/) 
          { $in{$srch} = "d$1"; } 
	# 20010703.llc: accept astral identifiers (genetic domains)
        if ($in{$srch} =~ /^g([\ds][\da-z]{3}[\w\.][0-9_a-z])$/) 
          { $in{$srch} = "d$1"; } 

        $key = $in{$srch};

	$search_type = $in{'search_type'};
#        &CgiDie("|$search_type| |$in{$srch}|");

        if ($search_type eq 'pdb') {
          print "Location: http://www.ebi.ac.uk/msd-srv/msdlite/apps/query?textSearch=$in{$srch}&show=scopId\n\n";
	  return 0;
        }

        # try to guess the search type
	if ($srch eq 'key') { $srch = &srch ($in{$srch}); }

	# 20010706.llc: rename 'tlev' if necessary
	$in{'lev'} = $in{'tlev'} if defined $in{'tlev'};

	if ((defined $in{'lev'}) && ($srch eq 'key')) { 
          &CgiDie ("Invalid request: lev option not defined for keyword search.\n",
             &PrintVariables(%in));
        }

        $func = $SrchFunc{$srch};
        ($srch eq 'key') ? (@match = &$func($key, %in))
			 : (@sunids = &$func($key, %in));

      }
    }
  }

  if (!defined $key) {
    &CgiDie ("Invalid Request",&PrintVariables(%in));
  }

  if (defined $in{'lev'}) { @sunids = &DispLev(@sunids); }

  @match = &SunidToPage(@sunids) if ($srch ne 'key');
  @match = &RemoveDups(@match);
  &ShowResults($srch, $key, $loc, @match);   

}

# try to guess the search type, based on the first keyword
sub srch
{
  my ($srch) = @_;
  $_ = $srch; s/\s+//; $_ = lc $_;
  ($_) = split (/[,]/,$srch);
  s/\s+//; $_ = lc $_; 
  $srch = $_; my $found = 0; 

  tie %sunid, 'CDB_File', "sunid.pge.cdb" ||
    &CgiDie ("Couldn't open sunid.pge.cdb: $!\n");

  tie %pdbpx, 'CDB_File', "pdb.px.cdb" ||
    &CgiDie ("Couldn't open pdb.px.cdb: $!\n");
  
# 20010405.llc modified to account for d1g2c.a:
  $found = $srch =~ /^d[\ds][\da-z]{3}[\w\.][0-9_a-z]$/;
  return 'sid' if $found;

# 20010408.llc: accept astral identifiers (aeid)
  $found = $srch =~ /^e([\ds][\da-z]{3}\.[0-9_a-z])[0-9a-z]/; 
  return 'sid' if $found; 

# 20010703.llc: accept astral identifiers (genetic domains)
  $found = $srch =~ /^g([\ds][\da-z]{3}[\w\.][0-9_a-z])$/; 
  return 'sid' if $found; 

# NOTE: this shadows pdb id (like 1914), use search.cgi?pdb=1914
  $found = (($srch =~ /^\d+$/) and (defined $sunid{$srch}));  
  return 'sunid' if $found;

  $found = (($srch =~ /^[\ds][\da-z]{3}$/) and (defined $pdbpx{$srch}));
  return 'pdb' if $found;

  $found = ($srch =~ /^[a-z]{1,2}(\.\d+){0,3}$/);
  return 'sccs' if $found;

  $found = ($srch =~ /^\d+(\.\d+){3}$/);
  return 'ec' if $found;

# 20020506.llc: history
  return 'newfa' if ($srch =~ /newfa/);
  return 'newsf' if ($srch =~ /newsf/);
  return 'newcf' if ($srch =~ /newcf/);
  return 'newcl' if ($srch =~ /newcl/);

# 20030501.llc
# obsolete entries history
  my %obshist;

  tie %obshist, 'CDB_File', "hist.obs.cdb" ||
    &CgiDie ("Couldn't open hist.obs.cdb: $!\n");
  $found = (defined $obshist{$srch});

  $found = ($found and ($srch =~ /^\d+$/));  
  return 'sunid' if $found;

  $found = ($found and ($srch =~ /^[a-z]{1,2}(\.\d+){0,3}$/));
  return 'sccs' if $found;

  tie %obs, 'CDB_File', "pdb.sprsde.cdb" ||
    &CgiDie ("Couldn't open pdb.sprsde.cdb $!\n");

  $found = (($srch =~ /^[\ds][\da-z]{3}$/) and (defined $obs{$srch}));
  return 'pdb' if $found;

  return 'key';
}

sub ProcDir
# 20030513.llc modified to keep parseable files at MRC in "parse" dir
{
  my ($dir) = @_;
  my ($fn);

  $dir =~ tr/A-Z/a-z/; 
  if (defined ($fn = $Dirs{$dir})) {
    $fn .= "_$scopver" unless $fn =~ /obs.txt/;
    print "Content-type: text/plain\n\n";
    &PrintFile($fn);
  } else {
   &CgiError("Parseable files available at http://scop.mrc-lmb.cam.ac.uk/scop/parse");
#    &CgiError("Directory type $dir not supported");
  }
   
  exit;
}

# Doesn't deal with missing leading 0's.
# Doesn't do any error checking at all!
sub ProcSid
# 20000911.llc modified to deal with px
# 20020122.llc: history
{
  my ($sids) = @_;
  my (@sids, $sid, @sunids, %sid, %obshist);
  $sids =~ s/\s+//g;
  $sids = lc $sids;
  @sids = split(',',$sids);

  # verify sids
  tie %sid, 'CDB_File', "pxidx.cdb" ||
    &CgiDie ("Couldn't open pxidx.cdb: $!\n");

  # obsolete entries history
  tie %obshist, 'CDB_File', "hist.obs.cdb" ||
    &CgiDie ("Couldn't open hist.obs.cdb: $!\n");

  foreach $sid (@sids) {
    if (defined $sid{$sid}) {
      &CgiError("$sid gets sunid $sid{$sid}\n") if $debug;
      push (@sunids, $sid{$sid});
    } else {
      if (defined $obshist{$sid}) {
        $MatchMsg .= $obshist{$sid};
      }
    }
  }
  untie %sid;
  return @sunids;
}


#sub ProcEc
# 20000909.llc modified to deal with px
# 20031210.llc obsolete starting with scop 1.65
#{
#  my ($ec) = @_;
#  my (@ecs, @pdbs, @pxs);
#  $ec =~ s/\s+//g;
#  @ecs = split(/,/,$ec);
#  @pdbs = &EcToPdb(@ecs);
#  @pxs = &PdbToPx(@pdbs);
#  return (@pxs);
#}
sub ProcEc
{
  my (@ecs) = @_;
  my ($ec, %ecpx, @pxs);

  tie %ecpx, 'CDB_File', "ec.px.cdb" ||
      &CgiDie ("Couldn't open ec.px.cdb: $!\n");

  foreach $ec (@ecs) {
    push (@pxs, split (/,/,$ecpx{$ec}));
  }

  untie %ecpx;

  return (@pxs);
}

sub ProcSccs
# 20020122.llc: history
{
  my ($sccs) = @_;
  my (@sccs, %sccssunid, @sunids, @matches, %obshist);
  $sccs =~ s/\s+//g;
  @sccs = split(/,/,$sccs);

  tie %sccssunid, 'CDB_File', "sccs.sunid.cdb" ||
      &CgiDie ("Couldn't open sccs.sunid.cdb: $!\n");

  # obsolete entries history
  tie %obshist, 'CDB_File', "hist.obs.cdb" ||
    &CgiDie ("Couldn't open hist.obs.cdb: $!\n");
 
  foreach $sccs (@sccs) {
    if (defined $sccssunid{$sccs}) {
      &CgiError("$sccs gets sunid $sccssunid{$sccs}\n") if $debug;
      push (@sunids, $sccssunid{$sccs});
    } else {
      if (defined $obshist{$sccs}) {
        $MatchMsg .= $obshist{$sccs};
      }
    }
  }

  untie %sccssunid; untie %obshist;

  return (@sunids);
}

sub ProcSunid
# 20020122.llc history
{
  my ($sunids) = @_;
  my (@sunids, @sunid0, %sunid, %obshist, $sunid);
  $sunids =~ s/\s+//g;
  @sunids = split(',',$sunids);

  # verify sunids
  tie %sunid, 'CDB_File', "sunid.pge.cdb" ||
    &CgiDie ("Couldn't open sunid.pge.cdb: $!\n");
  # obsolete entries history
  tie %obshist, 'CDB_File', "hist.obs.cdb" ||
    &CgiDie ("Couldn't open hist.obs.cdb: $!\n");

  @sunid0 = ();
  foreach $sunid (@sunids) {
    if (defined $sunid{$sunid}) {
      push (@sunid0, $sunid);
    } else {
      if (defined $obshist{$sunid}) {
        $MatchMsg .= $obshist{$sunid};
      }
    }
  }

  untie %sunid; untie %obshist;

  return @sunid0;
}

sub ProcPdb
# 20000909.llc modified to use px instead of sid
{
  my ($pdb) = @_;
  my (@pdbs,@pxs);

  $pdb =~ tr/A-Z/a-z/;
  $pdb =~ s/\s+//g;
  @pdbs = split(/,/,$pdb);
  @pxs = &PdbToPx(@pdbs);

  return (@pxs);
}

sub ProcKey
{
  my ($key, %in) = @_;
  my ($mkey, $match, $line, $file, @files);
  local (*INDEX);

  $key =~ tr/A-Z/a-z/;
#TODO# fix this, it causes errors when searching with "[52630 for example DONE?
  $key =~ s/\s*$//; $key =~ s/\[//; $key =~ s/\]//;
  $mkey = $key;
  $mkey .= '\s' unless  ($key =~ s/\+$//);
  open (INDEX,"index.scop") || &CgiDie ("Couldn't open index.scop: $!\n");

  &look(*INDEX,$key);

  while (($line = <INDEX>) =~ /^$mkey/) {
    ($match, $file) = split (/\s+/,$line);
    push (@files,"$file");
  }
  close (INDEX) || &CgiDie ("Couldn't close index: $!\n");

  return @files;
}

sub ProcNew
{
  my ($key) = @_;
  my $key0 = "$key-$scopver";

  my %s;
  $s{newfa} = 'new families';
  $s{newsf} = 'new superfamilies';
  $s{newcf} = 'new folds';
  $s{newcl} = 'new classes';

  my %new; my ($new, $sunid, @sunids);
  open (fd0, "new-clfa.txt") || &CgiDie ("Couldn't open new-clfa.txt");
  while (<fd0>) {
    chomp;
    ($new, $sunid) = split (/ /,$_,2);
    $sunid = -1 if not defined ($sunid);
    $new{$new} = $sunid;
  }
  close fd0;

  if ($new{"$key0"} == -1)  {
     $MatchMsg .= "no $s{$key} in $scopver";
  } elsif (not defined $new{"$key0"}) {
      $MatchMsg .= "no information available about $s{$key} in $scopver";
  } else {
    @sunids = split (/ /,$new{$key0});
# &CgiDie ("$key $new{$key}","@sunids");
  }
  return (@sunids);
}

# display output
sub ShowResults
# 20020514.llc modified to sort according to master file
# (as reflected by pcn)
{
  my ($srch, $key, $loc, @match) = @_;
  my ($name, $title, $file, $file0, @s, $i, $match);
  my $pad = 4;	     # padding length for pcn

  if (@match == 0) {
    &ShowNoMatch($srch, $key);
  } elsif ((@match == 1) && (!defined $MatchMsg)) {
    &CgiError("Match = 1", "@match") if $debug;
    ($name,$title,$file) = split(/\t/,$match[0]);
    $file =~ s/\s*//g;
    print "Location: $loc" , "data/$file\n\n";

  } else {
    print &PrintHeader;
    print "<head><title>SCOP: Search Results for $key</title></head><body>";
    print &ScopHeader;
    &CgiError ("Match != 1", "@match") if $debug;
    print "<h1>Search Results for \"$key\" [scop $scopver]</h1>" ;
    my %h = ();
    foreach $match (@match) {
      ($name,$title,$file) = split(/\t/,$match);
      $file =~ s/\s*//g;
# format $file0 for easy sorting
# NOTE: sort will be based on master file order (reflected by pcn)
# and not on sccs, even if it appears in the title (sccs ARE NOT ordered)
      @s = split (/\./,$file);
      for ($i=0; $i<=$#s; $i++) {
        $s[$i] = ' ' x (4 - length $s[$i]) . $s[$i];
      }
      $file0 = join (".", @s);
      $h{$file0}{file} = $file;
      $h{$file0}{title} = $title;
    }
    
    foreach (sort keys %h) {
      print "<a href=\"$loc" , "data/$h{$_}{file}\">$h{$_}{title}</a><br>\n";
    }
    print "$MatchMsg" if defined $MatchMsg;
    print &ScopFooter;
  }
}

sub ShowNoMatch
{
  my ($srch, $key) = @_;
#  &CgiError("Match = 0", "@match") if $debug;
  print &PrintHeader;
  print "<head><title>SCOP: Search Results: None</title></head><body>";
  print &ScopHeader;
  print qq!<h1>No matches for "$key" (search type "$srch")</h1>\n!;
  print "$MatchMsg<br>\n" if defined $MatchMsg;
  print $NoMatchMsg if defined $NoMatchMsg;
  print qq!<h2>You may try <a href="search.cgi$scopverstr2">another search</a></h2>!;
  $key =~ tr/A-Z/a-z/;
  if (($key =~ /^[\ds][\da-z]{3}/) && ($MatchMsg !~ /replaced/)) {
    print "You can also ";
    print qq!<a href="http://www.ebi.ac.uk/msd-srv/ssm/cgi-bin/ssmserver?q=$key;a=SCOP;qp=70;tp=50;view=backbone;scopf=on" target=extlnk onFocus=true>search for structural matches</a>!;
    print " of PDB $key in SCOP using <a href=http://www.ebi.ac.uk/msd-srv/ssm target=extlnk onFocus=true>SSM</a>\n";
    print "<br>(If nothing seems to happen, check behind this window)"; 
  }
  print &ScopFooter;
}

sub ScopHeader
{
  return <<END;
<i>Structural Classification of Proteins</i><hr>
<a href="index.html" onMouseOver="window.status='scop home'; return true"><img
height=40 width=40 src="I/r.scop.gif" alt="home "></a><a href="http://scop.mrc-lmb.cam.ac.uk/scop/mail.cgi"
onMouseOver="window.status='send mail to scop authors'; return true"><img
height=40 width=40 src="I/r.mail.gif" alt="mail "></a><a href="help.html"
onMouseOver="window.status='scop help and information';
return true"><img height=40 width=40 src="I/r.ques.gif" alt="help "></a> 
END

}


sub ScopFooter
{
    return <<END;
<hr>
<address>Copyright  &#169; 1994-2009 The scop authors /
scop\@mrc-lmb.cam.ac.uk</address>
June 2009
END
}

sub PrintQueryForm {
  print &PrintHeader;
  print "<head><title>SCOP: Search Form</title></head><body>";
  print &ScopHeader;

  print <<EOT;

<h1>Search the scop database [scop $scopver]</h1>

<form method=GET action="search.cgi">
<input type="hidden" name="ver" value="$scopver">

You can use this search engine to search the 
<font size=-1>SCOP</font></b> database using several access methods (including 
<i>sunid,
sid, sccs,</i> <font size=-1>PDB</font></b> identifiers, and 
any word that appears in any of
the <font size=-1>SCOP</font></b> pages) as well as 
more sophisticated options. Please read the
<a href="http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html#search">release notes</a> for a detailed explanation and examples. This kind of search is internal to a
<font size=-1>SCOP</font></b> release and therefore will always provide complete
results.
<p>
By checking the <font size=-1>PDB</font></b> box, you can also search 
<font size=-1>SCOP</font></b> using the external MSDlite search engine 
for words that appear in several <i>text fields</i>
in the corresponding <font size=-1>PDB</font></b> file (including header, author
names, abstract, and MeSH terms from the primary citation). 
Please refer to <a href=http://www.ebi.ac.uk/msd-srv/msdlite/index.html><font size=-1>MSD</font>lite</a> for more details.

<p>
<input name="key">

<p>
<input type=radio name="search_type" value="scop" checked> Search the 
<b><font size=-1>SCOP</font></b> database.<br>
<input type=radio name="search_type" value="pdb"> Search the
<b><font size=-1>PDB</font></b> database using 
<a href=http://www.ebi.ac.uk/msd-srv/msdlite/index.html><font size=-1>MSD</font>lite</a>.<br>
 <input type=submit value="Retrieve information"><br>
 <input type="reset" value="Clear"> the search form.
</p>

</form>

EOT
  print &ScopFooter;

}

sub PrintFile {
  my ($fn) = @_;
  my ($buf);
  local (*FILE);

  open(FILE, $fn) || &CgiError ("Couldn't open file $fn: $!");
  while (read(FILE, $buf, 16384)) {
    print $buf;
  }
  close(FILE);
}
 

sub SetLoc
{
  my ($loc);

  if (defined $in{'loc'}) {
    $loc = "$in{'loc'}";
    $loc .= '/' unless $loc =~ /\/$/;
  } else {
    my ($path) = $ENV{"SCRIPT_NAME"} =~ /(.*)\//;
    my $port = $ENV{'SERVER_PORT'} == 80 ? "" : ":" . $ENV{'SERVER_PORT'};

    $loc = "http://" . $ENV{"SERVER_NAME"} . $port . $path . "/";
  }
  return $loc;
}


#  Translate sunids to pages
sub SunidToPage
{
  my (@sunids) = @_;
  my ($sunid,@match,%sunid,%pgetit,$pge,$title,%sunid2sccs);
  
  tie %sunid, 'CDB_File', "sunid.pge.cdb" ||
     &CgiDie ("Couldn't open sunid.pge.cdb: $!\n");

  tie %pgetit, 'CDB_File', "pge.title.cdb" || 
     &CgiDie ("Couldn't open pge.title.cdb: $!\n");

  tie %sunid2sccs, 'CDB_File', "sunid.sccs.cdb" || 
     &CgiDie ("Couldn't open sunid.sccs.cdb: $!\n");


  foreach $sunid (@sunids) {

    $pge = $sunid{$sunid}; 
    $title = $pgetit{$pge} if $pge; 
    if (defined $sunid2sccs{$sunid}) { $title .= " [$sunid2sccs{$sunid}]"; }

    push (@match, $sunid."\t".$title."\t".$pge) if $pge;

    &CgiError ("Dealing with sunid :$sunid: :$title: :$pge:" ) if
      $debug;
  }

  untie %sunid; untie %pgetit; untie %sunid2sccs;

  return @match;
}

#  Translate sids to pages
#  Problems: doesn't munge whitespace, missing 0s. :
sub SidToPage
# 20000909.llc obsolete (see SunidToPage)
{
  my (@sids) = @_;
  my ($sid,@match,%sid,%sidtit);
  
  tie %sid, 'CDB_File', "sid.scop.cdb" ||
     &CgiDie ("Couldn't open sid.scop.cdb: $!\n");
  tie %sidtit, 'CDB_File', "sid.title.scop.cdb" || 
     &CgiDie ("Couldn't open sid.title.scop.cdb: $!\n");
  foreach $sid (@sids) {
    push (@match, $sid."\t".$sidtit{$sid}."\t".$sid{$sid}) if $sid{$sid};
    &CgiError ("Dealing with sid :$sid: :$sidtit{$sid}: :$sid{$sid}:" ) if
      $debug;
  }

  return @match;
}

sub DispLev
{
  my (@sunids) = @_;
  my ($lev, %px2scls, @sunid0, $sunid, $newsunid);

  $lev = $in{'lev'} || $in{'tlev'};
  $lev =~ tr/A-Z/a-z/;
  $lev =~ s/\s//g;
  # for internal consistence (scopm), 'pr' is called 'dm'
  # for users is 'pr'
  # indices are based on 'dm', therefore:
  $lev =~ s/pr/dm/;

  if (!defined $lev{$lev}) { 
    &CgiDie ("Invalid level request (lev)",&PrintVariables(%in));
  }

  tie %px2scls, 'CDB_File', "px.scls.cdb" || 
     &CgiDie ("Couldn't open px.scls.cdb: $!\n");
  foreach $sunid (@sunids) {  
    ($newsunid) = ($px2scls{$sunid} =~ /$lev=(\d+)/);
    push (@sunid0, $newsunid); 
  }

  untie %px2scls;  

  return (@sunid0);
}


# convert to appropriate level
sub SetDispLev
# 20000909.llc obsolete (see DispLev)
{ 
  my (@match) = @_;
  my ($lev,$name,$title,$file,$dots,$i,@file,%sidtit);

# 20000406.llc modified to cope with:

# WARNING WARNING WARNING WARNING:  This will only work when:
#   1) Parameter is EXACTLY correct    and
#   2) The level that would have been returned is further down in the 
#      tree than the level being requested with the 'key' or 'pdb' 
#      parameter.  

# To be fixed:

# BUG BUG BUG: The '$title' field contains text of level that will be
#    returned.  This text is not modified to reflect the level
#    which is produced by this routine

  $lev = $in{'lev'};
  $lev =~ tr/A-Z/a-z/;
  $lev =~ s/\s//g;

  if (!defined $lev{$lev}) { 
    &CgiDie ("Invalid level request (lev)",&PrintVariables(%in));
  }

  tie %sidtit, 'CDB_File', "sid.title.scop.cdb" || 
     &CgiDie ("Couldn't open sid.title.scop.cdb: $!\n");

  $dots = $lev{$lev} + 1;
  for $i (0 .. $#match) {
    ($name,$title,$file) = split(/\t/,$match[$i]);
    @file = split(/\./, $file);
    $file = join ('.', @file[0 .. $dots], 'html');
    $title = $sidtit{$name}; 
    $match[$i] = join ("\t", $name,$title,$file);
  }

  untie %sidtit;

  return @match;
}

sub RemoveDups
# 20020514.llc modified to respect original order (no sorting)
{
  my(@match) = @_;
  my($name,$title,$file,%match,$key,$match,@match0);
# remove duplicate pages (generated from different structures on one page)
  foreach $match (@match) {
    ($name,$title,$file) = split(/\t/,$match);
    if (not defined $match{$file}) {
      push (@match0, "\t$title\t$file\t");
    }
    $match{$file}=$title;
  }
  @match = @match0;

#  @match = ();
#  foreach $key (keys(%match)) {
#    push(@match,"\t$match{$key}\t$key\t");
#  }

  return @match;
}


#sub EcToPdb
# 20031210.llc obsolete starting with scop 1.65
#{
#  my (@ecs) = @_;
#  my ($ec, %ecpdb,@pdbs);
#
#  tie %ecpdb, 'CDB_File', "ec.pdb.cdb" ||
#      &CgiDie ("Couldn't open ec.pdb.cdb: $!\n");
#
#  foreach $ec (@ecs) {
#    push (@pdbs, split ("\0",$ecpdb{$ec}));
#  }
#
#  untie %ecpdb;
#
#  return (@pdbs);
#}

sub PdbToPx 
{
  my (@pdbs) = @_;
  my (%pdbpx, $pdb, @pxs, @matches);
  my (%obs, $newid, $date, $lev);

  tie %pdbpx, 'CDB_File', "pdb.px.cdb" ||
    &CgiDie ("Couldn't open pdb.px.cdb: $!\n");
 
  tie %obs, 'CDB_File', "pdb.sprsde.cdb" ||
    &CgiDie ("Couldn't open pdb.sprsde.cdb $!\n");
  
  foreach $pdb (@pdbs) {
    if (defined $pdbpx{$pdb}) {
      push (@pxs, split (/,/,$pdbpx{$pdb}));
    } else {
      if ($newid = $obs{$pdb}) {
        $date = $obs{"$pdb-date"}; 
	$lev =''; if (defined $in{'lev'}) { $lev = '&lev=' . $in{'lev'}; }
        $MatchMsg .= "Entry $pdb was replaced by entry " .
          qq!<a href="search.cgi?pdb=$newid$scopverstr$lev">$newid</a> on $date.</p>\n!;
      }
    }
  }

  untie %pdbpx; untie %obs;

  return @pxs;
}

sub PdbToSid
# 20000909.llc obsolete (see PdbToPx)
{
  my (@pdbs) = @_;
  my (%db,$pdb,@sids,@matches,$line,$code);
  local (*PDBIDX);

  open (PDBIDX, "index.pdb.scop") or 
    &CgiDie("Couldn't open index.pdb.scop: $!\n");

  foreach $pdb (@pdbs) {
    &look (*PDBIDX, $pdb);
    $line = <PDBIDX>;
    ($code, @matches) = split (/[\s\0]+/, $line);
    &CgiError("$pdb gets sid @matches\n") if $debug;
    push (@sids,@matches) if $code eq $pdb;
  }
  close (PDBIDX);

  return @sids;
}

sub SearchScop
# 20000912.llc modified to deal with new index.scop
{
  my ($mkey, $key) = @_;
  my (@files, $line, $match, $file, %pgetit);
  local (*INDEX);

  open (INDEX,"index.scop") || &CgiDie ("Couldn't open index.scop: $!\n");

  &look(*INDEX,$key);

  while (($line = <INDEX>) =~ /^$mkey/) {
    ($match, $file) = split (/\s+/,$line);
    push (@files,"$file");
  }
  close (INDEX) || &CgiDie ("Couldn't close index: $!\n");

  return (@files);
}

sub ProcKeys { 

  my ($keys, %in) = @_;
  my (%pgetit, @files, $file, %files, @words, $word, @match, $s);
  my $ex = "
<p>For keyword search, enter one or more [+|-]keywords.</p> 
Examples:
<ul>
<li>
<tt><b>immunoglobulin</b></tt> returns a (list of) 
<b><font size=-1>SCOP</font></b> page(s) referring to <tt>immunoglobulin</tt>
<li>
<tt><b>immun+</b></tt> returns a (list of) 
<b><font size=-1>SCOP</font></b> page(s) containing all
completions of <tt>immun</tt>
<li>
<tt><b>immunoglobulin +variable</b></tt> returns a (list of) 
<b><font size=-1>SCOP</font></b> page(s) in which both 
 <tt>immunoglobulin</tt> and <tt>variable</tt> appear
<li>
<tt><b>immunoglobulin -variable</b></tt> returns a (list of) 
<b><font size=-1>SCOP</font></b> page(s) corresponding to 
 <tt>immunoglobulin</tt> but not containing <tt>variable</tt></p>
</ul>";


  local (*INDEX);

  tie %pgetit, 'CDB_File', "pge.title.cdb" || 
    &CgiDie ("Couldn't open pge.title.cdb: $!\n");

  $_ = $keys; s/\s+/ /g; s/^ //; s/ $//; 
  @words = split (/ /,lc($_));

  @files = &ProcKey(shift @words, %in);
  %files = (); foreach (@files) { $files{$_}++; }

  foreach (@words) {
    $s = substr($_,0,1); s/^\+//; s/^\-//; $word = $_;
    if ($s eq '-')  {  
      @files = &ProcKey($word, %in);
      foreach (@files) { delete $files{$_} if defined $files{$_}; }
    } elsif ($s eq '+') {
        @files = &ProcKey($word, %in);
        @files = grep (defined $files{$_}, @files);
        %files = (); foreach (@files) { $files{$_}++; }
    } else { &CgiDie 
             ("Invalid request: use '+' or '-' prefix for multiple word search.\n",
               &PrintVariables(%in),$ex); 
    } 
  }

  @match = ();
  foreach $file (keys %files) {
    push (@match,"-1\t$pgetit{$file}\t$file");
  } 

  return @match;
}