#!/bin/env icarus
# IARC TP53 Mutations Database (Rel. 14)
# Prevalence data set
# Author: Paolo Romano (paolo.romano@istge.it)
# Created: Dec 3rd, 2002
# Modified by Domenico Marra on 2003/07/24
# Modified by Paolo Romano on 2007/02/06
# Modified by Paolo Romano on 2008/04/08

$fn={
 Prevalence_id:prev_id
 Topography:topog
# Short_topo:stopog
 Topo_code:topo_c
 Morphology:morpho
 Morpho_code:morph_cod
 Population:popul
 Country:cnty
 Sample_analyzed:sanal
 Sample_mutated:sammu
 Comment:comment
 Ref_id:ref_id
 Cross_ref_id:cross
 Title:title
 Authors:authors
 Year:year
 Journal:journal
 Volume:volume
 Start_page:start
 End_page:end
 PubMed_entry:pub_e
 Ref_comment:refco
 Tissue_processing:tissue
 Start_material:start_material
 Material_sequenced:material_sequenced
 Prescreening:prescreening
 Exon2:exon2
 Exon3:exon3
 Exon4:exon4
 Exon5:exon5
 Exon6:exon6
 Exon7:exon7
 Exon8:exon8
 Exon9:exon9
 Exon10:exon10
 Exon11:exon11
# Prevalence:prevalence
# Morpho_group:morgro
}

$rules={
  entry:	~ {$In:[file:text] $Out pre $Skip:0}
		  ('Prevalence_id' {$Not} ln)*
		  ('Prevalence_id' {$entryFip=$Fip $Wrt} ln {$App}
		  ('Prevalence_id' {$Not} ln {$App})*)?
		~
# fields

  fields:	~ {$In:entry $Out $Skip:1}
                  (/\/[^\n]+\n/ {$Wrt} |
                  word {$Wrt:$fn.$Ct} 
                  (/[\/A-Z]/ {$Not} ln {$App})+)+ ~
 

#indexing
  prev_id:	~ {$In:[fields c:prev_id] $Out}
		  tag / /* number {$Wrt} ~

  numbers:	~ {$In:[fields c:{year volume start end ref_id sanal sammu}] $Out}
		  tag ' '* number? {$Uniq:$Itc} ~ 

  unique1:	~ {$In:[fields c:{morph_cod}] $Out}
		  tag ' '*  (not_word | all {$Uniq:$Itc})~ 

  topog: 	~ {$In:[fields c:topog] $Out} 
		  tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~ 

  topogsort: 	~ {$In:[fields c:topog] $Out} 
		  tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~ 

  topo_c: 	~ {$In:[fields c:topo_c] $Out} 
		  tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~ 

  morpho: 	~ {$In:[fields c:morpho] $Out} 
		  tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~ 

  popul: 	~ {$In:[fields c:popul] $Out} 
		  tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~ 

  cnty: 	~ {$In:[fields c:cnty] $Out} 
		  tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~ 

  stopog:       ~ {$In:[fields c:stopog] $Out}
                  tag ' '* (/\n/ |  ln {$Wrt})~ 

  prevalence:       ~ {$In:[fields c:prevalence] $Out}
                  tag ' '* (/\n/ |  ln {$Wrt})~
  
  morgro:       ~ {$In:[fields c:morgro] $Out}
                  tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~

  refco:       ~ {$In:[fields c:refco] $Out}
                  tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~

  pub_e:       ~ {$In:[fields c:pub_e] $Out}
		  tag ' '* number {$Uniq:$Itc }  ~
#		  tag ' '* (/\n/|('PM:'|'PU:') ' '* number {$Uniq:$Itc })  ~

  unique:	~ {$In:[fields c:{title cross authors journal comment tissue 
                                  start_material material_sequenced prescreening
                                  exon2 exon3 exon4 exon5 exon6 exon7 exon8 
                                  exon9 exon10 exon11}] $Out}
		  tag ' '* (word {$Uniq:$Itc}| punct)* ~ 

#HTML stuff...

 h_top:      ~ {$In:[fields c:prev_id t:html] pre if:$ParInt:isTable $Fail}
               word {$Rep:
                     |</TR><TR>
                     |<TD colspan=2 bgcolor=\"#ffffff\">
                     |<font color=\"#000066\"><B><center>
                     |($entry.libName)</center></B></TD>
                     |</TR>
                     |<TR><TD bgcolor=\"#ffffff\">
                     |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
                     |<TD bgcolor=\"#ffffff\">
                    } 
                     /.*/ {$Rep:"$Ct</TD></TR>"}  ~

 
 h_ref: ~ {$In:[fields c:ref_id t:html] pre if:$ParInt:isTable $Fail }
             'Ref_id' {if:$isTable==0
               $Rep:
                 |<TR><TD bgcolor=\"#ffffff\">
                 |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
                 |<TD bgcolor=\"#ffffff\">
               }
             punct? number {$Rep:$Hlink:[tp53_som_refR p:{$Ct $Ct}]} ~


h_cross_ref: ~ {$In:[fields c:cross t:html] pre if:$ParInt:isTable $Fail }
             'Cross_ref_id' {if:$isTable==0
               $Rep:
                 |<TR><TD bgcolor=\"#ffffff\">
                 |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
                 |<TD bgcolor=\"#ffffff\">
               }
             ' '  ('-'|number {$Rep:$Hlink:[tp53_som_refR p:{$Ct $Ct}]}) ~

h_pubmed: ~ {$In:[fields c:pub_e t:html] pre if:$ParInt:isTable $Fail }
             'PubMed_entry' {if:$isTable==0
               $Rep:
                 |<TR><TD bgcolor=\"#ffffff\">
                 |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
                 |<TD bgcolor=\"#ffffff\">
               }
              ' '*  ('-'|number {$Rep:$Hlink:[medlineR p:{$Ct $Ct}]}) ~
 
 h_fields:      ~ {$In:[fields t:html] pre if:$ParInt:isTable $Fail}
                        word {if:$isTable==0
                        $Rep:
                     |<TR><TD bgcolor=\"#ffffff\">
                     |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
                     |<TD bgcolor=\"#ffffff\">
                     } 
                         /.*/ {$Rep:"$Ct</TD></TR>"}  ~

 t_fields:     ~ {$In:[fields] $Out} tag /.*/ {$Wrt:$Itc} ~



# definitions

  tag:          ~ /[0-9a-zA-Z_]+/ ~
  ln:		~ /[^\n]*\n/ ~
  word:		~ /[0-9a-zA-Z_]+/ ~
  word2:	~ /[a-zA-Z+-]+/ ~
  word3:	~ /[0-9a-zA-Z\\.+-]+/ ~
  number:	~ /[0-9]+/ ~
  not_word:	~ /[^a-zA-Z0-9_-]+/ ~ 
  punct:	~ /[^a-zA-Z0-9_]+/ ~  
  all:		~ /[^\n]+/ ~ 

}

	# debugging