#!/bin/env icarus
# IARC TP53 Mutations Database (Rel. 14)
# Prevalence data set
# Author: Paolo Romano (paolo.romano@istge.it)
# Created: Dec 3rd, 2002
# Modified by Domenico Marra on 2003/07/24
# Modified by Paolo Romano on 2007/02/06
# Modified by Paolo Romano on 2008/04/08
$fn={
Prevalence_id:prev_id
Topography:topog
# Short_topo:stopog
Topo_code:topo_c
Morphology:morpho
Morpho_code:morph_cod
Population:popul
Country:cnty
Sample_analyzed:sanal
Sample_mutated:sammu
Comment:comment
Ref_id:ref_id
Cross_ref_id:cross
Title:title
Authors:authors
Year:year
Journal:journal
Volume:volume
Start_page:start
End_page:end
PubMed_entry:pub_e
Ref_comment:refco
Tissue_processing:tissue
Start_material:start_material
Material_sequenced:material_sequenced
Prescreening:prescreening
Exon2:exon2
Exon3:exon3
Exon4:exon4
Exon5:exon5
Exon6:exon6
Exon7:exon7
Exon8:exon8
Exon9:exon9
Exon10:exon10
Exon11:exon11
# Prevalence:prevalence
# Morpho_group:morgro
}
$rules={
entry: ~ {$In:[file:text] $Out pre $Skip:0}
('Prevalence_id' {$Not} ln)*
('Prevalence_id' {$entryFip=$Fip $Wrt} ln {$App}
('Prevalence_id' {$Not} ln {$App})*)?
~
# fields
fields: ~ {$In:entry $Out $Skip:1}
(/\/[^\n]+\n/ {$Wrt} |
word {$Wrt:$fn.$Ct}
(/[\/A-Z]/ {$Not} ln {$App})+)+ ~
#indexing
prev_id: ~ {$In:[fields c:prev_id] $Out}
tag / /* number {$Wrt} ~
numbers: ~ {$In:[fields c:{year volume start end ref_id sanal sammu}] $Out}
tag ' '* number? {$Uniq:$Itc} ~
unique1: ~ {$In:[fields c:{morph_cod}] $Out}
tag ' '* (not_word | all {$Uniq:$Itc})~
topog: ~ {$In:[fields c:topog] $Out}
tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~
topogsort: ~ {$In:[fields c:topog] $Out}
tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~
topo_c: ~ {$In:[fields c:topo_c] $Out}
tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~
morpho: ~ {$In:[fields c:morpho] $Out}
tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~
popul: ~ {$In:[fields c:popul] $Out}
tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~
cnty: ~ {$In:[fields c:cnty] $Out}
tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~
stopog: ~ {$In:[fields c:stopog] $Out}
tag ' '* (/\n/ | ln {$Wrt})~
prevalence: ~ {$In:[fields c:prevalence] $Out}
tag ' '* (/\n/ | ln {$Wrt})~
morgro: ~ {$In:[fields c:morgro] $Out}
tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~
refco: ~ {$In:[fields c:refco] $Out}
tag ' '* (/\n/ | ln {$Uniq:$Itc}) ~
pub_e: ~ {$In:[fields c:pub_e] $Out}
tag ' '* number {$Uniq:$Itc } ~
# tag ' '* (/\n/|('PM:'|'PU:') ' '* number {$Uniq:$Itc }) ~
unique: ~ {$In:[fields c:{title cross authors journal comment tissue
start_material material_sequenced prescreening
exon2 exon3 exon4 exon5 exon6 exon7 exon8
exon9 exon10 exon11}] $Out}
tag ' '* (word {$Uniq:$Itc}| punct)* ~
#HTML stuff...
h_top: ~ {$In:[fields c:prev_id t:html] pre if:$ParInt:isTable $Fail}
word {$Rep:
|</TR><TR>
|<TD colspan=2 bgcolor=\"#ffffff\">
|<font color=\"#000066\"><B><center>
|($entry.libName)</center></B></TD>
|</TR>
|<TR><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
/.*/ {$Rep:"$Ct</TD></TR>"} ~
h_ref: ~ {$In:[fields c:ref_id t:html] pre if:$ParInt:isTable $Fail }
'Ref_id' {if:$isTable==0
$Rep:
|<TR><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
punct? number {$Rep:$Hlink:[tp53_som_refR p:{$Ct $Ct}]} ~
h_cross_ref: ~ {$In:[fields c:cross t:html] pre if:$ParInt:isTable $Fail }
'Cross_ref_id' {if:$isTable==0
$Rep:
|<TR><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
' ' ('-'|number {$Rep:$Hlink:[tp53_som_refR p:{$Ct $Ct}]}) ~
h_pubmed: ~ {$In:[fields c:pub_e t:html] pre if:$ParInt:isTable $Fail }
'PubMed_entry' {if:$isTable==0
$Rep:
|<TR><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
' '* ('-'|number {$Rep:$Hlink:[medlineR p:{$Ct $Ct}]}) ~
h_fields: ~ {$In:[fields t:html] pre if:$ParInt:isTable $Fail}
word {if:$isTable==0
$Rep:
|<TR><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
/.*/ {$Rep:"$Ct</TD></TR>"} ~
t_fields: ~ {$In:[fields] $Out} tag /.*/ {$Wrt:$Itc} ~
# definitions
tag: ~ /[0-9a-zA-Z_]+/ ~
ln: ~ /[^\n]*\n/ ~
word: ~ /[0-9a-zA-Z_]+/ ~
word2: ~ /[a-zA-Z+-]+/ ~
word3: ~ /[0-9a-zA-Z\\.+-]+/ ~
number: ~ /[0-9]+/ ~
not_word: ~ /[^a-zA-Z0-9_-]+/ ~
punct: ~ /[^a-zA-Z0-9_]+/ ~
all: ~ /[^\n]+/ ~
}
# debugging