*=*=*=*= ToTag.html =*=*=*=*
Integer Function ToTag(length,word,tag)
implicit integer(a-z)
parameter (mxnoun=1000,mxlen=50,fnice=1.0)
common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun
character*(mxlen) noun(mxnoun)
character*(mxlen) clower,clotag
integer len_noun(mxnoun),weight_noun(mxnoun)
integer freq_noun(mxnoun)
character*(*) word,tag
c
totag = length
tag = word
if(length.eq.0) return
clotag = clower(word,length)
do 1 i=1,nnoun
if(len_noun(i).ne.length) goto 1
if(noun(i)(:length).ne.clotag(:length)) goto 1
c
c if first occurence, make reference, otherwise link
c
if(weight_noun(i).eq.0) then
weight_noun(i) = 1
tag = 'clotag(:length)//
& '">'//word(:length)//''
totag = 15 + 2*length
else if(weight_noun(i).eq.1) then
c
c link
c
tag = 'clotag(:length)//
& '">'//word(:length)//''
totag = 16 + 2*length
else if(weight_noun(i).eq.2) then
c
c external filename
c
tag = clotag(:length)//
& '">'//word(:length)//''
totag = 15 + 2*length
endif
return
1 continue
end
*=*=*=*= Txt2HTML.html =*=*=*=*
Program Txt2HTML
implicit integer(a-z)
parameter (mxlin=250,mxlen=50,mxnoun=1000,mxpar=20000)
parameter (mxpass=2,mxtag=100)
parameter (fnice=1.0)
common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun
c
character*(mxtag) tag
character*(mxlin) cline
character*(mxlen) noun(mxnoun)
character*(mxlen) word,lastword,nextword,sfile
character*(mxpar) para,temp,chtml
character*16 cpunct /'-:;,/.?!"''(){}[]'/
character*10 tchar
character*3 ccthree
character*1 c,cc,cclast
c
integer len_noun(mxnoun),weight_noun(mxnoun)
integer freq_noun(mxnoun)
c
real f
c
logical lpunct,lalpha,lnumbr,lclaus
c
external lenocc,totag
c
c statement functions
c
lclaus(c) = index(cpunct,c).gt.5
lalpha(c) = (lge(c,'a').and.lle(c,'z')).or.
& (lge(c,'A').and.lle(c,'Z'))
lnumbr(c) = lge(c,'0').and.lle(c,'9')
c
c call getarg(1,sfile,status)
c if(status.le.0) goto 900
c open(1,file=sfile(:status),status='old',err=900)
open(1,status='old',readonly,err=900)
c open(7,file='user',title='Candidate Nouns',iofocus=.true.)
c
nnoun = 0
lpar = 0
pass = 1
c
c Read the file and accumulate the paragraphs
c
1 read(1,'(a)',end=2,err=2) cline
c
ll = lenocc(cline)
c
if(ll.le.0.and.lpar.ne.0) then
c
c empty line ... process words in preceding paragraph
c
lhtml = 0
lwordl = 0
lwordn = 0
lword = 0
lastword = ' '
nextword = ' '
word = ' '
mark = 1
markl = 0
markn = 0
ipos = 0
cc = char(1)
3 ipos = ipos+1
cclast = cc
if(ipos.gt.lpar) goto 4
cc = para(ipos:ipos)
if(cc.eq.char(9)) cc = ' '
good3 = 1
if(ipos.gt.1.and.ipos.lt.lpar) then
ccthree=para(ipos-1:ipos+1)
if(lalpha(cclast).or.lnumbr(cclast)) good3=2
if(lalpha(para(ipos+1:ipos+1)).or.
& lnumbr(para(ipos+1:ipos+1))) good3=good3+1
endif
c
c add characters to nextword
c
if(lalpha(cc).or.lnumbr(cc)) then
if(lwordn.lt.mxlen) then
lwordn = lwordn+1
nextword(lwordn:lwordn) = cc
endif
goto 3
c
c special for files ... assumes structure like A.B !
c
else if(cc.eq.'.'.and.good3.eq.3) then
if(lwordn.lt.mxlen) then
lwordn = lwordn+1
nextword(lwordn:lwordn) = cc
endif
goto 3
else if(ccthree(2:3).eq.':\'.and.lalpha(cclast)) then
if(lwordn.lt.mxlen-1) then
lwordn = lwordn+2
nextword(lwordn-1:lwordn) = ':\'
ipos = ipos+1
endif
goto 3
else if(cc.eq.'\'.and.index(nextword(:lwordn),'\').ne.0) then
if(lwordn.lt.mxlen) then
lwordn = lwordn+1
nextword(lwordn:lwordn) = cc
endif
goto 3
else if(pass.lt.mxpass) then
markn = 0
if(lclaus(cc)) markn = 1
c
c end of word.
c
if(lword*lwordn.ne.0.and.pass.lt.mxpass) then
call context(lastword(:lwordl),
& word(:lword),
& nextword(:lwordn),markl)
c shift words left
endif
if(lwordn.eq.0) goto 3
lastword = word
lwordl = lword
word = nextword
lword = lwordn
lwordn = 0
c
c mark signifies whether "word" was preceded by a clause end
c
markl = mark
mark = markn
goto 3
else if(pass.eq.mxpass) then
c
c Last pass .... pipe out HTML
c
lastword = word
lwordl = lword
word = nextword
lword = lwordn
lwordn = 0
markl = mark
mark = markn
c
c check for tag
c
ltag = 0
if(lword.ne.0) ltag = totag(lword,word,tag)
temp = chtml
if(lhtml+ltag+1.ge.70) then
write(6,*) chtml(:lhtml)
lhtml = 0
endif
c
c check for translation chars
c
tchar(1:1) = cc
lt = 1
if(cc.eq.'<') then tchar = '<'
lt = 5
else if(cc.eq.'>') then
tchar = '>'
lt = 5
else if(cc.eq.'&') then
tchar = '&'
lt = 5
endif
if(ltag.eq.0) then
chtml = temp(:lhtml)//tchar(:lt)
lhtml = lhtml+lt
else if(cc.ne.' '.or.cclast.ne.cc) then
chtml = temp(:lhtml)//tag(:ltag)//tchar(:lt)
lhtml = lhtml+ltag+lt
else
chtml = temp(:lhtml)//tag(:ltag)
lhtml = lhtml+ltag
endif
goto 3
endif
4 lpar = 0
if(lwordl*lword.ne.0.and.pass.lt.mxpass)
& call context(lastword(:lwordl),word(:lword),' ',markl)
if(pass.eq.mxpass.and.ll.eq.0) then
write(6,*) chtml(:lhtml)
lhtml = 0
write(6,*) '
' endif else c c add line to para c temp = para para = temp(:lpar)//' '//cline(:ll)
lpar = lpar + ll
+ 1 endif goto 1 c c End of pass ... c 2 continue if(pass.lt.mxpass) then write(7,*) ' A total of ',nnoun,'
candidate nouns at pass ',pass write(7,'(1x,a20,f10.5)') (noun(i)(:len_noun(i)), & real(weight_noun(i))/real(freq_noun(i)),i=1,nnoun) endif c pass
= pass+1 c c Prepare final weights for tagging pass c if(pass.eq.mxpass.and.nnoun.gt.0)
then inoun = 1 6 if(inoun.le.nnoun) then f = real(weight_noun(inoun))/real(freq_noun(inoun)) weight_noun(inoun)
= 0 c c For filenames, set the weight to be 2 c if(index(noun(inoun),'.').ne.0)
weight_noun(inoun)=2 if(index(noun(inoun),'\').ne.0) weight_noun(inoun)=2 if(f.lt.fnice)
then c c remove from list c write(7,*) ' Rejecting ',noun(inoun),f do i=inoun+1,nnoun noun(i-1) = noun(i) weight_noun(i-1) = weight_noun(i)
freq_noun(i-1) = freq_noun(i) len_noun(i-1) =
len_noun(i) enddo nnoun
= nnoun-1 else inoun = inoun + 1 endif goto 6 endif write(7,*) ' A total of ',nnoun,' filtered nouns' write(7,'(1x,a20,f10.5)') (noun(i)(:len_noun(i)), & real(weight_noun(i))/real(freq_noun(i)),i=1,nnoun) endif c if(pass.le.mxpass) then lpar = 0 rewind(1) goto 1
endif write(6,*) chtml(:lhtml) stop 900
write(6,*) ' Error opening input file' end *=*=*=*= context.html =*=*=*=*
subroutine context(lastword,word,nextword,mark)
implicit integer(a-z)
parameter (mxnoun=1000,mxlen=50)
common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun
character*(mxlen) lowlastword,lowword,lownextword
character*(mxlen) noun(mxnoun)
character*(mxlen) clower
character*(*) lastword,word,nextword
character*(1) c
external clower
logical lcapit
integer len_noun(mxnoun),weight_noun(mxnoun),freq_noun(mxnoun)
c
lcapit(c) = lge(c,'A').and.lle(c,'Z')
c
if (nnoun.ge.mxnoun) return
if (word.eq.' ') return
lw = lenocc(word)
lowword = clower(word,lw)
c
c Get index of this noun, if it exists
c
ifound = 0
do 1 i=1,nnoun
if(noun(i).eq.lowword) then
ifound = i
goto 2
endif
1 continue
2 continue
c
c Check the candidate word as a plural
c
if(lowword(lw:lw).eq.'s'.and.lw.gt.2) then
weight = 4
do i=1,nnoun
if(lowword(:lw-1).eq.noun(i)) then
weight_noun(i) = weight_noun(i)+1
freq_noun(i) = freq_noun(i)+1
goto 100
endif
enddo
endif
lwl = lenocc(lastword)
lwn = lenocc(nextword)
lownextword = clower(nextword,lwn)
lowlastword = clower(lastword,lwl)
c
c If word contains . or \ then it is a file
c
weight = 50
if(index(word(:lw),'.').ne.0.or.index(word(:lw),'\')) goto 100
c
c If not start of clause, and first letter is capital, then noun
c
weight = 10
if(mark.eq.0.and.lcapit(word(1:1))) goto 100
c
c If start of clause, and first two letters are capital, then noun
c
weight = 5
if(mark.ne.0.and.lw.gt.1.and.lcapit(word(2:2))) goto 100
c
c
c Check if word on right is a noun
c
weight = 0
do i=1,nnoun
if(lownextword.eq.noun(i)) then
weight = -2
goto 100
endif
enddo
weight = 2
if (lowlastword.eq.'the') goto 100
weight = 1
if (lowlastword.eq.'a') goto 100
c
c probably not a noun
c
weight = -1
c
100 continue
if(ifound.ne.0) then
weight_noun(ifound)=weight_noun(ifound)+weight
freq_noun(ifound)=freq_noun(ifound)+1
endif
c
c reduce weight on left if that is supposed to be a noun, too
c
do i=1,nnoun
if(lowlastword.eq.noun(i)) then
weight_noun(i) = weight_noun(i)-2
endif
end do
if(weight.le.0.or.ifound.ne.0) return
nnoun = nnoun+1
noun(nnoun) = lowword
len_noun(nnoun) = lw
weight_noun(nnoun) = weight
freq_noun(nnoun) = 1
end
c *=*=*=*= Lenocc.html =*=*=*=*
Integer Function Lenocc(c)
Character*(*) c
do lenocc=len(c),1,-1
if(c(lenocc:lenocc).ne.' ') return
enddo
lenocc = 0
end
*=*=*=*= clower.html =*=*=*=*
character*(*) function clower(cword,l)
character*(*) cword
character*(26) lc /'abcdefghijklmnopqrstuvwxyz'/
character*(26) uc /'ABCDEFGHIJKLMNOPQRSTUVWXYZ'/
clower = cword
do i=1,l
ip = index(uc,cword(i:i))
if(ip.ne.0) clower(i:i) = lc(ip:ip)
enddo
end