/* authoritative file: /pub/Develop/projects/vipsi/libs/TEXT.vl */ var TEXT.SpinAgain = 0 var TEXT.SpinDelay = 0.1 // animations/sec var TEXT.SpinIndex = 1 var TEXT.SpinCount = 0 proc TEXT.SpinReset() { SpinCount=0 } proc TEXT.SpinWheel() { if now>=SpinAgain SpinAgain = now + SpinDelay SpinIndex %= 4 SpinIndex += 1 put "|/-\\"[SpinIndex],"\b" then return ++SpinCount } proc TEXT.CropSpaces_R ( t ) { var j = count t do while j && t[j]<=" " j-- loop return t[to j] } proc TEXT.CropSpaces_L ( t ) { var j = 1 do while j<=count t && t[j]<=" " j++ loop return t[j to] } proc TEXT.CropSpaces ( t ) { var a=1, e=count t do while e && t[e]<=" " e-- loop do while a<=e && t[a]<=" " a++ loop return t[a to e] } /* ---- Zerlege einen Text in Worte ---------------------------------- ein: "Ein Beispielsatz." aus: { "","Ein"," ","Beispielsatz","." } // { gap, word, gap, ... gap } first and last gap are always present but may be "". (( => count result is always odd )) "***" -> { "***" } "" -> { "" } "a" -> { "","a","" } " a" -> { " ","a","" } "a " -> { "","a"," " } */ var TEXT.anf = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ" # "abcdefghijklmnopqrstuvwxyzäöü" # "#<" var TEXT.mid = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ" # "abcdefghijklmnopqrstuvwxyzäöü" # "0123456789" # "ß-_" var TEXT.fin = "ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ" # "abcdefghijklmnopqrstuvwxyzäöü" # "0123456789" # "ß_>" proc TEXT.SplitWordsNoTags ( text ) { var result = {} text = " " # text # " " // --> simlify loop var wrd = 1, gap = 1 // wort == [wrd ... [gap var len = count text do // 1. gap: wrd = gap do // skip gap while ++wrd<=len until find(TEXT.anf,text[wrd]) loop // wrd -> word_anfang result ##= text[gap to wrd-1] while wrd <= len // 2. word: gap = wrd do // skip word while ++gap<=len while find(TEXT.mid,text[gap]) loop if !find( TEXT.fin,text[gap-1] ) gap-- elif gap<=len && find(TEXT.fin,text[gap]) gap++ then // gap -> gap_anfang result ##= text[wrd to gap-1] loop var n = count result result[1] = result[1][2 to] result[n] = result[n][to count(result[n])-1 ] return result } /* ---- Zerlege einen Text in Worte ---------------------------------- html-Tags: werden als Wort-Separator behandelt Links: ... Links werden als ein einziges Wort behandelt => Verhindert den Einbau von automatischen Links in den Link! ein: "Ein Beispielsatz." aus: { "","Ein"," ","","","Beispielsatz","","","." } // { sep, word, sep, ... sep } */ proc TEXT.SplitIntoWords ( text ) { var result = { "" } // convert text from html // " -> " etc.; verändert Textlänge + Textinhalt! // da sollte ggf. die anker/link-Erzeugung _vor_ der html-konvertierung stattfinden! proc append_to_result ( text ) { var r = count result result ##= TEXT.SplitWordsNoTags( text ) result[r] #= result[r+1] // join gaps del result[r+1] } do var i = find(text,"<") while i var j = find(text,">",i) while j i = rfind(text,"<",j) append_to_result( text[to i-1] ) // append text before html tag var tag = text[i to j] // the tag "" text = text[j+1 to] // text after tag if j-i >= 160 // unlikely to be really a tag append_to_result( tag ) // "<" and ">" will be handled as sep's next then var lctag = lowerstr(tag) var j = 1 + (lctag[2]=="/") do while lctag[++j]>="a" loop var id = lctag[2 to j-1] // id := "a" or "/a" or "pre" etc. if !find(HTML.TagsAny," "#id#" ") // no html tag append_to_result( tag ) // handle '<' and '>' as sep's next then // html tag: if id!="a" || !find(lctag,"href=") // not a tag ((aka 'link')) result ##= tag ## "" // preserve entire html tag as word next then // ... must be kept as entity: i = find(lowerstr(text),"") if i tag #= text[to i+3] text = text[i+4 to] else log nl, "'' missing: ", tag, " " then result ##= tag ## "" loop append_to_result( text ) return result } proc TEXT.JoinText ( liste, * ) // opt. separator { if count locals>1 if count liste == 0 return "" then // dann ex. liste[1] nicht! rename locals[2] = "sep" var z = liste[1] liste = sep # liste // operation on whole list! liste[1] = z then var text = "" var i = 0 do while ++i <= count liste text #= liste[i] loop return text } /* ---- Namen normalisieren --------------- ein: separierte Worte wie von TEXT.SplitIntoWords() ein: { "ein", "Anker" } aus: "ein_ank" */ proc TEXT.NormalizeName ( liste ) { var result = "" var i = 0 do while ++i <= count liste var word = lowerstr(liste[i]) // if i!=1 && i!=count(liste) && find(" der die das a the "," "#word#" ") // next // then if count word>=3 && find( " er en em es ", " "#rightstr(word,2)#" " ) word = word[to count word -2] elif count word>=3 && find( "es", rightstr(word,1) ) word = word[to count word -1] then result #= "_" # word loop return result[2 to] } /* ---- remove odd items from list --------------------- -> removes the separators from a split-into-words list these are items liste[1],[3],[5],… */ proc TEXT.RemoveOddItems ( liste ) { var i=0 do while ++i <= count liste del liste[i] loop return liste } /* for vipsi < 0.8.8 */ /* try proc spacestr(len) { var spc = " " do while count spc < len spc #= spc[to len-count spc] loop return spc[to len] } then */ end 0