(:
Hello - I saw some discussions about full text indexing. Dumbo here cannot work out how distance matching works.
Also is it somehow possible to traverse up and down a full-index word list from a hit position rather than having to spend time say reversing strings. Is this is not possible due to how the word indexing works? If so can I preprocess https://github.com/pierrec/lz4/blob/master/fuzz/corpus/Mark.Twain-Tom.Sawyer_long.txt to break it into words.
I was considering making something like DTSearch (but more flexible) before I realised how difficult this is.
Many thanks
Dumbo
:)
import module namespace functx = 'http://www.functx.com';
(:fulltext <x><content><![CDATA[ https://github.com/pierrec/lz4/blob/master/fuzz/corpus/Mark.Twain-Tom.Sawyer_long.txt ]]></content></x>:)
(:$f - return all nodes containing queer and enterprises:)
let $f := <x>{ft:search('xvue_textIndex',("enterprises", "queer"), map { 'mode': 'all' })/parent::* }</x>
let $options1 := map { 'mode': 'all'}
let $options2 := map { 'mode': 'all', "distance": map { "max": "5","unit": "words" }}
let $options3 := map { 'mode': 'all words', "distance": map { "max": "5","unit": "words" }}
let $options4 := map { 'mode': 'all words', "distance": map { "max": 5, "unit": "words" }}
let $options := $options1 (:Why wont others work:)
(:$g - mark words queers and enterprises. Can't get options:2,3,4 to work:)
let $g := <y>{ft:mark( $f//*[ft:contains(text(), ('queer','enterprises'), $options)], 'mark')}</y>
(:Hopeful - with distancing will this result in <mark>queer enterprises</mark>. Otherwise I have to postprocess more:)
(:Unsure about how to return words before and words after using fulltext. Have to limit to characters:)
(:Ideally, I would like to be able to specify words after and before:)
let $charbefore := 30
let $charafter := 30
(:This takes a while because I am string joining large preceding-sibling:nodes() (sometimes text() and sometimes marked/text()) to return words in context:)(:Three seconds:)
(:Is there a fulltext way of doing this that is faster eg traverse a word list by match position:)
let $h := for $w in $g//mark
return <a><preceedingWords>{
functx:reverse-string (substring(
functx:reverse-string(string-join($w/preceding-sibling::node())),0,$charbefore))}</preceedingWords><match>{$w/text()}</match><followingWords>{substring(string-join($w/following-sibling::node() ),0,$charafter)}</followingWords></a>
return $h
(:
$h :=
<a>
<preceedingWords>thought and talked,
and what </preceedingWords>
<match>queer</match>
<followingWords> enterprises they sometimes e</followingWords>
</a>
<a>
<preceedingWords>t and talked,
and what queer </preceedingWords>
<match>enterprises</match>
<followingWords> they sometimes engaged in.
</followingWords>
</a>
Sorry about the length of this.
:)