Source code for chaininglib.search.treebankQueries
import re
[docs]def treebank_query(lemma=None, word=None, pos=None):
'''
This function builds a query for getting occurances of a given lemma within a treebank
Args:
lemma: a lemma to look for
word: wordform to look for
pos: POS tag to look for
Returns:
a treebank query string
>>> tb = create_treebank().word("kat")
>>> df_trees = tb.search().kwic()
>>> display(df_trees)
'''
parts = []
if lemma is not None:
parts.append( r'@root="'+ lemma + r'"' )
if word is not None:
parts.append( r'@word="'+ word + r'"' )
# if no features are provided, we need to query for pos in 'pt', with the query string in lower case
# but if we do have features, we'll be searching for pos in 'postag' (no need for lower case there)
if pos is not None:
if (re.match("^[A-Za-z]+$", pos)):
parts.append( r'@pt="'+ pos.lower() + r'"' )
else:
parts.append( r'@postag="'+ pos + r'"' )
return r'xquery //node[' + r' and '.join(parts) + r']'