Source code for chaininglib.search.lexiconQueries

import chaininglib.utils.stringutils as stringutils

    
[docs]def lexicon_query(word, pos, lexicon, sparql_limit=None, sparql_offset=None):
    '''
    This function builds a query for getting the paradigm etc. of a given lemma out of a given lexicon.
    The resulting query string is to be used in LexiconQuery.search() 
    
    Args:
        word: a lemma/wordform to build the query with
        pos: a part-of-speech to build the query with
        lexicon: a lexicon to build the query for
    Returns:
        a query string to be used as a parameter of pattern() 
    '''
  
    if word is None:
        return _lexicon_query_alllemmata(lexicon, pos, sparql_limit, sparql_offset)
    
    limitPart = """"""
    if sparql_limit is not None:
        limitPart = """
        LIMIT """ +  str(sparql_limit) + """
        OFFSET """ + str(sparql_offset) + """
        """
    
    if (lexicon=="anw"):
        # part-of-speech filter not supported for this lexicon
        if (pos is not None and pos != ''):
            print('Filtering by part-of-speech is not (yet) supported in the \''+lexicon+'\' lexicon')
        # exact or fuzzy search
        exactsearch = (not stringutils.containsRegex(word))
        subpart = """FILTER ( regex(?lemma, \""""+word+"""\") || regex(?definition, \""""+word+"""\") ) . """
        if (exactsearch == True):
              subpart =  """
                { { ?lemId rdfs:label ?lemma .  
                values ?lemma { \""""+word+"""\"@nl \""""+word+"""\" } }                 
                UNION
                { ?definitionId lemon:value ?definition .
                values ?definition { \""""+word+"""\"@nl \""""+word+"""\" } } } .
                """               
        query = """PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
                  PREFIX anw: <http://rdf.ivdnt.org/lexica/anw>
                  PREFIX anwsch: <http://rdf.ivdnt.org/schema/anw/>
                  PREFIX lemon: <http://lemon-model.net/lemon#>
                  
                  SELECT ?lemId ?lemma ?writtenForm ?definition concat('', ?definitionComplement) as ?definitionComplement
                  FROM <http://rdf.ivdnt.org/lexica/anw/>
                  WHERE {
                      ?lemId rdfs:label ?lemma .
                      ?lemId ontolex:sense ?senseId .
                      ?senseId lemon:definition ?definitionId .
                      ?definitionId lemon:value ?definition .
                      OPTIONAL { ?definitionId anwsch:definitionComplement ?definitionComplement .}
                      OPTIONAL { ?lemId ontolex:canonicalForm ?lemCFId . 
                          ?lemCFId ontolex:writtenRepresentation ?writtenForm . }
                      """+subpart+"""
                      }
                      """+limitPart
    elif (lexicon=="diamant"):
        # part-of-speech filter not supported for this lexicon
        #if (pos is not None and pos != ''):
            #print('Filtering by part-of-speech is not (yet) supported in the \''+lexicon+'\' lexicon')
      
        # exact or fuzzy search
        exactsearch = (not stringutils.containsRegex(word))
        subpart1 = """?n_form ontolex:writtenRep ?n_ontolex_writtenRep . 
            FILTER regex(?n_ontolex_writtenRep, \""""+word+"""\") . """
        subpart2 = """?n_syndef diamant:definitionText ?n_syndef_definitionText .  
            FILTER regex(?n_ontolex_writtenRep, \""""+word+"""\") . """
        subpartPos = """{ ?n_entry rdf:type ?lempos . }"""
        if (exactsearch == True):
            subpart1 =  """
                { ?n_form ontolex:writtenRep ?n_ontolex_writtenRep . 
                values ?n_ontolex_writtenRep { \""""+word+"""\"@nl \""""+word+"""\" } } 
                """                
            subpart2 = """
                { ?n_syndef diamant:definitionText ?n_syndef_definitionText . 
                values ?n_syndef_definitionText { \""""+word+"""\"@nl \""""+word+"""\" } } 
                """
        if (pos is not None and pos != ''):
            subpartPos = subpartPos + """{ ?n_entry rdf:type ?lempos .  FILTER  regex(?lempos, \""""+pos+"""$\") . }"""
        query = """
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        prefix prov: <http://www.w3.org/ns/prov#>
        prefix diamant: <http://rdf.ivdnt.org/schema/diamant#>
        prefix lexinfo: <http://www.lexinfo.net/ontology/2.0/lexinfo#>
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        prefix lemon: <http://lemon-model.net/lemon#>
        prefix ontolex: <http://www.w3.org/ns/lemon/ontolex#>
        prefix ud: <http://universaldependencies.org/u/pos/>
        prefix skos: <http://www.w3.org/2004/02/skos/core#>
        prefix dcterms: <http://purl.org/dc/terms/>
        prefix dc: <http://purl.org/dc/terms/>

        select ?n_entry ?n_form ?n_ontolex_writtenRep ?n_syndef ?n_sensedef ?n_sensedef_definitionText ?n_syndef_definitionText ?n_sense ?inputMode ?wy_f_show ?wy_t_show ?lempos
        FROM <http://rdf.ivdnt.org/lexica/diamant/v1.0/>
        where
        {
        {
            """ + subpart1 + """
            """ + subpartPos + """
            { ?n_entry a ontolex:LexicalEntry} .
            { ?n_form a ontolex:Form} .
            { ?n_sense a ontolex:LexicalSense} .
            { ?n_syndef a diamant:SynonymDefinition} .
            { ?n_sensedef a lemon:SenseDefinition} .
            { ?n_syndef diamant:definitionText ?n_syndef_definitionText } .
            { ?n_sensedef diamant:definitionText ?n_sensedef_definitionText } .
            { ?n_entry ontolex:canonicalForm ?n_form } .
            { ?n_entry ontolex:sense ?n_sense } .
            { ?n_sense lemon:definition ?n_syndef } .
            { ?n_sense lemon:definition ?n_sensedef } .
              ?n_sense diamant:attestation ?n_attest_show .
              ?n_sense diamant:attestation ?n_attest_filter .
              ?n_attest_show diamant:text ?n_q_show .
              ?n_attest_filter diamant:text ?n_q_filter .
              ?n_attest_show a diamant:Attestation .
              ?n_attest_filter a diamant:Attestation .
              ?n_q_filter a diamant:Quotation .
              ?n_q_show a diamant:Quotation .
              ?n_q_filter diamant:witnessYearFrom ?wy_f_filter .
              ?n_q_filter diamant:witnessYearTo ?wy_t_filter .
              ?n_q_show diamant:witnessYearFrom ?wy_f_show .
              ?n_q_show diamant:witnessYearTo ?wy_t_show .
            { bind("lemma" as ?inputMode) } .
            } UNION
          {
            """ + subpart2 + """
            """ + subpartPos + """
            { ?n_sense a ontolex:LexicalSense} .
            { ?n_syndef a diamant:SynonymDefinition} .
            { ?n_sensedef a lemon:SenseDefinition} .
            { ?n_form a ontolex:Form} .
            { ?n_form ontolex:writtenRep ?n_ontolex_writtenRep } .  { ?n_entry a ontolex:LexicalEntry} .
            { ?n_entry ontolex:sense ?n_sense } .
            { ?n_sense lemon:definition ?n_syndef } .
            { ?n_sense lemon:definition ?n_sensedef } .
            { ?n_sensedef diamant:definitionText ?n_sensedef_definitionText } .
            { ?n_entry ontolex:canonicalForm ?n_form } .
            ?n_sense diamant:attestation ?n_attest_show .
            ?n_sense diamant:attestation ?n_attest_filter .
            ?n_attest_filter diamant:text ?n_q_filter .
            ?n_attest_show diamant:text ?n_q_show .
            ?n_q_filter diamant:witnessYearFrom ?wy_f_filter .
            ?n_q_filter diamant:witnessYearTo ?wy_t_filter .
            ?n_q_show diamant:witnessYearFrom ?wy_f_show .
            ?n_q_show diamant:witnessYearTo ?wy_t_show .
            ?n_attest_show a diamant:Attestation .
            ?n_attest_filter a diamant:Attestation .
            ?n_q_filter a diamant:Quotation .
            ?n_q_show a diamant:Quotation .
          { bind("defText" as ?inputMode) } .
            }
        }
        """+limitPart
    elif (lexicon=="molex"):
        # exact or fuzzy search
        exactsearch = (not stringutils.containsRegex(word))
        subpart1 = """"""
        subpart2 = """"""
        subpartPos = """"""
        if (word != ''):
            if (exactsearch == True):
                subpart1 =  """
                    { ?lemCFId ontolex:writtenRep ?lemma . 
                    values ?lemma { \""""+word+"""\"@nl \""""+word+"""\" } } 
                    UNION
                    { ?wordformId ontolex:writtenRep ?wordform . 
                    values ?wordform { \""""+word+"""\"@nl \""""+word+"""\" } } .
                    """        
            else:
                subpart2 = """FILTER ( regex(?lemma, \""""+word+"""\") || regex(?wordform, \""""+word+"""\") ) . """
        if (pos is not None and pos != ''):
            features_start = pos.find('(')
            features_end = pos.find(')')
            
            if (features_start >=0):
                
                # extract features before we cut them off the pos
                features_arr = ( pos[ features_start+1 : features_end ] ).split(",")
                
                # deal with pos
                pos = pos[ 0 : features_start ]
                subpartPos = """FILTER ( regex(?lemPos, \""""+pos+"""\") ) ."""
                
                # deal with the features now
                for one_features_set in features_arr:
                    key = one_features_set.split("=")[0]
                    
                    if (key == 'degree'):
                        value = one_features_set.split("=")[1]
                        subpartPos = subpartPos + """
                            { ?lemEntryId UD:Degree ?degree .
                            FILTER ( regex( lcase(str(?degree)), \""""+value+"""$\") ) .}
                            """
            
            else:
                subpartPos = """FILTER ( regex(?lemPos, \""""+pos+"""$\") ) ."""
            
                   
        query = """
            PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
            PREFIX UD: <http://universaldependencies.org/u/>
            PREFIX diamant: <http://rdf.ivdnt.org/schema/diamant#>
            
            SELECT DISTINCT ?lemEntryId ?lemma ?lemPos ?wordformId ?wordform ?hyphenation ?wordformPos ?Gender ?Number
            FROM <http://rdf.ivdnt.org/lexica/molex>
            WHERE
            {
            ?lemEntryId ontolex:canonicalForm ?lemCFId .
            ?lemCFId ontolex:writtenRep ?lemma .
            """+subpart1+"""
            OPTIONAL {?lemEntryId UD:Gender ?Gender .}
            OPTIONAL {?lemEntryId UD:VerbForm ?verbform .}
            ?lemEntryId UD:pos ?lemPos .
            """+subpartPos+"""
            ?lemEntryId ontolex:lexicalForm ?wordformId .
            ?wordformId UD:pos ?wordformPos .
            OPTIONAL {?wordformId UD:Number ?Number .}
            OPTIONAL {?wordformId ontolex:writtenRep ?wordform .}
            OPTIONAL {?wordformId diamant:hyphenation ?hyphenation .}
            """+subpart2+"""
            }
        """+limitPart
        
        
#     elif (lexicon=="duelme"):
#         # part-of-speech filter not supported for this lexicon
#         if (pos is not None and pos != ''):
#             print('Filtering by part-of-speech is not (yet) supported in the \''+lexicon+'\' lexicon')
#         # exact or fuzzy search
#         exactsearch = (not stringutils.containsRegex(word))
#         subpart = """FILTER ( regex(?lemma, \""""+word+"""\") || regex(?wordform, \""""+word+"""\") ) ."""
#         if (exactsearch == True):
#             subpart =  """
#                 { ?y lmf:hasLemma ?dl .  
#                 values ?dl { \""""+word+"""\"@nl \""""+word+"""\" } }                 
#                 """        
#         query = """
#             PREFIX duelme: <http://rdf.ivdnt.org/lexica/duelme>
#             PREFIX intskos: <http://ivdnt.org/schema/lexica#>
#             PREFIX lmf: <http://www.lexinfo.net/lmf>
#             PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
#             PREFIX UD: <http://rdf.ivdnt.org/vocabs/UniversalDependencies2#>
            
#             SELECT ?exampleSentence ?lemma ?gender ?number
#             WHERE  {
#                   ?d intskos:ExampleSentence ?exampleSentence .
#                   ?d lmf:ListOfComponents [lmf:Component ?y] .
#                   ?y lmf:hasLemma ?lemma . 
#                   OPTIONAL {?y UD:Gender ?gender}
#                   OPTIONAL {?y UD:Number ?number}
#             """+subpart+"""
#             }
#         """+limitPart
    elif (lexicon=="duelme"):
        duelMeSubparts1 = """
                { FILTER ( regex(?multiwordexp, \""""+word+"""\") ) . }                 
                """
        duelMeSubparts2 = """"""
        if (pos is not None and pos != ''):
            duelMeSubparts2 = """
                    { values ?syncat { \""""+pos+"""\" } . }
                    """
        query = """
            PREFIX duelme: <http://rdf.ivdnt.org/lexica/duelme>
            PREFIX intskos: <http://ivdnt.org/schema/lexica#>
            PREFIX lmf: <http://www.lexinfo.net/lmf>
            PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
            PREFIX UD: <http://rdf.ivdnt.org/vocabs/UniversalDependencies2#>
            PREFIX prov: <http://www.w3.org/ns/prov#>
            PREFIX dcterms: <http://purl.org/dc/terms/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>    
            PREFIX olia: <http://purl.org/olia/olia.owl#>

            SELECT ?lemma  ?pos ?parts
            FROM <http://rdf.ivdnt.org/lexica/duelme>
            WHERE {      
              SELECT replace(STRAFTER(str(?multiwordexp), "duelme_"), "_", " ") AS ?lemma ?mwepattern (?syncat AS ?pos) group_concat(DISTINCT ?subsubcat; separator=" + ") AS ?parts
              WHERE  { 
                    {
                    SELECT DISTINCT ?multiwordexp ?mwepattern (STRAFTER(str(?trueSynCat), '#') AS ?syncat) (STRAFTER(str(?trueSubsubcat), '#') AS ?subsubcat) 
                    FROM <http://rdf.ivdnt.org/lexica/duelme>
                    WHERE {
                        {
                        ?multiwordexp lmf:hasMWEPattern ?mwepattern .      
                        ?mwepattern lmf:hasMWENode ?node .
                        ?node rdf:type ?syncat .
                        filter regex(str(?syncat), 'http://purl.org/olia/olia.owl') .
                          bind(
                            if(?syncat = olia:NounPhrase,
                              olia:NP,
                              if(?syncat = olia:VerbPhrase, 
                                olia:VP, 
                                if(?syncat = olia:Determiner, 
                                  olia:DP, 
                                  if(?syncat = olia:Verb, 
                                    olia:V, 
                                    if(?syncat = olia:PrepositionalPhrase, 
                                    olia:PP, 
                                      if(?syncat = olia:Preposition, 
                                      olia:P, 
                                        if(?syncat = olia:AdjectivePhrase, 
                                        olia:AP, 
                                          if(?syncat = olia:SubordicateClause, 
                                          olia:SC, 
                                          olia:Unknown
                                          )
                                        )
                                      )
                                    )
                                  )
                                )
                              )
                            )
                            AS ?trueSynCat )
                          ?node lmf:hasMWEEdge ?subnode .
                          ?subnode lmf:hasMWENode ?subsubnode .
                            ?subsubnode rdf:type ?subsubcat .  
                          filter regex(str(?subsubcat), 'http://purl.org/olia/olia.owl') .
                          bind(
                            if(?subsubcat = olia:NounPhrase,
                              olia:NP,
                              if(?subsubcat = olia:VerbPhrase, 
                                olia:VP, 
                                if(?subsubcat = olia:Determiner, 
                                  olia:DP, 
                                  if(?subsubcat = olia:Verb, 
                                    olia:V, 
                                    if(?subsubcat = olia:PrepositionalPhrase, 
                                    olia:PP, 
                                      if(?subsubcat = olia:Preposition, 
                                      olia:P, 
                                        if(?subsubcat = olia:AdjectivePhrase, 
                                        olia:AP, 
                                          if(?subsubcat = olia:SubordicateClause, 
                                          olia:SC, 
                                          olia:Unknown
                                          )
                                        )
                                      )
                                    )
                                  )
                                )
                              )
                            )
                            AS ?trueSubsubcat )

                            """+duelMeSubparts1+"""
                            }                   

                    }
                    ORDER BY ?partnr
                  } 
                  {
                    """+duelMeSubparts2+"""
                  }
                } 
              GROUP BY ?multiwordexp ?mwepattern  ?syncat 

            }
        """
    elif (lexicon=="celex"):
        # part-of-speech filter not supported for this lexicon
        if (pos is not None and pos != ''):
            print('Filtering by part-of-speech is not (yet) supported in the \''+lexicon+'\' lexicon')
        # exact or fuzzy search
        exactsearch = (not stringutils.containsRegex(word))
        subpart = """FILTER ( regex(?lemma, \""""+word+"""\") ) . """
        if (exactsearch == True):
            subpart =  """
                { ?lemmaId ontolex:canonicalForm [ontolex:writtenRep ?lemma] .  
                values ?lemma { \""""+word+"""\"@nl \""""+word+"""\" } }                 
                """        
        query = """
            PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
            PREFIX celex: <http://rdf.ivdnt.org/lexica/celex/>
            PREFIX UD: <http://rdf.ivdnt.org/vocabs/UniversalDependencies2#>
            PREFIX decomp: <http://www.w3.org/ns/lemon/decomp#>
            PREFIX gold: <http://purl.org/linguistics/gold#>
            
            SELECT DISTINCT ?lemmaId ?lemma ?wordformId ?wordform ?number ?gender concat('', ?subLemmata) AS ?subLemmata
            FROM <http://rdf.ivdnt.org/lexica/celex/>
            WHERE  {
                ?lemmaId ontolex:canonicalForm [ontolex:writtenRep ?lemma] .
                """+subpart+"""
                BIND( ?lemmaId AS ?lemmaIdIRI ).
                ?lemmaId ontolex:lexicalForm ?wordformId .
                ?wordformId ontolex:writtenRep ?wordform .
                OPTIONAL {?wordformId UD:Number ?number} .
                OPTIONAL {
                    ?lemmaId UD:Gender ?g . 
                        bind( 
                            if(?g = UD:Fem_Gender, 
                            UD:Com_Gender, 
                                if(?g = UD:Masc_Gender,
                                    UD:Com_Gender,
                                    if(?g = UD:Com_Gender,
                                        UD:Com_Gender,
                                        if(?g = UD:Neut_Gender,
                                            UD:Neut,
                                            ?g
                                        )
                                    )
                                )
                            )
                            AS ?gender
                        )
                }
                OPTIONAL {
                    SELECT ?lemmaIdIRI (group_concat(DISTINCT concat(?partNr,":",?subLemma);separator=" + ") as ?subLemmata)
                    WHERE {
                        SELECT ?lemmaIdIRI ?celexComp ?aWordformId ?subLemma ?partNr
                        WHERE {
                                {
                                ?lemmaIdIRI ontolex:lexicalForm ?aWordformId . 
                                ?lemmaIdIRI decomp:constituent ?celexComp .
                                OPTIONAL { ?celexComp gold:stem [ontolex:writtenRep ?subLemma] . }
                                OPTIONAL { ?celexComp decomp:correspondsTo [ ontolex:canonicalForm [ontolex:writtenRep ?subLemma]] . }
                                }
                                {
                                    {
                                        {?lemmaIdIRI <http://www.w3.org/1999/02/22-rdf-syntax-ns#_1> ?celexComp .}
                                        UNION
                                        {?lemmaIdIRI <http://www.w3.org/1999/02/22-rdf-syntax-ns#_2> ?celexComp .}
                                        UNION
                                        {?lemmaIdIRI <http://www.w3.org/1999/02/22-rdf-syntax-ns#_3> ?celexComp .}
                                        UNION
                                        {?lemmaIdIRI <http://www.w3.org/1999/02/22-rdf-syntax-ns#_4> ?celexComp .}
                                        UNION
                                        {?lemmaIdIRI <http://www.w3.org/1999/02/22-rdf-syntax-ns#_5> ?celexComp .}
                                        UNION
                                        {?lemmaIdIRI <http://www.w3.org/1999/02/22-rdf-syntax-ns#_6> ?celexComp .}                                        
                                    }
                                ?lemmaIdIRI ?rdfsynt ?celexComp .
                                BIND(IF(STRSTARTS(str(?rdfsynt), "http://www.w3.org/1999/02/22-rdf-syntax-ns#"), replace(STRAFTER(str(?rdfsynt), "#"), "_", ""), "999") AS ?partNr) .
                                MINUS {
                                    ?lemmaIdIRI <http://www.w3.org/1999/02/22-rdf-syntax-ns#0> ?celexComp .
                                    }
                                }
                            FILTER (?partNr != "999") .
                            }
                            ORDER BY ?partNr
                            }
                        GROUP BY ?aWordformId ?lemmaIdIRI
                    }
            }
        """+limitPart
    else:
        raise ValueError("Lexicon " + lexicon + " unknown!")
        
    return query



def _lexicon_query_alllemmata(lexicon, pos, sparql_limit=None, sparql_offset=None):
    '''
    This function builds a query for getting all lemmata of a lexicon, if needed restricted to a given part-of-speech.
    The resulting query string is to be used as a parameter of search_lexicon().
    
    Args:
        lexicon: a lexicon name 
        pos: (optional) a part-of-speech
    Returns:
        a lexicon query string
    '''
    
    limitPart = """"""
    if sparql_limit is not None:
        limitPart = """
        LIMIT """ +  str(sparql_limit) + """
        OFFSET """ + str(sparql_offset) + """
        """
    
    if (lexicon=="anw"):
        # part-of-speech filter not supported for this lexicon
        if (pos is not None and pos != ''):
            print('Filtering by part-of-speech is not (yet) supported in the \''+lexicon+'\' lexicon')
        query = """PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
                  PREFIX anw: <http://rdf.ivdnt.org/lexica/anw>                  
                  SELECT DISTINCT ?writtenForm
                  FROM <http://rdf.ivdnt.org/lexica/anw>
                  WHERE {
                      ?lemId rdfs:label ?lemma .
                      ?lemId ontolex:canonicalForm ?lemCFId . 
                      ?lemCFId ontolex:writtenRepresentation ?writtenForm .
                      }
                      ORDER BY ?writtenForm"""+limitPart
    elif (lexicon=="celex"):
        # part-of-speech filter not supported for this lexicon
        if (pos is not None and pos != ''):
            print('Filtering by part-of-speech is not (yet) supported in the \''+lexicon+'\' lexicon')
        query = """
            PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
            
            SELECT DISTINCT ?lemma AS ?writtenForm
            WHERE  {
                ?lemmaId ontolex:canonicalForm [ontolex:writtenRep ?lemma] .                
                }
            ORDER BY ?lemma"""+limitPart
    elif (lexicon=="diamant"):
        # part-of-speech filter not supported for this lexicon
        #if (pos is not None and pos != ''):
        #    print('Filtering by part-of-speech is not (yet) supported in the \''+lexicon+'\' lexicon')
        subpartPos = """""" 
        if pos is not None and pos != '':
            subpartPos = """
            { ?n_entry ontolex:canonicalForm ?n_form } .
            { ?n_entry rdf:type ?lempos . 
             FILTER  regex(?lempos, \""""+pos+"""$\" ) } . """
        query = """
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX prov: <http://www.w3.org/ns/prov#>
        PREFIX diamant: <http://rdf.ivdnt.org/schema/diamant#>
        PREFIX lexinfo: <http://www.lexinfo.net/ontology/2.0/lexinfo#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX lemon: <http://lemon-model.net/lemon#>
        PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
        PREFIX ud: <http://universaldependencies.org/u/pos/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX dc: <http://purl.org/dc/terms/>

        SELECT DISTINCT ( ?n_ontolex_writtenRep AS ?writtenForm )
        WHERE {        
            { ?n_form ontolex:writtenRep ?n_ontolex_writtenRep } .
            { ?n_form a ontolex:Form } .
            """+subpartPos+"""
        }
        ORDER BY ?n_ontolex_writtenRep
        """+limitPart
        #LIMIT 10000
        #"""
    elif (lexicon=="duelme"):
        # part-of-speech filter not supported for this lexicon
        if (pos is not None and pos != ''):
            print('Filtering by part-of-speech is not (yet) supported in the \''+lexicon+'\' lexicon')
        query = """
            PREFIX lmf: <http://www.lexinfo.net/lmf>            
            SELECT DISTINCT ?lemma AS ?writtenForm
            WHERE  {
                  ?y lmf:hasLemma ?lemma . 
            }
            ORDER BY ?lemma"""+limitPart
    elif (lexicon=="molex"):
        # part-of-speech filter
        pos_condition = """"""
        if pos is not None and pos != '':
            pos_condition = """
            {?lemEntryId UD:pos ?lemPos .
            FILTER regex(?lemPos, \""""+pos+"""\") } .
            """
        query = """
                PREFIX ontolex: <http://www.w3.org/ns/lemon/ontolex#>
                PREFIX UD: <http://universaldependencies.org/u/>
                SELECT DISTINCT ?lemma AS ?writtenForm
                FROM <http://rdf.ivdnt.org/lexica/molex>
                WHERE
                {
                ?lemEntryId ontolex:canonicalForm ?lemCFId .
                ?lemCFId ontolex:writtenRep ?lemma .  
                """+pos_condition+"""
                }
                 ORDER BY ?lemma"""+limitPart
    else:
        raise ValueError("Lexicon " + lexicon + " not supported for querying all words.")
        
    #print(query)
    return query