Source code for decomp.semantics.predpatt.utils.ud_schema

#!/usr/bin/env python
"""Universal Dependencies schema definitions for PredPatt.

This module provides POS tags and dependency relation definitions
for both UD v1.0 and v2.0, supporting version-specific processing
in the PredPatt semantic extraction system.

The dependency relation classes define core syntactic relations (subject,
object, modifiers) and relation sets used by PredPatt for pattern matching
during predicate-argument extraction.

Classes
-------
POSTag
    Universal Dependencies part-of-speech tags.
DependencyRelationsBase
    Abstract base class for dependency relations.
DependencyRelationsV1
    UD v1.0 dependency relation definitions.
DependencyRelationsV2
    UD v2.0 dependency relation definitions.

Functions
---------
get_dependency_relations
    Helper to get relations for a specific version.

Constants
---------
postag
    Alias for POSTag class.
dep_v1
    Instance of DependencyRelationsV1.
dep_v2
    Instance of DependencyRelationsV2.
"""

from abc import ABC, abstractmethod
from typing import ClassVar


[docs] class POSTag: """Universal Dependencies part-of-speech tags. Reference: http://universaldependencies.org/u/pos/index.html """ # open class words ADJ: ClassVar[str] = "ADJ" ADV: ClassVar[str] = "ADV" INTJ: ClassVar[str] = "INTJ" NOUN: ClassVar[str] = "NOUN" PROPN: ClassVar[str] = "PROPN" VERB: ClassVar[str] = "VERB" # closed class words ADP: ClassVar[str] = "ADP" AUX: ClassVar[str] = "AUX" CCONJ: ClassVar[str] = "CCONJ" DET: ClassVar[str] = "DET" NUM: ClassVar[str] = "NUM" PART: ClassVar[str] = "PART" PRON: ClassVar[str] = "PRON" SCONJ: ClassVar[str] = "SCONJ" # other PUNCT: ClassVar[str] = "PUNCT" SYM: ClassVar[str] = "SYM" X: ClassVar[str] = "X"
[docs] class DependencyRelationsBase(ABC): """Base class for Universal Dependencies relation definitions.""" # version identifier VERSION: ClassVar[str] # core dependency relations that must be defined by subclasses @property @abstractmethod def nsubj(self) -> str: """Nominal subject relation.""" pass @property @abstractmethod def nsubjpass(self) -> str: """Passive nominal subject relation.""" pass @property @abstractmethod def dobj(self) -> str: """Direct object relation.""" pass @property @abstractmethod def auxpass(self) -> str: """Passive auxiliary relation.""" pass # relation sets that must be defined by subclasses @property @abstractmethod def subj(self) -> set[str]: """All subject relations.""" pass @property @abstractmethod def obj(self) -> set[str]: """All object relations.""" pass
[docs] class DependencyRelationsV1(DependencyRelationsBase): """Universal Dependencies v1.0 relation definitions.""" VERSION: ClassVar[str] = "1.0" # subject relations nsubj: ClassVar[str] = "nsubj" nsubjpass: ClassVar[str] = "nsubjpass" csubj: ClassVar[str] = "csubj" csubjpass: ClassVar[str] = "csubjpass" # object relations dobj: ClassVar[str] = "dobj" iobj: ClassVar[str] = "iobj" # copular cop: ClassVar[str] = "cop" # auxiliary aux: ClassVar[str] = "aux" auxpass: ClassVar[str] = "auxpass" # negation neg: ClassVar[str] = "neg" # non-nominal modifier amod: ClassVar[str] = "amod" advmod: ClassVar[str] = "advmod" # nominal modifiers nmod: ClassVar[str] = "nmod" nmod_poss: ClassVar[str] = "nmod:poss" nmod_tmod: ClassVar[str] = "nmod:tmod" nmod_npmod: ClassVar[str] = "nmod:npmod" obl: ClassVar[str] = "nmod" # maps to nmod in v1 obl_npmod: ClassVar[str] = "nmod:npmod" # appositional modifier appos: ClassVar[str] = "appos" # coordination cc: ClassVar[str] = "cc" conj: ClassVar[str] = "conj" cc_preconj: ClassVar[str] = "cc:preconj" # marker mark: ClassVar[str] = "mark" case: ClassVar[str] = "case" # fixed multiword expression mwe: ClassVar[str] = "fixed" # parataxis parataxis: ClassVar[str] = "parataxis" # punctuation punct: ClassVar[str] = "punct" # clausal complement ccomp: ClassVar[str] = "ccomp" xcomp: ClassVar[str] = "xcomp" # relative clause advcl: ClassVar[str] = "advcl" acl: ClassVar[str] = "acl" aclrelcl: ClassVar[str] = "acl:relcl" # unknown dependency dep: ClassVar[str] = "dep" # relation sets for pattern matching SUBJ: ClassVar[set[str]] = {nsubj, csubj, nsubjpass, csubjpass} OBJ: ClassVar[set[str]] = {dobj, iobj} NMODS: ClassVar[set[str]] = {nmod, obl, nmod_npmod, nmod_tmod} ADJ_LIKE_MODS: ClassVar[set[str]] = {amod, appos, acl, aclrelcl} ARG_LIKE: ClassVar[set[str]] = { nmod, obl, nmod_npmod, nmod_tmod, nsubj, csubj, csubjpass, dobj, iobj } # trivial symbols to be stripped out TRIVIALS: ClassVar[set[str]] = {mark, cc, punct} # these dependents of a predicate root shouldn't be included in the predicate phrase PRED_DEPS_TO_DROP: ClassVar[set[str]] = { ccomp, csubj, advcl, acl, aclrelcl, nmod_tmod, parataxis, appos, dep } # these dependents of an argument root shouldn't be included in the # argument phrase if the argument root is the gov of the predicate root SPECIAL_ARG_DEPS_TO_DROP: ClassVar[set[str]] = { nsubj, dobj, iobj, csubj, csubjpass, neg, aux, advcl, auxpass, ccomp, cop, mark, mwe, parataxis } # predicates of these relations are hard to find arguments HARD_TO_FIND_ARGS: ClassVar[set[str]] = {amod, dep, conj, acl, aclrelcl, advcl} @property def subj(self) -> set[str]: """All subject relations.""" return self.SUBJ @property def obj(self) -> set[str]: """All object relations.""" return self.OBJ
[docs] class DependencyRelationsV2(DependencyRelationsBase): """Universal Dependencies v2.0 relation definitions.""" VERSION: ClassVar[str] = "2.0" # subject relations nsubj: ClassVar[str] = "nsubj" nsubjpass: ClassVar[str] = "nsubj:pass" # changed in v2 csubj: ClassVar[str] = "csubj" csubjpass: ClassVar[str] = "csubj:pass" # changed in v2 # object relations dobj: ClassVar[str] = "obj" # changed in v2 iobj: ClassVar[str] = "iobj" # auxiliary aux: ClassVar[str] = "aux" auxpass: ClassVar[str] = "aux:pass" # changed in v2 # negation neg: ClassVar[str] = "neg" # copular cop: ClassVar[str] = "cop" # non-nominal modifier amod: ClassVar[str] = "amod" advmod: ClassVar[str] = "advmod" # nominal modifiers nmod: ClassVar[str] = "nmod" nmod_poss: ClassVar[str] = "nmod:poss" nmod_tmod: ClassVar[str] = "nmod:tmod" nmod_npmod: ClassVar[str] = "nmod:npmod" obl: ClassVar[str] = "obl" # separate relation in v2 obl_npmod: ClassVar[str] = "obl:npmod" # appositional modifier appos: ClassVar[str] = "appos" # coordination cc: ClassVar[str] = "cc" conj: ClassVar[str] = "conj" cc_preconj: ClassVar[str] = "cc:preconj" # marker mark: ClassVar[str] = "mark" case: ClassVar[str] = "case" # fixed multiword expression mwe: ClassVar[str] = "fixed" # parataxis parataxis: ClassVar[str] = "parataxis" # punctuation punct: ClassVar[str] = "punct" # clausal complement ccomp: ClassVar[str] = "ccomp" xcomp: ClassVar[str] = "xcomp" # relative clause advcl: ClassVar[str] = "advcl" acl: ClassVar[str] = "acl" aclrelcl: ClassVar[str] = "acl:relcl" # unknown dependency dep: ClassVar[str] = "dep" # relation sets for pattern matching SUBJ: ClassVar[set[str]] = {nsubj, csubj, nsubjpass, csubjpass} OBJ: ClassVar[set[str]] = {dobj, iobj} NMODS: ClassVar[set[str]] = {nmod, obl, nmod_npmod, nmod_tmod} ADJ_LIKE_MODS: ClassVar[set[str]] = {amod, appos, acl, aclrelcl} ARG_LIKE: ClassVar[set[str]] = { nmod, obl, nmod_npmod, nmod_tmod, nsubj, csubj, csubjpass, dobj, iobj } # trivial symbols to be stripped out TRIVIALS: ClassVar[set[str]] = {mark, cc, punct} # these dependents of a predicate root shouldn't be included in the predicate phrase PRED_DEPS_TO_DROP: ClassVar[set[str]] = { ccomp, csubj, advcl, acl, aclrelcl, nmod_tmod, parataxis, appos, dep } # these dependents of an argument root shouldn't be included in the # argument phrase if the argument root is the gov of the predicate root SPECIAL_ARG_DEPS_TO_DROP: ClassVar[set[str]] = { nsubj, dobj, iobj, csubj, csubjpass, neg, aux, advcl, auxpass, ccomp, cop, mark, mwe, parataxis } # predicates of these relations are hard to find arguments HARD_TO_FIND_ARGS: ClassVar[set[str]] = {amod, dep, conj, acl, aclrelcl, advcl} @property def subj(self) -> set[str]: """All subject relations.""" return self.SUBJ @property def obj(self) -> set[str]: """All object relations.""" return self.OBJ
# convenience aliases for backwards compatibility postag = POSTag dep_v1 = DependencyRelationsV1 dep_v2 = DependencyRelationsV2
[docs] def get_dependency_relations(version: str = "2.0") -> type[DependencyRelationsBase]: """Get dependency relations for a specific UD version. Parameters ---------- version : str, optional The UD version ("1.0" or "2.0"), by default "2.0" Returns ------- type[DependencyRelationsBase] The dependency relations class for the specified version Raises ------ ValueError If an unsupported version is specified """ if version == "1.0": return DependencyRelationsV1 elif version == "2.0": return DependencyRelationsV2 else: raise ValueError(f"Unsupported UD version: {version}. Use '1.0' or '2.0'.")