Source code for decomp.semantics.predpatt.core.token
"""Token representation for dependency parsing in PredPatt.This module defines the core Token class that represents individualtokens (words) in a dependency parse tree. Tokens store linguistic informationincluding text, part-of-speech tags, and dependency relations.Classes-------Token Represents a single token with its linguistic properties and dependency relations. Used as the basic unit in dependency parsing for predicate-argument extraction."""from__future__importannotationsfromtypingimportTYPE_CHECKINGfrom..utils.ud_schemaimportdep_v1,postagifTYPE_CHECKING:from..parsing.udparseimportDepTriplefrom..typingimportUDSchema
[docs]classToken:"""Represents a single token in a dependency parse. Attributes ---------- position : int The position of the token in the sentence (0-based). text : str The text content of the token. tag : str The part-of-speech tag of the token. dependents : list[DepTriple] | None List of dependent edges where this token is the governor. Initially set to None. gov : Token | None The governing token (parent) in the dependency tree. Initially set to None. gov_rel : str | None The dependency relation to the governing token. Initially set to None. ud : UDSchema The Universal Dependencies module (dep_v1 or dep_v2) that defines relation types and constants. """
[docs]def__init__(self,position:int,text:str,tag:str,ud:UDSchema=dep_v1)->None:"""Initialize a Token. Parameters ---------- position : int The position of the token in the sentence (0-based). text : str The text content of the token. tag : str The part-of-speech tag of the token. ud : UDSchema, optional The Universal Dependencies module, by default dep_v1. """# maintain exact initialization orderself.position:int=positionself.text:str=textself.tag:str=tagself.dependents:list[DepTriple]|None=Noneself.gov:Token|None=Noneself.gov_rel:str|None=Noneself.ud:UDSchema=ud
[docs]def__repr__(self)->str:"""Return string representation of the token. Returns ------- str String in format 'text/position'. """returnf'{self.text}/{self.position}'
@propertydefisword(self)->bool:"""Check if the token is not punctuation. Returns ------- bool True if the token is not punctuation, False otherwise. """returnself.tag!=postag.PUNCT
[docs]defargument_like(self)->bool:"""Check if this token looks like the root of an argument. Returns ------- bool True if the token's gov_rel is in ARG_LIKE relations. """returnself.gov_relinself.ud.ARG_LIKE
[docs]defhard_to_find_arguments(self)->bool:"""Check if this is potentially the root of a predicate with hard-to-find arguments. This func is only called when one of its dependents is an easy predicate. Here, we're checking: Is this potentially the root of an easy predicate, which will have an argment? Returns ------- bool True if this could be a predicate root with hard-to-find arguments. """# amod:# there is nothing wrong with a negotiation,# but nothing helpful about generating one that is just for show .# ^ ^ ^# --amod-- (a easy predicate, dependent of "helpful"# which is hard_to_find_arguments)ifself.dependentsisNone:raiseTypeError(f"Cannot iterate over None dependents for token '{self.text}' "f"at position {self.position}. Token not properly initialized "f"with dependency information.")foreinself.dependents:ife.relinself.ud.SUBJore.relinself.ud.OBJ:returnFalsereturnself.gov_relinself.ud.HARD_TO_FIND_ARGS