Source code for decomp.semantics.predpatt.parsing.udparse
"""Universal Dependencies parse representation and visualization.This module provides data structures for representing and visualizingUniversal Dependencies (UD) parse trees. It includes classes for storingdependency relations and methods for pretty-printing and visualizingparse structures.The UDParse class supports various output formats includingpretty-printed text, LaTeX diagrams, and PDF visualization.Classes-------DepTriple Named tuple representing a single dependency relation.UDParse Container for complete dependency parse with tokens and relations."""from__future__importannotationsimportosfromcollectionsimportdefaultdict,namedtuplefromhashlibimportmd5fromtypingimportTYPE_CHECKINGfromtabulateimporttabulatefromtermcolorimportcoloredifTYPE_CHECKING:from..core.tokenimportTokenfrom..typingimportUDSchema# import at runtime to avoid circular dependencydef_get_dep_v1()->UDSchema:"""Get the dep_v1 module dynamically. Returns ------- UDSchema The dep_v1 module containing UD v1 constants. """from..utils.ud_schemaimportdep_v1returndep_v1
[docs]classDepTriple(namedtuple("DepTriple","rel gov dep")):"""Dependency triple representing a single dependency relation. A named tuple with three fields representing a dependency edge in the parse tree. Attributes ---------- rel : str The dependency relation type (e.g., 'nsubj', 'dobj'). gov : int | Token The governor (head) of the dependency. Can be token index or Token object. dep : int | Token The dependent of the dependency. Can be token index or Token object. Notes ----- The __repr__ format shows the relation with dependent first: rel(dep,gov). This ordering (dep before gov) is preserved for compatibility. """
[docs]def__repr__(self)->str:"""Return string representation in format rel(dep,gov). Note that dependent comes before governor in the output. Returns ------- str String representation like 'nsubj(0,2)'. """returnf"{self.rel}({self.dep},{self.gov})"
[docs]classUDParse:"""Universal Dependencies parse representation. Container for a dependency parse including tokens, POS tags, and dependency relations. Parameters ---------- tokens : list List of tokens (strings or Token objects) in the sentence. tags : list[str] List of POS tags corresponding to tokens. triples : list[DepTriple] List of dependency relations in the parse. ud : module, optional Universal Dependencies module (ignored - always uses dep_v1). Attributes ---------- ud : module The UD module (always set to dep_v1 regardless of parameter). tokens : list List of tokens in the sentence. tags : list[str] List of POS tags. triples : list[DepTriple] List of dependency relations. governor : dict Maps dependent index/token to its governing DepTriple. dependents : defaultdict[list] Maps governor index/token to list of dependent DepTriples. """
[docs]def__init__(self,tokens:list[str|Token],tags:list[str],triples:list[DepTriple],ud:UDSchema|None=None,)->None:"""Initialize UDParse with tokens, tags, and dependency triples. Parameters ---------- tokens : list[str | Token] List of tokens (strings or Token objects). tags : list[str] List of POS tags. triples : list[DepTriple] List of dependency relations. ud : module, optional UD module (ignored - always uses dep_v1). """# maintain exact behavior - always set to dep_v1self.ud=_get_dep_v1()self.tokens=tokensself.tags=tagsself.triples=triples# build governor mapping: dependent -> DepTripleself.governor:dict[int|Token,DepTriple]={e.dep:eforeintriples}# build dependents mapping: governor -> [DepTriple]self.dependents:defaultdict[int|Token,list[DepTriple]]=defaultdict(list)foreinself.triples:self.dependents[e.gov].append(e)
[docs]defpprint(self,color:bool=False,k:int=1)->str:"""Pretty-print list of dependencies. Parameters ---------- color : bool, optional Whether to use colored output (default: False). k : int, optional Number of columns to use (default: 1). Returns ------- str Formatted string representation of dependencies. """tokens1=[*self.tokens,"ROOT"]c=colored("/%s","magenta")ifcolorelse"/%s"e=[f"{e.rel}({tokens1[e.dep]}{c%e.dep}, {tokens1[e.gov]}{c%e.gov})"foreinsorted(self.triples,key=lambdax:x.dep)]cols:list[list[str]]=[[]for_inrange(k)]fori,xinenumerate(e):cols[i%k].append(x)# add padding to columns because zip stops at shortest iterator.forcolincols:col.extend(""for_inrange(len(cols[0])-len(col)))returnstr(tabulate(zip(*cols,strict=False),tablefmt="plain"))
[docs]defview(self,do_open:bool=True)->str|None:"""Open a dependency parse diagram of the sentence. Requires that pdflatex be in PATH and that Daniele Pighin's tikz-dependency.sty be in the current directory. Parameters ---------- do_open : bool, optional Whether to open the PDF file (default: True). Returns ------- str | None Path to the generated PDF file, or None if generation fails. """latex=self.latex()was=os.getcwd()try:os.chdir("/tmp")tokens_str=" ".join(xifisinstance(x,str)elsex.textforxinself.tokens)hash_str=md5(tokens_str.encode("ascii",errors="ignore")).hexdigest()base=f"parse_{hash_str}"pdf=f"{base}.pdf"ifnotos.path.exists(pdf):withopen(f"{base}.tex","wb")asf:f.write(latex)os.system(f"pdflatex -halt-on-error {base}.tex >/dev/null")ifdo_open:os.system(f"xdg-open {pdf}")returnos.path.abspath(pdf)finally:os.chdir(was)
[docs]deftoimage(self)->str|None:"""Convert parse diagram to PNG image. Creates a PNG image of the dependency parse diagram. Returns ------- str | None Path to the generated PNG file, or None if generation fails. """img=self.view(do_open=False)ifimgisnotNone:out=img[:-4]+".png"ifnotos.path.exists(out):cmd=f"gs -dBATCH -dNOPAUSE -sDEVICE=pngalpha -o {out}{img}"os.system(cmd)returnoutreturnNone