[tree.pl] -- package tree; use strict; require "./jcode.pl"; ;## ## ;## Copyright (C) 2002 by Junro YOSHINO ## ;## ## ;## Package for Handling Nested Data Structures ## ;## ## ;# ;# This package serves some subroutine for handling nested data ;# structures. ;# ;# Its soubroutine handles two types of structures, a set, and a list. ;# At this package, the word "literal value" represents a number and a ;# string. ;# A list is a Perl's array, whose 0th element ($ARRAY[0]) is its name. ;# Latter elements can be a literal value, a set, and a list. ;# A set is a Perl's hash, whose element indexed by '*NAME*' is its ;# name. Other elements is a list. ;# ;# Data are loaded with filehandle, and filedata is written in the ;# parsable ascii format. ;# The parsable ascii format following: ;# ;# a simple list: One term is written in one line. ;# [LISTNAME] ;# LITERAL DATA 1 ;# LITERAL DATA 2 ;# ... ;# [endLISTNAME] ;# a nested set: Each childelement is a list or set. ;# Each childelement is treated as one term indexed by ;# its name in the parent set. ;# Never exists the same child name. ;# [PARENTSET /] ;# .... ;# [CHILD1] ;# .... ;# [endCHILD1] ;# .... ;# [endPARENTSET] ;# a nested list: Each childelements is a list or a set. ;# Each childelements is treated as one term of the ;# parentlist. ;# Never exists the same child name. ;# [PARENTLIST : CHILD1; CHILD2; ... CHILDN] ;# .... ;# [CHILD1] ;# .... ;# [endCHILD1] ;# .... ;# [endPARENTLIST] ;# literal lists in a set: each line is a list. ;# [SETNAME /] ;# LIST_A_NAME, LITERAL_A1, LITERAL_A2, ... , LITERAL_AN ;# LIST_B_NAME, LITERAL_B1, LITERAL_B2, ... , LITERAL_BM ;# ... ;# [endSETNAME] ;# All of the following example texts gives the same data tree. ;# #--ex A-- ;# [group /] ;# Andy, male, 1980-01-02 ;# Beth, female, 1985-03-04 ;# Charlie, male, 1990-05-06 ;# [endgroup] ;# #--ex B-- ;# [group /] ;# [Andy] ;# male ;# 1980-01-02 ;# [endAndy] ;# [Beth] ;# female ;# 1985-03-04 ;# [endBeth] ;# [Charlie] ;# male ;# 1990-05-06 ;# [endCharlie] ;# [endgroup] ;# #--ex C-- ;# [group /] ;# Andy, male, 1980-01-02 ;# [Beth] ;# female ;# 1985-03-04 ;# [endBeth] ;# Charlie, male, 1990-05-06 ;# [endgroup] ;#-------------------------------------------- ;# parse_treeroot(\*FH, \$name, \$type, \@param, \$count) ;# parse_tree(\*FH, \$tree, \$count, @param) ;# ;# $count = 0; ;# open(FILE, "<" . $file) || die("Open Failed"); ;# &tree::parse_treeroot(\*FILE, \$name, \$type, \@param, \$count) ;# || die("Violation Found: line $count"); ;# $tree = ($type == 1) ? [$name] : {'*NAME*' => $name}; ;# &tree::parse_tree(\*FILE, $tree, \$count, @param) ;# || die("Violation Found: line $count"); ;# close FILE; ;#-------------------------------------------- ;# element_at($tree, $path) ;# $path should be escaped ;#-------------------------------------------- ;# index_of($list, $name) ;#-------------------------------------------- ;# name_of($tree) ;#-------------------------------------------- ;# type_of($element) ;#-------------------------------------------- ;# escape(\$str) ;#-------------------------------------------- ;# construct_tree(\*FILE, $tree) ;#------------------------------------------------------------- ;# constants my ($_LITERAL, $_LIST, $_SET) = (0, 1, 2); sub construct_tree{ my($fhndl, $treeref) = @_; my $escname = name_of($treeref); escape(\$escname); print $fhndl "[$escname"; my @subindex = (); my @work; my ($x); if(type_of($treeref) == $_LIST){ for(my $i = 1; $i <= $#{$treeref}; $i ++){ next if type_of($treeref->[$i]) == $_LITERAL; $x = name_of($treeref->[$i]); escape(\$x); push(@subindex, $x); } if($#subindex >= 0){ print $fhndl " : "; print $fhndl join("\; ", @subindex); } print $fhndl "]\n"; for(my $i = 1; $i <= $#{$treeref}; $i ++){ if(type_of($treeref->[$i]) == $_LITERAL){ $x = $treeref->[$i]; escape(\$x); print $fhndl "$x\n"; next; } construct_tree($fhndl, $treeref->[$i]) || return 0; } }elsif(type_of($treeref) == $_SET){ print $fhndl " /]\n"; while(my($key, $value) = each %{$treeref}){ next if $key =~ /\*/; my @work=(); if(type_of($value) == $_LIST){ my $test = 1; for(my $i = 1; $i <= $#{$value}; $i ++){ if(type_of($value->[$i]) != $_LITERAL){ $test = 0; last; } } if($test){ for(my $i = 0; $i <= $#{$value}; $i ++){ $x = $value->[$i]; escape(\$x); push(@work, $x); } print $fhndl join(", ", @work) . "\n"; next; } } construct_tree($fhndl, $value) || return 0; } }else{ return 0; } print $fhndl "[end$escname]\n"; return 1; } sub escape{ my($strref) = @_; $$strref =~ s{([\[\]/,:\;\\])}{\\$1}g; } sub _unescape{ my($strref) = @_; $$strref =~ s/\\(.)/$1/g; } sub _numerize{ my($strref) = @_; $$strref =~ s/(?<=\\)(.)/sprintf("%02lX", unpack("C", $1))/eg; } sub _characterize{ my($strref) = @_; $$strref =~ s/(?<=\\)([\d0-9A-Fa-f]{2})/pack("C", hex($1))/eg; } sub type_of{ my($objref) = @_; return $_LITERAL unless ref($objref); return $_LIST if ref($objref) eq "ARRAY"; return $_SET if ref($objref) eq "HASH"; return -1; } sub name_of{ ;# this subroutine assumes that all arguments have valid values my($treeref) = @_; if(type_of($treeref) == $_LIST){ return $treeref->[0]; }elsif(type_of($treeref) == $_SET){ return $treeref->{'*NAME*'}; } return; } sub index_of{ my($listref, $name) = @_; $name = 1 if $name eq ""; jcode::convert(\$name, "euc"); return $name if $name =~ /^\d+$/; for(my $i = 1; $i <= $#{$listref}; $i ++){ next if type_of($listref->[$i]) == $_LITERAL; return $i if name_of($listref->[$i]) eq $name; } return -1; } sub element_at{ my($treeref, $path_i) = @_; $path_i = 1 if $path_i eq ""; jcode::convert(\$path_i, "euc"); _numerize(\$path_i); my ($path) = ($path_i =~ /^\s*(.*?)\s*$/); if($path !~ m{/}){ _characterize(\$path); _unescape(\$path); if(type_of($treeref) == $_LIST){ return $treeref->[index_of($treeref, $path)]; }elsif(type_of($treeref) == $_SET){ return $treeref->{$path}; } }else{ # $path =~ m{/} my($entry, $subpath) = ($path =~ m{^([^/]*?)\s*/\s*(.*)$}); $entry = 1 if $entry eq ""; _characterize(\$entry); _unescape(\$entry); _characterize(\$subpath); if(type_of($treeref) == $_LIST){ return element_at( $treeref->[index_of($treeref, $entry)], $subpath ); }elsif(type_of($treeref) == $_SET){ return element_at($treeref->{$entry}, $subpath); } } return; } sub parse_treeroot{ my($fhndl, $nameref, $typeref, $paramref, $countref) = @_; my $readline = <$fhndl>; jcode::convert(\$readline, "euc"); my $endflag = 0; chomp($readline); my ($line) = ($readline =~ /^\s*(.*?)\s*$/); ${$countref} ++; return _parse_starttag($line, $nameref, $typeref, $paramref); } sub _parse_starttag{ my ($line, $nameref, $typeref, $paramref) = @_; $$nameref = ""; @$paramref = (); my($opt, @pm); _numerize(\$line); my($tag) = ($line =~ /^\[\s*(.*?)\s*\]$/); if($tag eq ""){ return 0; }elsif($tag =~ m{/}){ $$typeref = $_SET; ($$nameref) = ($tag =~ m{^(.*?)\s*/}); _characterize($nameref); _unescape($nameref); }elsif($tag =~ /:/){ $$typeref = $_LIST; ($$nameref, $opt) = ($tag =~ /^(.*?)\s*:\s*(.*)$/); _characterize($nameref); _unescape($nameref); @pm = split(/\;/, $opt); for(my $i=0; $i <= $#pm; $i ++){ ($paramref->[$i]) = ($pm[$i] =~ /^\s*(.*?)\s*$/); _characterize(\{$paramref->[$i]}); _unescape(\{$paramref->[$i]}); } }else{ $$typeref = $_LIST; $$nameref = $tag; _characterize($nameref); _unescape($nameref); } return 0 if ${$nameref} eq ""; return 0 if ${$nameref} =~ /^\d+$/; return 1; } sub parse_tree{ my($fhndl, $treeref, $countref, @param) = @_; my($type, $name); if(type_of($treeref) == $_LIST){ $type = $_LIST; $name = $treeref->[0]; return 0 if $name eq ""; splice(@{$treeref}, 1); }elsif(type_of($treeref) == $_SET){ $type = $_SET; $name = $treeref->{'*NAME*'}; return 0 if $name eq ""; while(my($key, $value) = each %{$treeref}){ delete $treeref->{$key} unless $key eq '*NAME*'; } }else{ return 0; } my $escaped_name = $name; escape(\$escaped_name); ;# parse element my $endflag = 0; while(<$fhndl>){ chomp; jcode::convert(\$_, "euc"); my ($line) = ($_ =~ /^\s*(.*?)\s*$/); ${$countref} ++; if($line eq "[end$escaped_name]"){ $endflag = 1; last; } my($subname, $subtype, @subparam, $subtreeref); my $subflag = 0; if(_parse_starttag($line, \$subname, \$subtype, \@subparam)){ if($type == $_LIST){ foreach my $subcand (@param){ next unless $subname eq $subcand; if($subtype == $_LIST){ push(@{$treeref}, [$subname]); }else{ # $subtype == $_SET push(@{$treeref}, {'*NAME*' => $subname}); } $subtreeref = $treeref->[$#{$treeref}]; last; } }else{ # $type == $_SET if($subtype == $_LIST){ $treeref->{$subname} = [$subname]; }else{ # $subtype == $_SET $treeref->{$subname} = {'*NAME*' => $subname}; } $subtreeref = $treeref->{$subname}; } parse_tree($fhndl, $subtreeref, $countref, @subparam) || return 0; $subflag = 1; } if(! $subflag){ if($type == $_SET){ my($t); _numerize(\$line); my (@memb) = split(/,/, $line); my ($key) = ($memb[0] =~ /^\s*(.*?)\s*$/); _characterize(\$key); _unescape(\$key); $treeref->{$key} = [$key]; for(my $i=1; $i<=$#memb; $i++){ ($t) = ($memb[$i] =~ /^\s*(.*)\s*$/); _characterize(\$t); _unescape(\$t); push(@{$treeref->{$key}}, $t); } }else{ # $type == $_LIST _unescape(\$line); push(@{$treeref}, $line); } } } return 0 unless $endflag; return 1; } 1; -- Copyright (C) Junro YOSHINO