#!/bin/tclsh

#
# \brief  Tool for processing human-inclined data (HID) syntax
# \author Norman Feske
# \date   2023-05-01
#

set total_parse_errors 0


proc error { msg } {
	puts stderr "\nError: $msg\n"
	exit 1
}


#
# HID nodes and attributes are represented as lists
#
# hid_padding  - space following the node type
# hid_prespace - space in front of a node
# hid_anno     - annotation following immediately the node type
# hid_anchor   - x or +
# hid_preface  - comment line preceeding the node
# attr_anno    - annotation following an attriute
# attr_align   - space between tag and value
#

proc hid_type       { hid  } { return [lindex $hid  0] }
proc hid_padding    { hid  } { return [lindex $hid  1] }
proc hid_line       { hid  } { return [lindex $hid  2] }
proc hid_prespace   { hid  } { return [lindex $hid  3] }
proc hid_anchor     { hid  } { return [lindex $hid  4] }
proc hid_preface    { hid  } { return [lindex $hid  5] }
proc hid_attributes { hid  } { return [lindex $hid  6] }
proc hid_children   { hid  } { return [lindex $hid  7] }
proc hid_quoted     { hid  } { return [lindex $hid  8] }
proc hid_anno       { hid  } { return [lindex $hid  9] }
proc attr_line      { attr } { return [lindex $attr 0] }
proc attr_prespace  { attr } { return [lindex $attr 1] }
proc attr_tag       { attr } { return [lindex $attr 2] }
proc attr_align     { attr } { return [lindex $attr 3] }
proc attr_value     { attr } { return [lindex $attr 4] }
proc attr_anno      { attr } { return [lindex $attr 5] }

proc same_trimmed     { s1 s2 } { return [expr {[string trim $s1] == [string trim $s2]}] }
proc same_attr_values { a1 a2 } { return [same_trimmed [attr_value $a1] [attr_value $a2]] }

proc hid_leaf { hid } {

	return [expr {[llength [hid_children $hid]] == 0}]
}


proc node_stack_element_as_hid { ctx_var i } {

	upvar $ctx_var ctx

	return [list $ctx($i,type)       $ctx($i,padding)  $ctx($i,line)    \
	             $ctx($i,prespace)   $ctx($i,anchor)   $ctx($i,preface) \
	             $ctx($i,attributes) $ctx($i,children) $ctx($i,quoted)  \
	             $ctx($i,annotation)]
}


proc stack_pos_for_xpos { ctx_var xpos } {

	upvar $ctx_var ctx

	set depth $ctx(depth)
	set pos 0
	for {set i 0} {$i < $depth} {incr i} {
		if {[info exists ctx($i,type)]} {
			if {$xpos > $ctx($i,xpos)} {
				set pos $i } } }

	return $pos;
}


proc pop_nodes { ctx_var xpos } {

	upvar $ctx_var ctx

	# find appropriate stack position for new node
	set new_pos [expr [stack_pos_for_xpos ctx $xpos] + 1]
	set depth $ctx(depth)

	for {set i [expr $depth - 1]} { $i >= $new_pos } { incr i -1 } {
		lappend ctx([expr $i - 1],children) \
		        [node_stack_element_as_hid ctx $i]
		incr ctx(depth) -1
	}
}


proc drop_misaligned_preface { ctx_var xpos } {

	upvar $ctx_var ctx

	if {$xpos != $ctx(preface,xpos)} {
		foreach line $ctx(preface,lines) {
			set n    [lindex $line 0]
			set text [lindex $line 2]
			puts stderr "Dropping commented-out line $n: $text"
			set ctx(prespace) "\n"
		}
		set ctx(preface,xpos)  -1
		set ctx(preface,lines) {}
	}
}


proc add_node { ctx_var n xpos type anchor padding } {

	upvar $ctx_var ctx

	pop_nodes ctx $xpos

	# validate consistency of preface xpos with node xpos
	set anchor_xpos [expr max(0, $xpos - 2)]
	drop_misaligned_preface ctx $anchor_xpos

	# push new node
	set pos $ctx(depth)
	set ctx($pos,type)       $type
	set ctx($pos,padding)    $padding
	set ctx($pos,line)       $n
	set ctx($pos,prespace)   $ctx(prespace)
	set ctx($pos,xpos)       $xpos
	set ctx($pos,anchor)     $anchor
	set ctx($pos,preface)    $ctx(preface,lines)
	set ctx($pos,attributes) {}
	set ctx($pos,children)   {}
	set ctx($pos,quoted)     {}
	set ctx($pos,annotation) {}
	set ctx(preface,lines)   {}
	set ctx(prespace)        ""
	incr ctx(depth)
}


proc add_attribute { ctx_var xpos attr } {

	upvar $ctx_var ctx

	set pos [stack_pos_for_xpos ctx $xpos]
	lappend ctx($pos,attributes) $attr

	set ctx(prespace) ""
}


proc assign_quoted { ctx_var xpos content } {

	upvar $ctx_var ctx

	set pos [stack_pos_for_xpos ctx $xpos]
	set ctx($pos,quoted) $content
}


proc assign_annotation { ctx_var xpos anno } {

	upvar $ctx_var ctx

	set pos [stack_pos_for_xpos ctx $xpos]
	set ctx($pos,annotation) $anno
}


proc capture_preface { ctx_var n xpos text } {

	upvar $ctx_var ctx

	drop_misaligned_preface ctx $xpos

	set ctx(preface,xpos) $xpos
	lappend ctx(preface,lines) [list $n $ctx(prespace) $text]

	set ctx(prespace) ""
}


##
## Grammar for parsing an individual line
##

proc indent_pattern  { } { return {[| ]( |$)}        }
proc id_pattern      { } { return {[a-z][a-z0-9_-]*} }
proc space_pattern   { } { return { +}               }
proc tag_pattern     { } { return " *[id_pattern]:"  }
proc value_pattern   { } { return { *([^| ]| [^|])+} }
proc anchor_pattern  { } { return { *[+x] }          }
proc comment_pattern { } { return { *[.]( |$)}       }
proc anno_pattern    { } { return { *[|] [.] .*$}    }
proc quoted_pattern  { } { return { *[:]( |$)}       }
proc delim_pattern   { } { return { *[|] }           }
proc eof_pattern     { } { return {-\s*$}            }
proc empty_pattern   { } { return {\s*$}             }


proc parse { pattern chars_var xpos_var match_var match_body } {

	upvar $chars_var chars
	upvar $xpos_var  xpos
	upvar $match_var match

	while {[regexp "^($pattern)" $chars match]} {
		incr xpos [string length $match]
		regsub "^($pattern)" $chars "" chars
		uplevel 1 $match_body
	}
}


proc parse_value { chars_var xpos_var align_var value_var body } {

	upvar $chars_var chars
	upvar $xpos_var  xpos
	upvar $align_var align
	upvar $value_var value

	parse [value_pattern] chars xpos aligned_value {
		set align ""
		parse [space_pattern] aligned_value xpos align { }
		parse [value_pattern] aligned_value xpos value {
			uplevel 1 $body } }
}


proc parse_annotation { chars_var xpos_var } {

	upvar $chars_var chars
	upvar $xpos_var  xpos

	set result ""
	parse [anno_pattern] chars xpos match {
		set result $match
		set chars ""
	}
	return $result
}


proc parse_attr { prespace_var chars_var xpos_var tag_name_var align_var value_var anno_var body } {

	upvar $prespace_var prespace
	upvar $chars_var    chars
	upvar $xpos_var     xpos
	upvar $tag_name_var tag_name
	upvar $align_var    align
	upvar $value_var    value
	upvar $anno_var     anno

	parse [tag_pattern] chars xpos tag {
		parse [space_pattern] tag xpos space { append prespace $space }
		parse [id_pattern]    tag xpos tag_name {
			set value ""
			set align ""
			parse_value chars xpos align value { }
			set anno [parse_annotation chars xpos]

			uplevel 1 $body } }
}


proc parse_property { n chars_var xpos_var ctx_var } {

	upvar $chars_var chars
	upvar $xpos_var  xpos
	upvar $ctx_var   ctx

	set property_xpos $xpos
	parse_attr ctx(prespace) chars xpos tag align value anno {
		add_attribute ctx $property_xpos [list $n $ctx(prespace) $tag $align $value $anno] }

	parse [anchor_pattern] chars xpos match {
		parse_node $n chars xpos ctx $match }

	parse [quoted_pattern] chars xpos ignored {
		add_node ctx $n [expr $property_xpos + 2] ":" "" ""
		parse {.+$} chars xpos text {
			assign_quoted ctx $xpos $text } }

	parse_details $n chars xpos ctx
}


proc parse_details { n chars_var xpos_var ctx_var } {

	upvar $chars_var chars
	upvar $xpos_var  xpos
	upvar $ctx_var   ctx

	parse [delim_pattern] chars xpos match {
		append ctx(prespace) $match
		parse_property $n chars xpos ctx }
}


proc parse_node { n chars_var xpos_var ctx_var anchor } {

	upvar $chars_var chars
	upvar $xpos_var  xpos
	upvar $ctx_var   ctx

	set node_xpos $xpos
	parse [id_pattern] chars xpos type {

		set attr_xpos $xpos

		# capture padding at the right of the node type
		regexp {^ *} $chars padding
		regsub {^ *} $chars "" chars
		incr xpos [string length $padding]

		add_node ctx $n $node_xpos $type $anchor $padding

		parse_attr ctx(prespace) chars xpos tag align value anno {
			add_attribute ctx $attr_xpos [list $n $ctx(prespace) $tag $align $value $anno] }

		# optional name attribute
		parse_value chars xpos align name {
			set anno [parse_annotation chars xpos]
			add_attribute ctx $attr_xpos [list $n $ctx(prespace) name $align $name $anno] }

		# annotation at node with no attributes
		assign_annotation ctx $attr_xpos [parse_annotation chars xpos]

		parse_details $n chars xpos ctx
	}
}


##
## Parse line sequence and capture data in node stack
##

proc parse_line { n chars ctx_var } {

	upvar $ctx_var ctx

	set orig_chars $chars
	set xpos 0

	set eof false
	parse [eof_pattern] chars xpos ignored {
		set eof true }

	if {$eof} {
		pop_nodes ctx 0
		return false }

	parse [empty_pattern] chars xpos space {
		append ctx(prespace) $space
		return true }

	parse [indent_pattern] chars xpos space { append ctx(prespace) $space }

	# capture preface comment or drop commented-out line
	parse [comment_pattern] chars ignored_xpos comment {
		capture_preface ctx $n $xpos $chars
		set chars "" }

	parse_property $n chars xpos ctx

	# top-level node
	parse_node $n chars xpos ctx ""

	# diagnostic feedback
	if {[string length $chars] > 0} {
		set at ""
		if {$n} { set at " at line $n" }
		if {[regexp {\s+$} $chars bad]} {
			set bad_line "[string trim $orig_chars]$bad"
			puts stderr "Error: trailing space$at: '$bad_line'"
		} elseif {[regexp {^[| +x]*\t} $chars]} {
			regsub {\t} $orig_chars "<TAB>" bad_line
			set bad_line "[string trim $bad_line]"
			puts stderr "Error: tab misused to indent$at: '$bad_line'"
		} else {
			puts stderr "Unable to parse$at: '$chars'"
		}
		global total_parse_errors
		incr total_parse_errors
	}
	return true
}


proc import_hid { data { multiple_lines true } } {

	global total_parse_errors
	set orig_total_parse_errors $total_parse_errors

	# set up node stack in context 'ctx'
	set ctx(depth)         0
	set ctx(0,type)        ""
	set ctx(0,padding)     ""
	set ctx(0,line)        0
	set ctx(0,prespace)    ""
	set ctx(0,anchor)      ""
	set ctx(0,preface)     {}
	set ctx(0,attributes)  {}
	set ctx(0,xpos)        0
	set ctx(0,children)    {}
	set ctx(0,quoted)      {}
	set ctx(0,annotation)  {}
	set ctx(preface,xpos)  -1
	set ctx(preface,lines) {}
	set ctx(prespace)      ""

	set lines [split $data "\n"]
	set eof 0
	set n [expr $multiple_lines ? 1 : 0]
	foreach line $lines {
		if {![parse_line $n $line ctx]} {
			set eof true
			break; }
		append ctx(prespace) "\n"
		incr n
	}

	pop_nodes ctx 0

	set parse_errors [expr $total_parse_errors - $orig_total_parse_errors]
	if {$parse_errors > 0} {
		set suffix ""
		if {$parse_errors > 1} { set suffix "s" }
		error "$parse_errors line$suffix could not be parsed completely"
		exit 1
	}

	if {$multiple_lines && !$eof} {
		error "missing end-of-data mark '-'" }

	return [node_stack_element_as_hid ctx 0]
}


##
## Query
##

proc hid_query_nodes { query_hid data_hid } {

	if {[hid_type $query_hid] != [hid_type $data_hid]} {
		return {} }

	# check for conflicting attributes
	foreach query_attr [hid_attributes $query_hid] {
		set attr_exists_in_data 0
		foreach data_attr [hid_attributes $data_hid] {
			if {[attr_tag $data_attr] == [attr_tag $query_attr]} {
				set attr_exists_in_data 1
				if {![same_attr_values $data_attr $query_attr]} {
					return {} } } }

		if {!$attr_exists_in_data} {
			return {} }
	}

	# stop searching when reaching the leaf of the query
	if {[hid_leaf $query_hid]} {
		return [list $data_hid] }

	# traverse into next level
	set result_list {}
	foreach sub_query_hid [hid_children $query_hid] {
		foreach sub_data_hid [hid_children $data_hid] {
			foreach hid [hid_query_nodes $sub_query_hid $sub_data_hid] {
				lappend result_list $hid } } }

	return $result_list
}


##
## Filter function used for remove/set/enable/disable commands
##

proc hid_filter { ctx_var query_hid data_hid filtered_hid_var match_body } {

	upvar $filtered_hid_var filtered_hid
	upvar $ctx_var ctx

	if {[hid_type $query_hid] != [hid_type $data_hid]} {
		return $data_hid }

	# check for conflicting attributes
	foreach query_attr [hid_attributes $query_hid] {
		set attr_exists_in_data 0
		foreach data_attr [hid_attributes $data_hid] {
			if {[attr_tag $data_attr] == [attr_tag $query_attr]} {
				set attr_exists_in_data 1
				if {![same_attr_values $data_attr $query_attr]} {
					return $data_hid } } } }

	# apply filter when matching the leaf of the query
	if {[hid_leaf $query_hid]} {
		set filtered_hid $data_hid
		uplevel 1 $match_body
		return $filtered_hid
	}

	# traverse into next level
	set filtered_children {}
	foreach sub_query_hid [hid_children $query_hid] {
		foreach sub_data_hid [hid_children $data_hid] {

			set filter_result [hid_filter ctx $sub_query_hid $sub_data_hid \
			                              $filtered_hid_var $match_body]

			if {[llength $filter_result] > 0} {
				lappend filtered_children $filter_result }
		}
	}
	return [list [hid_type       $data_hid] [hid_padding  $data_hid] \
	             [hid_line       $data_hid] [hid_prespace $data_hid] \
	             [hid_anchor     $data_hid] [hid_preface  $data_hid] \
	             [hid_attributes $data_hid] $filtered_children       \
	             [hid_quoted     $data_hid] [hid_anno     $data_hid] ]
}


##
## Output in HID syntax
##

proc hid_attr_value { hid tag } {
	foreach attr [hid_attributes $hid] {
		if {[attr_tag $attr] == $tag} {
			return [attr_value $attr] } }
	return ""
}


proc hid_foreach_non_name_attr { tag_var value_var hid body } {

	upvar $tag_var   tag
	upvar $value_var value

	foreach attr [hid_attributes $hid] {
		if {[attr_tag $attr] != "name"} {
			set tag   [attr_tag   $attr]
			set value [attr_value $attr]
			set anno  [attr_anno  $attr]
			uplevel 1 $body } }
}


proc hid_first_attr { hid } {
	set first ""
	foreach attr [hid_attributes $hid] {
		if {$first == ""} {
			set first $attr }
		if {[attr_tag $attr] == "name"} {
			set first $attr }
	}
	return $first
}


proc hid_foreach_non_first_attr { tag_var value_var anno_var hid body } {

	upvar $tag_var   tag
	upvar $value_var value
	upvar $anno_var  anno

	set first_tag [attr_tag [hid_first_attr $hid]]

	foreach attr [hid_attributes $hid] {
		set tag  [attr_tag  $attr]
		set anno [attr_anno $attr]
		if {$tag != $first_tag} {
			set value [attr_value $attr]
			uplevel 1 $body
		}
	}
}


proc hid_num_quoted_lines { hid } {
	set count 0
	foreach child $hid {
		if {[hid_type $child] == ":"} { incr count } else { return 0 } }
	return $count
}


proc print_hid_as_tree_recursively { out_fd indent hid { last 0 } } {

	# print comment above node
	foreach preface_line [hid_preface $hid] {
		regsub {. $|^$} $indent ". " comment_prefix
		puts $out_fd "$comment_prefix[lindex $preface_line 2]" }

	set type [string trim [hid_type $hid]]

	set node_branch_char "|"
	if {$last} {
		set node_branch_char " " }

	if {[llength [hid_attributes $hid]] == 0} {
		set anno [hid_anno $hid]
		if {$anno != ""} { set anno " $anno" }
		puts $out_fd "$indent$type$anno"
		regsub {[+x] $} $indent "$node_branch_char " indent
		regsub {^[|]} $indent " " indent
	} else {

		# max tag length used for tabular output of attribute values
		set max_tag_len 0
		hid_foreach_non_name_attr tag value $hid {
			set max_tag_len [expr max($max_tag_len, [string length $tag])] }
		set tag_column_size [expr $max_tag_len + 1]

		set first_attr [hid_first_attr $hid]
		set anno [string trim [attr_anno $first_attr]]
		if {$anno != ""} { set anno " $anno" }
		if {[attr_tag $first_attr] == "name"} {
			puts $out_fd "$indent$type [attr_value $first_attr]$anno"
		} else {
			set padded_tag [format "%-${tag_column_size}s" "[attr_tag $first_attr]:"]
			set value [string trim [attr_value $first_attr]]
			if {$value != ""} { set value " $value" }
			puts $out_fd "$indent$type $padded_tag$value$anno"
		}

		# node branch to next sibling
		regsub {[+x] $} $indent "$node_branch_char " indent
		regsub {^[|]} $indent " " indent
		regsub -all {.} "$type" " " node_column

		# branch besides the attributes to the first child node
		if {![hid_leaf $hid] && $indent != ""} {
			regsub {^ } $node_column "|" node_column }

		hid_foreach_non_first_attr tag value anno $hid {
			set padded_tag [format "%-${tag_column_size}s" "$tag:"]
			if {$value != ""} { set value " $value" }
			puts $out_fd "$indent$node_column $padded_tag$value$anno"
		}
	}

	set num_children [llength [hid_children $hid]]
	foreach child_hid [hid_children $hid] {
		incr num_children -1
		set last [expr {$num_children < 1}]

		# quoted content
		if {[hid_type $child_hid] == ":"} {
			if {[string length [hid_quoted $child_hid]] == 0} {
				puts $out_fd "$indent:"
			} else {
				puts $out_fd "$indent: [hid_quoted $child_hid]"
			}
		} else {
			set sub_node_indent "$indent[string trim [hid_anchor $child_hid]] "
			print_hid_as_tree_recursively $out_fd $sub_node_indent $child_hid $last
		}
	}
}


proc print_hid_as_tree { out_fd hid } {
	print_hid_as_tree_recursively $out_fd "" $hid
	puts $out_fd "-"
}


proc print_hid_as_is_recursively { out_fd hid } {

	foreach preface_line [hid_preface $hid] {
		puts -nonewline $out_fd "[lindex $preface_line 1]. [lindex $preface_line 2]" }

	puts -nonewline $out_fd "[hid_prespace $hid][hid_anchor $hid][hid_type $hid][hid_padding $hid][hid_anno $hid]"

	set first_attr 1
	foreach attr [hid_attributes $hid] {
		puts -nonewline $out_fd "[attr_prespace $attr]"
		if {!$first_attr || [attr_tag $attr] != "name"} {
			puts -nonewline $out_fd "[attr_tag $attr]:" }
		puts -nonewline $out_fd "[attr_align $attr][attr_value $attr][attr_anno $attr]"
		set first_attr 0
	}

	foreach child_hid [hid_children $hid] {

		# quoted content
		if {[hid_type $child_hid] == ":"} {
			puts -nonewline $out_fd "[hid_prespace $child_hid]:"
			if {[string length [hid_quoted $child_hid]] > 0} {
				puts -nonewline $out_fd " [hid_quoted $child_hid]" }
		} else {
			print_hid_as_is_recursively $out_fd $child_hid
		}
	}
}


proc print_hid_as_is { out_fd hid } {
	print_hid_as_is_recursively $out_fd $hid
	puts $out_fd "\n-"
}


##
## XML support
##

proc _parse_xml { type pattern replace xml_var id_var info_var } {

	upvar $xml_var  xml
	upvar $id_var   id
	upvar $info_var info

	while {[regexp $pattern $xml dummy match]} {
		set info($id,type) $type
		set info($id,text) $match
		regsub $pattern $xml [eval "string trim $replace"] xml
		incr id
	}
}


proc _xml_subst { } { return { { {\&} {\&amp;}  }
                               { {>}  {\&gt;}   }
                               { {<}  {\&lt;}   }
                               { "\"" {\&quot;} }
                               { {'}  {\&apos;} } } }


proc _xml_decoded { text } {

	foreach subst [_xml_subst] {
		regsub -all [lindex $subst 1] $text [lindex $subst 0] text }

	return $text
}


proc _xml_encoded { text } {

	foreach subst [_xml_subst] {
		regsub -all [lindex $subst 0] $text [lindex $subst 1] text }

	return $text
}


proc _hid_from_xml_rec { text info_var } {

	upvar $info_var info

	set result { }

	while {1} {

		set pattern {^<(\d+)>}
		if {![regexp $pattern $text match id]} {
			break }

		regsub $pattern $text "" text

		set child_type $info($id,type)
		set child_text $info($id,text)

		set hid_type     ""
		set hid_attrs    {}
		set hid_children {}
		set hid_quoted   {}

		if {$child_type == "quoted"} {

			set trimmed [string trim $child_text]
			if {[string length $trimmed] > 0} {
				foreach line [split $child_text "\n"] {
					set decoded [string trim [_xml_decoded $line]]
					lappend result [list ":" 1 "" "" "" "" "" "" $decoded] } }
		}

		if {$child_type == "compound"} {

			set start_pattern {^<(\w[^\s>]*)((\s+([0-9\s]*))?)>}
			if {![regexp $start_pattern $child_text match hid_type xml_attrs]} {
				regsub {><.*$} $child_text ">" err_text
				error "unable to interpret XML start tag: $err_text" }

			regsub $start_pattern $child_text "" child_text

			set end_pattern {</(\w[^\s]*)\s*>}
			if {![regexp $end_pattern $child_text match end_tag]} {
				error "XML node <$hid_type> lacks end tag" }

			if {$hid_type != $end_tag} {
				error "XML start tag <$hid_type> mismatches end tag <$end_tag>" }

			regsub $end_pattern $child_text "" child_text

			set hid_children [_hid_from_xml_rec $child_text info]
		}

		if {$child_type == "leaf"} {

			set leaf_pattern {<([^\s>]+)((\s+([0-9\s]*))?)/>}
			if {![regexp $leaf_pattern $child_text match hid_type xml_attrs]} {
				regsub {><.*$} $child_text ">" err_text
				error "unable to interpret XML tag: $err_text" }

			regsub $leaf_pattern $child_text "" child_text
		}

		if {$child_type == "compound" || $child_type == "leaf"} {

			foreach attr $xml_attrs {
				if {![info exists info($attr,type)] || $info($attr,type) != "attr"} {
					error "unable to parse XML attribute $attr" }

				set attr_pattern {([\w0-9\-_]+) *= *"([^"]*)"}
				if {[regexp $attr_pattern $info($attr,text) match tag value]} {
					lappend hid_attrs [list 0 "" $tag "" $value] }
			}

			lappend result [list $hid_type "" 1 "" "+ " "" $hid_attrs $hid_children ""]
		}
	}

	if {[string length $text]} {
		regsub -all {<[^>]*>} $text "" text
		error "unable to parse input as XML: $text" }

	return $result
}


proc import_hid_from_xml { xml } {

	set id 0

	_parse_xml comment  {<\!--[^\n]*-->\n}            {""}      xml id info
	_parse_xml attr     {([\w0-9\-_]+ *= *"[^"]*")}   {$id}     xml id info
	_parse_xml leaf     {(<\w[^>]*/>)}                {<$id>}   xml id info
	_parse_xml quoted   {>([^<]+)<}                   {><$id><} xml id info
	_parse_xml compound {(<\w[^>]*>[<>0-9]*</[^>]*>)} {<$id>}   xml id info

	return [lindex [_hid_from_xml_rec [string trim $xml] info] 0]
}


proc print_hid_as_xml { out_fd hid { indent 0 } } {

	if {[string trim [hid_anchor $hid]] == "x"} {
		return }

	set indent_tabs ""
	for { set i 0 } { $i < $indent } { incr i } {
		append indent_tabs "\t" }

	set type     [hid_type     $hid]
	set children [hid_children $hid]
	set preface  [hid_preface  $hid]

	# node-preface comments
	if {[llength $preface] > 1} {
		set texts {}
		foreach line $preface {
			lappend texts "  [lindex $line 2]" }
		puts $out_fd "$indent_tabs<!--  [join $texts "\n$indent_tabs"]-->"
	} elseif {[llength $preface] == 1} {
		puts $out_fd "$indent_tabs<!-- [lindex [lindex $preface 0] 2] -->"
	}

	# quoted content
	if {$type == ":"} {
		puts $out_fd "$indent_tabs[hid_quoted $hid]"
		return
	}

	set xml_tag "<$type"
	foreach attr [hid_attributes $hid] {
		lappend xml_tag "[attr_tag $attr]=\"[string trim [attr_value $attr]]\"" }
	puts -nonewline $out_fd "$indent_tabs[join $xml_tag { }]"

	if {[hid_leaf $hid]} {
		puts $out_fd "/>"
	} else {
		# distinguish quoted content from sub-structured content
		if {[hid_num_quoted_lines $children] > 0} {
			if {[hid_num_quoted_lines $children] == 1} {
				puts $out_fd ">[_xml_encoded [hid_quoted [lindex $children 0]]]</$type>"
			} else {
				puts $out_fd ">"
				foreach child $children {
					puts $out_fd "$indent_tabs\t[_xml_encoded [hid_quoted $child]]" }
				puts $out_fd "$indent_tabs</$type>"
			}
		} else {
			puts $out_fd ">"
			incr indent
			foreach child $children {
				print_hid_as_xml $out_fd $child $indent }
			incr indent -1
			puts $out_fd "$indent_tabs</$type>"
		}
	}
}


##
## User interface
##

proc file_content { path } {

	if {$path == "-"} {
		return [read stdin] }

	set fh [open $path "RDONLY"]
	set content [read $fh]
	close $fh
	return $content
}

set short_usage_msg {
  usage: hid [-i] [--import-xml] <command> [<options>] <hid-file>}

append help_hint_msg $short_usage_msg "\n" {
  Use '--help' to get more information. }

append help_msg $short_usage_msg {

  With the option '-i' specfied, the <hid-file> is modified in place.

  available commands:

    hid [-i] format <hid-file>

      Print formatted HID data. Alternatively to the HID syntax used
      by default, the output format can be defined via:

      --output-tcl  Print HID structure as Tcl list with each HID node
                    represented as a list of the following structure:

                    { type line enabled preface attributes children }

                    The 'children' element is a list of HID nodes.
                    The 'attributes' element is a list of attributes
                    each represented as a list as follow:

                    { line tag value }

      --output-xml  Print HID structure as XML

    hid subnodes <node-path> <hid-file>

      Query nodes from HID structure. The <node-path> describes the
      targeted sub trees within the hierarchy as a sequence of HID
      nodes separated by '| +'.

      Each HID node can be followed by optional attribute filters
      in the form of '| <tag>: <value>', which are interpreted as
      conditions for the match. If multiple attributes are specified,
      each conditions must apply.

      Example:

      $ hid subnodes 'config | + start terminal | + route' <hid-file>

      Prints the 'route' subnode of the 'start' node named 'terminal'.

    hid get <attr-path> <hid-file>

      Query attributes value from HID structure. The <attr-path>
      consists of a <node-path> followed by '| : <tag>' denoting the
      tag to retrieve.

      Example:

      $ hid get 'config | + start | : name' <hid-file>

      Prints the name of each 'start' node found in 'config'.

    hid [-i] set <attr-def> <hid-file>

      Sets the attribute values specified by <attr-def>, which is
      a <node-path> followed by the delimiter '| :' followed by
      one or more attribute definitions. Each definition has the
      form '<tag>: <value>' and separated from the next one by '|'.

      Example:

      $ hid set 'config | + start vfs | : caps: 100 | ram: 16M' <hid-file>

      Sets the attributes 'caps' and 'ram' of the 'start' node named
      'vfs' to the values 100 and 16M respectively.

    hid [-i] remove <node-path>

      Removes the node(s) specified by the <node-path>.

      Example:

      $ hid remove 'config | + start | + route' <hid-file>

      Removes all 'route' sub nodes from each 'start' node hosted
      at the 'config'.

    hid [-i] disable <node-path>

      Marks nodes specified by <node-path> as disabled, which turns the
      node anchor from '+' to 'x'.

      Example:

      $ hid disable 'config | + start terminal' <hid-file>

      Turns the node anchor of the terminal's 'start' node to an 'x'.

    hid [-i] enable <node-path>

      Reverts the enabled state of nodes addressed by the <node-path>.
}


##
## Main program: process arguments and execute commands
##

##
# Consume flag from global 'argv'
#
proc consume_flag_arg { flag body } {

	global argv

	set remaining_argv {}
	foreach arg $argv {
		if {$arg == $flag} {
			uplevel 1 $body
		} else {
			lappend remaining_argv $arg }
	}
	set argv $remaining_argv
}

# define input and output format
set import_hid_fn import_hid
set print_hid_fn  print_hid_as_is
set in_place 0

consume_flag_arg --import-xml {
	set import_hid_fn import_hid_from_xml
	set print_hid_fn  print_hid_as_tree
}

consume_flag_arg --output-xml {
	set print_hid_fn print_hid_as_xml }

consume_flag_arg --output-tcl {
	set print_hid_fn puts }

consume_flag_arg -i {
	set in_place 1 }

consume_flag_arg --help {
	puts "$help_msg"
	exit
}


if {[llength $argv] < 1} {
	error "missing sub-command argument\n$help_hint_msg" }

set command [lindex $argv 0]
set path    [lindex $argv end]

set command_info(min_args,format)   2
set command_info(min_args,subnodes) 3
set command_info(min_args,get)      3
set command_info(min_args,remove)   3
set command_info(min_args,set)      3
set command_info(min_args,enable)   3
set command_info(min_args,disable)  3

if {![info exists command_info(min_args,$command)]} {
	error "unknown command '$command'\n$help_hint_msg" }

if {[llength $argv] < $command_info(min_args,$command)} {
	error "missing $command argument\n$help_hint_msg" }

set input_hid [$import_hid_fn [file_content $path]]

set out_fd stdout
if {$in_place} {
	set out_fd [open $path w]
}


#
# Commands
#

proc with_command { expected body } {

	global command

	if {$command == $expected} {
		uplevel 1 $body }
}


proc with_node_query { node_query_hid_var body } {

	upvar $node_query_hid_var node_query_hid

	global argv
	set node_query_hid [import_hid [lindex $argv 1] false]
	uplevel 1 $body
}


##
# Split query definition info node path and args
#
# The query uses HID syntax with both parts separated by ' | : '.
# Technically, the args are represented as quoted content of the
# leaf node of the query.
#
proc with_node_query_and_args { node_query_hid_var args_var body } {

	upvar $node_query_hid_var node_query_hid
	upvar $args_var           args

	global argv
	set query [lindex $argv 1]

	set split_pattern "^(.*) [delim_pattern][quoted_pattern](.*)"
	if {[regexp $split_pattern $query dummy node_query dummy args]} {

		set node_query_hid [import_hid $node_query false]
		uplevel 1 $body
	}
}


with_command format {

	if {$print_hid_fn == "print_hid_as_is"} { set print_hid_fn print_hid_as_tree }

	$print_hid_fn $out_fd $input_hid
}


with_command subnodes {

	if {$print_hid_fn == "print_hid_as_is"} { set print_hid_fn print_hid_as_tree }

	with_node_query query_hid {
		foreach hid [hid_query_nodes $query_hid $input_hid] {
			$print_hid_fn stdout $hid } }
}


with_command remove {

	with_node_query query_hid {
		set filter_fn { set empty {} }
		$print_hid_fn $out_fd [hid_filter unused $query_hid $input_hid empty $filter_fn]
	}
}


with_command enable {

	with_node_query query_hid {
		$print_hid_fn $out_fd [hid_filter unused $query_hid $input_hid enable_hid {
			if {[hid_anchor $enable_hid] != ""} {
				set enable_hid [lreplace $enable_hid 4 4 "+ "] } }] }
}


with_command disable {

	with_node_query query_hid {
		$print_hid_fn $out_fd [hid_filter unused $query_hid $input_hid disable_hid {
			if {[hid_anchor $disable_hid] != ""} {
				set disable_hid [lreplace $disable_hid 4 4 "x "] } }] }
}


with_command get {

	with_node_query_and_args query_hid queried_tag {
		foreach hid [hid_query_nodes $query_hid $input_hid ] {
			foreach attr [hid_attributes $hid] {
				if {[attr_tag $attr] == $queried_tag} {
					puts stdout [attr_value $attr] } } } }
}


with_command set {

	with_node_query_and_args query_hid new_attrs {

		# satisfy invariant that each attribute is preceeded by space (prespace)
		set new_attrs " $new_attrs"

		set ctx(new_attrs) [hid_attributes [import_hid $new_attrs false]]

		$print_hid_fn $out_fd [hid_filter ctx $query_hid $input_hid filtered_hid {

			set orig_attrs [hid_attributes $filtered_hid]
			set new_attrs  $ctx(new_attrs)
			set result_attrs {}

			# apply changes to existing attributes
			foreach attr $orig_attrs {
				foreach new_attr $new_attrs {
					if {[attr_tag $attr] == [attr_tag $new_attr]} {
						set value [string trim [attr_value $new_attr]]
						set align [attr_align $attr]
						if {$value == ""} { set align "" } else { set align " " }
						set attr [lreplace $attr 3 3 $align]
						set attr [lreplace $attr 4 4 $value]
					}
				}
				lappend result_attrs $attr
			}

			# supplement newly added attributes
			foreach new_attr $new_attrs {
				set present 0
				foreach attr $result_attrs {
					if {[attr_tag $attr] == [attr_tag $new_attr]} {
						set present 1 } }
				if {!$present} {
					set prespace " "
					if {[llength $result_attrs] > 0} { set prespace " | " }
					lappend result_attrs [list "?" $prespace [attr_tag   $new_attr] \
					                                     " " [attr_value $new_attr]] }
			}
			set filtered_hid [lreplace $filtered_hid 6 6 $result_attrs]
		}]
	}
}
