2010-10-30

Forward proxy of HTTPS by Apache HTTPD

I wanted to set up an forward proxy server for a web site that uses
both HTTPS and HTTP protocols.
Okay it is well documented in
<http://httpd.apache.org/docs/2.2/en/mod/mod_proxy.html>.

One thing I had to find by trial and error is that https connection is
implemented by CONNECT method of HTTP and is not represented directly
in <Proxy> directive.
Instead we have to match URI in form "proxy:host:443".

# for HTTP
<Proxy http://{ORIGIN-SERVER}/*>
Order deny,allow
Allow from {CLIENT-IP}
</Proxy>

# for HTTPS
<Proxy proxy:{ORIGIN-SERVER}:443>
Order deny,allow
Allow from {CLIENT-IP}
</Proxy>

2010-10-21

ctags for XML Schema

require 'uri'
require 'rubygems'
require 'xml'

class App

def wputs str
$stderr.puts str if $VERBOSE
end

def eputs str
$stderr.puts str
end

def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end

def getopts
while /^-/ === @argv.first
case opt = @argv.shift
when /^-o(.*)/ then @outfnam = $1
end
end
end

def initialize argv
@argv = argv.dup
@cache = {}
@names = {}
@outfnam = 'tags'
getopts
end

def help
puts <
vi tags generator
usage: ruby #$0 file.xsd ...
EOF
end

XSD_NS = 'http://www.w3.org/2001/XMLSchema'

def get1 uri, disp = nil
if @cache[uri.to_s] then
wputs "skipping #{disp or uri}"
return
end
doc = XML::Document.file(uri.path)
@cache[uri.to_s] = true
children = []
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
children.push node['schemaLocation'].to_s
}
nodes = nil
nodes = doc.find('/xs:schema/xs:*/@name', 'xs'=>XSD_NS)
nodes.each { |node|
name = node.value
if @names[name]
eputs "duplicated #{name} in #{uri.path} and #{@names[name]}"
end
@names[name] = uri.path
}
nodes = nil
doc = nil
for child in children
get1(uri + child, child)
end
end

def run1 filename
uri = URI('file:///' + filename)
get1(uri)
end

def run
for filename in @argv
run1 filename
end
self
end

def output
File.open(@outfnam, 'w') { |fp|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
fp.puts "!_TAG_FILE_SORTED\t1\tsort=case-sensitive date=#{now}"
for name in @names.keys.sort
query = '/\["\']' + name.gsub(/\W/, '.') + '\["\']/'
fp.puts [name, @names[name], query].join("\t")
end
}
eputs "saved to #{@outfnam}"
end

def close
output
self
end

end

App.new(ARGV).run.close
When editing XMLs in vi, it is really powerful.
====
require 'uri'
require 'rubygems'
require 'xml'

class App

def wputs str
$stderr.puts str if $VERBOSE
end

def eputs str
$stderr.puts str
end

def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end

def getopts
while /^-/ === @argv.first
case opt = @argv.shift
when /^-o(.*)/ then @outfnam = $1
end
end
end

def initialize argv
@argv = argv.dup
@cache = {}
@names = {}
@outfnam = 'tags'
getopts
end

def help
puts <
vi tags generator
usage: ruby #$0 file.xsd ...
EOF
end

XSD_NS = 'http://www.w3.org/2001/XMLSchema'

def get1 uri, disp = nil
if @cache[uri.to_s] then
wputs "skipping #{disp or uri}"
return
end
doc = XML::Document.file(uri.path)
@cache[uri.to_s] = true
children = []
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
children.push node['schemaLocation'].to_s
}
nodes = nil
nodes = doc.find('/xs:schema/xs:*/@name', 'xs'=>XSD_NS)
nodes.each { |node|
name = node.value
if @names[name]
eputs "duplicated #{name} in #{uri.path} and #{@names[name]}"
end
@names[name] = uri.path
}
nodes = nil
doc = nil
for child in children
get1(uri + child, child)
end
end

def run1 filename
uri = URI('file:///' + filename)
get1(uri)
end

def run
for filename in @argv
run1 filename
end
self
end

def output
File.open(@outfnam, 'w') { |fp|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
fp.puts "!_TAG_FILE_SORTED\t1\tsort=case-sensitive date=#{now}"
for name in @names.keys.sort
query = '/\["\']' + name.gsub(/\W/, '.') + '\["\']/'
fp.puts [name, @names[name], query].join("\t")
end
}
eputs "saved to #{@outfnam}"
end

def close
output
self
end

end

App.new(ARGV).run.close

Caveat - xlink:href in ISO 19139 Geographic Metadata

I wanted to use the same instances of gmd:authority/gmd:CI_Citation
many times, so I came up with using xlink:href.

[definition]
<gmd:authority>
<gmd:CI_Citation id="url.authority">
<gmd:title>...
</gmd:CI_Citation>
</gmd:authority>

[quotation]
<gmd:authority>
<gmd:CI_Citation xlink:href="#url.authority">
</gmd:authority>

But it causes validation error, saying xlink:href is not allowed in CI_Citation.
It took a while, but finally I got the reason reading XML schema.

The xlink:href attribute must be attached to parent element of omitted
element that would be if xlink:href is not used.
So following validates.

<gmd:authority>
<gmd:CI_Citation id="url.authority">
<gmd:title>...

<gmd:authority xlink:href="#url.authority"/>

2010-10-20

A libxml-ruby script to download XSD recursively

require 'uri'
require 'net/http'
require 'rubygems'
require 'xml'

class App

def initialize argv
@argv = argv
@cache = {}
@htconn = {}
end

def help
puts <<EOF
XSD downloader following includes and imports
usage: ruby #$0 [-pNUM] uri ...
-pNUM number of directory structure (including hostname) to be stripped
EOF
end

def outfnam uri
File.join(*[uri.host, uri.path].compact)
end

def close
@htconn[:conn].finish if @htconn[:conn]
end

def getconn uri
shp = [uri.scheme, uri.host, uri.port]
if @htconn[:shp] == shp then
puts 'reusing connection'
yield @htconn[:conn]
else
@htconn[:conn].finish if @htconn[:conn]
puts "connecting #{shp.join(' ')}"
@htconn[:shp] = shp
@htconn[:conn] = Net::HTTP.new(uri.host, uri.port)
@htconn[:conn].start
yield @htconn[:conn]
end
end

def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end

XSD_NS = 'http://www.w3.org/2001/XMLSchema'

def mkdir_p dirname
return nil if File.directory?(dirname)
raise Errno::ENOTDIR, "not a directory: (#{dirname})" if
File.exist?(dirname)
puts "mkdir #{dirname}"
mkdir_p(File.dirname(dirname))
Dir.mkdir(dirname)
end

def savefile filename, content
puts "saving #{filename}"
mkdir_p File.dirname(filename)
File.open(filename, 'wb') { |fp| fp.write(content) }
end

def get1 uri, disp = nil
if @cache[uri.to_s] then
puts "skipping #{disp or uri}"
return
end
ofn = outfnam(uri)
buf = nil
getconn(uri) {|conn|
resp = conn.get(uri.path)
raise "#{resp.code} #{resp.message}" unless /^200/ === resp.code
buf = resp.body
}
savefile(ofn, buf)
@cache[uri.to_s] = true
doc = XML::Document.string(buf)
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
child = node['schemaLocation'].to_s
get1(uri + child, child)
}
nodes = nil
end

def run1 arg
uri = URI(arg)
get1(uri)
end

def run
for arg in @argv
run1 arg
end
self
end

end

App.new(ARGV).run.close

2010-10-13

How to create processing instruction when writing XML using libxml-ruby

Actually I didn't find the way. So I had to use libxslt-ruby and
apply a stylesheet simply inserts PI.

require 'libxslt'
if @xslt
filter = <<-END_OF_XSLT
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<xsl:processing-instruction name="xml-stylesheet"
>type="text/xsl" href="#{@xslt}"</xsl:processing-instruction>
<xsl:copy-of select="."/>
</xsl:template>
</xsl:stylesheet>
END_OF_XSLT
stylesheet = LibXSLT::XSLT::Stylesheet.new(XML::Document.string(filter))
@xdoc = stylesheet.apply(@xdoc)
end