# # Sample links dictionary file for Seth Golub's txt2html # http://www.aigeek.com/txt2html/ # # This dictionary contains some patterns for converting obvious URLs, # ftp sites, hostnames, email addresses and the like to hrefs. # # Adapted shamelessly from the html.pl package by Oscar Nierstrasz in # the Software Archive of the Software Composition Group # http://iamwww.unibe.ch/~scg/Src/ # # Some people like to mark URLs explicitly /<URL:\s*(\S+?)\s*>/ -hi-> <URL:$1> # Urls: : |snews:[\w\.]+| -> $& |news:[\w\.]+| -> $& |nntp:[\w/\.:+\-]+| -> $& |<(http:[\w/\.:\@+\-~\%#?=&;,]+[\w/])>| -hi-> <$1> |http:[\w/\.:\@+\-~\%#?=&;,]+[\w/]| -> $& |shttp:[\w/\.:+\-~\%#?=&;,]+| -> $& |https:[\w/\.:+\-~\%#?=&;,]+| -> $& |file:[\w/\.:+\-]+| -> $& |ftp:[\w/\.:+\-]+| -> $& |wais:[\w/\.:+\-]+| -> $& |gopher:[\w/\.:+\-]+| -> $& |telnet:[\w/\@\.:+\-]+| -> $& # catch some newsgroups to avoid confusion with sites: |([^\w\-/\.:\@>])(alt\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(bionet\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(bit\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(biz\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(clari\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(comp\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(gnu\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(humanities\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(k12\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(misc\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(news\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(rec\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(soc\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(talk\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(us\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(ch\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(de\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 # FTP locations (with directory): # anonymous@: |(anonymous\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)| -h-> $1$2:$4$3 # ftp@: |(ftp\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)| -h-> $1$2:$4$3 # Email address |[a-zA-Z0-9_\+\-\.]+\@([a-zA-Z0-9][\w\.+\-]+\.[a-zA-Z]{2,})| -> mailto:$& # : |([^\w\-/\.:\@>])([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)| -h-> $1$2:$4$3 # NB: don't confuse an http server with a port number for # an FTP location! # internet number version: : |([^\w\-/\.:\@])(\d{2,}\.\d{2,}\.\d+\.\d+):([\w\d+\-/\.]+)| -h-> $1$2:$3 # telnet |telnet ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})\s+(\d{2,4})| -h-> telnet $1 $3 # ftp |ftp ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})| -h-> ftp $1 # host with "ftp" in the machine name |(^|[^\w\d\-/\.:!])(([a-zA-Z][\w+\-]*)?ftp[\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!])| -h-> $1ftp $2$4 # ftp.foo.net/blah/ |ftp(\.[a-zA-Z0-9_\@:-]+)+/\S+| -> ftp://$& # www.thehouse.org/txt2html/ |www(\.[a-zA-Z0-9_\@:-]+)+/\S+| -> http://$& # host with "www" in the machine name |(^|[^\w\d\-/\.:!])(([a-zA-Z][\w+\-]*)?www[\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!\@])| -h-> $1$2$4 # |([a-zA-Z][\w+\-]+\.[\w+\-]+\.[a-zA-Z]{2,})\s+(\d{2,4})| -h-> $1 $2 # just the site name: |([^\w\-/\.:\@>])([a-zA-Z][\w+\-]+(\.[\w+\-]+)+\.[a-zA-Z]{2,})| -h-> $1$2/ # just internet numbers with port: |([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+(\d{1,4})| -h-> $1$2 $3 # just internet numbers: |([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})| -h-> $1$2 # (see "relative path") as used by Tom Fine # /\(see \"([^\"]+)\"\)/ -> $1.html # RFCs /RFC ?(\d+)/ -i-> http://www.cis.ohio-state.edu/rfc/rfc$1.txt # This would turn "f^H_o^H_o^H_" into "foo". Gross, isn't it? # Thanks to Mark O'Dell for fixing this. # # /(.\\010_)+/ -he-> $tmp = $&;$tmp =~ s@\010_@@g;"$tmp" # /(_\\010.)+/ -he-> $tmp = $&;$tmp =~ s@_\010@@g;"$tmp" # /(.\^H_)+/ -he-> $tmp = $&;$tmp =~ s@\^H_@@g;"$tmp" # /(_\^H.)+/ -he-> $tmp = $&;$tmp =~ s@_\^H@@g;"$tmp" # Mark _underline stuff_ as underlined stuff #/\B_([a-z][a-z ]*[a-z])_\B/ -hi-> $1 # Use this one instead if you want it to match more aggressively. /\B_(\w(\w|\s|\!|\?|,|;|\.|\-)*(\w|\.|\!|\?))_\B/ -hi-> $1 # Need special case for _x_ /\B_([a-z])_\B/ -hi-> $1 # Mark *emphasized stuff* as emphasized stuff #/\B\*([a-z][a-z -]*[a-z])\*\B/ -hi-> $1 # Use this one instead if you want it to match more aggressively. /\B\*(\w(\w|\s|\!|\?|,|;|\.)*(\w|\.|\!|\?))\*\B/ -hi-> $1 # We also need a special case for *x* /\B\*([a-z])\*\B/ -hi-> $1 # Seth and his amazing conversion program :-) "Seth Golub" -io-> http://www.aigeek.com/ "txt2html" -io-> http://www.aigeek.com/txt2html/ # End of sample dictionary