create-mime.conf.pl 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. #!/usr/bin/perl -w
  2. # Based on create-mime.assign.pl in debian lighttpd (1.4.x) package
  3. # Creates an example mime.conf from /etc/mime.types
  4. use strict;
  5. # future: might use Getopt::Std, but this is simple enough for now
  6. my $verbose = 0;
  7. foreach (@ARGV) { $verbose = 1 if ($_ eq "-v"); }
  8. # text/* subtypes to serve as "text/...; charset=utf-8"
  9. # text/html IS NOT INCLUDED: html has its own method for defining charset
  10. # (<meta>), but the standards specify that content-type in HTTP wins over
  11. # the setting in the html document.
  12. # text/markdown doesn't have an official default charset, but requires
  13. # one being specified - it seems reasonable to hardcode it to UTF-8
  14. my %text_utf8 = map { $_ => 1 } qw(
  15. css
  16. csv
  17. markdown
  18. plain
  19. x-bibtex
  20. x-boo
  21. x-c++hdr
  22. x-c++src
  23. x-chdr
  24. x-csh
  25. x-csrc
  26. x-dsrc
  27. x-diff
  28. x-haskell
  29. x-java
  30. x-lilypond
  31. x-literate-haskell
  32. x-makefile
  33. x-moc
  34. x-pascal
  35. x-perl
  36. x-python
  37. x-scala
  38. x-sh
  39. x-tcl
  40. x-tex
  41. );
  42. # map extension to hash which maps types to the type they should be replaced with
  43. my %manual_conflicts_resolve = (
  44. '.ra' => {
  45. 'audio/x-pn-realaudio' => 'audio/x-realaudio',
  46. },
  47. # use font media types from iana registry
  48. '.otf' => {
  49. 'application/font-sfnt' => 'font/ttf',
  50. 'font/sfnt' => 'font/ttf',
  51. 'font/ttf' => 'font/ttf',
  52. },
  53. '.ttf' => {
  54. 'application/font-sfnt' => 'font/ttf',
  55. 'font/otf' => 'font/ttf',
  56. 'font/sfnt' => 'font/ttf',
  57. },
  58. '.woff' => {
  59. 'application/font-woff' => 'font/woff',
  60. },
  61. # end of fonts
  62. '.asn' => {
  63. 'chemical/x-ncbi-asn1-spec' => 'application/octet-stream',
  64. },
  65. '.ent' => {
  66. 'chemical/x-ncbi-asn1-ascii' => 'application/octet-stream',
  67. },
  68. );
  69. open MIMETYPES, "/etc/mime.types" or die "Can't open /etc/mime.types: $!";
  70. my %extensions;
  71. my %lcext;
  72. sub set {
  73. my ($extension, $mimetype) = @_;
  74. $extensions{$extension} = $mimetype;
  75. $lcext{lc($extension)} = $extension;
  76. }
  77. sub add {
  78. my ($extension, $mimetype) = @_;
  79. # lighttpd uses case-insensitive extension mapping to mime type. Still,
  80. # preserve case of first ext seen if case-insensitive duplicates exist.
  81. my $seen = $lcext{lc($extension)};
  82. if (defined($seen) && $seen ne $extension) {
  83. # update @_ too for calls to set
  84. $_[0] = $extension = $seen;
  85. }
  86. my $have = $extensions{$extension};
  87. my $r = $manual_conflicts_resolve{$extension};
  88. # update @_ too for calls to set
  89. $_[1] = $mimetype = $r->{$mimetype} if $r && $r->{$mimetype};
  90. # mime.types can have same extension for different mime types
  91. if ($have) {
  92. # application/octet-stream means we couldn't resolve another conflict
  93. return if $have eq $mimetype || $have eq 'application/octet-stream';
  94. my ($have_type, $have_subtype) = split /\//, $have, 2;
  95. my ($type, $subtype) = split /\//, $mimetype, 2;
  96. my $have_x = ($have_type =~ /^x-/ || $have_subtype =~ /^x-/);
  97. my $x = ($type =~ /^x-/ || $subtype =~ /^x-/);
  98. # entries without x- prefix in type/subtype win:
  99. if ($have_x && !$x) {
  100. return set @_; # overwrite
  101. } elsif ($x && !$have_x) {
  102. return; # ignore
  103. }
  104. # text/ wins over application/ for same subtype
  105. if ($subtype eq $have_subtype) {
  106. if ($type eq "text" && $have_type eq "application") {
  107. return set @_; # overwrite
  108. } elsif ($have_type eq "text" && $type eq "application") {
  109. return; # ignore
  110. }
  111. }
  112. # non-vnd.* subtype wins over vnd.* subtype
  113. my $have_vnd = ($have_subtype =~ /^vnd\./);
  114. my $vnd = ($subtype =~ /^vnd\./);
  115. if ($vnd ^ $have_vnd) {
  116. if ($have_vnd) {
  117. return set @_; # overwrite
  118. }
  119. else {
  120. return; # ignore
  121. }
  122. }
  123. if ($verbose && !$vnd) {
  124. print STDERR "Duplicate mimetype: '${extension}' => '${mimetype}' (already have '${have}'), merging to 'application/octet-stream'\n"
  125. }
  126. set ($extension, 'application/octet-stream');
  127. } else {
  128. set @_;
  129. }
  130. }
  131. sub print_type {
  132. my ($extension, $mimetype) = @_;
  133. if ($mimetype =~ /^text\/(.*)$/) {
  134. $mimetype .= "; charset=utf-8" if $text_utf8{$1};
  135. }
  136. print "\t\"${extension}\" => \"${mimetype}\",\n";
  137. }
  138. while (<MIMETYPES>) {
  139. chomp;
  140. s/\#.*//;
  141. next if /^\w*$/;
  142. if (/^([a-z0-9\/+.-]+)\s+((?:[a-z0-9+.-]+[ ]?)+)$/i) {
  143. my $mimetype = $1;
  144. my @extensions = split / /, $2;
  145. foreach my $ext (@extensions) {
  146. add(".${ext}", $mimetype);
  147. }
  148. }
  149. }
  150. # missing in /etc/mime.types;
  151. # from http://www.iana.org/assignments/media-types/media-types.xhtml
  152. add(".dtd", "application/xml-dtd");
  153. # other useful mappings
  154. my %useful = (
  155. ".tar.gz" => "application/x-gtar-compressed",
  156. ".gz" => "application/x-gzip",
  157. ".tbz" => "application/x-gtar-compressed",
  158. ".tar.bz2" => "application/x-gtar-compressed",
  159. ".bz2" => "application/x-bzip",
  160. ".log" => "text/plain",
  161. ".conf" => "text/plain",
  162. ".spec" => "text/plain",
  163. "README" => "text/plain",
  164. "Makefile" => "text/x-makefile",
  165. );
  166. while (my ($ext, $mimetype) = each %useful) {
  167. add($ext, $mimetype) unless $extensions{$ext};
  168. }
  169. print <<EOF;
  170. # created by create-mime.conf.pl
  171. #######################################################################
  172. ##
  173. ## MimeType handling
  174. ## -------------------
  175. ##
  176. ## https://redmine.lighttpd.net/projects/lighttpd/wiki/Mimetype_assignDetails
  177. ##
  178. ## mimetype.xattr-name
  179. ## Set the extended file attribute name used to obtain mime type
  180. ## (must also set mimetype.use-xattr = "enable")
  181. ##
  182. ## Default value is "Content-Type"
  183. ##
  184. ## freedesktop.org Shared MIME-info Database specification suggests
  185. ## user-defined value ("user.mime_type") as name for extended file attribute
  186. #mimetype.xattr-name = "user.mime_type"
  187. ##
  188. ## Use extended attribute named in mimetype.xattr-name (default "Content-Type")
  189. ## to obtain mime type if possible
  190. ##
  191. ## Disabled by default
  192. ##
  193. #mimetype.use-xattr = "enable"
  194. ##
  195. ## mimetype ("Content-Type" HTTP header) mapping for static file handling
  196. ##
  197. ## The first matching suffix is used. If no mapping is found
  198. ## 'application/octet-stream' is used, and caching (etag/last-modified handling)
  199. ## is disabled to prevent clients from caching "unknown" mime types.
  200. ##
  201. ## Therefore the last mapping is:
  202. ## "" => "application/octet-stream"
  203. ## This matches all extensions and acts as default mime type, and enables
  204. ## caching for those.
  205. mimetype.assign = (
  206. EOF
  207. # sort "x-" and "vnd." prefixed names after everything else
  208. sub mimecmpvalue {
  209. my ($mimetype) = @_;
  210. $mimetype =~ s/(^|\/)(x-|vnd\.)/~$1$2/g;
  211. return $mimetype;
  212. }
  213. sub countdots {
  214. my ($s) = @_;
  215. return scalar(() = $s =~ /\./g);
  216. }
  217. # the first matching suffix wins, so we have to sort by "length"
  218. # as all extensions start with "." we use the number of "."s as length
  219. # the exceptions are "README" and "Makefile" which are assumed not to conflict
  220. # (i.e. are not a suffix of any other extension)
  221. for my $ext (sort { countdots($b) <=> countdots($a) || mimecmpvalue($extensions{$a}) cmp mimecmpvalue($extensions{$b}) || $a cmp $b } keys(%extensions)) {
  222. print_type($ext, $extensions{$ext});
  223. }
  224. print <<EOF;
  225. # enable caching for unknown mime types:
  226. "" => "application/octet-stream"
  227. )
  228. EOF