Index: lib/fileencoding.n
===================================================================
--- lib/fileencoding.n	(revision 0)
+++ lib/fileencoding.n	(revision 0)
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2006, Snaury (snaury@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright notice,
+ *       this list of conditions and the following disclaimer in the documentation
+ *       and/or other materials provided with the distribution.
+ *     * Neither the name of the author nor the names of its contributors may be
+ *       used to endorse or promote products derived from this software without
+ *       specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+namespace Nemerle.Encoding
+{
+  using System;
+  using System.IO;
+  using System.Text;
+  using System.Text.RegularExpressions;
+
+  /**
+   * Auto detects FileStream encoding (simple BOM/utf-8/non-utf-8 cases)
+   */
+  public module FileStreamAutoEncoding
+  {
+    /**
+     * Detects if FileStream contains valid utf-8
+     * Optionally appends up to maxlines lines of text to lines (decoded using ascii encoding)
+     * If FileStream has valid utf-8 returns Encoding.UTF8
+     * If FileStream has invalid utf-8 returns defaultEncoding (or Encoding.Default if it is null)
+     */
+    public DetectEncoding(fs : FileStream, lines : StringBuilder, mutable maxlines : int, mutable defaultEncoding : Encoding) : Encoding
+    {
+      when(fs == null) throw ArgumentNullException("fs");
+      when(maxlines < 0) throw ArgumentOutOfRangeException("maxlines", "maxlines must be greater or equal to zero");
+      when(lines == null) maxlines = 0;
+      def saved_position = fs.Position;
+      def ascii = Encoding.ASCII;
+      mutable utf8len = 0;
+      mutable numbytes = 0;
+      mutable numchars = 0;
+      mutable foundlines = 0;
+      mutable ch1 = '\0';
+      mutable ch2;
+      mutable has_valid_utf8 = true;
+      def bytes = array.[byte](4096);
+      mutable chars = if(lines != null) array.[char](4096) else null;
+      while({ numbytes = fs.Read(bytes, 0, bytes.Length); numbytes } > 0)
+      {
+        // we need to scan for more lines
+        when(foundlines < maxlines)
+        {
+          mutable linestart = 0;
+          mutable lineend;
+          numchars = ascii.GetChars(bytes, 0, numbytes, chars, 0);
+          while(foundlines < maxlines && linestart < numchars)
+          {
+            for(lineend = linestart; lineend < numchars; ++lineend)
+              when(chars[lineend] == '\r' || chars[lineend] == '\n')
+                Nemerle.Imperative.Break();
+            when(lineend == numchars)
+              Nemerle.Imperative.Break();
+            // this means that possible piece of line ending was detected in previous chunk
+            when(ch1 != '\0' && lineend == 0)
+            {
+              ch2 = chars[lineend];
+              when((ch1 == '\r' && ch2 == '\n') || (ch1 == '\n' && ch2 == '\r'))
+              {
+                ch1 = '\0';
+                linestart = 1;
+                Nemerle.Imperative.Continue();
+              }
+            }
+            when(linestart < lineend)
+              _ = lines.Append(chars, linestart, lineend - linestart);
+            _ = lines.Append('\n');
+            ++foundlines;
+            when(lineend == (numchars - 1))
+            {
+              ch1 = chars[lineend];
+              linestart = lineend + 1;
+              Nemerle.Imperative.Break();
+            }
+            ch1 = chars[lineend];
+            ch2 = chars[lineend + 1];
+            if((ch1 == '\r' && ch2 == '\n') || (ch1 == '\n' && ch2 == '\r'))
+              linestart = lineend + 2;
+            else
+              linestart = lineend + 1;
+            ch1 = '\0';
+          }
+          when(foundlines < maxlines && linestart < numchars)
+            _ = lines.Append(chars, linestart, numchars - linestart);
+        }
+        // determine if utf-8 is correct
+        for(mutable i = 0; has_valid_utf8 && i < numbytes; ++i)
+        {
+          def b = bytes[i];
+          if(b >= 0x80) {
+            if(b < 0xc0) {
+              if(utf8len > 0)
+                utf8len--;
+              else
+                has_valid_utf8 = false;
+            } else if(b >= 0xc2 && b < 0xf5) {
+              if(utf8len > 0)
+                has_valid_utf8 = false;
+              else if(b < 0xe0)
+                utf8len = 1;
+              else if(b < 0xf0)
+                utf8len = 2;
+              else
+                utf8len = 3;
+            } else
+              has_valid_utf8 = false;
+          } else when(utf8len > 0)
+            has_valid_utf8 = false;
+        }
+        unless(has_valid_utf8) {
+          when(foundlines < maxlines)
+            Nemerle.Imperative.Continue();
+          Nemerle.Imperative.Break();
+        }
+      }
+      fs.Position = saved_position;
+      when(foundlines < maxlines)
+        _ = lines.Append('\n');
+      if(has_valid_utf8)
+        Encoding.UTF8;
+      else
+        if(defaultEncoding != null) defaultEncoding else Encoding.Default;
+    }
+
+    public DetectEncoding(fs : FileStream, lines : StringBuilder, maxlines : int) : Encoding
+    {
+      DetectEncoding(fs, lines, maxlines, null);
+    }
+
+    public DetectEncoding(fs : FileStream, defaultEncoding : Encoding) : Encoding
+    {
+      DetectEncoding(fs, null, 0, defaultEncoding);
+    }
+
+    public DetectEncoding(fs : FileStream) : Encoding
+    {
+      DetectEncoding(fs, null, 0, null);
+    }
+
+    /**
+     * Opens StreamReader with correct encoding for FileStream
+     * When file has BOM encoding is detected with BOM
+     * When file does not have BOM encoding is detected using DetectEncoding
+     */
+    public OpenText(fs : FileStream, defaultEncoding : Encoding) : StreamReader
+    {
+      when(fs == null) throw ArgumentNullException("fs");
+
+      def saved_position = fs.Position;
+      mutable enc = Encoding.ASCII;
+      mutable reader = StreamReader(fs, enc);
+      _ = reader.Peek(); // force BOM detection
+      when(reader.CurrentEncoding.Equals(enc))
+      {
+        // BOM wasn't detected, just forget we ever created this reader
+        reader.DiscardBufferedData();
+        fs.Position = saved_position;
+
+        enc = DetectEncoding(fs, defaultEncoding);
+        reader = StreamReader(fs, enc, false);
+      }
+      reader;
+    }
+
+    public OpenText(fs : FileStream) : StreamReader
+    {
+      OpenText(fs, null);
+    }
+
+    public OpenText(fname : string, defaultEncoding : Encoding) : StreamReader
+    {
+      when(fname == null) throw ArgumentNullException("fname");
+      def fs = File.OpenRead(fname);
+      try {
+        OpenText(fs, defaultEncoding);
+      } catch {
+        | e => throw e;
+      }
+    }
+
+    public OpenText(fname : string) : StreamReader
+    {
+      OpenText(fname, null);
+    }
+  }
+
+  /**
+   * Auto detects nemerle source file encoding
+   * Additionally to what FileStreamAutoEncoding does it checks comments with encoding specifier
+   */
+  public module NemerleSourceAutoEncoding
+  {
+    private coding_pep_re : Regex = Regex(@"^\s*//.*coding[:=](?:(?=\s)[^\r\n])*([-\.\w]+)", RegexOptions.Compiled %| RegexOptions.IgnoreCase %| RegexOptions.Multiline);
+    private coding_pep_cp_re : Regex = Regex(@"^(?:cp-?)?(\d+)$", RegexOptions.Compiled %| RegexOptions.IgnoreCase);
+
+    /**
+     * Opens StreamReader with correct encoding for FileStream
+     * When file has BOM encoding is detected with BOM
+     * When file does not have BOM encoding is detected using DetectEncoding
+     * Additionaly, first two comment lines scanned for construct `coding: <encoding>'
+     * If found the specified encoding is used
+     */
+    public OpenText(fs : FileStream, defaultEncoding : Encoding) : StreamReader
+    {
+      when(fs == null) throw ArgumentNullException("fs");
+
+      def saved_position = fs.Position;
+      mutable enc = Encoding.ASCII;
+      mutable reader = StreamReader(fs, enc);
+      _ = reader.Peek(); // force BOM detection
+      when(reader.CurrentEncoding.Equals(enc))
+      {
+        // BOM wasn't detected, just forget we ever created this reader
+        reader.DiscardBufferedData();
+        fs.Position = saved_position;
+
+        def sb = StringBuilder();
+        enc = FileStreamAutoEncoding.DetectEncoding(fs, sb, 2, defaultEncoding);
+        def m = coding_pep_re.Match(sb.ToString());
+        when(m.Success) match(m.Groups[1].Value) {
+          | "ascii" => enc = Encoding.ASCII;
+          | "mbcs" => enc = Encoding.Default;
+          | str =>
+            enc = null;
+            mutable innerException = null;
+            try {
+              enc = Encoding.GetEncoding(str);
+            } catch {
+              | e => innerException = e;
+            }
+            when(enc == null)
+            {
+              def m = coding_pep_cp_re.Match(str);
+              when(m.Success)
+              {
+                try {
+                  enc = Encoding.GetEncoding(int.Parse(m.Groups[1].Value));
+                } catch {
+                  | _ => ()
+                }
+              }
+            }
+            when(enc == null)
+              throw innerException;
+        }
+        reader = StreamReader(fs, enc, false);
+      }
+      reader;
+    }
+
+    public OpenText(fs : FileStream) : StreamReader
+    {
+      OpenText(fs, null);
+    }
+
+    public OpenText(fname : string, defaultEncoding : Encoding) : StreamReader
+    {
+      when(fname == null) throw ArgumentNullException("fname");
+      def fs = File.OpenRead(fname);
+      try {
+        OpenText(fs, defaultEncoding);
+      } catch {
+        | e => throw e;
+      }
+    }
+
+    public OpenText(fname : string) : StreamReader
+    {
+      OpenText(fname, null);
+    }
+  }
+}
Index: ncc/Makefile
===================================================================
--- ncc/Makefile	(revision 6449)
+++ ncc/Makefile	(working copy)
@@ -48,6 +48,7 @@
 	../lib/internal-numbered.n	\
 	../lib/internal-array.n		\
 	../lib/internal.n		\
+	../lib/fileencoding.n           \
 	../lib/core.n			\
 	../lib/macros.n			\
 	../lib/getopt.n			\
Index: ncc/parsing/Lexer.n
===================================================================
--- ncc/parsing/Lexer.n	(revision 6449)
+++ ncc/parsing/Lexer.n	(working copy)
@@ -1427,7 +1427,7 @@
     file_idx = Location.AddFile (fn);
     try {
       def file = IO.FileStream (fn, IO.FileMode.Open, IO.FileAccess.Read);
-      reader = IO.StreamReader (file, Text.Encoding.UTF8);
+      reader = Nemerle.Encoding.NemerleSourceAutoEncoding.OpenText (file);
       when (man.Options.Warnings.IsEnabled (10002))
         check_last_line_for_lf (file);
     }

