Java Read Big File Line by Line

enter image description hereI really did a enquiry in this topic for months in my gratis time and came up with a benchmark and here is a code to benchmark all the different means to read a File line by line.The individual performance may vary based on the underlying system. I ran on a windows x Java 8 Intel i5 HP laptop:Here is the code.

          import java.io.*; import java.nio.channels.Channels; import coffee.nio.channels.FileChannel; import java.nio.file.Files; import java.util.ArrayList; import coffee.util.List; import coffee.util.Scanner; import java.util.regex.Pattern; import coffee.util.stream.Stream;  public class ReadComplexDelimitedFile {     private static long total = 0;     individual static final Blueprint FIELD_DELIMITER_PATTERN = Blueprint.compile("\\^\\|\\^");      @SuppressWarnings("unused")     private void readFileUsingScanner() {          String s;         try (Scanner stdin = new Scanner(new File(this.getClass().getResource("input.txt").getPath()))) {             while (stdin.hasNextLine()) {                 s = stdin.nextLine();                 String[] fields = FIELD_DELIMITER_PATTERN.split(south, 0);                 total = total + fields.length;             }         } catch (Exception due east) {             System.err.println("Mistake");         }      }      //Winner     individual void readFileUsingCustomBufferedReader() {          endeavour (CustomBufferedReader stdin = new CustomBufferedReader(new FileReader(new File(this.getClass().getResource("input.txt").getPath())))) {             String southward;             while ((south = stdin.readLine()) != nix) {                 Cord[] fields = FIELD_DELIMITER_PATTERN.split up(due south, 0);                 total += fields.length;             }         } catch (Exception due east) {             Arrangement.err.println("Error");         }      }       private void readFileUsingBufferedReader() {          try (BufferedReader stdin = new BufferedReader(new FileReader(new File(this.getClass().getResource("input.txt").getPath())))) {             Cord s;             while ((s = stdin.readLine()) != zero) {                 String[] fields = FIELD_DELIMITER_PATTERN.split(s, 0);                 full += fields.length;             }         } catch (Exception e) {             System.err.println("Fault");         }     }      private void readFileUsingLineReader() {          try (LineNumberReader stdin = new LineNumberReader(new FileReader(new File(this.getClass().getResource("input.txt").getPath())))) {             String southward;             while ((due south = stdin.readLine()) != null) {                 Cord[] fields = FIELD_DELIMITER_PATTERN.dissever(south, 0);                 total += fields.length;             }         } grab (Exception e) {             Arrangement.err.println("Error");         }     }      private void readFileUsingStreams() {          try (Stream<String> stream = Files.lines((new File(this.getClass().getResource("input.txt").getPath())).toPath())) {             total += stream.mapToInt(south -> FIELD_DELIMITER_PATTERN.separate(southward, 0).length).sum();         } catch (IOException e1) {             e1.printStackTrace();         }     }       private void readFileUsingBufferedReaderFileChannel() {         try (FileInputStream fis = new FileInputStream(this.getClass().getResource("input.txt").getPath())) {             try (FileChannel inputChannel = fis.getChannel()) {                 effort (CustomBufferedReader stdin = new CustomBufferedReader(Channels.newReader(inputChannel, "UTF-eight"))) {                     Cord south;                     while ((s = stdin.readLine()) != null) {                         String[] fields = FIELD_DELIMITER_PATTERN.dissever(s, 0);                         full = total + fields.length;                     }                 }             } take hold of (Exception eastward) {                 Organisation.err.println("Error");             }         } grab (Exception eastward) {             System.err.println("Error");         }      }       public static void main(Cord args[]) {         //JVM wamrup         for (int i = 0; i < 100000; i++) {             full += i;         }         // We know scanner is slow-Still warming up         ReadComplexDelimitedFile readComplexDelimitedFile = new ReadComplexDelimitedFile();         List<Long> longList = new ArrayList<>(50);         for (int i = 0; i < 50; i++) {             full = 0;             long startTime = System.nanoTime();             //readComplexDelimitedFile.readFileUsingScanner();             long stopTime = Organisation.nanoTime();             long timeDifference = stopTime - startTime;             longList.add together(timeDifference);          }         Arrangement.out.println("Fourth dimension taken for readFileUsingScanner");         longList.forEach(System.out::println);         // Actual performance test starts here          longList = new ArrayList<>(10);         for (int i = 0; i < 10; i++) {             total = 0;             long startTime = System.nanoTime();             readComplexDelimitedFile.readFileUsingBufferedReaderFileChannel();             long stopTime = Organization.nanoTime();             long timeDifference = stopTime - startTime;             longList.add together(timeDifference);          }         Organisation.out.println("Time taken for readFileUsingBufferedReaderFileChannel");         longList.forEach(System.out::println);         longList.clear();         for (int i = 0; i < ten; i++) {             total = 0;             long startTime = System.nanoTime();             readComplexDelimitedFile.readFileUsingBufferedReader();             long stopTime = Organization.nanoTime();             long timeDifference = stopTime - startTime;             longList.add(timeDifference);          }         System.out.println("Time taken for readFileUsingBufferedReader");         longList.forEach(Organisation.out::println);         longList.clear();         for (int i = 0; i < 10; i++) {             total = 0;             long startTime = Organisation.nanoTime();             readComplexDelimitedFile.readFileUsingStreams();             long stopTime = System.nanoTime();             long timeDifference = stopTime - startTime;             longList.add(timeDifference);          }         System.out.println("Fourth dimension taken for readFileUsingStreams");         longList.forEach(System.out::println);         longList.articulate();         for (int i = 0; i < x; i++) {             full = 0;             long startTime = Organisation.nanoTime();             readComplexDelimitedFile.readFileUsingCustomBufferedReader();             long stopTime = System.nanoTime();             long timeDifference = stopTime - startTime;             longList.add(timeDifference);          }         System.out.println("Time taken for readFileUsingCustomBufferedReader");         longList.forEach(System.out::println);         longList.clear();         for (int i = 0; i < x; i++) {             total = 0;             long startTime = Organization.nanoTime();             readComplexDelimitedFile.readFileUsingLineReader();             long stopTime = Arrangement.nanoTime();             long timeDifference = stopTime - startTime;             longList.add(timeDifference);          }         Organization.out.println("Time taken for readFileUsingLineReader");         longList.forEach(Organisation.out::println);      } }                  

I had to rewrite BufferedReader to avoid synchronized and a couple of boundary atmospheric condition that is not needed.(Atleast that'south what I felt.Information technology is not unit tested so use it at your own risk.)

          import com.sun.istack.internal.NotNull;  import java.io.*; import java.util.Iterator; import coffee.util.NoSuchElementException; import java.util.Spliterator; import java.util.Spliterators; import coffee.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.stream.Stream; import java.util.stream.StreamSupport;  /**  * Reads text from a character-input stream, buffering characters then as to  * provide for the efficient reading of characters, arrays, and lines.  * <p>  * <p> The buffer size may be specified, or the default size may be used.  The  * default is big enough for most purposes.  * <p>  * <p> In full general, each read request made of a Reader causes a corresponding  * read request to be made of the underlying character or byte stream.  It is  * therefore advisable to wrap a CustomBufferedReader effectually whatsoever Reader whose read()  * operations may be costly, such as FileReaders and InputStreamReaders.  For  * instance,  * <p>  * <pre>  * CustomBufferedReader in  *   = new CustomBufferedReader(new FileReader("foo.in"));  * </pre>  * <p>  * will buffer the input from the specified file.  Without buffering, each  * invocation of read() or readLine() could cause bytes to be read from the  * file, converted into characters, and and then returned, which tin can be very  * inefficient.  * <p>  * <p> Programs that utilize DataInputStreams for textual input can be localized by  * replacing each DataInputStream with an advisable CustomBufferedReader.  *  * @author Mark Reinhold  * @see FileReader  * @run into InputStreamReader  * @see java.nio.file.Files#newBufferedReader  * @since JDK1.ane  */  public course CustomBufferedReader extends Reader {      private terminal Reader in;      private char cb[];     private int nChars, nextChar;      private static final int INVALIDATED = -2;     private static final int UNMARKED = -1;     private int markedChar = UNMARKED;     private int readAheadLimit = 0; /* Valid only when markedChar > 0 */      /**      * If the next character is a line feed, skip it      */     private boolean skipLF = false;      /**      * The skipLF flag when the mark was fix      */     private boolean markedSkipLF = false;      private static int defaultCharBufferSize = 8192;     private static int defaultExpectedLineLength = 80;     private ReadWriteLock rwlock;       /**      * Creates a buffering character-input stream that uses an input buffer of      * the specified size.      *      * @param in A Reader      * @param sz Input-buffer size      * @throws IllegalArgumentException If {@code sz <= 0}      */     public CustomBufferedReader(@NotNull terminal Reader in, int sz) {         super(in);         if (sz <= 0)             throw new IllegalArgumentException("Buffer size <= 0");         this.in = in;         cb = new char[sz];         nextChar = nChars = 0;         rwlock = new ReentrantReadWriteLock();     }      /**      * Creates a buffering character-input stream that uses a default-sized      * input buffer.      *      * @param in A Reader      */     public CustomBufferedReader(@NotNull final Reader in) {         this(in, defaultCharBufferSize);     }       /**      * Fills the input buffer, taking the marking into account if information technology is valid.      */     individual void fill() throws IOException {         int dst;         if (markedChar <= UNMARKED) {             /* No mark */             dst = 0;         } else {             /* Marked */             int delta = nextChar - markedChar;             if (delta >= readAheadLimit) {                 /* Gone past read-ahead limit: Invalidate mark */                 markedChar = INVALIDATED;                 readAheadLimit = 0;                 dst = 0;             } else {                 if (readAheadLimit <= cb.length) {                     /* Shuffle in the electric current buffer */                     Organization.arraycopy(cb, markedChar, cb, 0, delta);                     markedChar = 0;                     dst = delta;                 } else {                     /* Reallocate buffer to accommodate read-ahead limit */                     char ncb[] = new char[readAheadLimit];                     System.arraycopy(cb, markedChar, ncb, 0, delta);                     cb = ncb;                     markedChar = 0;                     dst = delta;                 }                 nextChar = nChars = delta;             }         }          int n;         exercise {             n = in.read(cb, dst, cb.length - dst);         } while (n == 0);         if (northward > 0) {             nChars = dst + due north;             nextChar = dst;         }     }      /**      * Reads a single character.      *      * @render The grapheme read, every bit an integer in the range      * 0 to 65535 (<tt>0x00-0xffff</tt>), or -1 if the      * end of the stream has been reached      * @throws IOException If an I/O error occurs      */     public char readChar() throws IOException {         for (; ; ) {             if (nextChar >= nChars) {                 fill up();                 if (nextChar >= nChars)                     return (char) -i;             }             return cb[nextChar++];         }     }      /**      * Reads characters into a portion of an array, reading from the underlying      * stream if necessary.      */     private int read1(char[] cbuf, int off, int len) throws IOException {         if (nextChar >= nChars) {             /* If the requested length is at least as large as the buffer, and                if there is no marking/reset activity, and if line feeds are not                being skipped, do not bother to copy the characters into the                local buffer.  In this way buffered streams will cascade                harmlessly. */             if (len >= cb.length && markedChar <= UNMARKED && !skipLF) {                 return in.read(cbuf, off, len);             }             fill up();         }         if (nextChar >= nChars) return -ane;         int north = Math.min(len, nChars - nextChar);         System.arraycopy(cb, nextChar, cbuf, off, n);         nextChar += n;         return n;     }      /**      * Reads characters into a portion of an array.      * <p>      * <p> This method implements the general contract of the corresponding      * <lawmaking>{@link Reader#read(char[], int, int) read}</lawmaking> method of the      * <lawmaking>{@link Reader}</code> course.  Equally an additional convenience, information technology      * attempts to read as many characters every bit possible by repeatedly invoking      * the <code>read</code> method of the underlying stream.  This iterated      * <code>read</lawmaking> continues until one of the following conditions becomes      * truthful: <ul>      * <p>      * <li> The specified number of characters take been read,      * <p>      * <li> The <code>read</code> method of the underlying stream returns      * <code>-one</code>, indicating end-of-file, or      * <p>      * <li> The <lawmaking>ready</lawmaking> method of the underlying stream      * returns <code>faux</code>, indicating that further input requests      * would cake.      * <p>      * </ul> If the commencement <lawmaking>read</code> on the underlying stream returns      * <code>-1</code> to betoken stop-of-file then this method returns      * <code>-1</code>.  Otherwise this method returns the number of characters      * really read.      * <p>      * <p> Subclasses of this class are encouraged, only not required, to      * effort to read as many characters as possible in the same fashion.      * <p>      * <p> Ordinarily this method takes characters from this stream's character      * buffer, filling it from the underlying stream as necessary.  If,      * however, the buffer is empty, the mark is non valid, and the requested      * length is at to the lowest degree every bit large as the buffer, then this method volition read      * characters directly from the underlying stream into the given array.      * Thus redundant <code>CustomBufferedReader</code>southward will not copy data      * unnecessarily.      *      * @param cbuf Destination buffer      * @param off  Offset at which to get-go storing characters      * @param len  Maximum number of characters to read      * @return The number of characters read, or -1 if the stop of the      * stream has been reached      * @throws IOException If an I/O error occurs      */     public int read(char cbuf[], int off, int len) throws IOException {         int northward = read1(cbuf, off, len);         if (n <= 0) return n;         while ((northward < len) && in.set()) {             int n1 = read1(cbuf, off + n, len - n);             if (n1 <= 0) break;             northward += n1;         }         return n;     }      /**      * Reads a line of text.  A line is considered to exist terminated by whatsoever one      * of a line feed ('\n'), a carriage return ('\r'), or a wagon return      * followed immediately by a linefeed.      *      * @param ignoreLF If true, the adjacent '\n' will be skipped      * @return A Cord containing the contents of the line, not including      * whatsoever line-termination characters, or null if the end of the      * stream has been reached      * @throws IOException If an I/O error occurs      * @encounter java.io.LineNumberReader#readLine()      */     String readLine(boolean ignoreLF) throws IOException {         StringBuilder s = zip;         int startChar;            bufferLoop:         for (; ; ) {              if (nextChar >= nChars)                 fill up();             if (nextChar >= nChars) { /* EOF */                 if (s != null && s.length() > 0)                     return s.toString();                 else                     return null;             }             boolean eol = false;             char c = 0;             int i;              /* Skip a leftover '\north', if necessary */                charLoop:             for (i = nextChar; i < nChars; i++) {                 c = cb[i];                 if ((c == '\n')) {                     eol = true;                     break charLoop;                 }             }              startChar = nextChar;             nextChar = i;              if (eol) {                 String str;                 if (s == null) {                     str = new String(cb, startChar, i - startChar);                 } else {                     s.append(cb, startChar, i - startChar);                     str = s.toString();                 }                 nextChar++;                 return str;             }              if (due south == nix)                 s = new StringBuilder(defaultExpectedLineLength);             s.append(cb, startChar, i - startChar);         }     }      /**      * Reads a line of text.  A line is considered to be terminated by any i      * of a line feed ('\n'), a railroad vehicle return ('\r'), or a carriage return      * followed immediately by a linefeed.      *      * @return A String containing the contents of the line, not including      * whatever line-termination characters, or nix if the end of the      * stream has been reached      * @throws IOException If an I/O error occurs      * @meet coffee.nio.file.Files#readAllLines      */     public String readLine() throws IOException {         return readLine(faux);     }      /**      * Skips characters.      *      * @param n The number of characters to skip      * @render The number of characters really skipped      * @throws IllegalArgumentException If <code>north</code> is negative.      * @throws IOException              If an I/O error occurs      */     public long skip(long n) throws IOException {         if (due north < 0L) {             throw new IllegalArgumentException("skip value is negative");         }         rwlock.readLock().lock();             long r = n;             try{             while (r > 0) {                 if (nextChar >= nChars)                     fill up();                 if (nextChar >= nChars) /* EOF */                     break;                 if (skipLF) {                     skipLF = false;                     if (cb[nextChar] == '\n') {                         nextChar++;                     }                 }                 long d = nChars - nextChar;                 if (r <= d) {                     nextChar += r;                     r = 0;                     suspension;                 } else {                     r -= d;                     nextChar = nChars;                 }             }         } finally {             rwlock.readLock().unlock();         }         render n - r;     }      /**      * Tells whether this stream is ready to exist read.  A buffered grapheme      * stream is ready if the buffer is non empty, or if the underlying      * character stream is ready.      *      * @throws IOException If an I/O error occurs      */     public boolean ready() throws IOException {         rwlock.readLock().lock();         try {             /*              * If newline needs to be skipped and the next char to be read              * is a newline character, then just skip it right away.              */             if (skipLF) {                 /* Note that in.ready() will return true if and but if the next                  * read on the stream will not block.                  */                 if (nextChar >= nChars && in.set()) {                     make full();                 }                 if (nextChar < nChars) {                     if (cb[nextChar] == '\n')                         nextChar++;                     skipLF = false;                 }             }         } finally {             rwlock.readLock().unlock();         }         return (nextChar < nChars) || in.ready();      }      /**      * Tells whether this stream supports the marker() operation, which it does.      */     public boolean markSupported() {         return truthful;     }      /**      * Marks the present position in the stream.  Subsequent calls to reset()      * will effort to reposition the stream to this signal.      *      * @param readAheadLimit Limit on the number of characters that may exist      *                       read while still preserving the mark. An endeavor      *                       to reset the stream afterwards reading characters      *                       up to this limit or beyond may fail.      *                       A limit value larger than the size of the input      *                       buffer will cause a new buffer to be allocated      *                       whose size is no smaller than limit.      *                       Therefore large values should be used with care.      * @throws IllegalArgumentException If {@code readAheadLimit < 0}      * @throws IOException              If an I/O mistake occurs      */     public void mark(int readAheadLimit) throws IOException {         if (readAheadLimit < 0) {             throw new IllegalArgumentException("Read-ahead limit < 0");         }         rwlock.readLock().lock();         try {             this.readAheadLimit = readAheadLimit;             markedChar = nextChar;             markedSkipLF = skipLF;         } finally {             rwlock.readLock().unlock();         }     }      /**      * Resets the stream to the virtually recent mark.      *      * @throws IOException If the stream has never been marked,      *                     or if the mark has been invalidated      */     public void reset() throws IOException {         rwlock.readLock().lock();         try {             if (markedChar < 0)                 throw new IOException((markedChar == INVALIDATED)                         ? "Mark invalid"                         : "Stream not marked");             nextChar = markedChar;             skipLF = markedSkipLF;         } finally {             rwlock.readLock().unlock();         }     }      public void close() throws IOException {         rwlock.readLock().lock();         try {             in.shut();         } finally {             cb = null;             rwlock.readLock().unlock();         }      }      public Stream<Cord> lines() {         Iterator<String> iter = new Iterator<String>() {             String nextLine = nil;              @Override             public boolean hasNext() {                 if (nextLine != cipher) {                     render true;                 } else {                     try {                         nextLine = readLine();                         return (nextLine != null);                     } grab (IOException due east) {                         throw new UncheckedIOException(e);                     }                 }             }              @Override             public String next() {                 if (nextLine != null || hasNext()) {                     String line = nextLine;                     nextLine = null;                     render line;                 } else {                     throw new NoSuchElementException();                 }             }         };         render StreamSupport.stream(Spliterators.spliteratorUnknownSize(                 iter, Spliterator.ORDERED | Spliterator.NONNULL), false);     } }                  

And now the results:

Time taken for readFileUsingBufferedReaderFileChannel 2902690903 1845190694 1894071377 1815161868 1861056735 1867693540 1857521371 1794176251 1768008762 1853089582

Time taken for readFileUsingBufferedReader 2022837353 1925901163 1802266711 1842689572 1899984555 1843101306 1998642345 1821242301 1820168806 1830375108

Time taken for readFileUsingStreams 1992855461 1930827034 1850876033 1843402533 1800378283 1863581324 1810857226 1798497108 1809531144 1796345853

Time taken for readFileUsingCustomBufferedReader 1759732702 1765987214 1776997357 1772999486 1768559162 1755248431 1744434555 1750349867 1740582606 1751390934

Time taken for readFileUsingLineReader 1845307174 1830950256 1829847321 1828125293 1827936280 1836947487 1832186310 1820276327 1830157935 1829171481

Procedure finished with exit code 0

Inference: The test was run on a 200 MB file. The test was repeated several times. The data looked like this

          Showtime Date^|^Showtime Fourth dimension^|^End Date^|^Stop Fourth dimension^|^Issue Title ^|^All Mean solar day Event^|^No Stop Fourth dimension^|^Event Clarification^|^Contact ^|^Contact Email^|^Contact Phone^|^Location^|^Category^|^Mandatory^|^Registration^|^Maximum^|^Concluding Date To Register 9/v/2011^|^3:00:00 PM^|^9/5/2011^|^^|^Social Studies Dept. Coming together^|^N^|^Y^|^Department coming together^|^Chris Gallagher^|^cgallagher@schoolwires.com^|^814-555-5179^|^Loftier Schoolhouse^|^two^|^N^|^N^|^25^|^nine/2/2011                  

Bottomline non much difference between BufferedReader and my CustomReader and information technology is very miniscule and hence y'all can use this to read your file.

Trust me yous don't accept to break your caput.use BufferedReader with readLine,it is properly tested.At worst if you feel yous can improve information technology just override and change it to StringBuilder instead of StringBuffer just to shave off one-half a second

martinnotsence.blogspot.com

Source: https://stackoverflow.com/questions/14037404/java-read-large-text-file-with-70million-line-of-text

0 Response to "Java Read Big File Line by Line"

Post a Comment

Iklan Atas Artikel

Iklan Tengah Artikel 1

Iklan Tengah Artikel 2

Iklan Bawah Artikel