Pages

Friday, 26 July 2013

Java Multi-Threaded Recursive File & Folder Crawler

This is a multi-threaded java program that takes a filepath and recursively goes through each folder and displays the contents of each 

package FileName;
import java.util.*;
import java.io.*;
 
public class fileCrawler {
 
  private WorkQueue workQ;
  static int i = 0;
 
 private class Worker implements Runnable {
 
  private WorkQueue queue;
 
  public Worker(WorkQueue q) {
   queue = q;
  }
 
//  since main thread has placed all directories into the workQ, we
//  know that all of them are legal directories; therefore, do not need
//  to try ... catch in the while loop below
 
  public void run() {
   String name;
   while ((name = queue.remove()) != null) {
    File file = new File(name);
    String entries[] = file.list();
    if (entries == null)
     continue;
    for (String entry : entries) {
     if (entry.compareTo(".") == 0)
      continue;
     if (entry.compareTo("..") == 0)
      continue;
     String fn = name + "\\" + entry;
     System.out.println(fn);
    }
   }
  }
 }
 
 public fileCrawler() {
  workQ = new WorkQueue();
 }
 
 public Worker createWorker() {
  return new Worker(workQ);
 }
 
 
// need try ... catch below in case the directory is not legal
 
 public void processDirectory(String dir) {
   try{
   File file = new File(dir);
   if (file.isDirectory()) {
    String entries[] = file.list();
    if (entries != null)
     workQ.add(dir);
 
    for (String entry : entries) {
     String subdir;
     if (entry.compareTo(".") == 0)
      continue;
     if (entry.compareTo("..") == 0)
      continue;
     if (dir.endsWith("\\"))
      subdir = dir+entry;
     else
      subdir = dir+"\\"+entry;
     processDirectory(subdir);
    }
   }}catch(Exception e){}
 }
 
 public static void main(String Args[]) {
 
  fileCrawler fc = new fileCrawler();
 
//  now start all of the worker threads
 
  int N = 5;
  ArrayList<Thread> thread = new ArrayList<Thread>(N);
  for (int i = 0; i < N; i++) {
   Thread t = new Thread(fc.createWorker());
   thread.add(t);
   t.start();
  }
 
//  now place each directory into the workQ
 
  fc.processDirectory(Args[0]);
 
//  indicate that there are no more directories to add
 
  fc.workQ.finish();
 
  for (int i = 0; i < N; i++){
   try {
    thread.get(i).join();
   } catch (Exception e) {};
  }
 }
}



package FileName;
import java.util.*;
 
public class WorkQueue {
 
//
// since we are providing the concurrency control, can use non-thread-safe
// linked list
//
  private LinkedList<String> workQ;
 private boolean done;  // no more directories to be added
 private int size;  // number of directories in the queue
 
 public WorkQueue() {
  workQ = new LinkedList<String>();
  done = false;
  size = 0;
 }
 
 public synchronized void add(String s) {
  workQ.add(s);
  size++;
  notifyAll();
 }
 
 public synchronized String remove() {
  String s;
  while (!done && size == 0) {
   try {
    wait();
   } catch (Exception e) {};
  }
  if (size > 0) {
   s = workQ.remove();
   size--;
   notifyAll();
  } else
   s = null;
  return s;
 }
 
 public synchronized void finish() {
  done = true;
  notifyAll();
 }
}

12 comments:

  1. I have a question : What is the use of the WorkQueue class ? I have seen a file crawler in less than 30 lines of code. So why to complicate ?
    Jenik

    ReplyDelete
    Replies
    1. WorkQueue is the concurrent class where all data manipulation is done

      Delete
    2. Hi Jenik, can you post your 30 lines of code here? thanks!

      Delete
  2. Java SE & Java EE article is practical oriented and real time examples. How Java EE address the enterprise development is very important. for that you need a practical orieneted Java Training Courses you need.

    Great Article

    Online Java Training
    Online Java Training
    Java Training Institutes in Chennai
    J2EE training
    Java Training in Chennai
    Java Interview Questions
    Best Recommended books for Spring framework

    ReplyDelete
    Replies
    1. Java Training Institutes Java Training Institutes Java EE Training in Chennai Java EE Training in Chennai Java Spring Hibernate Training Institutes in Chennai J2EE Training Institutes in Chennai J2EE Training Institutes in Chennai Core Java Training Institutes in Chennai Core Java Training Institutes in Chennai

      Hibernate Online Training Hibernate Online Training Hibernate Training in Chennai Hibernate Training in Chennai Java Online Training Java Online Training

      Delete
  3. hey ..im getting this error..Array index out of bound...what should i do

    ReplyDelete
  4. Thanks for sharing this helpful code. but there is no different in execution runtime if you use it with 1 thread or more. test it!

    ReplyDelete
  5. It is amazing and wonderful to visit your site.Thanks for sharing this information,this is useful to me...

    Selenium training in Chennai

    Selenium training in Bangalore

    ReplyDelete
  6. After reading this web site I am very satisfied simply because this site is providing comprehensive knowledge for you to audience.
    Thank you to the perform as well as discuss anything incredibly important in my opinion. We loose time waiting for your next article writing in addition to I beg one to get back to pay a visit to our website in




    Selenium training in bangalore
    Selenium training in Chennai
    Selenium training in Bangalore
    Selenium training in Pune
    Selenium Online training

    ReplyDelete
  7. Thanks for sharing The Information The Information shared is very valuable Please keep Updating us Time Just went on redaing the article Python Online Training AWS Online Training Devops Online Training Data Science Online Training

    ReplyDelete