Pages

Friday, 26 July 2013

Java Multi-Threaded Recursive File & Folder Crawler

This is a multi-threaded java program that takes a filepath and recursively goes through each folder and displays the contents of each 

package FileName;
import java.util.*;
import java.io.*;
 
public class fileCrawler {
 
  private WorkQueue workQ;
  static int i = 0;
 
 private class Worker implements Runnable {
 
  private WorkQueue queue;
 
  public Worker(WorkQueue q) {
   queue = q;
  }
 
//  since main thread has placed all directories into the workQ, we
//  know that all of them are legal directories; therefore, do not need
//  to try ... catch in the while loop below
 
  public void run() {
   String name;
   while ((name = queue.remove()) != null) {
    File file = new File(name);
    String entries[] = file.list();
    if (entries == null)
     continue;
    for (String entry : entries) {
     if (entry.compareTo(".") == 0)
      continue;
     if (entry.compareTo("..") == 0)
      continue;
     String fn = name + "\\" + entry;
     System.out.println(fn);
    }
   }
  }
 }
 
 public fileCrawler() {
  workQ = new WorkQueue();
 }
 
 public Worker createWorker() {
  return new Worker(workQ);
 }
 
 
// need try ... catch below in case the directory is not legal
 
 public void processDirectory(String dir) {
   try{
   File file = new File(dir);
   if (file.isDirectory()) {
    String entries[] = file.list();
    if (entries != null)
     workQ.add(dir);
 
    for (String entry : entries) {
     String subdir;
     if (entry.compareTo(".") == 0)
      continue;
     if (entry.compareTo("..") == 0)
      continue;
     if (dir.endsWith("\\"))
      subdir = dir+entry;
     else
      subdir = dir+"\\"+entry;
     processDirectory(subdir);
    }
   }}catch(Exception e){}
 }
 
 public static void main(String Args[]) {
 
  fileCrawler fc = new fileCrawler();
 
//  now start all of the worker threads
 
  int N = 5;
  ArrayList<Thread> thread = new ArrayList<Thread>(N);
  for (int i = 0; i < N; i++) {
   Thread t = new Thread(fc.createWorker());
   thread.add(t);
   t.start();
  }
 
//  now place each directory into the workQ
 
  fc.processDirectory(Args[0]);
 
//  indicate that there are no more directories to add
 
  fc.workQ.finish();
 
  for (int i = 0; i < N; i++){
   try {
    thread.get(i).join();
   } catch (Exception e) {};
  }
 }
}



package FileName;
import java.util.*;
 
public class WorkQueue {
 
//
// since we are providing the concurrency control, can use non-thread-safe
// linked list
//
  private LinkedList<String> workQ;
 private boolean done;  // no more directories to be added
 private int size;  // number of directories in the queue
 
 public WorkQueue() {
  workQ = new LinkedList<String>();
  done = false;
  size = 0;
 }
 
 public synchronized void add(String s) {
  workQ.add(s);
  size++;
  notifyAll();
 }
 
 public synchronized String remove() {
  String s;
  while (!done && size == 0) {
   try {
    wait();
   } catch (Exception e) {};
  }
  if (size > 0) {
   s = workQ.remove();
   size--;
   notifyAll();
  } else
   s = null;
  return s;
 }
 
 public synchronized void finish() {
  done = true;
  notifyAll();
 }
}

8 comments:

  1. I have a question : What is the use of the WorkQueue class ? I have seen a file crawler in less than 30 lines of code. So why to complicate ?
    Jenik

    ReplyDelete
    Replies
    1. WorkQueue is the concurrent class where all data manipulation is done

      Delete
    2. Hi Jenik, can you post your 30 lines of code here? thanks!

      Delete
  2. Java SE & Java EE article is practical oriented and real time examples. How Java EE address the enterprise development is very important. for that you need a practical orieneted Java Training Courses you need.

    Great Article

    Online Java Training
    Online Java Training
    Java Training Institutes in Chennai
    J2EE training
    Java Training in Chennai
    Java Interview Questions
    Best Recommended books for Spring framework

    ReplyDelete
    Replies
    1. Java Training Institutes Java Training Institutes Java EE Training in Chennai Java EE Training in Chennai Java Spring Hibernate Training Institutes in Chennai J2EE Training Institutes in Chennai J2EE Training Institutes in Chennai Core Java Training Institutes in Chennai Core Java Training Institutes in Chennai

      Hibernate Online Training Hibernate Online Training Hibernate Training in Chennai Hibernate Training in Chennai Java Online Training Java Online Training

      Delete
  3. hey ..im getting this error..Array index out of bound...what should i do

    ReplyDelete
  4. It is really a great work and the way in which u r sharing the knowledge is excellent.
    Thanks for helping me to understand basic concepts. As a beginner in java programming your post help me a lot.Thanks for your informative article.java training in chennai | chennai's no.1 java training in chennai

    ReplyDelete