Skip to content Skip to sidebar Skip to footer

How To Download All The Urls One By One And Keep In Different Folders

I have one html file where I have kept all the URLs(Download link for CSV files).I want a tool/program that has to go through each url one by one and download the file, Then keep t

Solution 1:

I used different method using Jsoup to parse the html file and downloading

import java.io.File;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 *//**
 *
 * @author nudanesh
 */publicclassURLDownload {

    private Document doc;
    Stringurl="", folder, file;
    privatefinal File sourceFile;
    inti=1;
    intr=1, c = 1;
    intanchorCol=3;
    Library lib;

    URLDownload() {
        lib = newLibrary();
        sourceFile = newFile("Download.html");
        try {

            doc = Jsoup.parse(sourceFile, "UTF-8");
        } catch (IOException ex) {
            Logger.getLogger(URLDownload.class.getName()).log(Level.SEVERE, null, ex);
        }
        //Elements links = doc.select("a[href]");Elementsrows= doc.select("tr");
        System.out.println("Size=" + rows.size());
        for (Element row : rows) {


                Elementscols= row.getElementsByTag("td");
                c = 1;
                for (Element col : cols) {
                    System.out.println("Row"+r);
                    if (c == 1) {
                        file = col.text();//System.out.println("File in main"+file);
                    } elseif (c == 2) {
                        folder = col.text();//System.out.println("Folder in main"+folder);
                    } else {
                        try {
                            url = col.getElementsByTag("a").attr("href");
                        } catch (Exception e) {
                            System.out.print("-");
                        }
                    }

                    c++;
                }
                if (!url.equals("")) {
                    lib.setLocation(file,folder);
                    lib.downloadFile(url);
                }
                url = "";

            i++;
            r++;
        }
    }

    publicstaticvoidmain(String arg[]) {

        newURLDownload();
    }
}

and following is the Library class file

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
importstatic java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 *//**
 *
 * @author nudanesh
 */publicclassLibrary  {

    booleandownloaded=false;
    Thread t;
    intwaitTime=0;
    StringbaseLoc="";
    intsize=1024, ByteWritten = 0;
    URL url;
    URLConnectionuCon=null;
    StringfolderLoc="", file = "firstFile.csv";
    File loc;
    private OutputStream outStream;
    private InputStream is=null;
    privatebyte[] buf;
    privateint ByteRead;
    privateintFolderInUrl=4;
    privatebooleanrootFolder=true;
    private File resultFile;
    private FileOutputStream fileResult;
    private XSSFWorkbook workbookResult;
    private XSSFSheet sheetResult;
    privateintupdateExcelRowNum= -1;
    privateintupdateExcelColNum= -1;
    String date;
    privateintwaitLimit=900000;

    Library() {
        /*System.out.print(Calendar.getInstance().toString());
         Date d=new Date();
         String date=d.toString();
         System.out.println(date);*///t = new Thread(this);// t.start();

        date = newSimpleDateFormat("yyyy_MM_dd_HH_mm_ss").format(Calendar.getInstance().getTime());
        System.out.print(date);
        baseLoc = date + "/";
        WriteDataToExcel();
        baseLoc += "Business Reports/";
        createRowExcel(updateExcelRowNum);
        updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Report Name");
        updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Path");
        updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Status");
        updateExcel();
    }

    publicvoidsetLocation(String a, String b) {
        file = a;
        file += ".csv";
        folderLoc = baseLoc + getFolderPath(b);

   // System.out.println("File Name: "+file);// System.out.println("Folder loc: "+folderLoc);
    }

    public String getFolderPath(String b) {
        Stringpath="";
        try {
            System.out.println("path" + b);
            path = b;
            // path = java.net.URLDecoder.decode(b, "UTF-8");
            String p[] = path.split("/");
            path = "";
            for (inti= FolderInUrl; i < p.length - 1; i++) {
                rootFolder = false;
                p[i] = removeSpacesAtEnd(p[i]);

                path = path + p[i] + "/";
            }

        } catch (Exception ex) {
            Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
        }
        return path;
    }

    publicvoiddownloadFile(String urlString) {
        // System.out.println("Started");try {
            url = newURL(urlString);
        } catch (MalformedURLException ex) {
            Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
        }
        try {
            loc = newFile(folderLoc);
            if (!loc.exists()) {
                loc.mkdirs();
            }


            outStream = newBufferedOutputStream(newFileOutputStream(folderLoc + file));
            uCon = url.openConnection();
        uCon.setReadTimeout(waitLimit);
                is = uCon.getInputStream();
               downloaded=true;
            buf = newbyte[size];

            while ((ByteRead = is.read(buf)) != -1) {
                System.out.println("while executing" + ByteRead);
                outStream.write(buf, 0, ByteRead);
                ByteWritten += ByteRead;
            }

            //System.out.println("Downloaded" + ByteWritten);
            resetCounters();
            createRowExcel(updateExcelRowNum);
            updateRowColExcel(updateExcelRowNum, updateExcelColNum, file);
            updateRowColExcel(updateExcelRowNum, updateExcelColNum, folderLoc);
            if (ByteWritten < 1000) {
                updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Downloaded ");
            } else {
                updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Downloaded ");
            }
            updateExcel();
        } catch (Exception e) {
            System.out.println("error catch" + e);
            resetCounters();
            createRowExcel(updateExcelRowNum);
            updateRowColExcel(updateExcelRowNum, updateExcelColNum, file);
            updateRowColExcel(updateExcelRowNum, updateExcelColNum, folderLoc);
            updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Rejected the Download after waiting " + (waitLimit / 60000) + " minutes");
            updateExcel();
            waitTime = 0;
        } finally {
            try {
                System.out.println("Error in streams");
                if(downloaded)
                is.close();
                outStream.close();
                downloaded= false;
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    publicvoidmoveToFolder(String reportName, String path) {

        try {
            Filerepo=newFile(folderLoc + "/" + reportName + ".csv");
            path = folderLoc + "/" + path;
            FilepathFolder=newFile(path);
            if (!pathFolder.exists()) {
                pathFolder.mkdirs();
            }
            pathFolder = newFile(path + reportName + ".csv");
            System.out.println("Path=" + pathFolder.getAbsolutePath() + "\nReport path=" + repo.getAbsolutePath());
            System.out.println("Source" + repo.getAbsolutePath());

            //System.out.println("Status" + repo.renameTo(new File(pathFolder.getAbsolutePath())));
            System.out.println("Status" + Files.move(repo.toPath(), newFile(pathFolder.getAbsolutePath()).toPath(), REPLACE_EXISTING));
//Files.

        } catch (Exception e) {
            System.out.println("error while moving" + e);
        }

    }

    public String changeSpecialCharacters(String report) {

        report = report.replaceAll(":", "_");
        return report;
    }

    public String removeSpacesAtEnd(String inputPath) {

        for (inti= inputPath.length() - 1; i >= 0; i--) {
            if (inputPath.charAt(i) != ' ') {
                break;
            } else {
                System.out.println("Before string is" + inputPath);
                inputPath = inputPath.substring(0, i);
                System.out.println("AFter string is" + inputPath);
            }
        }

        return inputPath;
    }

    publicvoidWriteDataToExcel() {

        try {
            // file = new FileInputStream(new File("config.xlsx"));//   File resultFolder = new File("Results");//   if (resultFolder.exists()) {//       deleteDirectory(resultFolder);//   }// resultFolder.mkdirs();if (!newFile(baseLoc).exists()) {
                newFile(baseLoc).mkdirs();
            }
            resultFile = newFile(baseLoc + "Reports info " + date + ".xlsx");
            System.out.println("Path" + resultFile.getAbsolutePath());
            resultFile.createNewFile();
            // rFilePath = resultFile.getAbsolutePath();

            fileResult = newFileOutputStream(resultFile);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        //Get the workbook instance for XLS file//      System.out.println("file success");XSSFWorkbookworkbook=null;

        try {

            workbookResult = newXSSFWorkbook();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        System.out.println("Opening the browser");
        //Get first sheet from the workbook

        sheetResult = workbookResult.createSheet();
        //sheetResult.set//Get iterator to all the rows in current sheet//Get iterator to all cells of current row//ar.add(folderLocation);//  ar.add(firefoxProfileLocation);
    }

    publicvoidupdateExcel() {
        try {
            //fileResult.close();

            fileResult = newFileOutputStream(resultFile);
            workbookResult.write(fileResult);
            fileResult.close();
        } catch (Exception e) {
            System.out.println(e);
        }

    }

    publicvoidcreateRowExcel(int num) {
        updateExcelRowNum++;
        num = updateExcelRowNum;
        sheetResult.createRow(num);

    }

    publicvoidupdateRowColExcel(int rnum, int cnum, String value) {
        updateExcelColNum++;
        cnum = updateExcelColNum;
        sheetResult.getRow(rnum).createCell(cnum);
        XSSFCellcell= sheetResult.getRow(rnum).getCell(cnum);
        cell.setCellValue(value);

    }

    publicvoidupdateColumn(int rnum, int cnum, String value) {
        XSSFCellcell= sheetResult.getRow(rnum).getCell(cnum);
        cell.setCellValue(value);

    }

    publicvoidresetCounters() {
        updateExcelColNum = -1;

    }

 /*   @Override
    public void run() {
        while (true) {
            if (true) {
                waitTime += 1000;
System.out.println(waitTime);
                if (waitTime > waitLimit) {
                    try {
                        is.close();
                        outStream.close();
                    //downloaded=false;
                        // cancelDownload=true;
                    } catch (Exception ex) {
                        Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
                    }

                }
            }
            try {
                Thread.sleep(1000);
            } catch (Exception e) {
            }

        }
    }*/

}

Post a Comment for "How To Download All The Urls One By One And Keep In Different Folders"