How To Download All The Urls One By One And Keep In Different Folders
I have one html file where I have kept all the URLs(Download link for CSV files).I want a tool/program that has to go through each url one by one and download the file, Then keep t
Solution 1:
I used different method using Jsoup to parse the html file and downloading
import java.io.File;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*//**
*
* @author nudanesh
*/publicclassURLDownload {
private Document doc;
Stringurl="", folder, file;
privatefinal File sourceFile;
inti=1;
intr=1, c = 1;
intanchorCol=3;
Library lib;
URLDownload() {
lib = newLibrary();
sourceFile = newFile("Download.html");
try {
doc = Jsoup.parse(sourceFile, "UTF-8");
} catch (IOException ex) {
Logger.getLogger(URLDownload.class.getName()).log(Level.SEVERE, null, ex);
}
//Elements links = doc.select("a[href]");Elementsrows= doc.select("tr");
System.out.println("Size=" + rows.size());
for (Element row : rows) {
Elementscols= row.getElementsByTag("td");
c = 1;
for (Element col : cols) {
System.out.println("Row"+r);
if (c == 1) {
file = col.text();//System.out.println("File in main"+file);
} elseif (c == 2) {
folder = col.text();//System.out.println("Folder in main"+folder);
} else {
try {
url = col.getElementsByTag("a").attr("href");
} catch (Exception e) {
System.out.print("-");
}
}
c++;
}
if (!url.equals("")) {
lib.setLocation(file,folder);
lib.downloadFile(url);
}
url = "";
i++;
r++;
}
}
publicstaticvoidmain(String arg[]) {
newURLDownload();
}
}
and following is the Library class file
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
importstatic java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*//**
*
* @author nudanesh
*/publicclassLibrary {
booleandownloaded=false;
Thread t;
intwaitTime=0;
StringbaseLoc="";
intsize=1024, ByteWritten = 0;
URL url;
URLConnectionuCon=null;
StringfolderLoc="", file = "firstFile.csv";
File loc;
private OutputStream outStream;
private InputStream is=null;
privatebyte[] buf;
privateint ByteRead;
privateintFolderInUrl=4;
privatebooleanrootFolder=true;
private File resultFile;
private FileOutputStream fileResult;
private XSSFWorkbook workbookResult;
private XSSFSheet sheetResult;
privateintupdateExcelRowNum= -1;
privateintupdateExcelColNum= -1;
String date;
privateintwaitLimit=900000;
Library() {
/*System.out.print(Calendar.getInstance().toString());
Date d=new Date();
String date=d.toString();
System.out.println(date);*///t = new Thread(this);// t.start();
date = newSimpleDateFormat("yyyy_MM_dd_HH_mm_ss").format(Calendar.getInstance().getTime());
System.out.print(date);
baseLoc = date + "/";
WriteDataToExcel();
baseLoc += "Business Reports/";
createRowExcel(updateExcelRowNum);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Report Name");
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Path");
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Status");
updateExcel();
}
publicvoidsetLocation(String a, String b) {
file = a;
file += ".csv";
folderLoc = baseLoc + getFolderPath(b);
// System.out.println("File Name: "+file);// System.out.println("Folder loc: "+folderLoc);
}
public String getFolderPath(String b) {
Stringpath="";
try {
System.out.println("path" + b);
path = b;
// path = java.net.URLDecoder.decode(b, "UTF-8");
String p[] = path.split("/");
path = "";
for (inti= FolderInUrl; i < p.length - 1; i++) {
rootFolder = false;
p[i] = removeSpacesAtEnd(p[i]);
path = path + p[i] + "/";
}
} catch (Exception ex) {
Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
}
return path;
}
publicvoiddownloadFile(String urlString) {
// System.out.println("Started");try {
url = newURL(urlString);
} catch (MalformedURLException ex) {
Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
}
try {
loc = newFile(folderLoc);
if (!loc.exists()) {
loc.mkdirs();
}
outStream = newBufferedOutputStream(newFileOutputStream(folderLoc + file));
uCon = url.openConnection();
uCon.setReadTimeout(waitLimit);
is = uCon.getInputStream();
downloaded=true;
buf = newbyte[size];
while ((ByteRead = is.read(buf)) != -1) {
System.out.println("while executing" + ByteRead);
outStream.write(buf, 0, ByteRead);
ByteWritten += ByteRead;
}
//System.out.println("Downloaded" + ByteWritten);
resetCounters();
createRowExcel(updateExcelRowNum);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, file);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, folderLoc);
if (ByteWritten < 1000) {
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Downloaded ");
} else {
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Downloaded ");
}
updateExcel();
} catch (Exception e) {
System.out.println("error catch" + e);
resetCounters();
createRowExcel(updateExcelRowNum);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, file);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, folderLoc);
updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Rejected the Download after waiting " + (waitLimit / 60000) + " minutes");
updateExcel();
waitTime = 0;
} finally {
try {
System.out.println("Error in streams");
if(downloaded)
is.close();
outStream.close();
downloaded= false;
} catch (IOException e) {
e.printStackTrace();
}
}
}
publicvoidmoveToFolder(String reportName, String path) {
try {
Filerepo=newFile(folderLoc + "/" + reportName + ".csv");
path = folderLoc + "/" + path;
FilepathFolder=newFile(path);
if (!pathFolder.exists()) {
pathFolder.mkdirs();
}
pathFolder = newFile(path + reportName + ".csv");
System.out.println("Path=" + pathFolder.getAbsolutePath() + "\nReport path=" + repo.getAbsolutePath());
System.out.println("Source" + repo.getAbsolutePath());
//System.out.println("Status" + repo.renameTo(new File(pathFolder.getAbsolutePath())));
System.out.println("Status" + Files.move(repo.toPath(), newFile(pathFolder.getAbsolutePath()).toPath(), REPLACE_EXISTING));
//Files.
} catch (Exception e) {
System.out.println("error while moving" + e);
}
}
public String changeSpecialCharacters(String report) {
report = report.replaceAll(":", "_");
return report;
}
public String removeSpacesAtEnd(String inputPath) {
for (inti= inputPath.length() - 1; i >= 0; i--) {
if (inputPath.charAt(i) != ' ') {
break;
} else {
System.out.println("Before string is" + inputPath);
inputPath = inputPath.substring(0, i);
System.out.println("AFter string is" + inputPath);
}
}
return inputPath;
}
publicvoidWriteDataToExcel() {
try {
// file = new FileInputStream(new File("config.xlsx"));// File resultFolder = new File("Results");// if (resultFolder.exists()) {// deleteDirectory(resultFolder);// }// resultFolder.mkdirs();if (!newFile(baseLoc).exists()) {
newFile(baseLoc).mkdirs();
}
resultFile = newFile(baseLoc + "Reports info " + date + ".xlsx");
System.out.println("Path" + resultFile.getAbsolutePath());
resultFile.createNewFile();
// rFilePath = resultFile.getAbsolutePath();
fileResult = newFileOutputStream(resultFile);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//Get the workbook instance for XLS file// System.out.println("file success");XSSFWorkbookworkbook=null;
try {
workbookResult = newXSSFWorkbook();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("Opening the browser");
//Get first sheet from the workbook
sheetResult = workbookResult.createSheet();
//sheetResult.set//Get iterator to all the rows in current sheet//Get iterator to all cells of current row//ar.add(folderLocation);// ar.add(firefoxProfileLocation);
}
publicvoidupdateExcel() {
try {
//fileResult.close();
fileResult = newFileOutputStream(resultFile);
workbookResult.write(fileResult);
fileResult.close();
} catch (Exception e) {
System.out.println(e);
}
}
publicvoidcreateRowExcel(int num) {
updateExcelRowNum++;
num = updateExcelRowNum;
sheetResult.createRow(num);
}
publicvoidupdateRowColExcel(int rnum, int cnum, String value) {
updateExcelColNum++;
cnum = updateExcelColNum;
sheetResult.getRow(rnum).createCell(cnum);
XSSFCellcell= sheetResult.getRow(rnum).getCell(cnum);
cell.setCellValue(value);
}
publicvoidupdateColumn(int rnum, int cnum, String value) {
XSSFCellcell= sheetResult.getRow(rnum).getCell(cnum);
cell.setCellValue(value);
}
publicvoidresetCounters() {
updateExcelColNum = -1;
}
/* @Override
public void run() {
while (true) {
if (true) {
waitTime += 1000;
System.out.println(waitTime);
if (waitTime > waitLimit) {
try {
is.close();
outStream.close();
//downloaded=false;
// cancelDownload=true;
} catch (Exception ex) {
Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
try {
Thread.sleep(1000);
} catch (Exception e) {
}
}
}*/
}
Post a Comment for "How To Download All The Urls One By One And Keep In Different Folders"