How Can We Help?

< All Topics

Introduction to Web Crawlers

Web crawlers are automated scripts designed to browse and index web content. In the context of automatic test creation, web crawlers are used to systematically explore web applications, creating comprehensive test cases that serve as baselines for testing.

Benefits of Automatic Test Creation with Web Crawlers

  • Efficiency: Saves significant time and effort, especially for large projects.
  • Accuracy: Reduces the likelihood of human error in manual testing.
  • Comprehensiveness: Ensures thorough testing by covering all reachable parts of the application.

Imagium when clubbed with web crawlers can do real wonders. For larger projects It can save coding effort worth a few weeks.

Web crawlers can crawl through all the public pages of your website and those pages can serve as a baseline and the subsequent run’s can be compared against this baseline.

package Crawlers.copy;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.codec.binary.Base64;
import org.json.simple.JSONObject;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import io.restassured.RestAssured;
import io.restassured.response.Response;
import io.restassured.specification.RequestSpecification;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;


public class Crwalers {

	static WebDriver driver;
	static int linkcount = 0; 
	
	//To scan maximum number of pages set maxCount = -1;
	static int maxCount = 20;
	static boolean exitLoop = false;
	static String uid = "";
	
	//Set project Key
	static String projectKey = "de8a3afc-9f58-440c-a424-3251bd05d9d9";
	
	//Set the Seed URL
	static String urlSeed ="https://en.wikipedia.org/wiki/Main_Page";
	
	//Set filter for the child pages, else keep it blank 
	static String filter = "wikipedia.org";
	
	//Set Test Name
	static String testName = "Codeless Automation";
	
	//Set End points
	static String getUIDEndPoint ="http://192.168.10.13:80/api/GetUID";
	static String validateEndPoint = "http://192.168.10.13:80/api/Validate";

	public static void main(String args[]) throws Exception
	{
		
		//Using selenium Launch browser and navigate to URL
		System.setProperty("webdriver.chrome.driver", "C:\\drivers\\chromedriver.exe");
		driver = new ChromeDriver();
		driver.manage().window().maximize();
		//driver.manage().window().setSize(new Dimension(1024, 768));
		
			uid = getUID(testName, projectKey);					
			
			
			crawl(1,urlSeed, new ArrayList<String>());
		//	linkcount
			System.out.println("TotalCount: " + Integer.toString(linkcount));	
			driver.quit();
		
		
		

	}
	public static void crawl(int level, String url, ArrayList<String> visited) throws IOException 
	{
		if(level<=1)
		{
			Document doc = request(url, visited);
			if (doc !=null) {
				for(Element link : doc.select("a[href]"))
				{
					String next_link = link.absUrl("href");
					if(next_link.contains(filter))
					{
											
							if (visited.contains(next_link)==false)
							{
								if(!exitLoop)
								{
									crawl(level++, next_link, visited);
								}
							
							}
							
					}
				}
			}
		}
	}
	
	private static Document request(String url, ArrayList<String> v)
	{
		try {
			Connection con = Jsoup.connect(url);
			Document doc = con.get();
			
			if(con.response().statusCode()==200)
			{
				linkcount += 1;
				if( maxCount>0)
				{
					if (linkcount  > maxCount-1)
					{
						exitLoop = true;
					}
				}
			
				
				driver.get(url);
				//driver.manage().window().setSize(new Dimension(1024, 768));
			    JavascriptExecutor js = (JavascriptExecutor)driver; 
			    js.executeScript("return document.body.style.overflow = 'hidden';");
			    				
				Thread.sleep(3000);
				String scrBase64 = ((TakesScreenshot) driver).getScreenshotAs(OutputType.BASE64);
				postRequest(url.replace("#","_"), uid, scrBase64);
				
				System.out.println("Link: "+url);
				System.out.println(doc.title());
				v.add(url);
				return doc;
				
			}
			return null;
		}
		catch(Exception e) {
			return null;
		}
	}
	private static String encodeFileToBase64Binary(File file){
        String encodedfile = null;
        try {
            @SuppressWarnings("resource")
			FileInputStream fileInputStreamReader = new FileInputStream(file);
            byte[] bytes = new byte[(int)file.length()];
            fileInputStreamReader.read(bytes);
            encodedfile = new String(Base64.encodeBase64(bytes), "UTF-8");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return encodedfile;
    }
	
	//Get unique Test ID for a specific project using Rest Assured 
	public static String getUID(String testName, String projectKey) {
		try {
			RequestSpecification request = RestAssured.given();
			request.header("content-type", "application/json");
			JSONObject json = new JSONObject();
			json.put("TestName", testName);
			json.put("ProjectKey", projectKey);
			json.put("Mode", "Default");

			request.body(json.toJSONString());
			Response response = request.when().post(getUIDEndPoint);
			int code = response.getStatusCode();
			String response_id = response.getBody().asString();
			System.out.println("TestID: " + response_id);
			return response_id;
		} catch (Exception ex) {
			return ex.toString();
		}
	}

	//Post a request using
	public static void postRequest(String stepName, String uid, String imagebase64) throws IOException {
		RequestSpecification request1 = RestAssured.given();
		request1.header("content-type", "application/json");
		JSONObject jo = new JSONObject();
		jo.put("TestRunID", uid.replace("\"", ""));
		jo.put("StepName", stepName);
		jo.put("ImageBase64", imagebase64);
		System.out.println("imagebase64:" + imagebase64);
		request1.body(jo.toJSONString());
		Response response1 = request1.when().post(validateEndPoint);
		String response_id1 = response1.getBody().asString();
		System.out.println("Response: " + response_id1);
	}


}

Additional Resources