Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions Problem-1/Problem-1unittesting.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
Test case 1-Valid URL and words file:
Input:
A valid URL text file that contains one URL per line.
A valid words text file that contains one word per line.
Output:
The top three words with their frequency for each URL in the URL file.
The total frequency of each word from all URLs combined.

https: //github.con/hotwax/training-assignment/wiki/Module—2-Implementation
is 4
the 4
cricket 0

https: //um.espncricinfo.con/

is 4
cricket 25
the 2

https://www.google.com/search?q=football&oq=football&aqs=chrome..69i57j0i67i433l4j46i67i433j0i67i433l3j0i67.2762j0j4&sourceid=chrome&ie=UTF-8

is 0
the 12
cricket 0


cricket 25
is 8
the 18
=====================================================================================================================================================================================


Test case 2-Invalid URL file:
Input:
An invalid URL text file that does not exist or is empty.
A valid words text file that contains one word per line.
Output:the file cannot be read or is empty.


Test case 3-Invalid words file:
Input:
A valid URL text file that contains one URL per line.
An invalid words text file that does not exist or is empty.
Output:
the file cannot be read or is empty.


Test case 4-Invalid URL:
Input:
A valid URL text file that contains one URL per line.
A valid words text file that contains one word per line.
One or more invalid URLs in the URL file.
Output:
The top three words with their frequency for each valid URL in the URL file.
invalid URL indicating that it cannot be parsed.


Test case 5-Invalid word:
Input:
A valid URL text file that contains one URL per line.
A valid words text file that contains one or more invalid words.
Output:
The top three words with their frequency for each URL in the URL file, excluding the invalid words.
The total frequency of each valid word from all URLs combined.


Test case 6-Invalid input type:
Input:
A valid URL text file that contains one URL per line.
A valid words text file that contains one word per line.
One or both of the input files are not in the correct format (e.g. a PDF file instead of a text file).
Output:
the file format is incorrect.


Test case 7-Edge case: Empty URL file:
Input:
An empty URL text file.
A valid words text file that contains one word per line.
Output:
the URL file is empty.


Test case 8-Edge case: Empty words file:
Input:
A valid URL text file that contains one URL per line.
An empty words text file.
Output:
the words file is empty.
13 changes: 13 additions & 0 deletions Problem-1/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"name": "employees",
"version": "1.0.0",
"type": "module",
"dependencies": {
"axios": "^1.3.5",
"cheerio": "^1.0.0-rc.3",
"fs": "^0.0.1-security",
"request": "^2.88.2",
"readline": "1.3.0",
"request-promise-native": "^1.0.9"
}
}
Binary file added Problem-1/problem-1snapshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions Problem-1/urls.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://github.com/hotwax/training-assignment/wiki/Module---2-Implementation
https://www.espncricinfo.com/
https://www.google.com/search?q=football&oq=football&aqs=chrome..69i57j0i67i433l4j46i67i433j0i67i433l3j0i67.2762j0j4&sourceid=chrome&ie=UTF-8
85 changes: 85 additions & 0 deletions Problem-1/webscrapper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Import necessary packages
import fs from "fs";
import cheerio from "cheerio";
import axios from "axios";

// Define file paths for URLs and words
const urlsPath = 'urls.txt';
const wordsPath = 'words.txt';

// Initialize arrays for URLs and words
let words=[];
let urls=[];

try {
// Read URLs from file
urls = fs.readFileSync(urlsPath, 'utf-8').split('\n').filter(url => url.trim() !== '');
} catch(error) {
// Handle error reading URLs from file
console.log(error);
}

try {
// Read words from file
words = fs.readFileSync(wordsPath, 'utf-8').split('\n').filter(word => word.trim() !== '');
} catch(error) {
// Handle error reading words from file
console.log(error);
}

// Initialize an empty object to store the count of each word
const wordCounts = {};

// Define function to count occurrences of a word in a string
function countOccurrences(string, word) {
const regex = new RegExp(word, 'gi');
return (string.match(regex) || []).length;
}

// Define an asynchronous main function to perform the web scraping and word counting
const main = async () => {
return new Promise(async (resolve, reject) => {
// Check that there are URLs and words to process
if (urls.length == 0) {
reject("urls file is empty");
return;
}
if (words.length == 0) {
reject("words file is empty");
return;
}
// Loop through each URL and scrape its text content
for (const url of urls) {
const body = await axios.get(url);
const $ = cheerio.load(body.data);
const text = $('body').text().toLowerCase();
const counts = {};
// Loop through each word and count its occurrences in the scraped text
for (const word of words) {
const woords = word.replace(/\r*/g, '');
const count = countOccurrences(text, woords.toLowerCase());
counts[woords] = count;
// If the word hasn't been seen before, add it to the total word count
if (!wordCounts[woords]) {
wordCounts[woords] = count;
} else {
// If the word has been seen before, add its count to the total count
wordCounts[woords] += count;
}
};
// Sort the word counts in descending order and print the top three words for the URL
const sortedCounts = Object.entries(counts).sort((value1, value2) => value2[1] - value1[1]);
const topWords = sortedCounts.slice(0, 3).map(entry => entry[0] + ' - ' + entry[1]);
console.log(url + '\n' + topWords.join('\n') + '\n');
};
resolve(wordCounts);
});
}

// Call the main function and store the result in a variable
const data = await main();

// Sort the total word counts in descending order and print them
const sortedWordCounts = Object.entries(data).sort((value1, value2) => value2[1] - value1[1]);
console.log('Total word counts:\n');
sortedWordCounts.forEach(entry => console.log(entry[0] + ' - ' + entry[1]));
3 changes: 3 additions & 0 deletions Problem-1/words.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
is
the
cricket
92 changes: 92 additions & 0 deletions Problem-4/Problem-4unittesting.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
-Test Case 1: Serialization
Input:
Option 1 (for Serialize file)
test.json (file name)
Output:
Serialization done successfully.

The 'test.json' file should contain the serialized data of the student objects in JSON format.



-Test Case 2: Deserialization
Input:
Option 2 (for Deserialize file)
test.json (file name)
Output:
Deserialization done successfully.
[ Student {
firstName: 'Rishabh',
dateOfBirth: 1990-01-01T00:00:00.000Z,
address: Address {
city: 'Indore',
state: 'MP',
pinCode: 452001,
country: 'India'
}
},
Student {
firstName: 'Sarthak',
dateOfBirth: 1992-02-02T00:00:00.000Z,
address: Address {
city: 'Bhopal',
state: 'MP',
pinCode: 452045,
country: 'India'
}
},
Student {
firstName: 'Vaibhav',
dateOfBirth: 2001-01-01T00:00:00.000Z,
address: Address {
city: 'Ujjain',
state: 'MP',
pinCode: 452076,
country: 'India'
}
},
Student {
firstName: 'Nayan',
dateOfBirth: 2001-01-01T00:00:00.000Z,
address: Address {
city: 'Dewas',
state: 'MP',
pinCode: 452087,
country: 'India'
}
} ]

The output should contain the deserialized student objects with their properties.



-Test Case 3: File not found
Input:
Option 2 (for Deserialize file)
test1.json (non-existent file name)
Output:
File not found.

The code should handle the error if the file specified for deserialization does not exist.



-Test Case4: Invalid choice
Input:
Option 4 (an invalid choice)
Output:
Invalid choice.

The code should handle the error if the user enters an invalid choice.



-Test Case 5: Invalid date format
Input:
Option 1 (for Serialize file)
test.json (file name)
1990-01-01T00:00:00.000Z (invalid date format)
Output:
Serialization done successfully.

The 'test.json' file should contain the serialized data of the student objects in JSON format, with the invalid date format handled by the code.
Binary file added Problem-4/Screenshot (33).png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Problem-4/Screenshot (34).png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Problem-4/Screenshot (35).png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions Problem-4/output1.ser
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"firstName":"Rishabh","dateOfBirth":"1990-01-01T00:00:00.000Z","address":{"city":"Indore","state":"MP","pinCode":452001,"country":"India"}},{"firstName":"Sarthak","dateOfBirth":"1992-02-02T00:00:00.000Z","address":{"city":"Bhopal","state":"MP","pinCode":452045,"country":"India"}},{"firstName":"Vaibhav","dateOfBirth":"2001-01-01T00:00:00.000Z","address":{"city":"Ujjain","state":"MP","pinCode":452076,"country":"India"}},{"firstName":"Nayan","dateOfBirth":"2001-01-01T00:00:00.000Z","address":{"city":"Dewas","state":"MP","pinCode":452087,"country":"India"}}]
1 change: 1 addition & 0 deletions Problem-4/output2.ser
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"firstName":"Rishabh","dateOfBirth":"1990-01-01T00:00:00.000Z","address":{"city":"Indore","state":"MP","pinCode":452001,"country":"India"}},{"firstName":"Sarthak","dateOfBirth":"1992-02-02T00:00:00.000Z","address":{"city":"Bhopal","state":"MP","pinCode":452045,"country":"India"}},{"firstName":"Vaibhav","dateOfBirth":"2001-01-01T00:00:00.000Z","address":{"city":"Ujjain","state":"MP","pinCode":452076,"country":"India"}},{"firstName":"Nayan","dateOfBirth":"2001-01-01T00:00:00.000Z","address":{"city":"Dewas","state":"MP","pinCode":452087,"country":"India"}}]
19 changes: 19 additions & 0 deletions Problem-4/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"name": "nodejs",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"@types/node": "^18.0.6",
"axios": "^1.3.4",
"cheerio": "^1.0.0-rc.12",
"node-fetch": "^3.2.6",
"request": "^2.88.2"
}
}
Loading