Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions include/huffman/huffman_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,31 @@
#include <vector>
#include <map>

// struct HNode {
// uint8_t symbol;
// uint64_t freq;
// std::shared_ptr<HNode> left, right;
// bool isLeaf() const { return !left && !right; }
// };

struct HNode {
uint8_t symbol;
std::string symbol;
uint64_t freq;
std::shared_ptr<HNode> left, right;
bool isLeaf() const { return !left && !right; }
// só é "folha" se tiver um símbolo && não tiver filhos.
bool isLeaf() const { return !symbol.empty() && !left && !right; }
};

class HuffmanTree {
public:
HuffmanTree();
void build(const std::vector<uint64_t> &freqs); // freqs size 256
std::map<uint8_t, std::vector<bool>> getCodes() const;
void build(const std::map<std::string, uint64_t> &freqs); // freqs size 256
std::map<std::string, std::vector<bool>> getCodes() const;
void serialize(std::ostream &out) const;
void deserialize(std::istream &in);
private:
std::shared_ptr<HNode> root_;
void buildCodes(std::shared_ptr<HNode> node, std::vector<bool> &path, std::map<uint8_t, std::vector<bool>> &out) const;
void buildCodes(std::shared_ptr<HNode> node, std::vector<bool> &path, std::map<std::string, std::vector<bool>> &out) const;
};

#endif

128 changes: 111 additions & 17 deletions src/freq_counter/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,122 @@
#include <string>
#include <cstring>
#include "../../include/huffman/huffman_tree.h"
#include <set>

// Simple freq_counter stub: count bytes and print summary
int main(int argc, char** argv){
if(argc < 2){
std::cerr << "Usage: freq_counter <file> [-o out.txt]\n";

//Function to help in case the loop find a char in the file
std::string charToString(char c) {
return std::string(1, c);
}

int main(int argc, char** argv) {

if (argc < 2) {
std::cerr << "Usage: freq_counter <file1> [file2...] [-o out.freq]\n";
return 1;
}
std::vector<uint64_t> freqs(256,0);
for(int i=1;i<argc;i++){
std::string fname = argv[i];
if(fname == "-o"){ ++i; break; }
std::ifstream in(fname, std::ios::binary);
if(!in){ std::cerr << "Cannot open " << fname << "\n"; continue; }

std::map<std::string, uint64_t> freqs;
std::set<std::string> keyWords = {
"int", "double", "while", "return", "include", "new", "class", "public",
"private", "protected", "if", "else", "for", "switch", "case", "break",
"continue", "struct", "enum", "namespace", "using", "const", "char",
"void", "unsigned", "long", "short", "bool", "true", "false", "nullptr", "Node", "Interface"
//Adicioanr mais, tendo em vista que o professor pediu que seja compressor de várias linguagens
};

std::vector<std::string> inputFiles;
std::string outputFile = "";


for (int i = 1; i < argc; ++i) {
std::string arg = argv[i];
if (arg == "-o") {

if (i + 1 < argc) {
outputFile = argv[i + 1];
i++;
} else {
std::cerr << "Erro: must have a file after -o \n";
return 1;
}
}
else {
inputFiles.push_back(arg);
}
}

if (inputFiles.empty()) {
std::cerr << "Error: Input file not found \n";
return 1;
}

for (const std::string& fname : inputFiles) {
std::ifstream in(fname);
if (!in) {
std::cerr << "Error: File didnt open" << fname << "\n";
continue;
}
char c;
while(in.get(c)){
unsigned char uc = static_cast<unsigned char>(c);
freqs[uc]++;

// O loop principal lê um caractere de cada vez
while (in.get(c)) {

//3 condições diferentes, para identificar as possibilidades existentes de simbolos

// Identificar o inicio de alguma palavra reservada ou variável
if (std::isalpha(c) || c == '_') {
std::string word;
word.push_back(c);

//in.peek() identifica o proximo char sem consumir
while (in.peek() != EOF && (std::isalnum(in.peek()) || in.peek() == '_')) {
word.push_back(in.get());
}

if (keyWords.count(word)) {

freqs[word]++;
} else {

for (char ch : word) {
freqs[charToString(ch)]++;
}
}
}

//Identificar o inicio de espaços, como \n, espaço em branco

else if (std::isspace(c)) {
freqs[charToString(c)]++;
}
else {
freqs[charToString(c)]++;
}
}
}
// print non-zero frequencies
for(size_t i=0;i<freqs.size();++i){
if(freqs[i]) std::cout << i << " -> " << freqs[i] << "\n";

std::ostream* out_stream = &std::cout;
std::ofstream out_file;

if (!outputFile.empty()) {
out_file.open(outputFile);
if (!out_file) {
std::cerr << "Could not create output file" << outputFile << "\n";
return 1;
}
out_stream = &out_file;
}

// Formato do map simbolo:fequencia
for (const auto& pair : freqs) {
(*out_stream) << pair.first << ":" << pair.second << "\n";
}

if (out_file.is_open()) {
out_file.close();
std::cout << "Frequency table saved in" << outputFile << "\n";
}

return 0;
}
}
Loading