diff --git a/include/huffman/huffman_tree.h b/include/huffman/huffman_tree.h index d60bdc5..160cc4c 100644 --- a/include/huffman/huffman_tree.h +++ b/include/huffman/huffman_tree.h @@ -6,23 +6,31 @@ #include #include +// struct HNode { +// uint8_t symbol; +// uint64_t freq; +// std::shared_ptr left, right; +// bool isLeaf() const { return !left && !right; } +// }; + struct HNode { - uint8_t symbol; + std::string symbol; uint64_t freq; std::shared_ptr left, right; - bool isLeaf() const { return !left && !right; } + // só é "folha" se tiver um símbolo && não tiver filhos. + bool isLeaf() const { return !symbol.empty() && !left && !right; } }; class HuffmanTree { public: HuffmanTree(); - void build(const std::vector &freqs); // freqs size 256 - std::map> getCodes() const; + void build(const std::map &freqs); // freqs size 256 + std::map> getCodes() const; void serialize(std::ostream &out) const; void deserialize(std::istream &in); private: std::shared_ptr root_; - void buildCodes(std::shared_ptr node, std::vector &path, std::map> &out) const; + void buildCodes(std::shared_ptr node, std::vector &path, std::map> &out) const; }; - #endif + diff --git a/src/freq_counter/main.cpp b/src/freq_counter/main.cpp index c7d69c7..bca384c 100644 --- a/src/freq_counter/main.cpp +++ b/src/freq_counter/main.cpp @@ -5,28 +5,122 @@ #include #include #include "../../include/huffman/huffman_tree.h" +#include -// Simple freq_counter stub: count bytes and print summary -int main(int argc, char** argv){ - if(argc < 2){ - std::cerr << "Usage: freq_counter [-o out.txt]\n"; + +//Function to help in case the loop find a char in the file +std::string charToString(char c) { + return std::string(1, c); +} + +int main(int argc, char** argv) { + + if (argc < 2) { + std::cerr << "Usage: freq_counter [file2...] [-o out.freq]\n"; return 1; } - std::vector freqs(256,0); - for(int i=1;i freqs; + std::set keyWords = { + "int", "double", "while", "return", "include", "new", "class", "public", + "private", "protected", "if", "else", "for", "switch", "case", "break", + "continue", "struct", "enum", "namespace", "using", "const", "char", + "void", "unsigned", "long", "short", "bool", "true", "false", "nullptr", "Node", "Interface" + //Adicioanr mais, tendo em vista que o professor pediu que seja compressor de várias linguagens + }; + + std::vector inputFiles; + std::string outputFile = ""; + + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "-o") { + + if (i + 1 < argc) { + outputFile = argv[i + 1]; + i++; + } else { + std::cerr << "Erro: must have a file after -o \n"; + return 1; + } + } + else { + inputFiles.push_back(arg); + } + } + + if (inputFiles.empty()) { + std::cerr << "Error: Input file not found \n"; + return 1; + } + + for (const std::string& fname : inputFiles) { + std::ifstream in(fname); + if (!in) { + std::cerr << "Error: File didnt open" << fname << "\n"; + continue; + } char c; - while(in.get(c)){ - unsigned char uc = static_cast(c); - freqs[uc]++; + + // O loop principal lê um caractere de cada vez + while (in.get(c)) { + + //3 condições diferentes, para identificar as possibilidades existentes de simbolos + + // Identificar o inicio de alguma palavra reservada ou variável + if (std::isalpha(c) || c == '_') { + std::string word; + word.push_back(c); + + //in.peek() identifica o proximo char sem consumir + while (in.peek() != EOF && (std::isalnum(in.peek()) || in.peek() == '_')) { + word.push_back(in.get()); + } + + if (keyWords.count(word)) { + + freqs[word]++; + } else { + + for (char ch : word) { + freqs[charToString(ch)]++; + } + } + } + + //Identificar o inicio de espaços, como \n, espaço em branco + + else if (std::isspace(c)) { + freqs[charToString(c)]++; + } + else { + freqs[charToString(c)]++; + } } } - // print non-zero frequencies - for(size_t i=0;i " << freqs[i] << "\n"; + + std::ostream* out_stream = &std::cout; + std::ofstream out_file; + + if (!outputFile.empty()) { + out_file.open(outputFile); + if (!out_file) { + std::cerr << "Could not create output file" << outputFile << "\n"; + return 1; + } + out_stream = &out_file; + } + + // Formato do map simbolo:fequencia + for (const auto& pair : freqs) { + (*out_stream) << pair.first << ":" << pair.second << "\n"; } + + if (out_file.is_open()) { + out_file.close(); + std::cout << "Frequency table saved in" << outputFile << "\n"; + } + return 0; -} +} \ No newline at end of file