Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/huffman/huffman_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <memory>
#include <vector>
#include <map>
#include "../../include/huffman/bit_stream.h"

// struct HNode {
// uint8_t symbol;
Expand All @@ -26,11 +27,15 @@ class HuffmanTree {
HuffmanTree();
void build(const std::map<std::string, uint64_t> &freqs); // freqs size 256
std::map<std::string, std::vector<bool>> getCodes() const;
std::shared_ptr<HNode> getRoot() const;
void serialize(std::ostream &out) const;
void deserialize(std::istream &in);

private:
std::shared_ptr<HNode> root_;
void buildCodes(std::shared_ptr<HNode> node, std::vector<bool> &path, std::map<std::string, std::vector<bool>> &out) const;
void serialize_helper(std::shared_ptr<HNode> node, BitOutputStream& bit_out) const;
std::shared_ptr<HNode> deserialize_helper(BitInputStream& bit_in);
};
#endif

30 changes: 24 additions & 6 deletions src/freq_counter/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,30 @@ int main(int argc, char** argv) {

std::map<std::string, uint64_t> freqs;
std::set<std::string> keyWords = {
"int", "double", "while", "return", "include", "new", "class", "public",
"private", "protected", "if", "else", "for", "switch", "case", "break",
"continue", "struct", "enum", "namespace", "using", "const", "char",
"void", "unsigned", "long", "short", "bool", "true", "false", "nullptr", "Node", "Interface"
//Adicioanr mais, tendo em vista que o professor pediu que seja compressor de várias linguagens
};
"alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor",
"bool", "break", "case", "catch", "char", "char8_t", "char16_t", "char32_t",
"class", "compl", "concept", "const", "consteval", "constexpr", "constinit",
"const_cast", "continue", "co_await", "co_return", "co_yield", "decltype",
"default", "delete", "do", "double", "dynamic_cast", "else", "enum",
"explicit", "export", "extern", "false", "float", "for", "friend", "goto",
"if", "inline", "int", "long", "mutable", "namespace", "new", "noexcept",
"not", "not_eq", "nullptr", "operator", "or", "or_eq", "private",
"protected", "public", "register", "reinterpret_cast", "requires", "return",
"short", "signed", "sizeof", "static", "static_assert", "static_cast",
"struct", "switch", "template", "this", "thread_local", "throw", "true",
"try", "typedef", "typeid", "typename", "union", "unsigned", "using",
"virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq",
// Identificadores com significado especial
"final", "override", "import", "module",
// Diretivas de pré-processador (muito comuns em .cpp/.h)
"define", "elif", "elifdef", "elifndef", "embed", "endif", "error",
"ifdef", "ifndef", "include", "line", "pragma", "undef", "warning",
// Tipos e classes comuns da (STL)
"std", "string", "vector", "map", "set", "iostream", "fstream", "sstream",
"memory", "shared_ptr", "unique_ptr", "make_shared", "make_unique",
"cout", "cin", "cerr", "endl"
}; //Fonte das palavras reservadas: https://en.cppreference.com/w/cpp/keywords.html e arquivos .h de antigos projetos


std::vector<std::string> inputFiles;
std::string outputFile = "";
Expand Down
113 changes: 94 additions & 19 deletions src/lib/huffman_tree.cpp
Original file line number Diff line number Diff line change
@@ -1,31 +1,95 @@
#include "../../include/huffman/huffman_tree.h"
#include "../../include/huffman/bit_stream.h"
#include <queue>
#include <iostream>

void serialize_helper(std::shared_ptr<HNode> node, BitOutputStream& bit_out) {
if (node == nullptr) {
return;
}
// Lógica de pré-ordem para visitar a raiz
if (node->isLeaf()) {
bit_out.writeBit(true);
// Escreve o tamanho da string como um único byte, com 8 bits para o tamanho
uint8_t len = node->symbol.length();
bit_out.writeBits(len, 8);

for (char c : node->symbol) {
bit_out.writeBits(static_cast<uint8_t>(c), 8);
}
//Caso nó interno
} else {

bit_out.writeBit(false);
// Percorre recursivamente a esquerda e a direita (Assim como o conceito de pré-ordem)
serialize_helper(node->left, bit_out);
serialize_helper(node->right, bit_out);
}
}

std::shared_ptr<HNode> deserialize_helper(BitInputStream& bit_in) {
int bit = bit_in.readBit();
if (bit == -1) {
return nullptr;
}

// Caso nó-folha
if (bit == 1) {
auto node = std::make_shared<HNode>();

uint8_t len = 0;
for(int i=0; i<8; ++i){
len = (len << 1) | bit_in.readBit();
}

// FAz A leitrua dos caracteres do símbolo
std::string symbol = "";
for(int i=0; i<len; ++i){
uint8_t ch = 0;
for(int j=0; j<8; ++j){
ch = (ch << 1) | bit_in.readBit();
}
symbol += static_cast<char>(ch);
}
node->symbol = symbol;
return node;
}
// Caso nó interno
else {
auto node = std::make_shared<HNode>();
node->symbol = ""; // Nós internos não têm símbolo
node->left = deserialize_helper(bit_in);
node->right = deserialize_helper(bit_in);
return node;
}
}

HuffmanTree::HuffmanTree(): root_(nullptr) {}

void HuffmanTree::build(const std::vector<uint64_t> &freqs){
void HuffmanTree::build(const std::map<std::string, uint64_t> &freqs){
struct QNode {
std::shared_ptr<HNode> node;
uint64_t freq;
bool operator<(QNode const& other) const { return freq > other.freq; } // for min-heap
bool operator<(const QNode& other) const { return freq > other.freq; }
};

std::priority_queue<QNode> pq;
for(int i=0;i<256;i++){
if(freqs[i]>0){
for(const auto& pair : freqs){
if(pair.second > 0){
auto n = std::make_shared<HNode>();
n->symbol = static_cast<uint8_t>(i);
n->freq = freqs[i];
pq.push(QNode{n, freqs[i]});
n->symbol = pair.first;
n->freq = pair.second;
pq.push(QNode{n, n->freq});
}
}
if(pq.empty()){
return;
}

if(pq.empty()){ return; }

while(pq.size() > 1){
auto a = pq.top(); pq.pop();
auto b = pq.top(); pq.pop();
auto parent = std::make_shared<HNode>();
parent->symbol = ""; // Nós internos são vazios para servir apenas como conectores de estruturas
parent->left = a.node;
parent->right = b.node;
parent->freq = a.freq + b.freq;
Expand All @@ -34,33 +98,44 @@ void HuffmanTree::build(const std::vector<uint64_t> &freqs){
root_ = pq.top().node;
}

std::map<uint8_t, std::vector<bool>> HuffmanTree::getCodes() const {
std::map<uint8_t, std::vector<bool>> out;
std::map<std::string, std::vector<bool>> HuffmanTree::getCodes() const {
std::map<std::string, std::vector<bool>> out;
if(root_) {
std::vector<bool> path;
const_cast<HuffmanTree*>(this)->buildCodes(root_, path, out);
buildCodes(root_, path, out);
}
return out;
}

void HuffmanTree::buildCodes(std::shared_ptr<HNode> node, std::vector<bool> &path, std::map<uint8_t, std::vector<bool>> &out) const {

void HuffmanTree::buildCodes(std::shared_ptr<HNode> node, std::vector<bool> &path, std::map<std::string, std::vector<bool>> &out) const {
if(!node) return;
if(node->isLeaf()){
out[node->symbol] = path;
return;
}
path.push_back(false);
buildCodes(node->left, path, out);
path.back() = true;
path.pop_back(); // Desfaz a mudança para o caminho da direita

path.push_back(true);
buildCodes(node->right, path, out);
path.pop_back();
path.pop_back(); // Limpa o caminho ao retornar da recursão
}


void HuffmanTree::serialize(std::ostream &out) const {
// stub: implement pre-order serialization with bits
out << ""; // placeholder
BitOutputStream bit_out(out);
serialize_helper(root_, bit_out);
bit_out.flush(); // Garante que qualquer bit restante no buffer seja escrito
}

void HuffmanTree::deserialize(std::istream &in) {
// stub: implement de-serialization
BitInputStream bit_in(in);
root_ = deserialize_helper(bit_in);
}

// Util para o descompressor
std::shared_ptr<HNode> HuffmanTree::getRoot() const {
return root_;
}
Loading