Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

add_executable(freq_counter src/freq_counter/main.cpp src/lib/utils.cpp)
add_executable(compressor src/compressor/main.cpp src/lib/huffman_tree.cpp src/lib/bit_stream.cpp src/lib/utils.cpp)
add_executable(compressor
src/compressor/main.cpp
src/compressor/compressor.cpp
src/lib/huffman_tree.cpp
src/lib/bit_stream.cpp
src/lib/utils.cpp
)

target_include_directories(freq_counter PRIVATE ${CMAKE_SOURCE_DIR}/include)
target_include_directories(compressor PRIVATE ${CMAKE_SOURCE_DIR}/include)
15 changes: 15 additions & 0 deletions code_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <iostream>
#include <vector>

// Um loop simples para testar palavras-chave
int main() {
std::vector<int> vec;
for (int i = 0; i < 10; ++i) {
vec.push_back(i);
}

if (!vec.empty()) {
std::cout << "O vetor nao esta vazio!" << std::endl;
}
return 0;
}
Binary file added codigo_comprimido.huff
Binary file not shown.
15 changes: 15 additions & 0 deletions codigo_restaurado.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <iostream>
#include <vector>

// Um loop simples para testar palavras-chave
int main() {
std::vector<int> vec;
for (int i = 0; i < 10; ++i) {
vec.push_back(i);
}

if (!vec.empty()) {
std::cout << "O vetor nao esta vazio!" << std::endl;
}
return 0;
}
Binary file added compressor
Binary file not shown.
Binary file added contador_frequencia
Binary file not shown.
1 change: 0 additions & 1 deletion examples/small_texts/README.md

This file was deleted.

18 changes: 18 additions & 0 deletions include/compressor/compressor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once
#include <string>
#include <map>
#include <cstdint>

namespace compressor {

// Compress: se freq_table_path vazio, conta bytes do input.
// Se freq_table_path não for vazio, tenta usar a tabela (suporta tokens multi-char).
void compress_file(const std::string &inputPath,
const std::string &outputPath,
const std::string &freq_table_path = "");

// Decompress: lê arquivo compactado e escreve output
void decompress_file(const std::string &inputPath,
const std::string &outputPath);

} // namespace compressor
209 changes: 209 additions & 0 deletions src/compressor/compressor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#include "../../include/compressor/compressor.h"

#include <fstream>
#include <sstream>
#include <stdexcept>
#include <vector>
#include <algorithm>
#include <map>
#include <iostream>
#include <cstdint>
#include <cstring>

#include "../../include/huffman/huffman_tree.h"
#include "../../include/huffman/bit_stream.h"

using namespace std;

namespace compressor {

// =================================================================================
// FUNÇÕES AUXILIARES
// =================================================================================

// Lê um arquivo inteiro para uma string.
static string readFileToString(const string &path) {
ifstream in(path, ios::binary);
if (!in) throw runtime_error("Nao foi possivel abrir o arquivo de entrada: " + path);
ostringstream ss;
ss << in.rdbuf();
return ss.str();
}

// Conta a frequência de cada byte individualmente.
static map<string, uint64_t> countByteFreqs(const string &data) {
map<string, uint64_t> freqs;
for (unsigned char c : data) {
string s(1, static_cast<char>(c));
freqs[s]++;
}
return freqs;
}

// Converte sequências de escape (ex: "\\n") para seus caracteres reais (ex: '\n').
static string unescape_chars(const std::string& s) {
std::string unescaped;
for (size_t i = 0; i < s.length(); ++i) {
if (s[i] == '\\' && i + 1 < s.length()) {
i++; // Pula a barra invertida
if (s[i] == 'n') {
unescaped += '\n';
} else if (s[i] == 'r') {
unescaped += '\r';
} else if (s[i] == 't') {
unescaped += '\t';
} else if (s[i] == '\\') {
unescaped += '\\';
}
} else {
unescaped += s[i];
}
}
return unescaped;
}

// Lê o arquivo de tabela de frequência ("simbolo:contagem").
static map<string, uint64_t> readFreqTable(const string &path) {
map<string, uint64_t> freqs;
if (path.empty()) return freqs;
ifstream in(path, ios::binary);
if (!in) throw runtime_error("Nao foi possivel abrir o arquivo de tabela de frequencia: " + path);
string line;
while (getline(in, line)) {
if (line.empty()) continue;
size_t pos = line.rfind(':');
if (pos == string::npos) continue;

string key = line.substr(0, pos);
string val = line.substr(pos + 1);

try {
uint64_t v = stoull(val);
freqs[unescape_chars(key)] = v;
} catch (...) {
// Ignora linhas com formato inválido
}
}
return freqs;
}

// Quebra o texto de entrada em tokens (símbolos) usando a abordagem "greedy".
static vector<string> tokenize_greedy(const string &text, const vector<string> &symbols_sorted) {
vector<string> out;
size_t i = 0;
const size_t n = text.size();
while (i < n) {
bool matched = false;
for (const string &sym : symbols_sorted) {
size_t L = sym.size();
if (L == 0) continue;
if (i + L <= n && memcmp(text.data() + i, sym.data(), L) == 0) {
out.push_back(sym);
i += L;
matched = true;
break;
}
}
if (!matched) {
out.emplace_back(string(1, text[i]));
++i;
}
}
return out;
}


// =================================================================================
// FUNÇÕES PRINCIPAIS
// =================================================================================

// Função principal para comprimir um arquivo.
void compress_file(const string &inputPath, const string &outputPath, const string &freq_table_path) {
string data = readFileToString(inputPath);
map<string, uint64_t> freqs = readFreqTable(freq_table_path);
vector<string> tokens;

// Se uma tabela de frequência foi fornecida, usa a tokenização inteligente.
if (!freqs.empty()) {
vector<string> symbols;
for (const auto &p : freqs) {
symbols.push_back(p.first);
}
sort(symbols.begin(), symbols.end(), [](const string &a, const string &b) {
if (a.size() != b.size()) return a.size() > b.size();
return a < b;
});

tokens = tokenize_greedy(data, symbols);
} else {
// Senão, faz a compressão simples por byte.
freqs = countByteFreqs(data);
for (unsigned char c : data) {
tokens.emplace_back(string(1, static_cast<char>(c)));
}
}

// Constrói a Árvore de Huffman.
HuffmanTree tree;
tree.build(freqs);

ofstream out(outputPath, ios::binary);
if (!out) throw runtime_error("Nao foi possivel abrir o arquivo de saida: " + outputPath);

// Salva a árvore no início do arquivo.
tree.serialize(out);

// Escreve os dados comprimidos bit a bit.
BitOutputStream bout(out);
auto codes = tree.getCodes();

for (const auto &tok : tokens) {
auto it = codes.find(tok);
if (it == codes.end()) {
throw runtime_error("Nao foi encontrado codigo para o token durante a compressao. Token: '" + tok + "'");
}
for (bool b : it->second) {
bout.writeBit(b);
}
}
bout.flush();
out.close();

cout << "Compressed " << inputPath << " -> " << outputPath << endl;
}

// Função principal para descomprimir um arquivo.
void decompress_file(const string &inputPath, const string &outputPath) {
ifstream in(inputPath, ios::binary);
if (!in) throw runtime_error("Nao foi possivel abrir o arquivo comprimido: " + inputPath);

// Lê a árvore do início do arquivo.
HuffmanTree tree;
tree.deserialize(in);

auto root = tree.getRoot();
if (!root) { return; }

ofstream out(outputPath, ios::binary);
if (!out) throw runtime_error("Nao foi possivel abrir o arquivo de saida: " + outputPath);

// Lê os bits e percorre a árvore para decodificar.
BitInputStream bin(in);
auto node = root;
while (true) {
int b = bin.readBit();
if (b == -1) break;

node = (b == 0) ? node->left : node->right;

if (node->isLeaf()) {
out << node->symbol;
node = root;
}
}

out.close();
in.close();
}

} // namespace compressor
67 changes: 52 additions & 15 deletions src/compressor/main.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,52 @@
#include <iostream>
#include <fstream>
#include <string>
#include "../../include/huffman/bit_stream.h"
#include "../../include/huffman/huffman_tree.h"

int main(int argc, char** argv){
if(argc < 2){
std::cerr << "Usage: compressor -c|-d ...\n";
return 1;
}
// This is a stub. Implement parsing -c (compress) and -d (decompress) and call appropriate functions.
std::cout << "Compressor stub. Implement command-line parsing and logic.\n";
return 0;
}
#include <iostream>
#include "../../include/compressor/compressor.h"

using namespace std;

static void printUsage() {
cerr << "Usage:\n"
<< " compressor -c [-f freq_table] -i <input> -o <output>\n"
<< " compressor -d -i <input> -o <output>\n";
}

int main(int argc, char** argv) {
if(argc < 2) { printUsage(); return 1; }

bool doCompress = false, doDecompress = false;
string freqFile, inputFile, outputFile;

for(int i=1;i<argc;i++){
string a = argv[i];
if(a == "-c") doCompress = true;
else if(a == "-d") doDecompress = true;
else if(a == "-f" && i+1<argc) { freqFile = argv[++i]; }
else if(a == "-i" && i+1<argc) { inputFile = argv[++i]; }
else if(a == "-o" && i+1<argc) { outputFile = argv[++i]; }
else {
cerr << "Unknown arg: " << a << "\n";
printUsage();
return 1;
}
}

try {
if(doCompress && doDecompress) { cerr << "Cannot both compress and decompress.\n"; return 1; }
if(doCompress) {
if(inputFile.empty() || outputFile.empty()) { printUsage(); return 1; }
compressor::compress_file(inputFile, outputFile, freqFile);
cout << "Compressed " << inputFile << " -> " << outputFile << endl;
} else if(doDecompress) {
if(inputFile.empty() || outputFile.empty()) { printUsage(); return 1; }
compressor::decompress_file(inputFile, outputFile);
cout << "Decompressed " << inputFile << " -> " << outputFile << endl;
} else {
printUsage();
return 1;
}
} catch(const exception &ex) {
cerr << "Error: " << ex.what() << "\n";
return 2;
}

return 0;
}
25 changes: 22 additions & 3 deletions src/freq_counter/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,26 @@
#include <cstring>
#include "../../include/huffman/huffman_tree.h"
#include <set>
#include <string>

// Função para escapar caracteres especiais
std::string escape_chars(const std::string& s) {
std::string escaped;
for (char c : s) {
if (c == '\n') {
escaped += "\\n";
} else if (c == '\r') {
escaped += "\\r";
} else if (c == '\t') {
escaped += "\\t";
} else if (c == '\\') {
escaped += "\\\\";
} else {
escaped += c;
}
}
return escaped;
}

//Function to help in case the loop find a char in the file
std::string charToString(char c) {
Expand Down Expand Up @@ -131,9 +150,9 @@ int main(int argc, char** argv) {
}

// Formato do map simbolo:fequencia
for (const auto& pair : freqs) {
(*out_stream) << pair.first << ":" << pair.second << "\n";
}
for (const auto& pair : freqs) {
(*out_stream) << escape_chars(pair.first) << ":" << pair.second << "\n";
}

if (out_file.is_open()) {
out_file.close();
Expand Down
Loading