-
Notifications
You must be signed in to change notification settings - Fork 0
feat(compressor/decompressor): Implementando o compressor/descompress… #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
5196201
feat(compressor/decompressor): Implementando o compressor/descompress…
DevlTz d105e8e
Update src/freq_counter/main.cpp
DevlTz f0b38dc
Atualizar o main.cpp
DevlTz 99d629f
Delete code_test.txt
DevlTz 4468b51
Delete examples/small_texts/README.md
DevlTz File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| #include <iostream> | ||
| #include <vector> | ||
|
|
||
| // Um loop simples para testar palavras-chave | ||
| int main() { | ||
| std::vector<int> vec; | ||
| for (int i = 0; i < 10; ++i) { | ||
| vec.push_back(i); | ||
| } | ||
|
|
||
| if (!vec.empty()) { | ||
| std::cout << "O vetor nao esta vazio!" << std::endl; | ||
| } | ||
| return 0; | ||
| } |
Binary file not shown.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| #include <iostream> | ||
| #include <vector> | ||
|
|
||
| // Um loop simples para testar palavras-chave | ||
| int main() { | ||
| std::vector<int> vec; | ||
| for (int i = 0; i < 10; ++i) { | ||
| vec.push_back(i); | ||
| } | ||
|
|
||
| if (!vec.empty()) { | ||
| std::cout << "O vetor nao esta vazio!" << std::endl; | ||
| } | ||
| return 0; | ||
| } |
Binary file not shown.
Binary file not shown.
This file was deleted.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| #pragma once | ||
| #include <string> | ||
| #include <map> | ||
| #include <cstdint> | ||
|
|
||
| namespace compressor { | ||
|
|
||
| // Compress: se freq_table_path vazio, conta bytes do input. | ||
| // Se freq_table_path não for vazio, tenta usar a tabela (suporta tokens multi-char). | ||
| void compress_file(const std::string &inputPath, | ||
| const std::string &outputPath, | ||
| const std::string &freq_table_path = ""); | ||
|
|
||
| // Decompress: lê arquivo compactado e escreve output | ||
| void decompress_file(const std::string &inputPath, | ||
| const std::string &outputPath); | ||
|
|
||
| } // namespace compressor |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,209 @@ | ||
| #include "../../include/compressor/compressor.h" | ||
|
|
||
| #include <fstream> | ||
| #include <sstream> | ||
| #include <stdexcept> | ||
| #include <vector> | ||
| #include <algorithm> | ||
| #include <map> | ||
| #include <iostream> | ||
| #include <cstdint> | ||
| #include <cstring> | ||
|
|
||
| #include "../../include/huffman/huffman_tree.h" | ||
| #include "../../include/huffman/bit_stream.h" | ||
|
|
||
| using namespace std; | ||
|
|
||
| namespace compressor { | ||
|
|
||
| // ================================================================================= | ||
| // FUNÇÕES AUXILIARES | ||
| // ================================================================================= | ||
|
|
||
| // Lê um arquivo inteiro para uma string. | ||
| static string readFileToString(const string &path) { | ||
| ifstream in(path, ios::binary); | ||
| if (!in) throw runtime_error("Nao foi possivel abrir o arquivo de entrada: " + path); | ||
| ostringstream ss; | ||
| ss << in.rdbuf(); | ||
| return ss.str(); | ||
| } | ||
|
|
||
| // Conta a frequência de cada byte individualmente. | ||
| static map<string, uint64_t> countByteFreqs(const string &data) { | ||
| map<string, uint64_t> freqs; | ||
| for (unsigned char c : data) { | ||
| string s(1, static_cast<char>(c)); | ||
| freqs[s]++; | ||
| } | ||
| return freqs; | ||
| } | ||
|
|
||
| // Converte sequências de escape (ex: "\\n") para seus caracteres reais (ex: '\n'). | ||
| static string unescape_chars(const std::string& s) { | ||
| std::string unescaped; | ||
| for (size_t i = 0; i < s.length(); ++i) { | ||
| if (s[i] == '\\' && i + 1 < s.length()) { | ||
| i++; // Pula a barra invertida | ||
| if (s[i] == 'n') { | ||
| unescaped += '\n'; | ||
| } else if (s[i] == 'r') { | ||
| unescaped += '\r'; | ||
| } else if (s[i] == 't') { | ||
| unescaped += '\t'; | ||
| } else if (s[i] == '\\') { | ||
| unescaped += '\\'; | ||
| } | ||
| } else { | ||
| unescaped += s[i]; | ||
| } | ||
| } | ||
| return unescaped; | ||
| } | ||
|
|
||
| // Lê o arquivo de tabela de frequência ("simbolo:contagem"). | ||
| static map<string, uint64_t> readFreqTable(const string &path) { | ||
| map<string, uint64_t> freqs; | ||
| if (path.empty()) return freqs; | ||
| ifstream in(path, ios::binary); | ||
| if (!in) throw runtime_error("Nao foi possivel abrir o arquivo de tabela de frequencia: " + path); | ||
DevlTz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| string line; | ||
| while (getline(in, line)) { | ||
| if (line.empty()) continue; | ||
| size_t pos = line.rfind(':'); | ||
| if (pos == string::npos) continue; | ||
|
|
||
| string key = line.substr(0, pos); | ||
| string val = line.substr(pos + 1); | ||
|
|
||
| try { | ||
| uint64_t v = stoull(val); | ||
| freqs[unescape_chars(key)] = v; | ||
| } catch (...) { | ||
| // Ignora linhas com formato inválido | ||
| } | ||
| } | ||
| return freqs; | ||
| } | ||
|
|
||
| // Quebra o texto de entrada em tokens (símbolos) usando a abordagem "greedy". | ||
| static vector<string> tokenize_greedy(const string &text, const vector<string> &symbols_sorted) { | ||
| vector<string> out; | ||
| size_t i = 0; | ||
| const size_t n = text.size(); | ||
| while (i < n) { | ||
| bool matched = false; | ||
| for (const string &sym : symbols_sorted) { | ||
| size_t L = sym.size(); | ||
| if (L == 0) continue; | ||
| if (i + L <= n && memcmp(text.data() + i, sym.data(), L) == 0) { | ||
| out.push_back(sym); | ||
| i += L; | ||
| matched = true; | ||
| break; | ||
| } | ||
| } | ||
| if (!matched) { | ||
| out.emplace_back(string(1, text[i])); | ||
| ++i; | ||
| } | ||
| } | ||
| return out; | ||
| } | ||
|
|
||
|
|
||
| // ================================================================================= | ||
| // FUNÇÕES PRINCIPAIS | ||
| // ================================================================================= | ||
|
|
||
| // Função principal para comprimir um arquivo. | ||
| void compress_file(const string &inputPath, const string &outputPath, const string &freq_table_path) { | ||
| string data = readFileToString(inputPath); | ||
| map<string, uint64_t> freqs = readFreqTable(freq_table_path); | ||
| vector<string> tokens; | ||
|
|
||
| // Se uma tabela de frequência foi fornecida, usa a tokenização inteligente. | ||
| if (!freqs.empty()) { | ||
| vector<string> symbols; | ||
| for (const auto &p : freqs) { | ||
| symbols.push_back(p.first); | ||
| } | ||
| sort(symbols.begin(), symbols.end(), [](const string &a, const string &b) { | ||
| if (a.size() != b.size()) return a.size() > b.size(); | ||
| return a < b; | ||
| }); | ||
|
|
||
| tokens = tokenize_greedy(data, symbols); | ||
| } else { | ||
| // Senão, faz a compressão simples por byte. | ||
| freqs = countByteFreqs(data); | ||
| for (unsigned char c : data) { | ||
| tokens.emplace_back(string(1, static_cast<char>(c))); | ||
| } | ||
| } | ||
|
|
||
| // Constrói a Árvore de Huffman. | ||
| HuffmanTree tree; | ||
| tree.build(freqs); | ||
|
|
||
| ofstream out(outputPath, ios::binary); | ||
| if (!out) throw runtime_error("Nao foi possivel abrir o arquivo de saida: " + outputPath); | ||
DevlTz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Salva a árvore no início do arquivo. | ||
| tree.serialize(out); | ||
|
|
||
| // Escreve os dados comprimidos bit a bit. | ||
| BitOutputStream bout(out); | ||
| auto codes = tree.getCodes(); | ||
|
|
||
| for (const auto &tok : tokens) { | ||
| auto it = codes.find(tok); | ||
| if (it == codes.end()) { | ||
| throw runtime_error("Nao foi encontrado codigo para o token durante a compressao. Token: '" + tok + "'"); | ||
ctrindadedev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| for (bool b : it->second) { | ||
| bout.writeBit(b); | ||
| } | ||
| } | ||
| bout.flush(); | ||
| out.close(); | ||
|
|
||
| cout << "Compressed " << inputPath << " -> " << outputPath << endl; | ||
| } | ||
|
|
||
| // Função principal para descomprimir um arquivo. | ||
| void decompress_file(const string &inputPath, const string &outputPath) { | ||
| ifstream in(inputPath, ios::binary); | ||
| if (!in) throw runtime_error("Nao foi possivel abrir o arquivo comprimido: " + inputPath); | ||
ctrindadedev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Lê a árvore do início do arquivo. | ||
| HuffmanTree tree; | ||
| tree.deserialize(in); | ||
|
|
||
| auto root = tree.getRoot(); | ||
| if (!root) { return; } | ||
|
|
||
| ofstream out(outputPath, ios::binary); | ||
| if (!out) throw runtime_error("Nao foi possivel abrir o arquivo de saida: " + outputPath); | ||
ctrindadedev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Lê os bits e percorre a árvore para decodificar. | ||
| BitInputStream bin(in); | ||
| auto node = root; | ||
| while (true) { | ||
| int b = bin.readBit(); | ||
| if (b == -1) break; | ||
|
|
||
| node = (b == 0) ? node->left : node->right; | ||
|
|
||
| if (node->isLeaf()) { | ||
| out << node->symbol; | ||
| node = root; | ||
| } | ||
| } | ||
|
|
||
| out.close(); | ||
| in.close(); | ||
| } | ||
|
|
||
| } // namespace compressor | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,52 @@ | ||
| #include <iostream> | ||
| #include <fstream> | ||
| #include <string> | ||
| #include "../../include/huffman/bit_stream.h" | ||
| #include "../../include/huffman/huffman_tree.h" | ||
|
|
||
| int main(int argc, char** argv){ | ||
| if(argc < 2){ | ||
| std::cerr << "Usage: compressor -c|-d ...\n"; | ||
| return 1; | ||
| } | ||
| // This is a stub. Implement parsing -c (compress) and -d (decompress) and call appropriate functions. | ||
| std::cout << "Compressor stub. Implement command-line parsing and logic.\n"; | ||
| return 0; | ||
| } | ||
| #include <iostream> | ||
| #include "../../include/compressor/compressor.h" | ||
|
|
||
| using namespace std; | ||
|
|
||
| static void printUsage() { | ||
| cerr << "Usage:\n" | ||
| << " compressor -c [-f freq_table] -i <input> -o <output>\n" | ||
| << " compressor -d -i <input> -o <output>\n"; | ||
| } | ||
|
|
||
| int main(int argc, char** argv) { | ||
| if(argc < 2) { printUsage(); return 1; } | ||
|
|
||
| bool doCompress = false, doDecompress = false; | ||
| string freqFile, inputFile, outputFile; | ||
|
|
||
| for(int i=1;i<argc;i++){ | ||
| string a = argv[i]; | ||
| if(a == "-c") doCompress = true; | ||
| else if(a == "-d") doDecompress = true; | ||
| else if(a == "-f" && i+1<argc) { freqFile = argv[++i]; } | ||
| else if(a == "-i" && i+1<argc) { inputFile = argv[++i]; } | ||
| else if(a == "-o" && i+1<argc) { outputFile = argv[++i]; } | ||
| else { | ||
| cerr << "Unknown arg: " << a << "\n"; | ||
| printUsage(); | ||
| return 1; | ||
| } | ||
| } | ||
|
|
||
| try { | ||
| if(doCompress && doDecompress) { cerr << "Cannot both compress and decompress.\n"; return 1; } | ||
| if(doCompress) { | ||
| if(inputFile.empty() || outputFile.empty()) { printUsage(); return 1; } | ||
| compressor::compress_file(inputFile, outputFile, freqFile); | ||
| cout << "Compressed " << inputFile << " -> " << outputFile << endl; | ||
| } else if(doDecompress) { | ||
| if(inputFile.empty() || outputFile.empty()) { printUsage(); return 1; } | ||
| compressor::decompress_file(inputFile, outputFile); | ||
| cout << "Decompressed " << inputFile << " -> " << outputFile << endl; | ||
| } else { | ||
| printUsage(); | ||
| return 1; | ||
| } | ||
| } catch(const exception &ex) { | ||
| cerr << "Error: " << ex.what() << "\n"; | ||
| return 2; | ||
| } | ||
|
|
||
| return 0; | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.