-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhdfscpp.cpp
More file actions
243 lines (190 loc) · 5.84 KB
/
hdfscpp.cpp
File metadata and controls
243 lines (190 loc) · 5.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#include "hdfscpp.h"
#include <string>
#include <assert.h>
/**********************************************************************/
namespace tmacam {
namespace hdfs {
/**********************************************************************/
FileInfoList::FileInfoList():
n_(0),
files_info_(NULL)
{
}
FileInfoList::~FileInfoList()
{
reset();
}
void FileInfoList::reset(hdfsFileInfo* entries, int num_entries)
{
// Check for inconsistencies in provided parameters, i.e.,
// Or you provide a both a valid files_info AND a non-zero num_entries
// or you provide both NULL and 0.
if ((entries != NULL) xor (num_entries != 0)) {
throw std::runtime_error("Invalid parameters for FileInfoList::reset");
}
// Release previously held hdfsFileInfo
if (files_info_) {
hdfsFreeFileInfo(files_info_, n_);
files_info_ = NULL;
n_ = 0;
}
if (entries) {
files_info_ = entries;
n_ = num_entries;
}
}
/**********************************************************************/
File::File(hdfsFS fs, const char* path, int flags, int bufferSize,
short replication, tSize blocksize) :
fs_(fs)
{
fh_ = hdfsOpenFile(fs_,path, flags, bufferSize, replication, blocksize);
if (!fh_) {
std::string msg("Failed open path '");
msg += path;
msg += "'.";
throw HDFSError(msg);
}
}
tSize File::Read(void* buffer, tSize length)
{
tSize result = hdfsRead(fs_, fh_, buffer, length);
if( result < 0 ) {
throw HDFSError("An error ocurred reading the file (hdfsRead()).");
}
return result;
}
tSize File::Pread(tOffset position, void* buffer, tSize length)
{
tSize result = hdfsPread(fs_, fh_, position, buffer, length);
if( result < 0 ) {
throw HDFSError("An error ocurred reading the file (hdfsRead()).");
}
return result;
}
/**********************************************************************/
FileSystem::FileSystem(const char* host, tPort port) :
host_(NULL),
host_str_copy_(""),
port_(port),
fs_(hdfsConnect(host, port))
{
/* Host parameter can be NULL but std::strings can't be constructed with
* std::string(NULL). So, only copy the contents of the host parameter
* to host_str_copy_ and only make host_ point to the internal contents
* of host_str_copy_ IFF the provided host is not NULL.
* */
if (host) {
host_str_copy_ = std::string(host);
host_ = host_str_copy_.c_str();
} else {
host_ = NULL;
}
/* Was the filesystem properly initiated? */
if (!fs_) {
throw HDFSError("Error during HDFS connect");
}
}
FileSystem::~FileSystem() {
if (fs_) {
if (hdfsDisconnect(fs_)) {
throw HDFSError("Failed during hdfsDisconnect()");
}
}
}
void FileSystem::Reconnect() {
hdfsDisconnect(fs_); // ignore return -- we really don't care at this point
fs_ = hdfsConnect(host_, port_);
if (!fs_) {
throw HDFSError("Error during HDFS reconnect (@ FileSystem::Reconnect())");
}
}
void FileSystem::GetPathInfo(const char* path, FileInfoList* info)
{
assert(info);
// Path is a directory, right?
hdfsFileInfo* path_info = hdfsGetPathInfo(fs_, path);
if (!path_info) {
std::string msg("Failed to get information about path '");
msg += path;
msg += "'.";
throw HDFSError(msg);
}
info->reset(path_info, 1);
}
void FileSystem::ListDirectory(const char* path, FileInfoList* info)
{
assert(info);
int n_entries = 0;
hdfsFileInfo* entries = hdfsListDirectory(fs_, path, &n_entries);
if (!entries && errno) {
std::string msg("Error getting information about files in path '");
msg += path;
msg += "'.";
throw HDFSError(msg);
}
info->reset(entries, n_entries);
}
bool FileSystem::Exists(const char* path)
{
return bool(hdfsExists(fs_, path) == 0);
}
/**********************************************************************/
void GetFileBlockLocations(hdfsFS fs, const char* path, tOffset start,
tOffset size, BlockLocationList* blocks_location)
{
BlockLocationList result;
char*** blocks_info = hdfsGetHosts(fs, path, start, size);
if(!blocks_info) {
throw HDFSError("hdfsGetHosts failed.");
}
for(size_t b = 0; blocks_info[b]; ++b) {
HostList hosts;
for(size_t h = 0; blocks_info[b][h]; ++h) {
hosts.push_back(std::string(blocks_info[b][h]));
}
result.push_back(hosts);
}
hdfsFreeHosts(blocks_info);
// Return the output swapping data with the input blocks_info
blocks_location->swap(result);
}
void ListDirectoryEntries(tmacam::hdfs::FileSystem* fs, const char* path,
std::vector<std::string>* files)
{
using namespace tmacam;
assert(files);
files->clear();
// We are dealing with a directory.. right? XXX check
hdfs::FileInfoList info_list;
fs->ListDirectory(path, &info_list);
if (info_list.empty()) {
return;
} else {
files->reserve(info_list.size());
for (int i = 0; i < info_list.size(); ++i) {
files->push_back(info_list[i].mName);
}
std::sort(files->begin(), files->end());
}
}
void ReadFullHDFSFile(FileSystem* fs, const char* path,
std::vector<char>* file_data)
{
FileInfoList file_info;
fs->GetPathInfo(path, &file_info);
tSize file_size = file_info->mSize;
std::vector<char> buffer(file_size);
File file(*fs, path, O_RDONLY);
tSize bytes_read = 0;
while (bytes_read < file_size) {
tSize read_length = 0;
read_length = file.Read(&buffer[bytes_read], file_size - bytes_read);
bytes_read += read_length;
}
// Replace file_data contents with the contents read from the file
file_data->swap(buffer);
}
}; // namespace hdfs
}; // namespace tmacam
// vim: et ai sts=4 ts=4 sw=4