-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdata.h
More file actions
75 lines (61 loc) · 1.8 KB
/
data.h
File metadata and controls
75 lines (61 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#ifndef _DATA_H_
#define _DATA_H_
#include <vector>
#include <stdio.h>
#include <iostream>
#include <utility>
#include <string.h>
#include <algorithm>
using namespace std;
typedef vector< pair<int,int> > WordsDistrib;
#define Word first
#define Count second
#define BS 13131
#define mod 1000000007
#define wordRangeL 0
#define wordRangeR 255
inline int mapping( const char* _word ){
int len = strlen( _word );
long long _h = 0;
for( int _ = 0 ; _ < len ; _ ++ )
_h = ( _h * BS + ( _word[ _ ] + 128 ) ) % mod;
return (int)_h;
}
inline WordsDistrib normalize( WordsDistrib _dist ){
WordsDistrib _newdist;
sort( _dist.begin() , _dist.end() );
for( size_t l = 0 , r = 0 ; l < _dist.size() ; l = r ){
while( r < _dist.size() && _dist[ l ].Word == _dist[ r ].Word ) r ++;
_newdist.push_back( make_pair( _dist[ l ].Word , r - l ) );
}
return _newdist;
}
char input[ 256 ] , tmp[ 256 ];
inline void standardize(){
int l = 0 , r = strlen( input ) - 1;
while( l <= r ){
if( !isalpha( input[ l ] ) && !isdigit( input[ l ] ) ) l ++;
else if( !isalpha( input[ r ] ) && !isdigit( input[ r ] ) ) r --;
else break;
}
for( int i = 0 , j = l ; j <= r ; j ++ , i ++ )
tmp[ i ] = input[ j ];
tmp[ r - l + 1 ] = '\0';
for( int i = 0 ; i <= r - l + 1 ; i ++ )
input[ i ] = tmp[ i ];
}
inline WordsDistrib read( const char* path , const int rangel = wordRangeL ,
const int ranger = wordRangeR ){
WordsDistrib _dist;
FILE *fin = fopen( path , "r" );
while( fscanf( fin , "%s" , input ) == 1 ){
standardize();
int len = strlen( input );
if( len < rangel || len > ranger ) continue;
int _hsh = mapping( input );
_dist.push_back( make_pair( _hsh , 1 ) );
}
fclose( fin );
return normalize( _dist );
}
#endif