-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHTMLparser.cs
More file actions
104 lines (102 loc) · 3.07 KB
/
HTMLparser.cs
File metadata and controls
104 lines (102 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace NBrowser
{
internal class HTMLParser
{
public HTMLParser()
{
}
public List<HTMLOBJ> parse(string t)
{
List<HTMLOBJ> ret = block(t);
return ret;
}
List<HTMLOBJ> block(string t)
{
int i = 0;
List<HTMLOBJ> cblk = new List<HTMLOBJ>();
string tag = "";
string att = "";
string eatt = "";
string child = "";
bool tfflag = false;
while (t.Length>i)
{
if (t[i]=='<')
{
tfflag = true;
i++;
while (t.Length > i&&!(t[i] == '>' || t[i]==' '))
{
tag += t[i];
i++;
}
if (t.Length>i&&t[i]==' ') { i++; }
while (t.Length > i&&t[i]!='>')
{
att += t[i];
i++;
}
i++;
int ocnt = 0;
while (t.Length>i)
{
if (t[i] == '<')
{
if (t[i + 1] == '/')
{
if (ocnt == 0) {
i++;
break;
}
//child += "<";
i++;
ocnt--;
}
else {
ocnt++;
}
}
//child += t[i];
i++;
}
i += 2;
while (t.Length > i&&t[i]!='>')
{
eatt += t[i];
i++;
}
if (tag != "script" && tag != "style")
{
cblk.Add(new HTMLOBJ(tag, att,"", block(child)));
}
tag = "";
att = "";
eatt = "";
child = "";
}
i++;
}
if (tfflag) { return cblk; }
else { return new List<HTMLOBJ> { new HTMLOBJ("text", "", t, null)}; }
}
}
class HTMLOBJ
{
public string tag { get; set; }
public string att { get; set; }
public string textchild { get; set; }
public List<HTMLOBJ>? child { get; set; }
public HTMLOBJ(string tag, string att,string textchild, List<HTMLOBJ>? child = null)
{
this.tag = tag;
this.att = att;
this.child = child;
this.textchild = textchild;
}
}
}