怎么用c语言编一个词法分析器
发布网友
发布时间:2022-04-26 19:38
我来回答
共1个回答
热心网友
时间:2022-07-13 00:31
简而言之就是先画一个状态图,然后根据图来编码就行
一个简单的xml的词法分析器供参考
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
typedef
struct
{
char
*p;
int
len;
}
xml_Text;
typedef
enum
{
xml_tt_U,
/*
Unknow
*/
xml_tt_H,
/*
Head
<?xxx?>*/
xml_tt_E,
/*
End
</xxx>
*/
xml_tt_B,
/*
Begin
<xxx>
*/
xml_tt_BE,
/*
Begin
End
<xxx/>
*/
xml_tt_T
/*
Text
xxx
*/
}
xml_TokenType;
typedef
struct
{
xml_Text
text;
xml_TokenType
type;
}
xml_Token;
int
xml_initText(xml_Text
*pText,
char
*s)
{
pText->p
=
s;
pText->len
=
strlen(s);
return
0;
}
int
xml_initToken(xml_Token
*pToken,
xml_Text
*pText)
{
pToken->text.p
=
pText->p;
pToken->text.len
=
0;
pToken->type
=
xml_tt_U;
return
0;
}
int
xml_print(xml_Text
*pText)
{
int
i;
for
(i
=
0;
i
<
pText->len;
i++)
{
putchar(pText->p[i]);
}
return
0;
}
int
xml_println(xml_Text
*pText)
{
xml_print(pText);
putchar('\n');
return
0;
}
int
xml_getToken(xml_Text
*pText,
xml_Token
*pToken)
{
char
*start
=
pToken->text.p
+
pToken->text.len;
char
*p
=
start;
char
*end
=
pText->p
+
pText->len;
int
state
=
0;
pToken->text.p
=
p;
pToken->type
=
xml_tt_U;
for
(;
p
<
end;
p++)
{
switch(state)
{
case
0:
switch(*p)
{
case
'<':
state
=
1;
break;
default:
state
=
7;
break;
}
break;
case
1:
switch(*p)
{
case
'?':
state
=
2;
break;
case
'/':
state
=
4;
break;
default:
state
=
5;
break;
}
break;
case
2:
switch(*p)
{
case
'?':
state
=
3;
break;
default:
state
=
2;
break;
}
break;
case
3:
switch(*p)
{
case
'>':
pToken->text.len
=
p
-
start
+
1;
pToken->type
=
xml_tt_H;
return
1;
default:
state
=
-1;
break;
}
break;
case
4:
switch(*p)
{
case
'>':
pToken->text.len
=
p
-
start
+
1;
pToken->type
=
xml_tt_E;
return
1;
default:
state
=
4;
break;
}
break;
case
5:
switch(*p)
{
case
'>':
pToken->text.len
=
p
-
start
+
1;
pToken->type
=
xml_tt_B;
return
1;
case
'/':
state
=
6;
break;
default:
state
=
5;
break;
}
break;
case
6:
switch(*p)
{
case
'>':
pToken->text.len
=
p
-
start
+
1;
pToken->type
=
xml_tt_BE;
return
1;
default:
state
=
-1;
break;
}
break;
case
7:
switch(*p)
{
case
'<':
p--;
pToken->text.len
=
p
-
start
+
1;
pToken->type
=
xml_tt_T;
return
1;
default:
state
=
7;
break;
}
break;
default:
pToken->text.len
=
p
-
start
+
1;
pToken->type
=
xml_tt_T;
return
1;
}
}
return
0;
}
int
main()
{
int
ret
=
0;
xml_Text
xml;
xml_initText(&xml,
"<?xml?><root>
ss
<haha>hoho</haha></root>");
xml_Token
token;
xml_initToken(&token,
&xml);
ret
=
xml_getToken(&xml,
&token);
printf("ret=%d;text=",ret);
xml_print(&token.text);
printf(";type=%d;\n\n",
token.type);
ret
=
xml_getToken(&xml,
&token);
printf("ret=%d;text=",ret);
xml_print(&token.text);
printf(";type=%d;\n\n",
token.type);
ret
=
xml_getToken(&xml,
&token);
printf("ret=%d;text=",ret);
xml_print(&token.text);
printf(";type=%d;\n\n",
token.type);
ret
=
xml_getToken(&xml,
&token);
printf("ret=%d;text=",ret);
xml_print(&token.text);
printf(";type=%d;\n\n",
token.type);
ret
=
xml_getToken(&xml,
&token);
printf("ret=%d;text=",ret);
xml_print(&token.text);
printf(";type=%d;\n\n",
token.type);
ret
=
xml_getToken(&xml,
&token);
printf("ret=%d;text=",ret);
xml_print(&token.text);
printf(";type=%d;\n\n",
token.type);
ret
=
xml_getToken(&xml,
&token);
printf("ret=%d;text=",ret);
xml_print(&token.text);
printf(";type=%d;\n\n",
token.type);
return
0;
}