请把课堂上的词法分析器写成一个cpp程序。
另外,程序的输入由从文件输入改为从控制台输入。
主程序 (不能修改)
#include "source.cpp"
参考答案(由WZB提供)
#ifndef __UTIL__
#define __UTIL__
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
using namespace std;
#define I(i,n) for (int i = 0; i < (n); ++ i)
#ifndef __LIST__
#define __LIST__
template <typename E>
struct _Node
{
E elem;
_Node * next;
_Node * prev;
};
class NotSuchElementException {};
template <typename E>
class LinkedList
{
int _size;
_Node<E> * head;
_Node<E> * tail;
public:
LinkedList() : _size(0) {}
int size() const { return _size; }
E & getFirst() const {
if (_size == 0) throw NotSuchElementException();
return head->elem;
}
E & getLast() const {
if (_size == 0) throw NotSuchElementException();
return tail->elem;
}
E removeFirst() {
if (_size == 0) throw NotSuchElementException();
_Node<E> * tmp = head;
head = head->next;
-- _size;
E e = tmp->elem;
delete tmp;
return e;
}
E removeLast() {
if (_size == 0) throw NotSuchElementException();
_Node<E> * tmp = tail;
tail = tail->prev;
-- _size;
E e = tmp->elem;
delete tmp;
return e;
}
void addFirst(const E & elem) {
_Node<E> * tmp = new _Node<E>;
tmp->elem = elem;
tmp->next = head;
if (_size > 0) head->prev = tmp;
head = tmp;
if (_size == 0) tail = tmp;
++ _size;
}
void addLast(const E & elem) {
_Node<E> * tmp = new _Node<E>;
tmp->elem = elem;
tmp->prev = tail;
if (_size > 0) tail->next = tmp;
tail = tmp;
if (_size == 0) head = tmp;
++ _size;
}
void clear() {
while (_size > 0) removeFirst();
}
~LinkedList() {
clear();
}
operator vector<E>() const {
vector<E> vec;
_Node<E> * tmp = head;
I(i, _size) {
vec.push_back(tmp->elem);
tmp = tmp->next;
}
return vec;
}
void copy_list(const vector<E> & vec) {
clear();
I(i, vec.size()) {
addLast(vec[i]);
}
}
LinkedList(const vector<E> & vec) : _size(0) {
copy_list(vec);
}
LinkedList(const LinkedList & list) : _size(0) {
copy_list(vector<E>(list));
}
LinkedList & operator = (const LinkedList & list) {
copy_list(vector<E>(list));
return *this;
}
};
#endif
#ifndef __HASH__
#define __HASH__
#ifndef ___HASH__
#define ___HASH__
class NoSuchKeyException {};
template <typename I>
I circular_shift(I bits, int shift_bits) {
I num_bits = 8 * sizeof(I);
shift_bits = shift_bits % num_bits;
return bits << shift_bits | bits >> (num_bits - shift_bits);
}
int operator%(const string & key, int mod) {
size_t code = 0;
I(i, key.size())
code ^= circular_shift(key[i], i * 5);
return code % mod;
}
// 如果需要把类型 K 作为键,需要有:
// 1) int operater%(K, int)
// 2) bool K::operater==(K)
template <typename K, typename T>
class _Hash
{
vector<T> tuples;
size_t _size;
size_t canonical_index(const K & key) const {
return key % tuples.size();
}
// the returned tuple is either empty or with the same key
size_t indexOfKey(const K & key) const {
size_t index = canonical_index(key);
while (true) { // will enter a dead loop if there is not empty tuple
if (! tuples[index].in_use) return index;
if (tuples[index].key == key) return index;
index = (index + 1) % tuples.size();
}
}
public:
_Hash() : _size(0) {
tuples.resize(2);
}
bool hasKey(const K & key) const {
size_t index = indexOfKey(key);
return tuples[index].in_use;
}
T & get(const K & key) {
size_t index = indexOfKey(key);
if (! tuples[index].in_use) throw NoSuchKeyException();
return tuples[index];
}
const T & get(const K & key) const {
size_t index = indexOfKey(key);
if (! tuples[index].in_use) throw NoSuchKeyException();
return tuples[index];
}
void put(const T & tuple) {
size_t index = indexOfKey(tuple.key);
bool in_use = tuples[index].in_use;
tuples[index] = tuple;
tuples[index].in_use = true;
if (in_use) return;
++ _size;
// make sure empty > size / 2
if (2 * _size > tuples.size())
_double_tuples();
}
void _double_tuples() {
vector<T> non_empty_tuples;
I(i, tuples.size())
if (tuples[i].in_use)
non_empty_tuples.push_back(tuples[i]);
tuples.resize(tuples.size() * 2);
clear();
I(i, non_empty_tuples.size())
put(non_empty_tuples[i]);
}
static bool _between(size_t hole, size_t canonical, size_t tuple_index) {
if (hole < tuple_index)
return canonical > hole && canonical <= tuple_index;
else
return canonical > hole || canonical <= tuple_index;
}
void remove(const K & key) {
size_t index = indexOfKey(key);
if (! tuples[index].in_use)
throw NoSuchKeyException();
tuples[index].in_use = false;
-- _size;
// if there is a hole between a tuple and its canonical position
// move the tuple to the hole
size_t hole = index;
size_t tuple_index = hole;
while (true) {
tuple_index = (tuple_index + 1) % tuples.size();
if (! tuples[tuple_index].in_use) return;
size_t canonical = canonical_index(tuples[tuple_index].key);
if (_between(hole, canonical, tuple_index)) continue;
tuples[hole] = tuples[tuple_index];
hole = tuple_index;
tuples[hole].in_use = false;
}
}
size_t size() const { return _size; }
void clear() {
I(i, tuples.size())
tuples[i].in_use = false;
_size = 0;
}
vector<K> keys() const {
vector<K> ks;
I(i, tuples.size())
if (tuples[i].in_use)
ks.push_back(tuples[i].key);
return ks;
}
};
#endif
template <typename K, typename V>
class HashTable
{
class _Tuple
{
public:
K key;
V val;
bool in_use;
_Tuple() : in_use(false) {}
_Tuple(const K & k, const V & v) : in_use(false), key(k), val(v) {}
};
_Hash<K,_Tuple> _hash;
public:
bool hasKey(const K & key) const {
return _hash.hasKey(key);
}
size_t size() const { return _hash.size(); }
void clear() {
_hash.clear();
}
const V & get(const K & k) const {
return _hash.get(k).val;
}
V & get(const K & k) {
return _hash.get(k).val;
}
void put(const K & k, const V & v) {
_hash.put(_Tuple(k, v));
}
void remove(const K & k) {
_hash.remove(k);
}
vector<K> keys() const {
return _hash.keys();
}
};
template <typename E>
class HashSet
{
class _Tuple
{
public:
E key;
bool in_use;
_Tuple() : in_use(false) {}
_Tuple(const E & e) : in_use(false), key(e) {}
};
_Hash<E,_Tuple> _hash;
public:
bool has(const E & e) const {
return _hash.hasKey(e);
}
size_t size() const { return _hash.size(); }
void clear() {
_hash.clear();
}
void add(const E & e) {
_hash.put(_Tuple(e));
}
void remove(const E & e) {
_hash.remove(e);
}
operator vector<E>() const {
return _hash.keys();
}
};
#endif
#endif
class CompilationError : public runtime_error
{
public:
CompilationError(const char err_msg[]) : runtime_error(err_msg) {}
};
string _normalize_braket(string text) {
string text2 = text;
I(i, text2.size())
switch (text[i]) {
case '(': text2[i] = '['; break;
case ')': text2[i] = ']'; break;
}
return text2;
}
class Span
{
public:
int line_no;
int prev_end;
int char_start;
int char_end;
string type;
string text;
string key;
vector<Span *> children;
Span() {}
Span(int line_no, int prev_end, int char_start, int char_end,
const string & type, const string & text) {
this->line_no = line_no;
this->prev_end = prev_end;
this->char_start = char_start;
this->char_end = char_end;
this->type = type;
this->text = text;
stringstream ss;
ss<<type<<"("<<char_start<<","<<char_end<<")";
this->key = ss.str();
}
string linearize() const {
stringstream ss;
ss<<"("<<_normalize_braket(type);
if (children.size() == 0)
ss<<" "<<_normalize_braket(text);
else {
I(i, children.size())
ss<<" "<<children[i]->linearize();
}
ss<<")";
return ss.str();
}
};
typedef int (* matcher)(const string & line, int start);
class _Matcher
{
public:
string type;
virtual int match(const string & line, int start) const = 0;
};
class TextMatcher : public _Matcher
{
string text;
public:
TextMatcher(const string & type, const string & text) {
this->type = type;
this->text = text;
}
int match(const string & line, int start) const {
string word = line.substr(start, this->text.size());
return (word == this->text) ? this->text.size() : 0;
}
};
class SymbolMatcher : public _Matcher
{
matcher func;
public:
SymbolMatcher(const string & type, matcher func) {
this->type = type;
this->func = func;
}
int match(const string & line, int start) const {
return this->func(line, start);
}
};
class LineLex
{
public:
vector<_Matcher *> matchers;
void match(const string & line, int line_no, vector<Span> & spans) {
int prev_end = 0;
for (int i = 0; i < line.size(); ++ i) {
if (line[i] == ' ' || line[i] == '\t') continue;
int max_match_size = -1;
string match_type;
for (int j = 0; j < this->matchers.size(); ++ j) {
int match_size = this->matchers[j]->match(line, i);
if (match_size > 0 && match_size > max_match_size) {
max_match_size = match_size;
match_type = this->matchers[j]->type;
}
}
if (max_match_size == -1) {
stringstream ss;
ss<<"Lexical error: "<<line.substr(i)<<" line# "<<(line_no+1)<<" char#"<<(i+1);
throw CompilationError(ss.str().c_str());
}
spans.push_back(Span(line_no, prev_end, i, i+max_match_size,
match_type, line.substr(i, max_match_size)));
prev_end = i+max_match_size;
i += max_match_size - 1;
}
}
};
//+ $int
int match_lex_int(const string & line, int start) {
for (int i = start; i < line.size(); ++ i) {
if (line[i] >= '0' && line[i] <= '9') continue;
return i - start;
}
return line.size() - start;
}
//+ $float
int match_lex_float(const string & line, int start) {
int num_dot = 0;
for (int i = start; i < line.size(); ++ i) {
if (line[i] == '.') {
num_dot += 1;
continue;
}
if (line[i] >= '0' && line[i] <= '9') continue;
return i - start;
}
if (num_dot != 1) return 0;
return line.size() - start;
}
const char * op[] = {"and", "not", "or", "xor", "+", "-", "*", "/", "//",
"~", "%", "^", "&", "|", ">", "<", ">=", "<=", "==", "!="};
bool op_has(const string & text) {
I(i, sizeof(op)/sizeof(char *))
if (text == op[i]) return true;
return false;
}
//+ $op
int match_lex_op(const string & line, int start) {
if (op_has(line.substr(start, 3))) return 3;
if (op_has(line.substr(start, 2))) return 2;
if (op_has(line.substr(start, 1))) return 1;
return 0;
}
//+ $id
int match_lex_id(const string & line, int start) {
for (int i = start; i < line.size(); ++ i) {
if (i != start && line[i] >= '0' && line[i] <= '9') continue;
if (line[i] == '_') continue;
if (line[i] >= 'a' && line[i] <= 'z') continue;
if (line[i] >= 'A' && line[i] <= 'Z') continue;
return i - start;
}
return line.size() - start;
}
//+ $text
int match_lex_text(const string & line, int start) {
char delimitor = ' ';
for (int i = start; i < line.size(); ++ i) {
if (i == start) {
if (line[i] == '"' || line[i] == '\'') {
delimitor = line[i];
continue;
}
}
else {
if (line[i] == delimitor) {
return i + 1 - start;
}
if (line[i] == '\\') {
if (i + 1 == line.size()) return 0;
if (line[i + 1] == '\\' || line[i + 1] == delimitor ||
line[i + 1] == 'n' || line[i + 1] == 'r' || line[i + 1] == 't') {
++ i;
continue;
}
else return 0;
}
continue;
}
return 0;
}
return 0;
}
//+ $comment
int match_lex_comment(const string & line, int start) {
if (line[start] != '#') return 0;
return line.size() - start;
}
LineLex line_lex;
TextMatcher t_m_0("'let'", "let");
TextMatcher t_m_1("'='", "=");
TextMatcher t_m_2("'def'", "def");
TextMatcher t_m_3("'('", "(");
TextMatcher t_m_4("')'", ")");
TextMatcher t_m_5("'if'", "if");
TextMatcher t_m_6("'else'", "else");
TextMatcher t_m_7("'for'", "for");
TextMatcher t_m_8("'in'", "in");
TextMatcher t_m_9("'while'", "while");
TextMatcher t_m_10("'return'", "return");
TextMatcher t_m_11("','", ",");
SymbolMatcher s_m_0("$int", match_lex_int);
SymbolMatcher s_m_1("$float", match_lex_float);
SymbolMatcher s_m_2("$op", match_lex_op);
SymbolMatcher s_m_3("$id", match_lex_id);
SymbolMatcher s_m_4("$text", match_lex_text);
SymbolMatcher s_m_5("$comment", match_lex_comment);
void init_rules() {
line_lex.matchers.push_back(&t_m_0);
line_lex.matchers.push_back(&t_m_1);
line_lex.matchers.push_back(&t_m_2);
line_lex.matchers.push_back(&t_m_3);
line_lex.matchers.push_back(&t_m_4);
line_lex.matchers.push_back(&t_m_5);
line_lex.matchers.push_back(&t_m_6);
line_lex.matchers.push_back(&t_m_7);
line_lex.matchers.push_back(&t_m_8);
line_lex.matchers.push_back(&t_m_9);
line_lex.matchers.push_back(&t_m_10);
line_lex.matchers.push_back(&t_m_11);
line_lex.matchers.push_back(&s_m_0);
line_lex.matchers.push_back(&s_m_1);
line_lex.matchers.push_back(&s_m_2);
line_lex.matchers.push_back(&s_m_3);
line_lex.matchers.push_back(&s_m_4);
line_lex.matchers.push_back(&s_m_5);
}
int main(int argc, char * argv[]) {
/*ifstream infile(filename);
if (infile.fail()) {
cout << "Cannot open file " << filename << endl;
return;
}*/
init_rules();
string line;
vector<vector<Span> > spans;
int i = 0;
while (getline(cin, line)) {
spans.push_back(vector<Span>());
line_lex.match(line, i, spans[i]);
I(j, spans[i].size())
cout << spans[i][j].type << " " << spans[i][j].text << endl;
++ i;
}
}