C.1(C++)

请把课堂上的词法分析器写成一个cpp程序。
另外,程序的输入由从文件输入改为从控制台输入。
主程序 (不能修改)

#include "source.cpp"

参考答案(由WZB提供)

#ifndef __UTIL__
#define __UTIL__

#include <string>
#include <vector>
#include <iostream>
#include <sstream>
using namespace std;

#define I(i,n) for (int i = 0; i < (n); ++ i)

#ifndef __LIST__
#define __LIST__

template <typename E>
struct _Node
{
    E elem;
    _Node * next;
    _Node * prev;
};

class NotSuchElementException {};

template <typename E>
class LinkedList
{
    int _size;
    _Node<E> * head;
    _Node<E> * tail;

public:
    LinkedList() : _size(0) {}

    int size() const { return _size; }

    E & getFirst() const {
        if (_size == 0) throw NotSuchElementException();
        return head->elem;
    }

    E & getLast() const {
        if (_size == 0) throw NotSuchElementException();
        return tail->elem;
    }

    E removeFirst() {
        if (_size == 0) throw NotSuchElementException();
        _Node<E> * tmp = head;
        head = head->next;
        -- _size;
        E e = tmp->elem;
        delete tmp;
        return e;
    }

    E removeLast() {
        if (_size == 0) throw NotSuchElementException();
        _Node<E> * tmp = tail;
        tail = tail->prev;
        -- _size;
        E e = tmp->elem;
        delete tmp;
        return e;
    }

    void addFirst(const E & elem) {
        _Node<E> * tmp = new _Node<E>;
        tmp->elem = elem;
        tmp->next = head;
        if (_size > 0) head->prev = tmp;
        head = tmp;
        if (_size == 0) tail = tmp;
        ++ _size;
    }

    void addLast(const E & elem) {
        _Node<E> * tmp = new _Node<E>;
        tmp->elem = elem;
        tmp->prev = tail;
        if (_size > 0) tail->next = tmp;
        tail = tmp;
        if (_size == 0) head = tmp;
        ++ _size;
    }

    void clear() {
        while (_size > 0) removeFirst();
    }

    ~LinkedList() {
        clear();
    }

    operator vector<E>() const {
        vector<E> vec;
        _Node<E> * tmp = head;
        I(i, _size) {
            vec.push_back(tmp->elem);
            tmp = tmp->next;
        }
        return vec;
    }

    void copy_list(const vector<E> & vec) {
        clear();
        I(i, vec.size()) {
            addLast(vec[i]);
        }
    }

    LinkedList(const vector<E> & vec) : _size(0) {
        copy_list(vec);
    }

    LinkedList(const LinkedList & list) : _size(0) {
        copy_list(vector<E>(list));
    }

    LinkedList & operator = (const LinkedList & list) {
        copy_list(vector<E>(list));
        return *this;
    }

};

#endif

#ifndef __HASH__
#define __HASH__


#ifndef ___HASH__
#define ___HASH__

class NoSuchKeyException {};

template <typename I>
I circular_shift(I bits, int shift_bits) {
    I num_bits = 8 * sizeof(I);
    shift_bits = shift_bits % num_bits;
    return bits << shift_bits | bits >> (num_bits - shift_bits);
}

int operator%(const string & key, int mod) {
    size_t code = 0;
    I(i, key.size())
        code ^= circular_shift(key[i], i * 5);
    return code % mod;
}

// 如果需要把类型 K 作为键,需要有:
//    1) int operater%(K, int)
//    2) bool K::operater==(K)

template <typename K, typename T>
class _Hash
{
    vector<T> tuples;
    size_t _size;

    size_t canonical_index(const K & key) const {
        return key % tuples.size();
    }

    // the returned tuple is either empty or with the same key
    size_t indexOfKey(const K & key) const {
        size_t index = canonical_index(key);
        while (true) { // will enter a dead loop if there is not empty tuple
            if (! tuples[index].in_use) return index;
            if (tuples[index].key == key) return index;
            index = (index + 1) % tuples.size();
        }
    }

public:
    _Hash() : _size(0) {
        tuples.resize(2);
    }

    bool hasKey(const K & key) const {
        size_t index = indexOfKey(key);
        return tuples[index].in_use;
    }

    T & get(const K & key) {
        size_t index = indexOfKey(key);
        if (! tuples[index].in_use) throw NoSuchKeyException();
        return tuples[index];
    }

    const T & get(const K & key) const {
        size_t index = indexOfKey(key);
        if (! tuples[index].in_use) throw NoSuchKeyException();
        return tuples[index];
    }

    void put(const T & tuple) {
        size_t index = indexOfKey(tuple.key);
        bool in_use = tuples[index].in_use;
        tuples[index] = tuple;
        tuples[index].in_use = true;
        if (in_use) return;
        ++ _size;
        // make sure empty > size / 2
        if (2 * _size > tuples.size())
            _double_tuples();
    }

    void _double_tuples() {
        vector<T> non_empty_tuples;
        I(i, tuples.size())
            if (tuples[i].in_use)
                non_empty_tuples.push_back(tuples[i]);
        tuples.resize(tuples.size() * 2);
        clear();
        I(i, non_empty_tuples.size())
            put(non_empty_tuples[i]);
    }

    static bool _between(size_t hole, size_t canonical, size_t tuple_index) {
        if (hole < tuple_index)
            return canonical > hole && canonical <= tuple_index;
        else
            return canonical > hole || canonical <= tuple_index;
    }

    void remove(const K & key) { 
        size_t index = indexOfKey(key);
        if (! tuples[index].in_use) 
            throw NoSuchKeyException();
        tuples[index].in_use = false;
        -- _size;
        // if there is a hole between a tuple and its canonical position
        // move the tuple to the hole
        size_t hole = index;
        size_t tuple_index = hole;
        while (true) {
            tuple_index = (tuple_index + 1) % tuples.size();
            if (! tuples[tuple_index].in_use) return;
            size_t canonical = canonical_index(tuples[tuple_index].key);
            if (_between(hole, canonical, tuple_index)) continue;
            tuples[hole] = tuples[tuple_index];
            hole = tuple_index;
            tuples[hole].in_use = false;
        }
    }

    size_t size() const { return _size; }

    void clear() {
        I(i, tuples.size())
            tuples[i].in_use = false;
        _size = 0;
    }

    vector<K> keys() const {
        vector<K> ks;
        I(i, tuples.size())
            if (tuples[i].in_use)
                ks.push_back(tuples[i].key);
        return ks;
    }

};

#endif


template <typename K, typename V>
class HashTable
{
    class _Tuple
    {
    public:
        K key;
        V val;
        bool in_use;

        _Tuple() : in_use(false) {}
        _Tuple(const K & k, const V & v) : in_use(false), key(k), val(v) {}
    };

    _Hash<K,_Tuple> _hash;

public:
    bool hasKey(const K & key) const {
        return _hash.hasKey(key);
    }

    size_t size() const { return _hash.size(); }

    void clear() {
        _hash.clear();
    }

    const V & get(const K & k) const {
        return _hash.get(k).val;
    }

    V & get(const K & k) {
        return _hash.get(k).val;
    }

    void put(const K & k, const V & v) {
        _hash.put(_Tuple(k, v));
    }

    void remove(const K & k) {
        _hash.remove(k);
    }

    vector<K> keys() const {
        return _hash.keys();
    }

};


template <typename E>
class HashSet
{
    class _Tuple
    {
    public:
        E key;
        bool in_use;

        _Tuple() : in_use(false) {}
        _Tuple(const E & e) : in_use(false), key(e) {}
    };

    _Hash<E,_Tuple> _hash;

public:
    bool has(const E & e) const {
        return _hash.hasKey(e);
    }

    size_t size() const { return _hash.size(); }

    void clear() {
        _hash.clear();
    }

    void add(const E & e) {
        _hash.put(_Tuple(e));
    }

    void remove(const E & e) {
        _hash.remove(e);
    }

    operator vector<E>() const {
        return _hash.keys();
    }

};



#endif

#endif

class CompilationError : public runtime_error
{
public:
    CompilationError(const char err_msg[]) : runtime_error(err_msg) {}
};

string _normalize_braket(string text) {
    string text2 = text;
    I(i, text2.size())
        switch (text[i]) {
        case '(': text2[i] = '['; break;
        case ')': text2[i] = ']'; break;
        }
    return text2;
}

class Span
{
public:
    int line_no;
    int prev_end;
    int char_start;
    int char_end;
    string type;
    string text;
    string key;
    vector<Span *> children;

    Span() {}

    Span(int line_no, int prev_end, int char_start, int char_end, 
            const string & type, const string & text) {
        this->line_no = line_no;
        this->prev_end = prev_end;
        this->char_start = char_start;
        this->char_end = char_end;
        this->type = type;
        this->text = text;
        stringstream ss;
        ss<<type<<"("<<char_start<<","<<char_end<<")";
        this->key = ss.str();
    }

    string linearize() const {
        stringstream ss;
        ss<<"("<<_normalize_braket(type);
        if (children.size() == 0)
            ss<<" "<<_normalize_braket(text);
        else {
            I(i, children.size())
                ss<<" "<<children[i]->linearize();
        }
        ss<<")";
        return ss.str();
    }
};

typedef int (* matcher)(const string & line, int start);

class _Matcher
{
public:
    string type;
    virtual int match(const string & line, int start) const = 0;
};

class TextMatcher : public _Matcher
{
    string text;
public:
    TextMatcher(const string & type, const string & text) {
        this->type = type;
        this->text = text;
    }

    int match(const string & line, int start) const {
        string word = line.substr(start, this->text.size());
        return (word == this->text) ? this->text.size() : 0;
    }
};

class SymbolMatcher : public _Matcher
{
    matcher func;
public:
    SymbolMatcher(const string & type, matcher func) {
        this->type = type;
        this->func = func;    
    }

    int match(const string & line, int start) const {
        return this->func(line, start);
    }
};

class LineLex
{
public:
    vector<_Matcher *> matchers;

    void match(const string & line, int line_no, vector<Span> & spans) {
        int prev_end = 0;
        for (int i = 0; i < line.size(); ++ i) {
            if (line[i] == ' ' || line[i] == '\t') continue;
            int max_match_size = -1;
            string match_type;
            for (int j = 0; j < this->matchers.size(); ++ j) {
                int match_size = this->matchers[j]->match(line, i);
                if (match_size > 0 && match_size > max_match_size) {
                    max_match_size = match_size;
                    match_type = this->matchers[j]->type;
                }
            }
            if (max_match_size == -1) {
                stringstream ss;
                ss<<"Lexical error: "<<line.substr(i)<<" line# "<<(line_no+1)<<" char#"<<(i+1);
                throw CompilationError(ss.str().c_str());
            }
            spans.push_back(Span(line_no, prev_end, i, i+max_match_size,
                                match_type, line.substr(i, max_match_size)));
            prev_end = i+max_match_size;
            i += max_match_size - 1;
        }
    }
};

//+ $int
int match_lex_int(const string & line, int start) {
    for (int i = start; i < line.size(); ++ i) {
        if (line[i] >= '0' && line[i] <= '9') continue;
        return i - start;
    }
    return line.size() - start;
}

//+ $float
int match_lex_float(const string & line, int start) {
    int num_dot = 0;
    for (int i = start; i < line.size(); ++ i) {
        if (line[i] == '.') {
            num_dot += 1;
            continue;
        }
        if (line[i] >= '0' && line[i] <= '9') continue;
        return i - start;
    }
    if (num_dot != 1) return 0;
    return line.size() - start;
}

const char * op[] = {"and", "not", "or", "xor", "+", "-", "*", "/", "//", 
            "~", "%", "^", "&", "|", ">", "<", ">=", "<=", "==", "!="};

bool op_has(const string & text) {
    I(i, sizeof(op)/sizeof(char *))
        if (text == op[i]) return true;
    return false;
}
//+ $op
int match_lex_op(const string & line, int start) {
    if (op_has(line.substr(start, 3))) return 3;
    if (op_has(line.substr(start, 2))) return 2;
    if (op_has(line.substr(start, 1))) return 1;
    return 0;
}

//+ $id
int match_lex_id(const string & line, int start) {
    for (int i = start; i < line.size(); ++ i) {
        if (i != start && line[i] >= '0' && line[i] <= '9') continue;
        if (line[i] == '_') continue;
        if (line[i] >= 'a' && line[i] <= 'z') continue;
        if (line[i] >= 'A' && line[i] <= 'Z') continue;
        return i - start;
    }
    return line.size() - start;
}

//+ $text
int match_lex_text(const string & line, int start) {
    char delimitor = ' ';
    for (int i = start; i < line.size(); ++ i) {
        if (i == start) {
            if (line[i] == '"' || line[i] == '\'') {
                delimitor = line[i];
                continue;
            }
        }
        else {
            if (line[i] == delimitor) {
                return i + 1 - start;
            }
            if (line[i] == '\\') {
                if (i + 1 == line.size()) return 0;
                if (line[i + 1] == '\\' || line[i + 1] == delimitor || 
                    line[i + 1] == 'n' || line[i + 1] == 'r' || line[i + 1] == 't') {
                    ++ i;
                    continue;
                }
                else return 0;
            }
            continue;
        }
        return 0;
    }
    return 0;
}

//+ $comment
int match_lex_comment(const string & line, int start) {
    if (line[start] != '#') return 0;
    return line.size() - start;
}

LineLex line_lex;

TextMatcher t_m_0("'let'", "let");
TextMatcher t_m_1("'='", "=");
TextMatcher t_m_2("'def'", "def");
TextMatcher t_m_3("'('", "(");
TextMatcher t_m_4("')'", ")");
TextMatcher t_m_5("'if'", "if");
TextMatcher t_m_6("'else'", "else");
TextMatcher t_m_7("'for'", "for");
TextMatcher t_m_8("'in'", "in");
TextMatcher t_m_9("'while'", "while");
TextMatcher t_m_10("'return'", "return");
TextMatcher t_m_11("','", ",");

SymbolMatcher s_m_0("$int", match_lex_int);
SymbolMatcher s_m_1("$float", match_lex_float);
SymbolMatcher s_m_2("$op", match_lex_op);
SymbolMatcher s_m_3("$id", match_lex_id);
SymbolMatcher s_m_4("$text", match_lex_text);
SymbolMatcher s_m_5("$comment", match_lex_comment);

void init_rules() {
    line_lex.matchers.push_back(&t_m_0);
    line_lex.matchers.push_back(&t_m_1);
    line_lex.matchers.push_back(&t_m_2);
    line_lex.matchers.push_back(&t_m_3);
    line_lex.matchers.push_back(&t_m_4);
    line_lex.matchers.push_back(&t_m_5);
    line_lex.matchers.push_back(&t_m_6);
    line_lex.matchers.push_back(&t_m_7);
    line_lex.matchers.push_back(&t_m_8);
    line_lex.matchers.push_back(&t_m_9);
    line_lex.matchers.push_back(&t_m_10);
    line_lex.matchers.push_back(&t_m_11);

    line_lex.matchers.push_back(&s_m_0);
    line_lex.matchers.push_back(&s_m_1);
    line_lex.matchers.push_back(&s_m_2);
    line_lex.matchers.push_back(&s_m_3);
    line_lex.matchers.push_back(&s_m_4);
    line_lex.matchers.push_back(&s_m_5);
}

int main(int argc, char * argv[]) {
    /*ifstream infile(filename);
    if (infile.fail()) {
        cout << "Cannot open file " << filename << endl;
        return;
    }*/
    init_rules();
    string line;
    vector<vector<Span> > spans;
    int i = 0;
    while (getline(cin, line)) {
        spans.push_back(vector<Span>());
        line_lex.match(line, i, spans[i]);
        I(j, spans[i].size())
            cout << spans[i][j].type << " " << spans[i][j].text << endl;
        ++ i;
    }
}

上一篇: 2021/06/11 下一篇: A.4(C++)
支持 makedown语法