"); g_output.add_string(m_name.c_str()); g_output.add

/* * Copyright (c) 2017-2024 OARC, Inc. * Copyright (c) 2011-2017, IIS - The Internet Foundation in Sweden * All rights reserved. * * This file is part of PacketQ. * * PacketQ is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PacketQ is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with PacketQ. If not, see . */ #include "config.h" #include "sql.h" #include "output.h" #include "packet_handler.h" #include "packetq.h" #include "reader.h" #include #include #include #ifdef WIN32 #include #endif #ifdef HAVE_LIBMAXMINDDB #include #include #include #include static MMDB_s* __cc_mmdb = 0; static MMDB_s* __asn_mmdb = 0; #endif namespace packetq { bool verbose = false; int g_allocs = 0; Column* Table::add_column(const char* name, const char* type, int id, bool hidden) { if (!type) return add_column(name, Coltype::_text, id, hidden); else if (strcmp(type, "bool") == 0) return add_column(name, Coltype::_bool, id, hidden); else if (strcmp(type, "int") == 0) return add_column(name, Coltype::_int, id, hidden); else if (strcmp(type, "float") == 0) return add_column(name, Coltype::_float, id, hidden); else return add_column(name, Coltype::_text, id, hidden); } Column* Table::add_column(const char* name, Coltype::Type type, int id, bool hidden) { Column* col = new Column(name, type, id, hidden); col->m_offset = Table::align(m_curpos, col->m_def.m_align); m_curpos = col->m_offset + col->m_def.m_size; if (type == Coltype::_text) m_text_column_offsets.push_back(col->m_offset); m_cols.push_back(col); return col; } void Table::delete_row(Row* row) { row->decref_text_columns(m_text_column_offsets); m_row_allocator->deallocate(row); } Row* Table::create_row() { if (!m_row_allocator) { m_rsize = sizeof(Row) - ROW_DUMMY_SIZE; // exclude the dummy m_dsize = m_curpos; m_row_allocator = new Allocator(m_rsize + m_dsize, 10000); } Row* r = m_row_allocator->allocate(); r->zero_text_columns(m_text_column_offsets); return r; } void Table::add_row(Row* row) { m_rows.push_back(row); } int g_comp = 0; void Ordering_terms::compile(const std::vector& tables, const std::vector& search_order, Query& q) { for (auto it = m_terms.begin(); it != m_terms.end(); it++) { OP* op = it->m_op; it->m_op = op->compile(tables, search_order, q); } } class Sorter { public: Sorter(Ordering_terms& order) : m_order(order) { } bool operator()(Row* ia, Row* ib) { // this works under the assumption that the ordering terms have // been compiled with only one table so the row index i is 0 Row** ia_rows = &ia; Row** ib_rows = &ib; for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) { g_comp++; OP* op = it->m_op; op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a); op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b); int res = m_a.cmp(m_b); if (res < 0) return true; if (res > 0) return false; } return false; } bool eq(Row* ia, Row* ib) { Row** ia_rows = &ia; Row** ib_rows = &ib; for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) { g_comp++; OP* op = it->m_op; op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a); op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b); int res = m_a.cmp(m_b); if (res != 0) return false; } return true; } int cmp(Row* ia, Row* ib) { Row** ia_rows = &ia; Row** ib_rows = &ib; for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) { g_comp++; OP* op = it->m_op; op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a); op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b); int res = m_a.cmp(m_b); if (res != 0) return res; } return 0; } Ordering_terms& m_order; Variant m_a, m_b; }; struct Stki { int s; int l; int b; }; struct Spkt { Variant cache; int row; }; class Per_sort { public: struct Tlink { Tlink() : m_next(0) , row(0) , m_eq(0) { } Tlink* get_eq() { return m_eq; } void reset() { m_next = 0; row = 0; m_eq = 0; } void add_eq(Tlink* o) { if (!o->m_eq) { // add single if (!m_eq) { // as first m_eq = o; o->m_eq_last = o; return; } // to list m_eq->m_eq_last->m_eq = o; m_eq->m_eq_last = o; return; } else { // add list if (!m_eq) { // as first m_eq = o; o->m_eq_last = o->m_eq->m_eq_last; return; } // to list m_eq->m_eq_last->m_eq = o; m_eq->m_eq_last = o->m_eq->m_eq_last; } } union { Tlink* m_next; Tlink* m_eq_last; }; Row* row; Variant cache; private: Tlink* m_eq; }; struct List { public: void reset() { m_size = 0; m_fl[0] = 0; m_fl[1] = 0; } int m_size; Tlink* m_fl[2]; }; Per_sort(Table& table, Ordering_terms& order) : m_sorter(order) , m_table(table) { m_escalate_sort = order.m_terms.size() > 1; m_asc = order.m_terms.begin()->m_asc ? 1 : -1; m_op = order.m_terms.begin()->m_op; memset(m_groups, 0, sizeof(m_groups)); m_current.reset(); } inline bool add_to_list(List& list, Tlink* t) { if (list.m_fl[0] == 0) { list.m_fl[0] = list.m_fl[1] = t; list.m_size = 1; return true; } list.m_fl[1]->m_next = t; list.m_fl[1] = t; list.m_size++; return true; } inline bool insert_into_list(List& list, Tlink* t) { if (list.m_fl[0] == 0) { list.m_fl[0] = list.m_fl[1] = t; list.m_size = 1; return true; } int cmp0 = cmp(t, list.m_fl[0]); if (cmp0 == 0) { list.m_fl[0]->add_eq(t); return true; } if (cmp0 < 0) { t->m_next = list.m_fl[0]; list.m_fl[0] = t; list.m_size++; return true; } int cmp1 = cmp(t, list.m_fl[1]); if (cmp1 == 0) { list.m_fl[1]->add_eq(t); return true; } if (cmp1 > 0) { list.m_fl[1]->m_next = t; list.m_fl[1] = t; list.m_size++; return true; } return false; } int cmp(Tlink* a, Tlink* b) { int cmp = a->cache.cmp(b->cache) * m_asc; if (cmp != 0 || !m_escalate_sort) return cmp; return m_sorter.cmp(a->row, b->row); } inline void add(Tlink* t) { if (!insert_into_list(m_current, t)) { insert_list(m_current); m_current.reset(); insert_into_list(m_current, t); } } inline void insert_list(List& l) { unsigned int size = l.m_size; int offs = 0; size >>= 1; while (size != 0) { offs++; size >>= 1; } if (m_groups[offs].m_size != 0) { List m = merge(m_groups[offs], l); m_groups[offs].reset(); insert_list(m); } else m_groups[offs] = l; } List merge(List& l1, List& l2) { List r; r.reset(); Tlink* a = l1.m_fl[0]; Tlink* b = l2.m_fl[0]; if (!a) return l2; if (!b) return l1; int size = l1.m_size + l2.m_size; while (a && b) { int c = cmp(a, b); if (c == 0) { Tlink* a2 = a; Tlink* b2 = b; a = a->m_next; b = b->m_next; a2->m_next = 0; b2->m_next = 0; a2->add_eq(b2); add_to_list(r, a2); size--; } else if (c < 0) { Tlink* a2 = a; a = a->m_next; a2->m_next = 0; add_to_list(r, a2); } else { Tlink* b2 = b; b = b->m_next; b2->m_next = 0; add_to_list(r, b2); } } if (a) { r.m_fl[1]->m_next = a; r.m_fl[1] = l1.m_fl[1]; } if (b) { r.m_fl[1]->m_next = b; r.m_fl[1] = l2.m_fl[1]; } l1.reset(); l2.reset(); r.m_size = size; return r; } void sort() { int table_size = (int)m_table.m_rows.size(); if (table_size <= 1) return; auto it = m_table.m_rows.begin(); Tlink* links = new Tlink[table_size]; int i; for (i = 0; i < table_size; i++) { Tlink& r = links[i]; r.reset(); r.row = *it; // &row works under the assumption that m_op has been compiled with // this table only so row index is 0 m_op->evaluate(&r.row, r.cache); it++; add(&r); } if (m_current.m_size) insert_list(m_current); List result; result.reset(); for (i = 0; i < sizeof(m_groups) / sizeof(List); i++) { if (m_groups[i].m_size > 0) { result = merge(result, m_groups[i]); m_groups[i].reset(); } } Tlink* p = result.m_fl[0]; it = m_table.m_rows.begin(); while (p) { *it++ = p->row; Tlink* e = p->get_eq(); while (e) { *it++ = e->row; e = e->get_eq(); } p = p->m_next; }; delete[] links; } OP* m_op; Sorter m_sorter; bool m_escalate_sort; int m_asc; Table& m_table; List m_groups[32]; List m_current; }; void Table::per_sort(Ordering_terms& order) { Per_sort sort(*this, order); sort.sort(); return; } void Table::merge_sort(Ordering_terms& order) { Sorter sorter(order); bool escalate_sort = order.m_terms.size() > 1; int asc = order.m_terms.begin()->m_asc ? 1 : -1; OP* op = order.m_terms.begin()->m_op; int table_size = (int)m_rows.size(); if (table_size <= 1) return; Row** row_ptrs = new Row*[table_size]; Spkt* spktpool = new Spkt[table_size * 2]; Spkt* rows[2]; rows[0] = spktpool; rows[1] = &spktpool[table_size]; auto it = m_rows.begin(); int i = 0; Spkt* r = rows[0]; for (i = 0; i < table_size; i += 2) { row_ptrs[i] = *it++; r[i].row = i; // &row works under the assumption that m_op has been compiled with // this table only so row index is 0 op->evaluate(&row_ptrs[r[i].row], r[i].cache); if (i + 1 < table_size) { row_ptrs[i + 1] = *it++; r[i + 1].row = i + 1; op->evaluate(&row_ptrs[r[i + 1].row], r[i + 1].cache); } } int swap = 0; Stki stack[64]; Stki* sp = stack; sp->s = 0; sp->l = 2; sp->b = 1; rows[1][0] = rows[0][1]; rows[1][1] = rows[0][0]; sp--; int npos = 0; while (true) { int start, len; if (sp > stack && sp->l == sp[-1].l) { // two equal size -> merge len = sp->l <<= 1; start = sp[-1].s; swap = sp[-1].b; sp--; sp--; } else { start = npos; npos += 2; swap = 0; len = 2; } int cnt = start + len > table_size ? table_size - start : len; Spkt* s = rows[swap]; Spkt* d = rows[1 - swap]; if (cnt > 0) { int p1 = start; int p2 = start + (len >> 1); int l1 = len >> 1; int l2 = l1; if (p1 + l1 > table_size) l1 = table_size - p1; if (p2 + l2 > table_size) l2 = table_size - p2; i = start; while (cnt-- > 0) { if (l1 <= 0) { d[i++] = s[p2++]; } else if (l2 <= 0) { d[i++] = s[p1++]; } else { int cmp = s[p1].cache.cmp(s[p2].cache) * asc; if (cmp < 0 || (cmp == 0 && escalate_sort && sorter(row_ptrs[s[p1].row], row_ptrs[s[p2].row]))) { l1--; d[i++] = s[p1++]; } else { l2--; d[i++] = s[p2++]; } } } } ++sp; sp->l = len; sp->s = start; sp->b = 1 - swap; if (len > table_size) { for (it = m_rows.begin(); it != m_rows.end(); it++) { *it = row_ptrs[(d++)->row]; } break; } } delete[] row_ptrs; delete[] spktpool; return; } void Table::limit(int limit, int offset) { int count = 0; auto e = m_rows.end(); for (auto it = m_rows.begin(); it != m_rows.end(); it++) { if (e != m_rows.end()) { delete_row(*e); m_rows.erase(e); e = m_rows.end(); } int l = count++; if (l < offset || l >= offset + limit) { e = it; } } if (e != m_rows.end()) { delete_row(*e); m_rows.erase(e); } } void printrep(int n, char c) { if (n >= 3000) return; char buf[3000]; int i; for (i = 0; i < n; i++) buf[i] = c; buf[i] = 0; printf("%s", buf); } void Table::xml() { g_output.reset(); int cols = (int)m_cols.size(); g_output.add_string("\n"); g_output.add_string("\n"); g_output.add_string("\n\n "); g_output.add_string(m_name.c_str()); g_output.add_string("\n"); g_output.add_string("\n"); g_output.add_string("\n"); g_output.add_string("\n"); g_output.add_string("\n"); g_output.add_string(""); for (int i = 0; i < cols; i++) { if (m_cols[i]->m_hidden) continue; const char* t = ""; switch (m_cols[i]->m_type) { case (Coltype::_float): t = "float"; break; case (Coltype::_int): t = "int"; break; case (Coltype::_text): t = "text"; break; case (Coltype::_bool): t = "bool"; break; } g_output.add_string(""); } g_output.add_string("\n"); for (auto it = m_rows.begin(); it != m_rows.end(); it++) { g_output.add_string(""); Row* r = *it; Variant v; for (int i = 0; i < cols; i++) { Column* c = m_cols[i]; if (c->m_hidden) continue; int offset = c->m_offset; static const int bufsize = 100; char buf[bufsize]; g_output.add_string(" "); } g_output.add_string("\n"); } g_output.add_string("

"); g_output.add_string(m_cols[i]->m_name.c_str()); g_output.add_string("
"); switch (c->m_type) { case Coltype::_bool: g_output.add_string(r->access_column(offset) ? "1" : "0"); break; case Coltype::_int: snprintf(buf, bufsize, "%i", r->access_column(offset)); g_output.add_string(buf); break; case Coltype::_float: snprintf(buf, bufsize, "%g", r->access_column(offset)); g_output.add_string(buf); break; case Coltype::_text: g_output.add_string(r->access_column(offset)->data); break; } g_output.add_string("

"); g_output.add_string(m_cols[i]->m_name.c_str()); g_output.add_string("

"); switch (c->m_type) { case Coltype::_bool: g_output.add_string(r->access_column(offset) ? "1" : "0"); break; case Coltype::_int: snprintf(buf, bufsize, "%i", r->access_column(offset)); g_output.add_string(buf); break; case Coltype::_float: snprintf(buf, bufsize, "%g", r->access_column(offset)); g_output.add_string(buf); break; case Coltype::_text: g_output.add_string(r->access_column(offset)->data); break; } g_output.add_string("

\n"); g_output.add_string("\n"); g_output.add_string("\n"); g_output.print(); } void Table::json(bool trailing_comma) { g_output.reset(); int cols = (int)m_cols.size(); g_output.add_string(" {\n "); g_output.add_q_string("table_name"); g_output.add_string(": "); g_output.add_q_string(m_name.c_str()); g_output.add_string(",\n "); g_output.add_q_string("query"); g_output.add_string(": "); g_output.add_q_string(m_qstring.c_str()); g_output.add_string(",\n "); g_output.add_q_string("head"); g_output.add_string(": ["); bool append_comma = false; for (int i = 0; i < cols; i++) { if (m_cols[i]->m_hidden) continue; if (append_comma) g_output.add_string(",\n"); else g_output.add_string("\n"); append_comma = true; g_output.add_string(" { "); g_output.add_q_string("name"); g_output.add_string(": "); g_output.add_q_string(m_cols[i]->m_name.c_str()); g_output.add_string(","); g_output.add_q_string("type"); g_output.add_string(": "); const char* t = ""; switch (m_cols[i]->m_type) { case (Coltype::_float): t = "float"; break; case (Coltype::_int): t = "int"; break; case (Coltype::_text): t = "text"; break; case (Coltype::_bool): t = "bool"; break; } g_output.add_q_string(t); g_output.add_string(" }"); } g_output.add_string("\n ],\n "); g_output.add_q_string("data"); g_output.add_string(": ["); bool outer_comma = false; for (auto it = m_rows.begin(); it != m_rows.end(); it++) { if (outer_comma) g_output.add_string(",\n ["); else g_output.add_string("\n ["); outer_comma = true; bool comma = false; Row* r = *it; for (int i = 0; i < cols; i++) { Column* c = m_cols[i]; if (c->m_hidden) continue; if (comma) g_output.add_string(","); comma = true; int offset = c->m_offset; static const int bufsize = 100; char buf[bufsize]; switch (c->m_type) { case Coltype::_bool: g_output.add_string(r->access_column(offset) ? "1" : "0"); break; case Coltype::_int: snprintf(buf, bufsize, "%i", r->access_column(offset)); g_output.add_string(buf); break; case Coltype::_float: snprintf(buf, bufsize, "%g", r->access_column(offset)); g_output.add_string(buf); break; case Coltype::_text: g_output.add_q_string(r->access_column(offset)->data); break; } } g_output.add_string("]"); } g_output.add_string("\n ]\n"); if (trailing_comma) { g_output.add_string(" },\n"); } else { g_output.add_string(" }\n"); } g_output.print(); } std::string csv_qoute_string(const std::string& s) { std::string r = "\""; int len = s.length(); for (int i = 0; i < len; i++) { if (s[i] == '"') { r += '"'; } r += s[i]; } r += '"'; return r; } void Table::csv(bool format) { int cols = (int)m_cols.size(); std::vector col_len(cols); for (int i = 0; i < cols; i++) col_len[i] = 0; int max = 0; char* tmp = 0; if (format) { for (auto it = m_rows.begin(); it != m_rows.end(); it++) { Row* r = *it; for (int i = 0; i < cols; i++) { Column* c = m_cols[i]; if (c->m_hidden) continue; int len = 0; int offset = c->m_offset; static const int bufsize = 100; char buf[bufsize]; switch (c->m_type) { case Coltype::_bool: len = 1; break; case Coltype::_int: snprintf(buf, bufsize, "%i", r->access_column(offset)); len = strlen(buf); break; case Coltype::_float: snprintf(buf, bufsize, "%g", r->access_column(offset)); len = strlen(buf); break; case Coltype::_text: len = csv_qoute_string(r->access_column(offset)->data).length(); break; } len++; if (len > col_len[i]) col_len[i] = len; if (len > max) max = len; } } for (int i = 0; i < cols; i++) { if (m_cols[i]->m_hidden) continue; int l = csv_qoute_string(m_cols[i]->m_name).length(); l++; if (l > col_len[i]) col_len[i] = l; if (l > max) max = l; } tmp = new char[max + 1]; for (int i = 0; i < max; i++) tmp[i] = 32; tmp[max] = 0; } for (int i = 0; i < cols; i++) { if (m_cols[i]->m_hidden) continue; printf("%s", csv_qoute_string(m_cols[i]->m_name).c_str()); if (i < cols - 1) { size_t pos = csv_qoute_string(m_cols[i]->m_name).length() + max - col_len[i] + 1; if (format && pos < max) { printf("%s,", &tmp[pos]); } else { printf(","); } } } printf("\n"); for (auto it = m_rows.begin(); it != m_rows.end(); it++) { Row* r = *it; for (int i = 0; i < cols; i++) { Column* c = m_cols[i]; if (c->m_hidden) continue; int offset = c->m_offset; static const int bufsize = 100; char buf[bufsize]; std::string out; switch (c->m_type) { case Coltype::_bool: out = r->access_column(offset) ? "1" : "0"; break; case Coltype::_int: snprintf(buf, bufsize, "%i", r->access_column(offset)); out = buf; break; case Coltype::_float: snprintf(buf, bufsize, "%g", r->access_column(offset)); out = buf; break; case Coltype::_text: out = csv_qoute_string(r->access_column(offset)->data); break; } fputs(out.c_str(), stdout); if (i < cols - 1) { if (format) { printf("%s,", &tmp[out.length() + max - col_len[i] + 1]); } else { printf(","); } } } printf("\n"); } delete[] tmp; } void Table::dump() { int cols = (int)m_cols.size(); int width = 25; char fmti[40]; snprintf(fmti, sizeof(fmti) - 1, "%%%dd |", width); fmti[39] = 0; char fmtd[40]; snprintf(fmtd, sizeof(fmtd) - 1, "%%%dg |", width); fmtd[39] = 0; char fmts[40]; snprintf(fmts, sizeof(fmts) - 1, "%%%ds |", width); fmts[39] = 0; printf("Table::dump() table:%s cols:%d\n", m_name.c_str(), cols); printrep((width + 2) * cols + 1, '-'); printf("\n"); printf("|"); for (int i = 0; i < cols; i++) printf(fmti, m_cols[i]->m_type); printf("\n"); printf("|"); for (int i = 0; i < cols; i++) printf(fmts, m_cols[i]->m_name.c_str()); printf("\n"); printrep((width + 2) * cols + 1, '*'); printf("\n"); for (auto it = m_rows.begin(); it != m_rows.end(); it++) { printf("|"); Row* r = *it; for (int i = 0; i < cols; i++) { Column* c = m_cols[i]; int offset = c->m_offset; switch (c->m_type) { case Coltype::_bool: printf(fmts, r->access_column(offset) ? "1" : "0"); break; case Coltype::_int: printf(fmti, r->access_column(offset)); break; case Coltype::_float: printf(fmtd, r->access_column(offset)); break; case Coltype::_text: printf(fmts, r->access_column(offset)); break; } } printf("\n"); } printrep((width + 2) * cols + 1, '-'); printf("\n"); } class Parser { private: Token::Type m_last; public: std::list m_tokens; typedef std::list::iterator Lit; Parser() { m_last = Token::_invalid; } void push(Token::Type type, const char* string) { if (!(type == Token::_semicolon && m_last == Token::_semicolon)) m_tokens.push_back(Token(type, string)); m_last = type; } void dump() { for (auto it = m_tokens.begin(); it != m_tokens.end(); it++) { printf("Type %d: %s\n", it->get_type(), it->get_token()); } } bool analyze(Query& q) { auto it = m_tokens.begin(); bool ok = true; while (ok) { ok = false; if (get_sample_stmt(q, it)) ok = true; if (get_select_stmt(q, it)) ok = true; } return true; } bool get_sample_stmt(Query& q, Lit& i_iter) { Lit it = i_iter; if (!is(it, Token::_label, "sample")) return false; it++; if (!is(it, Token::_number)) return false; int sample = atoi(it->get_token()); it++; if (!is(it, Token::_semicolon)) return false; it++; q.m_sample = sample; i_iter = it; return true; } bool get_select_stmt(Query& q, Lit& i_iter) { Lit it = i_iter; if (!get_select_core(q, it)) { return false; } get_from(q, it); get_where(q, it); get_group_by(q, it); get_order_by(q, it); get_limit(q, it); get_as(q, it); if (!is(it, Token::_semicolon)) { throw Error("Expected ';' but found '%s' !", it->get_token()); } it++; i_iter = it; return true; } bool is(Lit& it, Token::Type type, const char* str = 0) { if (!str) return (it->get_type() == type); return (it->get_type() == type && cmpi(it->get_token(), str)); } OP* get_result_column(std::list::iterator& it) { OP* res = 0; Lit save = it; if (is(it, Token::_op, "*")) { it++; return new OP(Token(Token::_column, "*")); } if (it->get_type() == Token::_label) { std::string table = it->get_token(); it++; if (is(it, Token::_op, ".")) { it++; if (is(it, Token::_op, "*")) { it++; std::string c = table + ".*"; return new OP(Token(Token::_column, c.c_str())); } it = save; return 0; } } it = save; if ((res = get_expr(it, 0))) { save = it; if (is(it, Token::_label, "as")) { it++; if (is(it, Token::_label)) { res->m_name = it->get_token(); it++; } else it = save; } } return res; // check for valid table } bool get_select_core(Query& q, Lit& it) { Lit save = it; if (!is(it, Token::_label, "select")) return false; it++; bool again = true; bool success = true; while (again) { OP* op; if ((op = get_result_column(it))) { q.m_select.push_back(op); } else { success = false; break; } if (is(it, Token::_op, ",")) it++; else again = false; } if (success) return true; it = save; return false; } bool get_ordering_terms(Ordering_terms& ordering, std::list::iterator& it) { OP* op; while ((op = get_expr(it, 0))) { bool asc = true; if (it->get_type() == Token::_label) { if (cmpi(it->get_token(), "asc")) { // default } else if (cmpi(it->get_token(), "desc")) { asc = false; } else if (cmpi(it->get_token(), "collate")) { throw Error("unhandled option:collate"); } else { ordering.m_terms.push_back(Ordering_terms::OP_dir(op, asc)); break; } it++; } ordering.m_terms.push_back(Ordering_terms::OP_dir(op, asc)); if (!is(it, Token::_op, ",")) break; it++; } return true; } bool get_group_by(Query& q, Lit& it) { if (!is(it, Token::_label, "group")) { return true; } it++; if (!is(it, Token::_label, "by")) return false; it++; bool res = get_ordering_terms(q.m_group_by, it); get_having(q, it); return res; } bool get_as(Query& q, Lit& it) { if (!is(it, Token::_label, "as")) { return true; } it++; if (!is(it, Token::_label)) return false; q.m_result->m_name = it->get_token(); it++; return true; } bool get_order_by(Query& q, Lit& it) { if (!is(it, Token::_label, "order")) { return true; } it++; if (!is(it, Token::_label, "by")) return false; it++; return get_ordering_terms(q.m_order_by, it); } bool get_limit(Query& q, Lit& it) { Lit save = it; if (!is(it, Token::_label, "limit")) { return true; } it++; if (!is(it, Token::_number)) { it = save; throw Error("non numeric operand to limit"); } q.m_limit = atoi(it->get_token()); it++; save = it; if (!is(it, Token::_label, "offset")) { return true; } it++; if (!is(it, Token::_number)) { it = save; throw Error("non numeric operand to offset"); } q.m_offset = atoi(it->get_token()); it++; return true; } bool get_having(Query& q, Lit& it) { Lit save = it; if (!is(it, Token::_label, "having")) return true; it++; OP* res = 0; if ((res = get_expr(it, 0))) { q.m_having = res; return true; } it = save; return false; } bool get_where(Query& q, Lit& it) { Lit save = it; if (!is(it, Token::_label, "where")) return true; it++; OP* res = 0; if ((res = get_expr(it, 0))) { q.m_where = res; return true; } it = save; return false; } bool get_from(Query& q, Lit& it) { if (!is(it, Token::_label, "from")) return false; it++; if (it->get_type() == Token::_label) { const char* name = it->get_token(); if (get_packet_handler(name)) { q.m_from_name = name; it++; return true; } else throw Error("Error in from statement, table '%s' not found", name); } throw Error("Error in from statement"); } int get_stack_precedence(std::stack& operator_stack) { int pre = 0; if (!operator_stack.empty()) pre = operator_stack.top()->m_precedence; return pre; } // using The shunting yard algorithm OP* get_expr(Lit& it, int rec) { std::stack operator_stack; std::stack operand_stack; bool success = true; bool expect_expr = true; while (success) { success = false; Lit save = it; Lit next = it; next++; // match literal if (expect_expr && is_literal(it)) { OP* op = new OP(*it); it++; operand_stack.push(op); success = true; expect_expr = false; continue; } // match literal if (expect_expr && is_unary_op(it)) { OP* op = new OP(*it); op->set_type(Token::_uop); it++; if (!(op->m_right = get_expr(it, rec + 1))) throw Error("Got unary '%s' but could not parse following expression", op->get_token()); operand_stack.push(op); success = true; expect_expr = false; continue; } // match function-name ( if (expect_expr && is(it, Token::_label) && is(next, Token::_paren, "(")) { OP* func = new OP(*it); func->set_type(Token::_function); it++; it++; if (is(it, Token::_op, "*")) { it->set_type(Token::_number); it->set_token("1"); } func->m_param[0] = get_expr(it, rec + 1); if (!func->m_param[0]) throw Error("Missing operand to function"); operand_stack.push(func); int n = 1; while (n < OP::max_param() && is(it, Token::_op, ",")) { it++; func->m_param[n++] = get_expr(it, rec + 1); } if (!is(it, Token::_paren, ")")) throw Error("Expected ) after %s", func->get_token()); it++; expect_expr = false; success = true; continue; } // match [[databasename .] table-name . ] column name if (expect_expr && is(it, Token::_label)) { OP* op = new OP(*it); it++; success = true; op->set_type(Token::_column); operand_stack.push(op); expect_expr = false; continue; } // match ( expr ) if (!expect_expr && is(it, Token::_paren, ",")) { break; } if (!expect_expr && is(it, Token::_paren, ")")) { break; } if (expect_expr && is(it, Token::_paren, "(")) { it++; OP* op = 0; if ((op = get_expr(it, rec + 1))) { if (is(it, Token::_paren, ")")) { it++; operand_stack.push(op); expect_expr = false; success = true; continue; } throw Error("Error in expression no )"); } it = save; throw Error("Error in expression"); } // bin op if (!expect_expr && is(it, Token::_op, "is") && is(next, Token::_op, "not")) { it++; it->set_token("is not"); } // bin op if (!expect_expr && is(it, Token::_op, "not") && is(next, Token::_op, "like")) { it++; it->set_token("not like"); } if (!expect_expr && is(it, Token::_op) && OP::is_binary(it->get_token())) { OP* bop = new OP(*it); while (bop->m_precedence <= get_stack_precedence(operator_stack)) { OP* stack_op = operator_stack.top(); operator_stack.pop(); if (operand_stack.size() >= 2) { OP* stk1 = operand_stack.top(); stack_op->m_right = stk1; operand_stack.pop(); OP* stk2 = operand_stack.top(); stack_op->m_left = stk2; operand_stack.pop(); operand_stack.push(stack_op); } } operator_stack.push(bop); it++; success = true; expect_expr = true; continue; } } while (!operator_stack.empty()) { OP* bop = operator_stack.top(); operator_stack.pop(); if (bop) { if (operand_stack.size() >= 2) { OP* stk = operand_stack.top(); bop->m_right = stk; operand_stack.pop(); OP* stk2 = operand_stack.top(); bop->m_left = stk2; operand_stack.pop(); operand_stack.push(bop); } } } if (operand_stack.size() == 0) return 0; return operand_stack.top(); } bool is_unary_op(Lit& it) { if (is(it, Token::_op, "-")) return true; if (is(it, Token::_op, "+")) return true; if (is(it, Token::_op, "~")) return true; if (is(it, Token::_op, "not")) return true; return false; } bool is_literal(Lit& it) { if (it->get_type() == Token::_number || it->get_type() == Token::_string) return true; if (it->get_type() == Token::_label && cmpi(it->get_token(), "null")) return true; return false; } }; class Lexer { public: Lexer(Parser& p) : m_parser(p) , num_state(_nan) { } Parser& m_parser; enum State { _unknown, _white, _label, _number, _op, _paren, _string }; bool is_white(const char c) { return (c == ' ' || c == 9 || c == 10 || c == 13); } bool is_char(const char c) { return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); } bool is_num(const char c) { return ((c >= '0' && c <= '9')); } bool is_op(const char c) { return (c == '*' || c == ',' || c == '=' || c == '<' || c == '>' || c == '/' || c == '|' || c == '%' || c == '+' || c == '-' || c == '&' || c == '~' || c == '!'); } bool is_paren(const char c) { return (c == '(' || c == ')'); } bool is_termination(const char c) { return (c == 0 || c == ';'); } bool is_quote(const char c) { return (c == '\''); } bool is_label_start(const char c) { return (is_char(c)); } bool is_label_part(const char c) { return (is_num(c) || is_char(c) || c == '_'); } bool lex(const char* i_str) { const char* p = i_str; State state = _white; const char* strstart = 0; bool is_escaped = false; std::string str = ""; while (true) { switch (state) { case (_unknown): { if (is_white(*p)) state = _white; else if (is_label_start(*p)) state = _label; else if (is_num(*p) || (*p == '.' && is_num(p[1]))) state = _number; else if (is_quote(*p)) state = _string; else if (is_paren(*p)) state = _paren; else if (is_op(*p)) state = _op; else if (is_termination(*p)) { m_parser.push(Token::_semicolon, ";"); if (*p++ == 0) { m_parser.push(Token::_end, "END"); return true; } } else { m_parser.push(Token::_end, "END"); printf("Unknown char %c (%d) at %d! in statement %s\n", *p, *p, int(p - i_str), i_str); return false; } } break; case (_white): { if (is_white(*p)) p++; else { state = _unknown; } } break; case (_string): { if ((!is_escaped) && is_quote(*p)) { if (!strstart) { strstart = p; str = ""; } else { m_parser.push(Token::_string, str.c_str()); str = ""; strstart = 0; state = _unknown; is_escaped = false; } p++; } else { if (is_escaped) { if (!is_quote(*p)) str += '\\'; is_escaped = false; } if (*p == 0) { printf("Unexpected end of string in statement %s\n", strstart); m_parser.push(Token::_end, "END"); return false; } if (*p == '\\') is_escaped = true; else str += (*p); p++; } } break; case (_number): { if (parse_num(*p)) { if (!strstart) strstart = p; p++; } else { if (!strstart) throw Error("Numeric problem"); std::string label = strstart; label = label.substr(0, p - strstart); m_parser.push(Token::_number, label.c_str()); strstart = 0; state = _unknown; } } break; case (_label): { if (!strstart) { strstart = p++; } else if (is_label_part(*p)) { p++; } else { std::string label = strstart; label = label.substr(0, p - strstart); Token::Type type = Token::_label; if (cmpi(label, "is")) type = Token::_op; if (cmpi(label, "not")) type = Token::_op; if (cmpi(label, "in")) type = Token::_op; if (cmpi(label, "like")) type = Token::_op; if (cmpi(label, "glob")) type = Token::_op; if (cmpi(label, "match")) type = Token::_op; if (cmpi(label, "regexp")) type = Token::_op; if (cmpi(label, "and")) type = Token::_op; if (cmpi(label, "or")) type = Token::_op; m_parser.push(type, label.c_str()); strstart = 0; state = _unknown; } } break; case (_paren): { std::string s; s = *p; m_parser.push(Token::_paren, s.c_str()); p++; state = _unknown; } break; case (_op): { std::string s; s = *p; char n = p[1]; switch (*p) { case ('|'): if (n == '|') { p++; s += *p; } break; case ('>'): if (n == '=') { p++; s += *p; } if (n == '>') { p++; s += *p; } break; case ('<'): if (n == '<') { p++; s += *p; } if (n == '=') { p++; s += *p; } if (n == '>') { p++; s += *p; } break; case ('='): if (n == '=') { p++; s += *p; } break; case ('!'): if (n == '=') { p++; s += *p; } break; } m_parser.push(Token::_op, s.c_str()); p++; state = _unknown; } break; default: printf("Missing impl char %c at %d! in statement %s\n", *p, int(p - i_str), i_str); m_parser.push(Token::_end, "END"); return false; } } } bool parse_num(const char p) { switch (num_state) { case (_nan): num_state = _int; if (p == '.') num_state = _dot; break; case (_int): if (p == '.') num_state = _dot; else if (!is_num(p)) { num_state = _nan; return false; } break; case (_dot): if (p == 'E' || p == 'e') num_state = _e; else if (!is_num(p)) { num_state = _nan; return false; } break; case (_dec): if (p == 'E' || p == 'e') num_state = _e; else if (!is_num(p)) { num_state = _nan; return false; } break; case (_e): if (p == '+' || p == '-') num_state = _sign; else if (is_num(p)) num_state = _exp; else { throw Error("expected number digit after E"); } break; case (_sign): if (!is_num(p)) throw Error("expected number digit after E"); num_state = _exp; break; case (_exp): if (!is_num(p)) { num_state = _nan; return false; } break; } return true; } enum Num_state { _nan, _int, _dot, _dec, _e, _sign, _exp, }; Num_state num_state; }; void Query::parse() { Parser p; Lexer l(p); l.lex(m_sql.c_str()); // p.dump(); if (!p.analyze(*this)) throw Error("error parsing select statement"); } // return column and index in tables, or 0 for column if column isn't found std::pair lookup_column_in_tables(const std::vector& tables, const std::vector& search_order, const char* name) { if (strcmp(name, "*") == 0) return std::pair((Column*)0, 0); for (auto i = search_order.begin(); i != search_order.end(); ++i) { Table* table = tables[*i]; int col_index = table->get_col_index(name); if (col_index >= 0) return std::pair(table->m_cols[col_index], *i); } return std::pair((Column*)0, 0); } OP* OP::compile(const std::vector& tables, const std::vector& search_order, Query& q) { OP* ret = 0; for (int i = 0; i < max_param(); i++) { if (m_param[i]) { m_param[i] = m_param[i]->compile(tables, search_order, q); if (m_param[i]->m_has_aggregate_function) m_has_aggregate_function = true; } } if (m_left) { m_left = m_left->compile(tables, search_order, q); if (m_left->m_has_aggregate_function) m_has_aggregate_function = true; } if (m_right) { m_right = m_right->compile(tables, search_order, q); if (m_right->m_has_aggregate_function) m_has_aggregate_function = true; } // default to destination row m_row_index = tables.size() - 1; if (get_type() == _column) { auto lookup = lookup_column_in_tables(tables, search_order, get_token()); Column* column = lookup.first; m_row_index = lookup.second; if (!column) throw Error("Column '%s' not found", get_token()); int offset = column->m_offset; m_t = column->m_type; switch (m_t) { case Coltype::_int: ret = new Column_access_int(*this, offset); break; case Coltype::_bool: ret = new Column_access_bool(*this, offset); break; case Coltype::_float: ret = new Column_access_float(*this, offset); break; case Coltype::_text: ret = new Column_access_text(*this, offset); break; } } else if (get_type() == _number) { const char* p = get_token(); bool integer = true; while (*p != 0) { if (*p < '0' || *p > '9') integer = false; p++; } if (integer) { m_t = Coltype::_int; ret = new Static_int(*this); } else { m_t = Coltype::_float; ret = new Static_float(*this); } } else if (get_type() == _string) { m_t = Coltype::_text; ret = new Static_text(*this); } else if ((get_type() == _function) && m_param[0]) { Table* dest_table = tables[m_row_index]; m_t = Coltype::_int; if (cmpi(get_token(), "if") && m_param[1] && m_param[2]) { m_t = m_param[1]->m_t; if (m_param[2]->m_t > m_t) m_t = m_param[2]->m_t; ret = new If_func(*this); } else if (cmpi(get_token(), "name") && m_param[1]) { m_t = Coltype::_text; ret = new Name_func(*this); } else if (cmpi(get_token(), "trim")) { m_t = Coltype::_text; ret = new Trim_func(*this); } else if (cmpi(get_token(), "rsplit") && m_param[1]) { m_t = Coltype::_text; ret = new Rsplit_func(*this); } else if (cmpi(get_token(), "netmask")) { m_t = Coltype::_text; ret = new Netmask_func(*this); } else if (cmpi(get_token(), "cc")) { m_t = Coltype::_text; ret = new Cc_func(*this); } else if (cmpi(get_token(), "asn")) { m_t = Coltype::_int; ret = new Asn_func(*this); } else if (cmpi(get_token(), "count")) { m_t = Coltype::_int; ret = new Count_func(*this, dest_table); } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "min")) { m_t = Coltype::_float; ret = new Min_func_float(*this, dest_table); } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "max")) { m_t = Coltype::_float; ret = new Max_func_float(*this, dest_table); } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "sum")) { m_t = Coltype::_float; ret = new Sum_func_float(*this, dest_table); } else if (cmpi(get_token(), "min")) { m_t = Coltype::_int; ret = new Min_func_int(*this, dest_table); } else if (cmpi(get_token(), "max")) { m_t = Coltype::_int; ret = new Max_func_int(*this, dest_table); } else if (cmpi(get_token(), "sum")) { m_t = Coltype::_int; ret = new Sum_func_int(*this, dest_table); } else if (cmpi(get_token(), "lower")) { m_t = Coltype::_text; ret = new Lower_func(*this); } else if (cmpi(get_token(), "len")) { m_t = Coltype::_int; ret = new Len_func(*this); } else if (cmpi(get_token(), "truncate")) { m_t = Coltype::_int; ret = new Truncate_func(*this); } else if (cmpi(get_token(), "stdev")) { m_t = Coltype::_float; ret = new Stdev_func(*this, dest_table); } else if (cmpi(get_token(), "avg")) { m_t = Coltype::_float; ret = new Avg_func(*this, dest_table); } } else if ((get_type() == _op) && m_left && m_right) { if (cmpi(get_token(), "||")) { m_t = Coltype::_text; ret = new Bin_op_concatenate(*this); } else if (cmpi(get_token(), "*")) { if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) { m_t = Coltype::_float; ret = new Bin_op_mul_float(*this); } else { m_t = Coltype::_int; ret = new Bin_op_mul(*this); } } else if (cmpi(get_token(), "/")) { m_t = Coltype::_float; ret = new Bin_op_div(*this); } else if (cmpi(get_token(), "%")) { m_t = Coltype::_float; ret = new Bin_op_modulo(*this); } else if (cmpi(get_token(), "+")) { if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) { m_t = Coltype::_float; ret = new Bin_op_add_float(*this); } else { m_t = Coltype::_int; ret = new Bin_op_add(*this); } } else if (cmpi(get_token(), "-")) { if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) { m_t = Coltype::_float; ret = new Bin_op_sub_float(*this); } else { m_t = Coltype::_int; ret = new Bin_op_sub(*this); } } else if (cmpi(get_token(), "<<")) { m_t = Coltype::_int; ret = new Bin_op_arithmetic_shift_left(*this); } else if (cmpi(get_token(), ">>")) { m_t = Coltype::_int; ret = new Bin_op_arithmetic_shift_right(*this); } else if (cmpi(get_token(), "&")) { m_t = Coltype::_int; ret = new Bin_op_bitwise_and(*this); } else if (cmpi(get_token(), "|")) { m_t = Coltype::_int; ret = new Bin_op_bitwise_or(*this); } else if (cmpi(get_token(), "<")) { m_t = Coltype::_bool; ret = new Bin_op_lt(*this); } else if (cmpi(get_token(), "<=")) { m_t = Coltype::_bool; ret = new Bin_op_lteq(*this); } else if (cmpi(get_token(), ">")) { m_t = Coltype::_bool; ret = new Bin_op_gt(*this); } else if (cmpi(get_token(), ">=")) { m_t = Coltype::_bool; ret = new Bin_op_gteq(*this); } else if (cmpi(get_token(), "=")) { m_t = Coltype::_bool; ret = new Bin_op_eq(*this); } else if (cmpi(get_token(), "==")) { m_t = Coltype::_bool; ret = new Bin_op_eq(*this); } else if (cmpi(get_token(), "like")) { m_t = Coltype::_bool; ret = new Bin_op_like(*this); } else if (cmpi(get_token(), "not like")) { m_t = Coltype::_bool; ret = new Bin_op_not_like(*this); } else if (cmpi(get_token(), "!=")) { m_t = Coltype::_bool; ret = new Bin_op_not_eq(*this); } else if (cmpi(get_token(), "<>")) { m_t = Coltype::_bool; ret = new Bin_op_not_eq(*this); } else if (cmpi(get_token(), "is")) { m_t = Coltype::_bool; ret = new Bin_op_eq(*this); } else if (cmpi(get_token(), "is not")) { m_t = Coltype::_bool; ret = new Bin_op_not_eq(*this); } else if (cmpi(get_token(), "and")) { m_t = Coltype::_bool; ret = new Bin_op_and(*this); } else if (cmpi(get_token(), "or")) { m_t = Coltype::_bool; ret = new Bin_op_or(*this); } } else if ((get_type() == _uop) && m_right) { if (cmpi(get_token(), "not")) { m_t = Coltype::_bool; ret = new Un_op_not(*this); } else if (cmpi(get_token(), "+")) { ret = m_right; } else if (cmpi(get_token(), "-")) { if (m_right->ret_type() == Coltype::_float) { m_t = Coltype::_float; ret = new Un_op_neg_float(*this); } else { m_t = Coltype::_int; ret = new Un_op_neg(*this); } } else if (cmpi(get_token(), "~")) { m_t = Coltype::_int; ret = new Un_op_ones_complement(*this); } } clear_ptr(); if (!ret) throw Error("Unknown operator error '%s' !", get_token()); delete this; return ret; } void OP::evaluate_aggregate_operands(Row** rows) { if (m_left) m_left->evaluate_aggregate_operands(rows); if (m_right) m_right->evaluate_aggregate_operands(rows); for (int i = 0; i < max_param(); ++i) if (m_param[i]) m_param[i]->evaluate_aggregate_operands(rows); } void OP::combine_aggregate(Row* base_row, Row* other_row) { if (m_left) m_left->combine_aggregate(base_row, other_row); if (m_right) m_right->combine_aggregate(base_row, other_row); for (int i = 0; i < max_param(); ++i) if (m_param[i]) m_param[i]->combine_aggregate(base_row, other_row); } // return any column access ops found in given list of op trees - they don't // have to be compiled beforehand; duplicate column tokens are skipped std::vector find_unique_column_ops(std::vector ops) { std::vector res; while (!ops.empty()) { OP* op = ops.back(); ops.pop_back(); if (op->m_left) ops.push_back(op->m_left); if (op->m_right) ops.push_back(op->m_right); for (int i = 0; i < op->max_param(); ++i) if (op->m_param[i]) ops.push_back(op->m_param[i]); if (op->get_type() == Token::_column) { bool found = false; for (auto i = res.begin(); i != res.end(); ++i) { if (cmpii((*i)->get_token(), op->get_token())) { found = true; break; } } if (!found) res.push_back(op); } } return res; } void Query::replace_star_column_with_all_columns() { bool found_star = false; for (auto i = m_select.begin(); i != m_select.end(); ++i) { if (strcmp((*i)->get_token(), "*") == 0) { found_star = true; break; } } if (found_star) { for (auto i = m_select.begin(); i != m_select.end(); ++i) delete *i; m_select.clear(); if (!m_from_name.empty()) { Packet_handler* handler = get_packet_handler(m_from_name); for (auto i = handler->packet_columns.begin(); i != handler->packet_columns.end(); ++i) m_select.push_back(new OP(Token(Token::_column, i->name))); } } } void Query::process_from() { replace_star_column_with_all_columns(); if (m_from_name.empty()) return; std::vector all_ops; all_ops.insert(all_ops.end(), m_select.begin(), m_select.end()); if (m_where) all_ops.push_back(m_where); // skip m_having, it can't access source columns for (auto i = m_order_by.m_terms.begin(); i != m_order_by.m_terms.end(); ++i) all_ops.push_back(i->m_op); for (auto i = m_group_by.m_terms.begin(); i != m_group_by.m_terms.end(); ++i) all_ops.push_back(i->m_op); auto used_columns = find_unique_column_ops(all_ops); // add from table with used columns Packet_handler* handler = get_packet_handler(m_from_name); for (auto j = handler->packet_columns.begin(); j != handler->packet_columns.end(); ++j) for (auto i = used_columns.begin(); i != used_columns.end(); ++i) if (cmpii(j->name, (*i)->get_token())) m_used_from_column_ids.push_back(j->id); m_from = handler->create_table(m_used_from_column_ids); } void Query::process_select(Row** rows, Row* dest, GenericAccessor* dest_accessors) { for (unsigned int i = 0, size = m_select.size(); i < size; ++i) { OP* op = m_select[i]; if (!op) continue; if (op->m_has_aggregate_function) { // defer evaluating aggregate functions, just eval their operands op->evaluate_aggregate_operands(rows); } else { Variant v; op->evaluate(rows, v); dest_accessors[i].set(dest, v); } } } void Query::combine_aggregates_in_select(Row* base_row, Row* other_row) { for (unsigned int i = 0; i < m_select.size(); ++i) { OP* op = m_select[i]; if (op && op->m_has_aggregate_function) op->combine_aggregate(base_row, other_row); } } void Query::process_aggregates_in_select(Row** rows, Row* dest, GenericAccessor dest_accessors[]) { for (unsigned int i = 0; i < m_select.size(); ++i) { OP* op = m_select[i]; if (op && op->m_has_aggregate_function) { Variant v; op->evaluate(rows, v); dest_accessors[i].set(dest, v); } } } bool Query::process_where(Row** rows) { if (!m_where) return true; Variant v; m_where->evaluate(rows, v); return v.get_bool(); } bool Query::process_having(Row** rows) { if (!m_having) return true; Variant v; m_having->evaluate(rows, v); return v.get_bool(); } std::vector process_group_by_key(Ordering_terms& group_by, Row** rows) { int size = group_by.m_terms.size(); std::vector res(size); for (int i = 0; i < size; ++i) group_by.m_terms[i].m_op->evaluate(rows, res[i]); return res; } bool Query::has_aggregate_functions() { // this assumes the ops have been compiled for (auto it = m_select.begin(); it != m_select.end(); it++) if ((*it)->m_has_aggregate_function) return true; return false; } void Query::execute(Reader& reader) { std::vector tables; std::vector search_results_last, search_results_first, search_results_only; // set up tables process_from(); if (m_from) tables.push_back(m_from); tables.push_back(m_result); for (int i = 0; i < int(tables.size()); ++i) search_results_last.push_back(i); for (int i = int(tables.size()) - 1; i >= 0; --i) search_results_first.push_back(i); search_results_only.push_back(tables.size() - 1); std::vector row_ptrs(tables.size()); Row** rows = &row_ptrs[0]; std::vector result_accessors_vector; // compile for (auto i = m_select.begin(); i != m_select.end(); ++i) { *i = (*i)->compile(tables, search_results_last, *this); Column* col = m_result->add_column((*i)->get_name(), (*i)->ret_type()); GenericAccessor a; a.m_offset = col->m_offset; a.m_type = col->m_type; result_accessors_vector.push_back(a); } if (m_where) m_where = m_where->compile(tables, search_results_last, *this); if (m_having) m_having = m_having->compile(tables, search_results_only, *this); if (m_group_by.exist()) m_group_by.compile(tables, search_results_last, *this); if (m_order_by.exist()) { // copy any missing columns to result table as hidden so we can // order by them std::vector ops; for (auto i = m_order_by.m_terms.begin(); i != m_order_by.m_terms.end(); ++i) ops.push_back(i->m_op); std::vector column_ops = find_unique_column_ops(ops); for (auto i = column_ops.begin(); i != column_ops.end(); ++i) { const char* name = (*i)->get_token(); auto lookup = lookup_column_in_tables(tables, search_results_first, name); if (lookup.first and lookup.second < int(tables.size()) - 1) { // found, but not in result table OP* copying_op = new OP(**i); copying_op = copying_op->compile(tables, search_results_last, *this); m_select.push_back(copying_op); Column* col = m_result->add_column(copying_op->get_name(), copying_op->ret_type(), -1, Column::HIDDEN); GenericAccessor a; a.m_offset = col->m_offset; a.m_type = col->m_type; result_accessors_vector.push_back(a); } } // we only provide access to result table for "order by"; in order // to make the sort thing work correctly the result table currently // has to be at index 0 std::vector tables_result_only = { m_result }; std::vector tables_result_only_search = { 0 }; m_order_by.compile(tables_result_only, tables_result_only_search, *this); } // execute GenericAccessor* result_accessors = &result_accessors_vector[0]; bool aggregate_functions = has_aggregate_functions(); int count = 0; bool limiter = !m_order_by.exist() && !m_group_by.exist() && !aggregate_functions && m_limit >= 0; if (m_from) { bool first_row = true; Packet_handler* handler = get_packet_handler(m_from_name); reader.seek_to_start(); const int src_i = 0, dest_i = tables.size() - 1; rows[src_i] = m_from->create_row(); if (m_group_by.exist() || aggregate_functions) { std::unordered_map, Row*> groups; rows[dest_i] = 0; while (reader.read_next(handler, m_used_from_column_ids, *rows[src_i], first_row or m_sample == 0 ? 0 : m_sample - 1)) { // fill in groups if (rows[dest_i]) rows[dest_i]->reset_text_columns(m_result->m_text_column_offsets); else rows[dest_i] = m_result->create_row(); process_select(rows, rows[dest_i], result_accessors); if (process_where(rows)) { auto key = process_group_by_key(m_group_by, rows); Row*& entry = groups[key]; if (entry) { combine_aggregates_in_select(entry, rows[dest_i]); } else { entry = rows[dest_i]; rows[dest_i] = 0; } } first_row = false; rows[src_i]->reset_text_columns(m_from->m_text_column_offsets); } if (rows[dest_i]) m_result->delete_row(rows[dest_i]); // put groups into result for (auto i = groups.begin(); i != groups.end(); ++i) { rows[dest_i] = i->second; // propagate the aggregate results through the evaluation tree process_aggregates_in_select(rows, rows[dest_i], result_accessors); if (process_having(rows)) m_result->add_row(rows[dest_i]); else m_result->delete_row(rows[dest_i]); } } else { rows[dest_i] = m_result->create_row(); while (reader.read_next(handler, m_used_from_column_ids, *rows[src_i], first_row or m_sample == 0 ? 0 : m_sample - 1)) { // fill in result process_select(rows, rows[dest_i], result_accessors); if (process_where(rows)) { bool commit = true; if (limiter) { int l = count++; if (m_offset > 0) l -= m_offset; if (m_limit >= 0 && l >= m_limit) break; if (l < 0) commit = false; } if (commit) { m_result->add_row(rows[dest_i]); rows[dest_i] = m_result->create_row(); } } first_row = false; rows[src_i]->reset_text_columns(m_from->m_text_column_offsets); } m_result->delete_row(rows[dest_i]); } m_from->delete_row(rows[src_i]); } else { const int dest_i = tables.size() - 1; rows[dest_i] = m_result->create_row(); process_select(rows, rows[dest_i], result_accessors); if (process_where(rows)) m_result->add_row(rows[dest_i]); else m_result->delete_row(rows[dest_i]); } if (m_order_by.exist()) m_result->per_sort(m_order_by); if (m_limit >= 0 && !limiter) m_result->limit(m_limit, m_offset); } DB::DB() { Column::init_defs(); } DB::~DB() { } bool DB::query(const char* q) { return false; } Table* DB::get_table(const char* i_name) { std::string name = lower(i_name); Table* t = 0; auto it = m_tables.find(name); if (it != m_tables.end()) t = it->second; return t; } Table* DB::create_or_use_table(const char* i_name) { std::string name = lower(i_name); Table* t = get_table(name.c_str()); if (!t) t = create_table(name.c_str()); return t; } Table* DB::create_table(const char* i_name) { std::string name = lower(i_name); Table* t = new Table(name.c_str()); m_tables[std::string(name.c_str())] = t; return t; } Column::Column(const char* name, Coltype::Type type, int id, bool hidden) : m_name(name) , m_type(type) , m_def(Column::m_coldefs[type]) , m_id(id) , m_offset(0) { m_hidden = hidden; } void Trim_func::evaluate(Row** rows, Variant& v) { Variant str; m_param[0]->evaluate(rows, str); RefCountStringHandle str_handle(str.get_text()); const char* s = (*str_handle)->data; const char* t; RefCountStringHandle trim_handle; if (m_param[1]) { Variant trim; m_param[1]->evaluate(rows, trim); trim_handle.set(trim.get_text()); t = (*trim_handle)->data; } else t = " "; int l = strlen(t); if (l <= 0) { v = *str_handle; return; } int slen = strlen(s); int start = 0, end = slen; // left trim while (end - start >= l && memcmp(s + start, t, l) == 0) start += l; // right trim while (end - start >= l && memcmp(s + end - l, t, l) == 0) end -= l; if (start == 0 && end == slen) v = *str_handle; else { RefCountStringHandle res(RefCountString::construct(s, start, end)); v = *res; } } Cc_func::Cc_func(const OP& op) : OP(op) { #ifdef HAVE_LIBMAXMINDDB if (__cc_mmdb) { return; } std::string db; char* env = getenv("PACKETQ_MAXMIND_CC_DB"); if (env) { db = env; } if (db.empty()) { std::list paths = { "/var/lib/GeoIP", "/usr/share/GeoIP", "/usr/local/share/GeoIP" }; if ((env = getenv("PACKETQ_MAXMIND_PATH"))) { paths.push_front(std::string(env)); } auto i = paths.begin(); for (; i != paths.end(); i++) { db = (*i) + "/GeoLite2-Country.mmdb"; struct stat s; if (!stat(db.c_str(), &s)) { break; } } if (i == paths.end()) { return; } } MMDB_s* mmdb = new (std::nothrow) MMDB_s; if (!mmdb) { return; } int ret = MMDB_open(db.c_str(), 0, mmdb); if (ret != MMDB_SUCCESS) { fprintf(stderr, "Warning: cannot open MaxMind CC database \"%s\": %s\n", db.c_str(), MMDB_strerror(ret)); delete mmdb; return; } __cc_mmdb = mmdb; #endif } void Cc_func::evaluate(Row** rows, Variant& v) { #ifdef HAVE_LIBMAXMINDDB if (!__cc_mmdb) { RefCountStringHandle res(RefCountString::construct("")); v = *res; return; } Variant str; m_param[0]->evaluate(rows, str); RefCountStringHandle str_handle(str.get_text()); int gai_error, ret; MMDB_lookup_result_s mmdb_result = MMDB_lookup_string(__cc_mmdb, (*str_handle)->data, &gai_error, &ret); if (gai_error || ret != MMDB_SUCCESS || !mmdb_result.found_entry) { RefCountStringHandle res(RefCountString::construct("")); v = *res; return; } MMDB_entry_data_s entry_data; ret = MMDB_get_value(&mmdb_result.entry, &entry_data, "country", "iso_code", NULL); if (ret != MMDB_SUCCESS || !entry_data.has_data || entry_data.type != MMDB_DATA_TYPE_UTF8_STRING) { RefCountStringHandle res(RefCountString::construct("")); v = *res; return; } RefCountStringHandle res(RefCountString::construct(entry_data.utf8_string, 0, entry_data.data_size)); v = *res; #else RefCountStringHandle res(RefCountString::construct("")); v = *res; #endif } Asn_func::Asn_func(const OP& op) : OP(op) { #ifdef HAVE_LIBMAXMINDDB if (__asn_mmdb) { return; } std::string db; char* env = getenv("PACKETQ_MAXMIND_ASN_DB"); if (env) { db = env; } if (db.empty()) { std::list paths = { "/var/lib/GeoIP", "/usr/share/GeoIP", "/usr/local/share/GeoIP" }; if ((env = getenv("PACKETQ_MAXMIND_PATH"))) { paths.push_front(std::string(env)); } auto i = paths.begin(); for (; i != paths.end(); i++) { db = (*i) + "/GeoLite2-ASN.mmdb"; struct stat s; if (!stat(db.c_str(), &s)) { break; } } if (i == paths.end()) { return; } } MMDB_s* mmdb = new (std::nothrow) MMDB_s; if (!mmdb) { return; } int ret = MMDB_open(db.c_str(), 0, mmdb); if (ret != MMDB_SUCCESS) { fprintf(stderr, "Warning: cannot open MaxMind ASN database \"%s\": %s\n", db.c_str(), MMDB_strerror(ret)); delete mmdb; return; } __asn_mmdb = mmdb; #endif } void Asn_func::evaluate(Row** rows, Variant& v) { #ifdef HAVE_LIBMAXMINDDB if (!__asn_mmdb) { v = -1; return; } Variant str; m_param[0]->evaluate(rows, str); RefCountStringHandle str_handle(str.get_text()); int gai_error, ret; MMDB_lookup_result_s mmdb_result = MMDB_lookup_string(__asn_mmdb, (*str_handle)->data, &gai_error, &ret); if (gai_error || ret != MMDB_SUCCESS || !mmdb_result.found_entry) { v = -1; return; } MMDB_entry_data_s entry_data; ret = MMDB_get_value(&mmdb_result.entry, &entry_data, "autonomous_system_number", NULL); if (ret != MMDB_SUCCESS || !entry_data.has_data || entry_data.type != MMDB_DATA_TYPE_UINT32) { v = -1; return; } v = (int_column)entry_data.uint32; #else v = -1; #endif } DB g_db; Coldef Column::m_coldefs[COLTYPE_MAX]; } // namespace packetq