From 59378a0f8f7ead7ba260a8d1f9d4e41a62629f84 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Wed, 25 May 2016 20:12:47 +0300 Subject: [PATCH 01/22] Initial query engine structure --- README.md | 32 +- pyFly.py | 0 pyFlyd.py | 6 + src/lib/utils.py | 0 src/query-engine/.query-parser.py.swo | Bin 0 -> 24576 bytes src/query-engine/query-engine.py | 15 + src/query-engine/query-optimizer.py | 26 ++ src/query-engine/query-parser.py | 410 ++++++++++++++++++++++++++ 8 files changed, 487 insertions(+), 2 deletions(-) create mode 100644 pyFly.py create mode 100644 pyFlyd.py create mode 100644 src/lib/utils.py create mode 100644 src/query-engine/.query-parser.py.swo create mode 100644 src/query-engine/query-engine.py create mode 100644 src/query-engine/query-optimizer.py create mode 100644 src/query-engine/query-parser.py diff --git a/README.md b/README.md index a6fdc73..3467f95 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,32 @@ # PyFlyDB - -PyFlyDB is a python a graph database that implements [cypher](http://www.opencypher.org/) query language +## Имплементация на графова база данни (данните могат да бъдат обработване като граф) над езика __link__ Open Cypher. + +### Базата притежава следните елементи +#### Данни: +##### Върхове - основна единица в базата. +###### Притежава: +* етикети (човек) +* свойства (име) +##### Ребра - насочена връзка между върхове. Притежава: +* етикет (познава) +* свойства (име) + +#### Функции върху данните: +##### Добавяне на върхове/ребра +##### Търсене на: +* на върхове по подадена част от качествата му (етикети, свойства) - `(a:Person {name: "Sam"})` +* на ребра - `(a)-[b:Loves]-(c)` +* подаване на подграф - +##### Търсене на връх +##### Търсене на подграф +##### Търсен + + +pattern filtering vs WHERE syntax + +допълнителни филтриране -- Return, Distinct, Order by, Where + +2 начина на съхранение + +оптимизации diff --git a/pyFly.py b/pyFly.py new file mode 100644 index 0000000..e69de29 diff --git a/pyFlyd.py b/pyFlyd.py new file mode 100644 index 0000000..4a425b0 --- /dev/null +++ b/pyFlyd.py @@ -0,0 +1,6 @@ +import re + + +re.search + +re.match diff --git a/src/lib/utils.py b/src/lib/utils.py new file mode 100644 index 0000000..e69de29 diff --git a/src/query-engine/.query-parser.py.swo b/src/query-engine/.query-parser.py.swo new file mode 100644 index 0000000000000000000000000000000000000000..2f078b810f29a49e1bf60bde4926903fa592385e GIT binary patch literal 24576 zcmeI43zQ^RS%52KH!&f>oWS9L54`Lw>F!NW?{0P@IAk`NotY#fJCDpGo27T?RCm|R zG}}|vRCVuevssQxcqt%)5R%B#m_(F-hKNE`4v!!S&+s@JAiPu{3Zh^@9_shsTeqsZ zcQz0XD#z1x_M5J%`~L6q|L?7>?znPlT0Ob6+u=ILasHxv$^K&x)Sd7DjN{Z@-)%Ij zb#Z#AYobvPhUdl~%8Pb;j*W2pksv%2wcMJwz2i5wx4P}Mz~4UCy|me#*tqAs9p@If8IHhHpalPUCi#Jn!mV%%+zh`CN8tz@hE-UB zA=m;Bp5Zv3gipX-a1%Tmx^NVlFavq`CHO@+8_t3+V*vR_cpuyZzYnj30BSG>J7EV5 z!4`M~oC2ReoiyPNcpKadzXoksgr~u;!W8U;0x0;m(;Vjk_yT+$J_q;1hu~dsJNyRB z!V}=@RN6goH{1cYz%yYloDFBe4>0I_555Zzz-Qni@E*7wZh|+$8{j4IVrau6EWkXR z3g5%<^C7qg?uI+zo$wBLIlKs72v@^?xDu9N4~)X2;B5Fh2B#0f2jQ>a_3*pErR=q3 z%a-(kmxtYE$5VON?-oYX;_Sq1;yM%Pz$%%$COXr8}->1k+lgu8HPcapFCRgTJmU<&V-ohtXI8oc0~vA>(K@w zoblP|>9LuK1vRRU9e14LI@Med=0=oBi8SP~sG9D2weBjnsH!8B>Y8g*wcrG;NJ2WZ zbCdI9i?j0!mAwZlbMsTP^HYlla4F|z=Lws;%KVU$Jeh|*a(j{fGb(@g=)Y4lxn&AJ zy|iy}sxrB6a(Z%RaiOv>w{L23adKWqS;WWm*y8x*$!p1eIX5gJht%+}n!IXmesW=9 zYIbIUN5@$<#hRU;Ad0;Qa^`VjYGH9|W_&SMG|&5|7B5fkFQ1&Bv=8%>i%at}JS{Bk zt&Hy*TUwaZDJBum&CgyswNL!VF_ZB{9+grNO)3 z_$FDC$oTx^nB*+`q?Tr;u2@QPKcw<4w@sV%RlQLsaGN&Uh}x>_wF@Q>Lz23(zT9T$ zx%IB`<<$#{jFYNKN+&d=Bxk0;s^P1e8+j^Np&99Vlmx@JylAg^ae#(j_m0|<1hqp^ zNgW6}g!EO^@@kEhF1cG-@j}mU$3e6(*4=h(jd0|lMz%yQk+<&p?M5w9)voG94S!V! z$W2eqUzUhMX9T%Sk4??=)^VmMDz044G@pfF-BZ=ZDs{B#9hG5%bZdunH#aq*qORX| zkLtvdp+&b3j^3_7nDVKB%@erop zWX4!YfXmT;`C|%O3)ZQxx|G)5CO&L-YzXT?nevXCpbt@8B6wKCx=o;9TEWJJoTghh zdrE2rkJyvECpV^;={!I|(}zA^%nlv;3Zl?AiK__0WJ$$W8iit3uyaiREGM4hIP>E= z%6=bx?c-q!Oh;PGlql0cRNC6AZq%sR3fAp~zOU+4mDfG7sP@YUTvVk}sgP|$aTT9@ zf?Uv@Zlk$Q${u>{4g*`%Vpgs+=_C1$uOHf8y@+enj%!*xyRBBUTVVjyBUg{lI80b@ z#G_nkJk(bDP|`m0z5W@Oy|hQ35L)L;76k1eXhx;ln%nU6+iW&N)M7}-0 zZquw+XvCRPiGR8JN|!`|@1l(`Q$7jOSW!*S?~PdPQ9F9el8p&jM4f7dVPQi-NTjkZ ziv7skkcuviHHlFqLzHDRp_82k!+F$p{hHTPJ>t){JE>I3hS$=PEAJdJq2oxytlJJ& z5R#{juDI!>`Exe%MV@5FOnu2rq=gozUr?i?{Svbg^^~(j$!G#b+Lcxiovck|KN-QisA z90_T=W_4T*>fOA{%DSl9Ub$LF=ibz%retR^RD>zhwBsvmjrEce?ioA{U6X!(U z&;nMywsCBicQ2Dcow+@G)CKxZ5*}p~a%^$0eqzg{uN^kX^wnFBT{yDgOmm#DI-V1z z)UCU1ll@Vm-!;-Z-EQ4kQWEzM$(L0)wjSF#GTmhgU>ZrUdc7_WjrCT*f-D(=?2B5^ zY@(i^ZbbG~=sBYQA3udP`>6y){}1?iE4uw#;DvA$mS7QvU=w@?9sYfAJ-iwYK^ey3 z`)51OKf@jHJh&G2!WdivkAr*B@vnyi@I+AXQS|%Q!|UJ`5JCl};a9+cPovA<2{*zT zj6)7ig}cvioDaaCfDfY255c41T=)k1_&eZQcrFy-d^iKXh0gs=_yYVH+yF0u7sDag z1EQC2f-j+O-vT$oAHW;nI`Ci-hT$A|Cp!17@G6MlYM6jwD8hfBOaCi;13m@!!W|&G zcL=-TEI1P$q;5V9cfot%R`^Y@y7oNxSAf*vB`~=D7bTznx9KI>s=d{GQ;WxxKmGD3oKgQP*>rV5|O!8dj=QFf`OUELq~tkq=r$wT;GWJ4-4j37c_XQ4q9}1*JI3 z6$dkptSI$M{>U0FXvyL%wT7f?-1Hp*wTh^}R@1tFH#QOIsAS#XVGE31#B0&CRl9tU z1hpMdZscNlhZ1RvgW;rRY$j$2(qH`8A2N`|QM_8bgjF^+ly#5q?CN4>lf?~I&#>WE zo0ujpzW8ECr^TOmrL6N`uOKgLz(||_iEfm-Y)Whcbo0qwA<+`V>d+M#S~u(>nMHf% zt%xyI^K`f(%_KGNq}4j478t6;i#DExiM>~kV%pqEM+&+N z1!zZ^UM}A3NFn!fl~p-ztTXf^G-W!H)^nHXJ!xyvUa_whRaw^qi)UJOpSvn2%_i}O z$6@KeroKh*aWTDlb%hafofUFgfO)M98yj`h?la766MLdAo$nphYr*wM9ci@JL`&{8 z84Fj!U|p>=!pJlxT{!-1i#&XhhfNrkfTcLx5#_z47eW zDAnhQOoeW(Z6+COGRntR12bQw6>QS9LDE^WFHZUc+CdIW$eLa&mN%WL z#DgCB(&9!HGs=%jLybHJK5Hk^T}?X{Y~M~DRlD2rP_^wYi1Z5Gz>IN<*#T9NDi$Ls z<(09fz2>%=lRFrC(@rlX8A_T}8b7N8Y`?K&QoB!53JGV7R%rqHySAR6iz;KG@2&kI zwpf_Kf|ZfDA7@QeWB}FFc9i~rd$A)QlCnkXF8WVsdb#Nf)U?eSz0Ned*mjjcE8l0v3FQ{m*(TQQ?h$Jv}$q-wI`zC;sS{zuuP61~(Rrhdof zP=2$?nTacI#H1B(!b#C=Rx`ud!;wcOmp`g3vKf<%#?zmS?rK)==Q;A)UK4?|2I;Fb z`q*Ah0-V8p} zVKOd= zK6o0O2Ty`cVE6q07Tds|!fT-o5rp7D9VTELWY7Px@NMh^55iaA-EbQ`AD#!-!gE3F z2{m{+6k#j;Jp3Fy9v%m$!*{VEd=2h|7r|AKht04F{skMt7vLti1}=k3;mL3z?1ruI zSU3Y@|Nm=nKgizyr{Vo@7u*If1+hKIzW*p(2&cnl*aTvO_;>ggd=TCP?}i)Tci?64 z+i(<)z~yimgHr>8Qv-Up#Ol(6Qv>NSkN>e#1KNm#X;L4|_(`01*s;-h2R6rI zBesl?_8L-q8z-E3V4tV|%ma$=+)Qehli;!OJI;N`rUaJXFgT30)0oOo?8QE2PY$Vk z!zbFJ;%r7+-$b1iep3}*rjC=H$1!_&HnB`evNPF|E{Zv_A?7(EbTQJF%>1z+c5t!t z#^we*NUYibVXR&p`EaCY5~pNFCwJljDSL>fh)hN=buj^uAKCwxk@fkp{{K?$KaZaO zIk+D_2=9egz65IoS4Zj7? zfy1x_i?9IptiV&aKNa4Eo#4;m?eI!yLj?O_H{{?vcoIAw9tY>Zx3C|489oj#2lf;YempaE+z2NUoJco3cc9=IFCPVg#_Zv&3P zez+2rU=%Kb-5~#=!WYr+?}NXC<1hoi3>U&<;L&g{oCcfW2k86qEx;$?2Dla)unY&` z7vS6I`1gUF|9>0Y4mZLL@N{?roCcd=6WFr{Vh?yXTn8_J1F#E*;EU+&uZHWO1#+f; z1h&GN@J-sxXF<;KOZ#!5_mWomaIaoV)hJf3QslW|Z7rWGKO?_<@W{6O!NN5Mj};2b zY}RJ2hE6uAYzLF~$K@=SHhE=z8SCPRJ(Omh$is~6D9f&mIc{xTjLj|L*t~rszt;ag zIkvrXME_hclHC9hU)d~~^=8Jxn=Q!3c0SoR%%>(!YbzD&-U)9r7CuSYUMKqNk?+A{ zrELd~KOCuAaY)2^YjQ&=o9%hEWpcf zGMU1Ix#@LGOM-{=L3`Su1SSg3>?QkyqtdUkB^kxUooU3`8XZ<^Y?+nQjHbJy#z{xN zJ>7B~&LkA?P;tZ|kUE$BKAEe{re?CCtusERhXynE8?|rUJ;X*ByP@mrowjUzYk!ir zSkxllta#(40tf2M<}31+Pb;Fn3ZPx|x0gw_HTv6&E>~7aCv772MraP|#f`~~a{Ig$ zjsyARalWy-#;tBWX(jYT9Sh$Io>E6^L8n>wb8-+Q-Z}8SGN`qq`oi^hY8{+w-V$@30T~)s4 z)%j#b=SX6(u}PnyTr97a*zM)h*nzlXVn_pdLiF|(C z!A5U;(`DIRr%Lsi2eY=PZX=Jmg0tspwQ<->i#M$0k;#Usd;QguZ60iUN?*_cIibf0 zr1wmd>?T5xX)f-|MAHsC-u4w)cxOC3#AukQY`rnBhrL6d*D?wU3zYb?E0h@daEenR z^uNBiQ^Ci3vkz-Xln$L85|fij(pZ_e8}jv9&rt-^+!a?me9|_mjJ93m+LrZH-{@%% zw$MiGtuBEi(AP|?yX{W)Yp(Hty=V^H(|xUGWW6GFSx@R?PDKvY@V$W?xu~bdM9f4p zrjD%fIhuT2iW@Z=Cb_w|kg?S zN^wpC6Q#VElLd;I)}Ga&`7jpgk3prf^@$<*zKRJDFOK+^%Zfsz79RDMOXj&@JIG&lj!?rI)un8Z*NlwfcanW;@NkJY> zc1Cwl!DjI@tG_%9LMv3sMOLsCy|Jg$jvHjh948?jkBQ6`XM&lZPJgiqhaiVq_FrPX$s6SM8IiRW=UMpyI_ QueryModel + - Optimize query + - execute the query using the storage_api + query - String + """ + query_model = QueryParser.parse(query) + query_model = QueryOptimizer.optimize(query_model) diff --git a/src/query-engine/query-optimizer.py b/src/query-engine/query-optimizer.py new file mode 100644 index 0000000..8faee1e --- /dev/null +++ b/src/query-engine/query-optimizer.py @@ -0,0 +1,26 @@ + +class QueryOptimizer: + + """Docstring for QueryOptimizer. """ + + def __init__(self, query): + """TODO: to be defined1. + + :query: TODO + + """ + self._query = query + + @staticmethod + def optimize(query): + """ + Optimizes a passed query + + :query: This is a object used for dark magic by magitions with + names that must not be spoken. + + :returns: TODO + + """ + pass + diff --git a/src/query-engine/query-parser.py b/src/query-engine/query-parser.py new file mode 100644 index 0000000..0a907c1 --- /dev/null +++ b/src/query-engine/query-parser.py @@ -0,0 +1,410 @@ +import re +import collections +import itertools + + +def split_list(unsplitted, sep_list, PN=False): + """ + Splits a string by list of separators + + Unary prefix by default. + """ + def apply_notation(splitted_list, splitter): + # TODO splitter may provide type of the notation (infix, postfix, suffix) + if PN: + # Binary operators - in_fix + for split_pos in range(0, len(splitted_list), 3): + splitted_list.insert(split_pos, splitter) + else: + # unary operators - prefix + for split_pos in range(1, len(splitted_list)): + splitted_list.insert(split_pos, splitter) + if not unsplitted or len(sep_list) == 0: + return unsplitted + + splitter = sep_list[0] + rest = sep_list[1:] + + if isinstance(unsplitted, list): + splitted_list = [] + for sub_str in unsplitted: + splitted_com = sub_str.split(splitter) + if len(splitted_com) > 0: + apply_notation(splitted_com, splitter) + + splitted_list.append(splitted_com) + unsplitted = list(itertools.chain(*splitted_list)) + else: + unsplitted = unsplitted.split(splitter) + apply_notation(unsplitted, splitter) + + return split_list(unsplitted, rest) + + +class InvalidSyntaxError(Exception): + pass + + +class Command: + + """ Defines a basic command for the db (MATCH, WHERE, ...) """ + + def __init__(self, clause, expression): + """TODO: to be defined1. + + Args: + clause (Clause): TODO + expression (Expression): TODO + + + """ + self._clause = clause + self._expression = expression + + +class SubQuery: + + """Docstring for QueryModel. """ + + def __init__(self, commands) + """TODO: to be defined1. + + Args: + commands (List[Command]): + """ + self._commands = commands + + +class Query: + + """Docstring for Query. """ + + def __init__(self, queries): + """ + Defines a whole query. + + Args: + queries (List[SubQuery]): + """ + self._queries = queries + + + +def sytax_check(query): + ''' + TODO + Some big regex to check for proper syntax + ''' + pass + + + +MAIN_CLAUSES = [ + 'MATCH', + 'MERGE', + # This matches or creates semantics by using + # indexes and locks. You can specify different + # operations in case of a MATCH (part of the + # pattern already existed) or on CREATE + # (pattern did not exist yet). + + 'CREATE UNIQUE', + 'CREATE', + 'SET', # This updates properties and labels on nodes + 'REMOVE', # +and/or relationships. + 'DELETE', # It deletes nodes and relationships + + 'PROFILE', +] + +SUB_CLAUSES = [ + 'RETURN', + 'WHERE', + 'WITH', + 'DISTINCT', + 'ORDER BY' +] + +# -- EXPRESSIONS -- + +MATCH_SPLITTERS = ['-'] +MULTI_ELEMENTS_SPLITTER = [','] + + +OPERATORS_BY_PRIORITY = ['OR', 'XOR', 'AND', 'OR', '>', '<', '<=', '>=', 'IN'] +ops = { + 'or': Operator('or', lambda a, b: a || b) +} + +COMMANDS = {} + + +class NumberOfOperandsError(Exception): + pass + +class InvalidOperationError(Exception): + pass + + +class Literal(object): + + """Defines a literal for an expression.""" + + def __init__(self, value): + """TODO: to be defined1. + + Args: + value (any): TODO + """ + self._value = value + + +class Identifier: + """ + Defines an identifier for a query. It can be + specified by name and populated on sub-query execution. + """ + def __init__(self, letter, value=None): + self._letter = letter + self._value = value + + @property + def value(self): + return self._value + + +class Operator: + def __init__(self, op, processor, operands=0): + self.operation = op + self.priority = priority + self.processor = processor + self.operands = operands + + def execute(*args): + ''' + Pass the required number of operands to the + operator + ''' + return self.processor(*args) + + +class Expression: + + ''' + A where (and other elements?) expression handler + Contains a list of elements - variables, consts and operations + ''' + + def __init__(self, elements): + self.elements = elements + + # TODO probably not ... + def validate_expression(self): + pass + + +class Edge: + + """ + TODO: make it immutable + An edge: + - has label + - identifier - used to keep the name of the matched edge + - has [properties] + - direction - true/false + - nodeIn, nodeOut - in case a direction if given, the edge direction + is determined from the node sequence given + """ + + def __init__(self, label, nodeIn, nodeOut, direction, identifier=None, properties={}): + self.__label = label + self.__properties = properties + self.__nodeIn = nodeIn + self.__nodeOut = nodeOut + self.__directed = directed + self.identifier = identifier + + def isDirected(self): + return self.direction + + def getNodes(self): + """ + Get a directed node pair out - in (direction flag needed) + """ + return (self.nodeOut, self.nodeIn) + + def getLabel(self): + return self.__label + + def getProperties(self): + return self.__properties + + +class ReturnEdge(Edge): + + """ + A returned edge must have an identifier, if not + it shouldn't be returned. + """ + + def __init__(self, direction, label, nodeLeft, nodeRight, _id, identifier, properties): + Edge.__init__(self, label, properties) + self.__id = _id + + # TODO implement setters + + +class Node: + + """ + TODO: make it immutable + A node: + - identifier -- used to define the result variable + - has label/s + - has [properties] + """ + + def __init__(self, labels, identifier=None, properties={}): + self.identifier = identifier + self.labels = labels + self.properties = properties + + +class ReturnNode(Node): + + def __init__(self, identifier, _id, properties, labels=[]): + Node.__init__(self, identifier, properties, labels) + self.__id = _id + + +class QueryParser: + """ + Creates a Query object out of a query string. + """ + + @staticmethod + def parse_query(query): + def __get_properties(node_string): + """ + {name: "Emu", ..} - dict use eval + returns prop dict or None + """ + properties_string = re.search('\{.*\}', node_string) + return eval(properties_string) if properties_string else None + + def __get_labels(node_string): + """ + varName:Label1:Label2:... + returns { + varName: ..., + labels: [] + } or None + """ + varibalbe_str = re.search('[^(]\w*(\)|\{))]', node_string) + return varibalbe_str.group(0).split(':') if varibalbe_str else None + + def __parse_node(node_string): + """ + Accepts (var:label {}) + + Do paring of string to node data and + return Node Object + """ + properties = __get_properties(node_string) + labels = __get_labels(__parse_node) + + def __parse_edge(self, node_string): + """ + An edge can + Returns an edge with specified properties and orientation. + """ + properties = __get_properties(node_string) + labels = __get_labels(__parse_node) + + def parse_expression(expression_string, expression_type): + + + def parse_sub_query(sub_query): + # Break to smaller parts with sub clauses - RETURN, WHERE + # List of: Clause, expressions (, separated) + clauses_split = split_list(query_part, SUB_CLAUSES) + # process expressions (of MATCH, WHERE, ... + +# TODO expression type is defined by the clause it refers to -- use that cluase + expressions_split = parse_expression(sub_split) + + """ + Parses an incoming query + CREATE ... + MATCH ... + as follows: + * get subqueries -- [CREATE ..., MATCH, ...] + * parse each sub query - + * define operation + * extract expression + * extract sub commands (WHERE, RETURN) + * TODO -- optimize query + * run sub query + * result in identifiers + * process next items with results from first + + Returns a list: + [ [operation, [*args]], - the comma separated elements + [ops, [*args], ...] + [ [MATCH, Node, edge, ...] ..] + """ + + # Process query by parts. + # Sub queries are defined by specific Clauses + sub_queries = split_list(query, MAIN_CLAUSES) + + parsed_sub_queries = [parse_sub_query(sub_query) for sub_query in sub_queries] + + return parsed_sub_queries +# TODO variables ??? + + @staticmethod + def __execute_command(command, *args): + # use commands dict + pas + command = op[0] + subcommands = op[1:] + QueryEngine.__execute_command(op, *subcommands) + + +""" +Spliting + By main operation keyword - MATCH, CREATE ... + Subspliting: + CREATE -- ',' (),(), ()-[]->() + MATCH -- ','; '-' + -- split by, and then by - ... it must be: (node),[edge],(node)<,[edge], ... + -- ()-->() + -- ()--() + -- (a)-->()<--(b) -- path + -- ({x:1}) + -- (a)-[{b:3}]->(b) + -- (a)-[:A]-() + + WHERE e.name IN ['a', 'b'] AND e.b > 5 OR ... + """ + +# query -> main_parts -> +""" +CREATE (m:Person {name:'b'}) +MATCH n WHERE n.name='B' RETURN n.name +==> Split by main clauses +'MATCH (n), (a)-->(b)<--c<--(d), WHERE n.name RETURN n.name' +==> Split by sub clauses ((a n1 n2 n3)) +[ +['MATCH', (n)--()], +['WHERE', 'n.name=\'B\''], +['RETURN', 'n.name'] +] +-> [commands] -- [M, W, R], +[expressions] -- list, tree ? + + +list -- a > 5 AND b < 3 --> [and, >, a, 5, <, b, 3] + +""" From 410ac56491136ffd8357affdad1ec64095ac7d2b Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 9 Jun 2016 11:33:02 +0300 Subject: [PATCH 02/22] Update structure --- .gitignore | 1 + src/query-engine/errors/syntax.py | 10 + src/query-engine/query-engine.py | 15 -- src/query-engine/query_ast/clauses.py | 0 src/query-engine/query_ast/expression.py | 0 src/query-engine/query_ast/operatiors.py | 39 +++ src/query-engine/query_ast/query.py | 43 ++++ src/query-engine/query_engine.py | 26 ++ ...{query-optimizer.py => query_optimizer.py} | 0 .../{query-parser.py => query_parser.py} | 225 ++++++------------ src/query-engine/syntax_checker.py | 15 ++ tests/query_engine/test_query_parser.py | 0 12 files changed, 208 insertions(+), 166 deletions(-) create mode 100644 src/query-engine/errors/syntax.py delete mode 100644 src/query-engine/query-engine.py create mode 100644 src/query-engine/query_ast/clauses.py create mode 100644 src/query-engine/query_ast/expression.py create mode 100644 src/query-engine/query_ast/operatiors.py create mode 100644 src/query-engine/query_ast/query.py create mode 100644 src/query-engine/query_engine.py rename src/query-engine/{query-optimizer.py => query_optimizer.py} (100%) rename src/query-engine/{query-parser.py => query_parser.py} (64%) create mode 100644 src/query-engine/syntax_checker.py create mode 100644 tests/query_engine/test_query_parser.py diff --git a/.gitignore b/.gitignore index e8bed93..923c7da 100755 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,4 @@ local_settings.py source/data/ *.db celerybeat-schedule.db +.vimrc.local diff --git a/src/query-engine/errors/syntax.py b/src/query-engine/errors/syntax.py new file mode 100644 index 0000000..d1226eb --- /dev/null +++ b/src/query-engine/errors/syntax.py @@ -0,0 +1,10 @@ +class InvalidSyntaxError(Exception): + pass + +class NumberOfOperandsError(InvalidSyntaxError): + pass + +class InvalidOperationError(InvalidSyntaxError): + pass + + diff --git a/src/query-engine/query-engine.py b/src/query-engine/query-engine.py deleted file mode 100644 index 7880cc1..0000000 --- a/src/query-engine/query-engine.py +++ /dev/null @@ -1,15 +0,0 @@ -class QueryEngine: - - def __init__(self, storage_api): - self.storage_api = storage_api - - def execute_query(self, query): - """ - Executes a passed query. Follows the steps: - - Parse query -> QueryModel - - Optimize query - - execute the query using the storage_api - query - String - """ - query_model = QueryParser.parse(query) - query_model = QueryOptimizer.optimize(query_model) diff --git a/src/query-engine/query_ast/clauses.py b/src/query-engine/query_ast/clauses.py new file mode 100644 index 0000000..e69de29 diff --git a/src/query-engine/query_ast/expression.py b/src/query-engine/query_ast/expression.py new file mode 100644 index 0000000..e69de29 diff --git a/src/query-engine/query_ast/operatiors.py b/src/query-engine/query_ast/operatiors.py new file mode 100644 index 0000000..f7ebb17 --- /dev/null +++ b/src/query-engine/query_ast/operatiors.py @@ -0,0 +1,39 @@ +# TODO Needs redesign +class Operator: + ''' + Base operator class. + ''' + + def __init__(self, op, processor, operands=0): + self.operation = op + self.priority = priority + self.processor = processor + self.operands = operands + + def execute(*args): + ''' + Pass the required number of operands to the + operator + ''' + return self.processor(*args) + +class Equals(Operator): + + """Defines operator =""" + + def __init__(self, op, processor, operands): + """TODO: to be defined1. + + Args: + op (TODO): TODO + processor (TODO): TODO + operands (TODO): TODO + + + """ + Operator.__init__(self) + + self._op = op + self._processor = processor + self._operands = operands + diff --git a/src/query-engine/query_ast/query.py b/src/query-engine/query_ast/query.py new file mode 100644 index 0000000..92efe8d --- /dev/null +++ b/src/query-engine/query_ast/query.py @@ -0,0 +1,43 @@ +class Command: + + """ Defines a basic command for the db (MATCH, WHERE, ...) """ + + def __init__(self, clause, expression): + """TODO: to be defined1. + + Args: + clause (Clause): TODO + expression (Expression): TODO + + + """ + self._clause = clause + self._expression = expression + + +class SubQuery: + + """Docstring for QueryModel. """ + + def __init__(self, commands) + """TODO: to be defined1. + + Args: + commands (List[Command]): + """ + self._commands = commands + + +class Query: + + """Docstring for Query. """ + + def __init__(self, queries): + """ + Defines a whole query. + + Args: + queries (List[SubQuery]): + """ + self._queries = queries + diff --git a/src/query-engine/query_engine.py b/src/query-engine/query_engine.py new file mode 100644 index 0000000..5bb3204 --- /dev/null +++ b/src/query-engine/query_engine.py @@ -0,0 +1,26 @@ +class QueryEngine: + + def __init__(self, storage_api): + self.storage_api = storage_api + + def execute_query(self, query): + """ + Executes a passed query. Follows the steps: + - Parse query -> QueryModel + - Optimize query + - execute the query using the storage_api + query - String + """ + query = QueryParser.parse(query) + query = QueryOptimizer.optimize(query_model) + self.__execute_query() + + + def __execute_command(command, *args): + # use commands dict + pass + command = op[0] + subcommands = op[1:] + QueryEngine.__execute_command(op, *subcommands) + + diff --git a/src/query-engine/query-optimizer.py b/src/query-engine/query_optimizer.py similarity index 100% rename from src/query-engine/query-optimizer.py rename to src/query-engine/query_optimizer.py diff --git a/src/query-engine/query-parser.py b/src/query-engine/query_parser.py similarity index 64% rename from src/query-engine/query-parser.py rename to src/query-engine/query_parser.py index 0a907c1..2b6ac2a 100644 --- a/src/query-engine/query-parser.py +++ b/src/query-engine/query_parser.py @@ -2,25 +2,35 @@ import collections import itertools +''' +- split to clauses (Match) +- split to sub-query (Where) -> Clause, expr, Cluase, expr ... +- Parse expression by clause + + +new MatchClause() +''' + + def split_list(unsplitted, sep_list, PN=False): """ Splits a string by list of separators - Unary prefix by default. - """ - def apply_notation(splitted_list, splitter): - # TODO splitter may provide type of the notation (infix, postfix, suffix) - if PN: - # Binary operators - in_fix - for split_pos in range(0, len(splitted_list), 3): - splitted_list.insert(split_pos, splitter) - else: - # unary operators - prefix - for split_pos in range(1, len(splitted_list)): - splitted_list.insert(split_pos, splitter) - if not unsplitted or len(sep_list) == 0: - return unsplitted + Unary prefix by default. + """ + def apply_notation(splitted_list, splitter): + # TODO splitter may provide type of the notation (infix, postfix, suffix) + if PN: + # Binary operators - in_fix + for split_pos in range(0, len(splitted_list), 3): + splitted_list.insert(split_pos, splitter) + else: + # unary operators - prefix + for split_pos in range(1, len(splitted_list)): + splitted_list.insert(split_pos, splitter) + if not unsplitted or len(sep_list) == 0: + return unsplitted splitter = sep_list[0] rest = sep_list[1:] @@ -41,89 +51,31 @@ def apply_notation(splitted_list, splitter): return split_list(unsplitted, rest) -class InvalidSyntaxError(Exception): - pass - - -class Command: - - """ Defines a basic command for the db (MATCH, WHERE, ...) """ - - def __init__(self, clause, expression): - """TODO: to be defined1. - - Args: - clause (Clause): TODO - expression (Expression): TODO - - - """ - self._clause = clause - self._expression = expression - - -class SubQuery: - - """Docstring for QueryModel. """ - - def __init__(self, commands) - """TODO: to be defined1. - - Args: - commands (List[Command]): - """ - self._commands = commands - - -class Query: - - """Docstring for Query. """ - - def __init__(self, queries): - """ - Defines a whole query. - - Args: - queries (List[SubQuery]): - """ - self._queries = queries - - - -def sytax_check(query): - ''' - TODO - Some big regex to check for proper syntax - ''' - pass - - - MAIN_CLAUSES = [ - 'MATCH', - 'MERGE', - # This matches or creates semantics by using - # indexes and locks. You can specify different - # operations in case of a MATCH (part of the - # pattern already existed) or on CREATE - # (pattern did not exist yet). - - 'CREATE UNIQUE', - 'CREATE', - 'SET', # This updates properties and labels on nodes - 'REMOVE', # +and/or relationships. - 'DELETE', # It deletes nodes and relationships - - 'PROFILE', -] + 'MATCH', + 'MERGE', + # This matches or creates semantics by using + # indexes and locks. You can specify different + # operations in case of a MATCH (part of the + # pattern already existed) or on CREATE + # (pattern did not exist yet). + + 'CREATE UNIQUE', + 'CREATE', + 'SET', # This updates properties and labels on nodes + 'REMOVE', # +and/or relationships. + 'DELETE', # It deletes nodes and relationships + + 'PROFILE', + ] SUB_CLAUSES = [ - 'RETURN', - 'WHERE', - 'WITH', - 'DISTINCT', - 'ORDER BY' -] + 'RETURN', + 'WHERE', + 'WITH', + 'DISTINCT', + 'ORDER BY' + ] # -- EXPRESSIONS -- @@ -133,19 +85,11 @@ def sytax_check(query): OPERATORS_BY_PRIORITY = ['OR', 'XOR', 'AND', 'OR', '>', '<', '<=', '>=', 'IN'] ops = { - 'or': Operator('or', lambda a, b: a || b) -} + 'or': Operator('or', lambda a, b: a || b) + } COMMANDS = {} - -class NumberOfOperandsError(Exception): - pass - -class InvalidOperationError(Exception): - pass - - class Literal(object): """Defines a literal for an expression.""" @@ -173,21 +117,6 @@ def value(self): return self._value -class Operator: - def __init__(self, op, processor, operands=0): - self.operation = op - self.priority = priority - self.processor = processor - self.operands = operands - - def execute(*args): - ''' - Pass the required number of operands to the - operator - ''' - return self.processor(*args) - - class Expression: ''' @@ -277,6 +206,8 @@ def __init__(self, identifier, _id, properties, labels=[]): self.__id = _id + + class QueryParser: """ Creates a Query object out of a query string. @@ -284,24 +215,6 @@ class QueryParser: @staticmethod def parse_query(query): - def __get_properties(node_string): - """ - {name: "Emu", ..} - dict use eval - returns prop dict or None - """ - properties_string = re.search('\{.*\}', node_string) - return eval(properties_string) if properties_string else None - - def __get_labels(node_string): - """ - varName:Label1:Label2:... - returns { - varName: ..., - labels: [] - } or None - """ - varibalbe_str = re.search('[^(]\w*(\)|\{))]', node_string) - return varibalbe_str.group(0).split(':') if varibalbe_str else None def __parse_node(node_string): """ @@ -309,9 +222,28 @@ def __parse_node(node_string): Do paring of string to node data and return Node Object - """ - properties = __get_properties(node_string) - labels = __get_labels(__parse_node) + """ + def __get_properties(node_string): + """ + {name: "Emu", ..} - dict use eval + returns prop dict or None + """ + properties_string = re.search('\{.*\}', node_string) + return eval(properties_string) if properties_string else None + + def __get_labels(node_string): + """ + varName:Label1:Label2:... + returns { + varName: ..., + labels: [] + } or None + """ + properties = __get_properties(node_string) + labels = __get_labels(__parse_node) + + varibalbe_str = re.search('[^(]\w*(\)|\{))]', node_string) + return varibalbe_str.group(0).split(':') if varibalbe_str else None def __parse_edge(self, node_string): """ @@ -324,8 +256,8 @@ def __parse_edge(self, node_string): def parse_expression(expression_string, expression_type): - def parse_sub_query(sub_query): - # Break to smaller parts with sub clauses - RETURN, WHERE + def parse_sub_query(sub_query): + # Break to smaller parts with sub clauses - RETURN, WHERE # List of: Clause, expressions (, separated) clauses_split = split_list(query_part, SUB_CLAUSES) # process expressions (of MATCH, WHERE, ... @@ -363,15 +295,6 @@ def parse_sub_query(sub_query): return parsed_sub_queries # TODO variables ??? - @staticmethod - def __execute_command(command, *args): - # use commands dict - pas - command = op[0] - subcommands = op[1:] - QueryEngine.__execute_command(op, *subcommands) - - """ Spliting By main operation keyword - MATCH, CREATE ... diff --git a/src/query-engine/syntax_checker.py b/src/query-engine/syntax_checker.py new file mode 100644 index 0000000..056836b --- /dev/null +++ b/src/query-engine/syntax_checker.py @@ -0,0 +1,15 @@ + +def _check_clauses(): + pass +def _check_operators(): + pass +def _check_operat + +def sytax_check(query): + ''' + TODO + Some big regex to check for proper syntax + ''' + pass + +class diff --git a/tests/query_engine/test_query_parser.py b/tests/query_engine/test_query_parser.py new file mode 100644 index 0000000..e69de29 From 6d93156424b514a58c54a598e2527c915b9695da Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Wed, 15 Jun 2016 11:49:46 +0300 Subject: [PATCH 03/22] FAIL - Update structure; Add test; --- .../query_ast/clauses.py => __init__.py} | 0 src/query-engine/.query-parser.py.swo | Bin 24576 -> 0 bytes src/query-engine/query_engine.py | 26 -- src/query-engine/query_parser.py | 333 ------------------ .../__init__.py} | 0 .../errors/syntax.py | 0 src/query_engine/query_ast/__init__.py | 1 + src/query_engine/query_ast/clauses.py | 88 +++++ src/query_engine/query_ast/expression.py | 29 ++ src/query_engine/query_ast/models.py | 127 +++++++ .../query_ast/operators.py} | 5 + .../query_ast/query.py | 2 +- src/query_engine/query_engine.py | 34 ++ .../query_optimizer.py | 0 src/query_engine/query_parser.py | 220 ++++++++++++ .../syntax_checker.py | 0 src/query_engine/test_query_parser.py | 201 +++++++++++ 17 files changed, 706 insertions(+), 360 deletions(-) rename src/{query-engine/query_ast/clauses.py => __init__.py} (100%) delete mode 100644 src/query-engine/.query-parser.py.swo delete mode 100644 src/query-engine/query_engine.py delete mode 100644 src/query-engine/query_parser.py rename src/{query-engine/query_ast/expression.py => query_engine/__init__.py} (100%) rename src/{query-engine => query_engine}/errors/syntax.py (100%) create mode 100644 src/query_engine/query_ast/__init__.py create mode 100644 src/query_engine/query_ast/clauses.py create mode 100644 src/query_engine/query_ast/expression.py create mode 100644 src/query_engine/query_ast/models.py rename src/{query-engine/query_ast/operatiors.py => query_engine/query_ast/operators.py} (90%) rename src/{query-engine => query_engine}/query_ast/query.py (95%) create mode 100644 src/query_engine/query_engine.py rename src/{query-engine => query_engine}/query_optimizer.py (100%) create mode 100644 src/query_engine/query_parser.py rename src/{query-engine => query_engine}/syntax_checker.py (100%) create mode 100644 src/query_engine/test_query_parser.py diff --git a/src/query-engine/query_ast/clauses.py b/src/__init__.py similarity index 100% rename from src/query-engine/query_ast/clauses.py rename to src/__init__.py diff --git a/src/query-engine/.query-parser.py.swo b/src/query-engine/.query-parser.py.swo deleted file mode 100644 index 2f078b810f29a49e1bf60bde4926903fa592385e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24576 zcmeI43zQ^RS%52KH!&f>oWS9L54`Lw>F!NW?{0P@IAk`NotY#fJCDpGo27T?RCm|R zG}}|vRCVuevssQxcqt%)5R%B#m_(F-hKNE`4v!!S&+s@JAiPu{3Zh^@9_shsTeqsZ zcQz0XD#z1x_M5J%`~L6q|L?7>?znPlT0Ob6+u=ILasHxv$^K&x)Sd7DjN{Z@-)%Ij zb#Z#AYobvPhUdl~%8Pb;j*W2pksv%2wcMJwz2i5wx4P}Mz~4UCy|me#*tqAs9p@If8IHhHpalPUCi#Jn!mV%%+zh`CN8tz@hE-UB zA=m;Bp5Zv3gipX-a1%Tmx^NVlFavq`CHO@+8_t3+V*vR_cpuyZzYnj30BSG>J7EV5 z!4`M~oC2ReoiyPNcpKadzXoksgr~u;!W8U;0x0;m(;Vjk_yT+$J_q;1hu~dsJNyRB z!V}=@RN6goH{1cYz%yYloDFBe4>0I_555Zzz-Qni@E*7wZh|+$8{j4IVrau6EWkXR z3g5%<^C7qg?uI+zo$wBLIlKs72v@^?xDu9N4~)X2;B5Fh2B#0f2jQ>a_3*pErR=q3 z%a-(kmxtYE$5VON?-oYX;_Sq1;yM%Pz$%%$COXr8}->1k+lgu8HPcapFCRgTJmU<&V-ohtXI8oc0~vA>(K@w zoblP|>9LuK1vRRU9e14LI@Med=0=oBi8SP~sG9D2weBjnsH!8B>Y8g*wcrG;NJ2WZ zbCdI9i?j0!mAwZlbMsTP^HYlla4F|z=Lws;%KVU$Jeh|*a(j{fGb(@g=)Y4lxn&AJ zy|iy}sxrB6a(Z%RaiOv>w{L23adKWqS;WWm*y8x*$!p1eIX5gJht%+}n!IXmesW=9 zYIbIUN5@$<#hRU;Ad0;Qa^`VjYGH9|W_&SMG|&5|7B5fkFQ1&Bv=8%>i%at}JS{Bk zt&Hy*TUwaZDJBum&CgyswNL!VF_ZB{9+grNO)3 z_$FDC$oTx^nB*+`q?Tr;u2@QPKcw<4w@sV%RlQLsaGN&Uh}x>_wF@Q>Lz23(zT9T$ zx%IB`<<$#{jFYNKN+&d=Bxk0;s^P1e8+j^Np&99Vlmx@JylAg^ae#(j_m0|<1hqp^ zNgW6}g!EO^@@kEhF1cG-@j}mU$3e6(*4=h(jd0|lMz%yQk+<&p?M5w9)voG94S!V! z$W2eqUzUhMX9T%Sk4??=)^VmMDz044G@pfF-BZ=ZDs{B#9hG5%bZdunH#aq*qORX| zkLtvdp+&b3j^3_7nDVKB%@erop zWX4!YfXmT;`C|%O3)ZQxx|G)5CO&L-YzXT?nevXCpbt@8B6wKCx=o;9TEWJJoTghh zdrE2rkJyvECpV^;={!I|(}zA^%nlv;3Zl?AiK__0WJ$$W8iit3uyaiREGM4hIP>E= z%6=bx?c-q!Oh;PGlql0cRNC6AZq%sR3fAp~zOU+4mDfG7sP@YUTvVk}sgP|$aTT9@ zf?Uv@Zlk$Q${u>{4g*`%Vpgs+=_C1$uOHf8y@+enj%!*xyRBBUTVVjyBUg{lI80b@ z#G_nkJk(bDP|`m0z5W@Oy|hQ35L)L;76k1eXhx;ln%nU6+iW&N)M7}-0 zZquw+XvCRPiGR8JN|!`|@1l(`Q$7jOSW!*S?~PdPQ9F9el8p&jM4f7dVPQi-NTjkZ ziv7skkcuviHHlFqLzHDRp_82k!+F$p{hHTPJ>t){JE>I3hS$=PEAJdJq2oxytlJJ& z5R#{juDI!>`Exe%MV@5FOnu2rq=gozUr?i?{Svbg^^~(j$!G#b+Lcxiovck|KN-QisA z90_T=W_4T*>fOA{%DSl9Ub$LF=ibz%retR^RD>zhwBsvmjrEce?ioA{U6X!(U z&;nMywsCBicQ2Dcow+@G)CKxZ5*}p~a%^$0eqzg{uN^kX^wnFBT{yDgOmm#DI-V1z z)UCU1ll@Vm-!;-Z-EQ4kQWEzM$(L0)wjSF#GTmhgU>ZrUdc7_WjrCT*f-D(=?2B5^ zY@(i^ZbbG~=sBYQA3udP`>6y){}1?iE4uw#;DvA$mS7QvU=w@?9sYfAJ-iwYK^ey3 z`)51OKf@jHJh&G2!WdivkAr*B@vnyi@I+AXQS|%Q!|UJ`5JCl};a9+cPovA<2{*zT zj6)7ig}cvioDaaCfDfY255c41T=)k1_&eZQcrFy-d^iKXh0gs=_yYVH+yF0u7sDag z1EQC2f-j+O-vT$oAHW;nI`Ci-hT$A|Cp!17@G6MlYM6jwD8hfBOaCi;13m@!!W|&G zcL=-TEI1P$q;5V9cfot%R`^Y@y7oNxSAf*vB`~=D7bTznx9KI>s=d{GQ;WxxKmGD3oKgQP*>rV5|O!8dj=QFf`OUELq~tkq=r$wT;GWJ4-4j37c_XQ4q9}1*JI3 z6$dkptSI$M{>U0FXvyL%wT7f?-1Hp*wTh^}R@1tFH#QOIsAS#XVGE31#B0&CRl9tU z1hpMdZscNlhZ1RvgW;rRY$j$2(qH`8A2N`|QM_8bgjF^+ly#5q?CN4>lf?~I&#>WE zo0ujpzW8ECr^TOmrL6N`uOKgLz(||_iEfm-Y)Whcbo0qwA<+`V>d+M#S~u(>nMHf% zt%xyI^K`f(%_KGNq}4j478t6;i#DExiM>~kV%pqEM+&+N z1!zZ^UM}A3NFn!fl~p-ztTXf^G-W!H)^nHXJ!xyvUa_whRaw^qi)UJOpSvn2%_i}O z$6@KeroKh*aWTDlb%hafofUFgfO)M98yj`h?la766MLdAo$nphYr*wM9ci@JL`&{8 z84Fj!U|p>=!pJlxT{!-1i#&XhhfNrkfTcLx5#_z47eW zDAnhQOoeW(Z6+COGRntR12bQw6>QS9LDE^WFHZUc+CdIW$eLa&mN%WL z#DgCB(&9!HGs=%jLybHJK5Hk^T}?X{Y~M~DRlD2rP_^wYi1Z5Gz>IN<*#T9NDi$Ls z<(09fz2>%=lRFrC(@rlX8A_T}8b7N8Y`?K&QoB!53JGV7R%rqHySAR6iz;KG@2&kI zwpf_Kf|ZfDA7@QeWB}FFc9i~rd$A)QlCnkXF8WVsdb#Nf)U?eSz0Ned*mjjcE8l0v3FQ{m*(TQQ?h$Jv}$q-wI`zC;sS{zuuP61~(Rrhdof zP=2$?nTacI#H1B(!b#C=Rx`ud!;wcOmp`g3vKf<%#?zmS?rK)==Q;A)UK4?|2I;Fb z`q*Ah0-V8p} zVKOd= zK6o0O2Ty`cVE6q07Tds|!fT-o5rp7D9VTELWY7Px@NMh^55iaA-EbQ`AD#!-!gE3F z2{m{+6k#j;Jp3Fy9v%m$!*{VEd=2h|7r|AKht04F{skMt7vLti1}=k3;mL3z?1ruI zSU3Y@|Nm=nKgizyr{Vo@7u*If1+hKIzW*p(2&cnl*aTvO_;>ggd=TCP?}i)Tci?64 z+i(<)z~yimgHr>8Qv-Up#Ol(6Qv>NSkN>e#1KNm#X;L4|_(`01*s;-h2R6rI zBesl?_8L-q8z-E3V4tV|%ma$=+)Qehli;!OJI;N`rUaJXFgT30)0oOo?8QE2PY$Vk z!zbFJ;%r7+-$b1iep3}*rjC=H$1!_&HnB`evNPF|E{Zv_A?7(EbTQJF%>1z+c5t!t z#^we*NUYibVXR&p`EaCY5~pNFCwJljDSL>fh)hN=buj^uAKCwxk@fkp{{K?$KaZaO zIk+D_2=9egz65IoS4Zj7? zfy1x_i?9IptiV&aKNa4Eo#4;m?eI!yLj?O_H{{?vcoIAw9tY>Zx3C|489oj#2lf;YempaE+z2NUoJco3cc9=IFCPVg#_Zv&3P zez+2rU=%Kb-5~#=!WYr+?}NXC<1hoi3>U&<;L&g{oCcfW2k86qEx;$?2Dla)unY&` z7vS6I`1gUF|9>0Y4mZLL@N{?roCcd=6WFr{Vh?yXTn8_J1F#E*;EU+&uZHWO1#+f; z1h&GN@J-sxXF<;KOZ#!5_mWomaIaoV)hJf3QslW|Z7rWGKO?_<@W{6O!NN5Mj};2b zY}RJ2hE6uAYzLF~$K@=SHhE=z8SCPRJ(Omh$is~6D9f&mIc{xTjLj|L*t~rszt;ag zIkvrXME_hclHC9hU)d~~^=8Jxn=Q!3c0SoR%%>(!YbzD&-U)9r7CuSYUMKqNk?+A{ zrELd~KOCuAaY)2^YjQ&=o9%hEWpcf zGMU1Ix#@LGOM-{=L3`Su1SSg3>?QkyqtdUkB^kxUooU3`8XZ<^Y?+nQjHbJy#z{xN zJ>7B~&LkA?P;tZ|kUE$BKAEe{re?CCtusERhXynE8?|rUJ;X*ByP@mrowjUzYk!ir zSkxllta#(40tf2M<}31+Pb;Fn3ZPx|x0gw_HTv6&E>~7aCv772MraP|#f`~~a{Ig$ zjsyARalWy-#;tBWX(jYT9Sh$Io>E6^L8n>wb8-+Q-Z}8SGN`qq`oi^hY8{+w-V$@30T~)s4 z)%j#b=SX6(u}PnyTr97a*zM)h*nzlXVn_pdLiF|(C z!A5U;(`DIRr%Lsi2eY=PZX=Jmg0tspwQ<->i#M$0k;#Usd;QguZ60iUN?*_cIibf0 zr1wmd>?T5xX)f-|MAHsC-u4w)cxOC3#AukQY`rnBhrL6d*D?wU3zYb?E0h@daEenR z^uNBiQ^Ci3vkz-Xln$L85|fij(pZ_e8}jv9&rt-^+!a?me9|_mjJ93m+LrZH-{@%% zw$MiGtuBEi(AP|?yX{W)Yp(Hty=V^H(|xUGWW6GFSx@R?PDKvY@V$W?xu~bdM9f4p zrjD%fIhuT2iW@Z=Cb_w|kg?S zN^wpC6Q#VElLd;I)}Ga&`7jpgk3prf^@$<*zKRJDFOK+^%Zfsz79RDMOXj&@JIG&lj!?rI)un8Z*NlwfcanW;@NkJY> zc1Cwl!DjI@tG_%9LMv3sMOLsCy|Jg$jvHjh948?jkBQ6`XM&lZPJgiqhaiVq_FrPX$s6SM8IiRW=UMpyI_ QueryModel - - Optimize query - - execute the query using the storage_api - query - String - """ - query = QueryParser.parse(query) - query = QueryOptimizer.optimize(query_model) - self.__execute_query() - - - def __execute_command(command, *args): - # use commands dict - pass - command = op[0] - subcommands = op[1:] - QueryEngine.__execute_command(op, *subcommands) - - diff --git a/src/query-engine/query_parser.py b/src/query-engine/query_parser.py deleted file mode 100644 index 2b6ac2a..0000000 --- a/src/query-engine/query_parser.py +++ /dev/null @@ -1,333 +0,0 @@ -import re -import collections -import itertools - -''' -- split to clauses (Match) -- split to sub-query (Where) -> Clause, expr, Cluase, expr ... -- Parse expression by clause - - -new MatchClause() -''' - - - -def split_list(unsplitted, sep_list, PN=False): - """ - Splits a string by list of separators - - Unary prefix by default. - """ - def apply_notation(splitted_list, splitter): - # TODO splitter may provide type of the notation (infix, postfix, suffix) - if PN: - # Binary operators - in_fix - for split_pos in range(0, len(splitted_list), 3): - splitted_list.insert(split_pos, splitter) - else: - # unary operators - prefix - for split_pos in range(1, len(splitted_list)): - splitted_list.insert(split_pos, splitter) - if not unsplitted or len(sep_list) == 0: - return unsplitted - - splitter = sep_list[0] - rest = sep_list[1:] - - if isinstance(unsplitted, list): - splitted_list = [] - for sub_str in unsplitted: - splitted_com = sub_str.split(splitter) - if len(splitted_com) > 0: - apply_notation(splitted_com, splitter) - - splitted_list.append(splitted_com) - unsplitted = list(itertools.chain(*splitted_list)) - else: - unsplitted = unsplitted.split(splitter) - apply_notation(unsplitted, splitter) - - return split_list(unsplitted, rest) - - -MAIN_CLAUSES = [ - 'MATCH', - 'MERGE', - # This matches or creates semantics by using - # indexes and locks. You can specify different - # operations in case of a MATCH (part of the - # pattern already existed) or on CREATE - # (pattern did not exist yet). - - 'CREATE UNIQUE', - 'CREATE', - 'SET', # This updates properties and labels on nodes - 'REMOVE', # +and/or relationships. - 'DELETE', # It deletes nodes and relationships - - 'PROFILE', - ] - -SUB_CLAUSES = [ - 'RETURN', - 'WHERE', - 'WITH', - 'DISTINCT', - 'ORDER BY' - ] - -# -- EXPRESSIONS -- - -MATCH_SPLITTERS = ['-'] -MULTI_ELEMENTS_SPLITTER = [','] - - -OPERATORS_BY_PRIORITY = ['OR', 'XOR', 'AND', 'OR', '>', '<', '<=', '>=', 'IN'] -ops = { - 'or': Operator('or', lambda a, b: a || b) - } - -COMMANDS = {} - -class Literal(object): - - """Defines a literal for an expression.""" - - def __init__(self, value): - """TODO: to be defined1. - - Args: - value (any): TODO - """ - self._value = value - - -class Identifier: - """ - Defines an identifier for a query. It can be - specified by name and populated on sub-query execution. - """ - def __init__(self, letter, value=None): - self._letter = letter - self._value = value - - @property - def value(self): - return self._value - - -class Expression: - - ''' - A where (and other elements?) expression handler - Contains a list of elements - variables, consts and operations - ''' - - def __init__(self, elements): - self.elements = elements - - # TODO probably not ... - def validate_expression(self): - pass - - -class Edge: - - """ - TODO: make it immutable - An edge: - - has label - - identifier - used to keep the name of the matched edge - - has [properties] - - direction - true/false - - nodeIn, nodeOut - in case a direction if given, the edge direction - is determined from the node sequence given - """ - - def __init__(self, label, nodeIn, nodeOut, direction, identifier=None, properties={}): - self.__label = label - self.__properties = properties - self.__nodeIn = nodeIn - self.__nodeOut = nodeOut - self.__directed = directed - self.identifier = identifier - - def isDirected(self): - return self.direction - - def getNodes(self): - """ - Get a directed node pair out - in (direction flag needed) - """ - return (self.nodeOut, self.nodeIn) - - def getLabel(self): - return self.__label - - def getProperties(self): - return self.__properties - - -class ReturnEdge(Edge): - - """ - A returned edge must have an identifier, if not - it shouldn't be returned. - """ - - def __init__(self, direction, label, nodeLeft, nodeRight, _id, identifier, properties): - Edge.__init__(self, label, properties) - self.__id = _id - - # TODO implement setters - - -class Node: - - """ - TODO: make it immutable - A node: - - identifier -- used to define the result variable - - has label/s - - has [properties] - """ - - def __init__(self, labels, identifier=None, properties={}): - self.identifier = identifier - self.labels = labels - self.properties = properties - - -class ReturnNode(Node): - - def __init__(self, identifier, _id, properties, labels=[]): - Node.__init__(self, identifier, properties, labels) - self.__id = _id - - - - -class QueryParser: - """ - Creates a Query object out of a query string. - """ - - @staticmethod - def parse_query(query): - - def __parse_node(node_string): - """ - Accepts (var:label {}) - - Do paring of string to node data and - return Node Object - """ - def __get_properties(node_string): - """ - {name: "Emu", ..} - dict use eval - returns prop dict or None - """ - properties_string = re.search('\{.*\}', node_string) - return eval(properties_string) if properties_string else None - - def __get_labels(node_string): - """ - varName:Label1:Label2:... - returns { - varName: ..., - labels: [] - } or None - """ - properties = __get_properties(node_string) - labels = __get_labels(__parse_node) - - varibalbe_str = re.search('[^(]\w*(\)|\{))]', node_string) - return varibalbe_str.group(0).split(':') if varibalbe_str else None - - def __parse_edge(self, node_string): - """ - An edge can - Returns an edge with specified properties and orientation. - """ - properties = __get_properties(node_string) - labels = __get_labels(__parse_node) - - def parse_expression(expression_string, expression_type): - - - def parse_sub_query(sub_query): - # Break to smaller parts with sub clauses - RETURN, WHERE - # List of: Clause, expressions (, separated) - clauses_split = split_list(query_part, SUB_CLAUSES) - # process expressions (of MATCH, WHERE, ... - -# TODO expression type is defined by the clause it refers to -- use that cluase - expressions_split = parse_expression(sub_split) - - """ - Parses an incoming query - CREATE ... - MATCH ... - as follows: - * get subqueries -- [CREATE ..., MATCH, ...] - * parse each sub query - - * define operation - * extract expression - * extract sub commands (WHERE, RETURN) - * TODO -- optimize query - * run sub query - * result in identifiers - * process next items with results from first - - Returns a list: - [ [operation, [*args]], - the comma separated elements - [ops, [*args], ...] - [ [MATCH, Node, edge, ...] ..] - """ - - # Process query by parts. - # Sub queries are defined by specific Clauses - sub_queries = split_list(query, MAIN_CLAUSES) - - parsed_sub_queries = [parse_sub_query(sub_query) for sub_query in sub_queries] - - return parsed_sub_queries -# TODO variables ??? - -""" -Spliting - By main operation keyword - MATCH, CREATE ... - Subspliting: - CREATE -- ',' (),(), ()-[]->() - MATCH -- ','; '-' - -- split by, and then by - ... it must be: (node),[edge],(node)<,[edge], ... - -- ()-->() - -- ()--() - -- (a)-->()<--(b) -- path - -- ({x:1}) - -- (a)-[{b:3}]->(b) - -- (a)-[:A]-() - - WHERE e.name IN ['a', 'b'] AND e.b > 5 OR ... - """ - -# query -> main_parts -> -""" -CREATE (m:Person {name:'b'}) -MATCH n WHERE n.name='B' RETURN n.name -==> Split by main clauses -'MATCH (n), (a)-->(b)<--c<--(d), WHERE n.name RETURN n.name' -==> Split by sub clauses ((a n1 n2 n3)) -[ -['MATCH', (n)--()], -['WHERE', 'n.name=\'B\''], -['RETURN', 'n.name'] -] --> [commands] -- [M, W, R], -[expressions] -- list, tree ? - - -list -- a > 5 AND b < 3 --> [and, >, a, 5, <, b, 3] - -""" diff --git a/src/query-engine/query_ast/expression.py b/src/query_engine/__init__.py similarity index 100% rename from src/query-engine/query_ast/expression.py rename to src/query_engine/__init__.py diff --git a/src/query-engine/errors/syntax.py b/src/query_engine/errors/syntax.py similarity index 100% rename from src/query-engine/errors/syntax.py rename to src/query_engine/errors/syntax.py diff --git a/src/query_engine/query_ast/__init__.py b/src/query_engine/query_ast/__init__.py new file mode 100644 index 0000000..7baf2a8 --- /dev/null +++ b/src/query_engine/query_ast/__init__.py @@ -0,0 +1 @@ +__all__ = ["clauses", "expression", "operators", "models", "query"] diff --git a/src/query_engine/query_ast/clauses.py b/src/query_engine/query_ast/clauses.py new file mode 100644 index 0000000..e69b643 --- /dev/null +++ b/src/query_engine/query_ast/clauses.py @@ -0,0 +1,88 @@ +def ensure_array(value): + return value if isinstance(value, list) else [value] + +""" +Either add some serious logic to items or generate them dynamicaly +(type) + +MAIN_CLAUSES = [ + 'MATCH', + 'MERGE', + # This matches or creates semantics by using + # indexes and locks. You can specify different + # operations in case of a MATCH (part of the + # pattern already existed) or on CREATE + # (pattern did not exist yet). + + 'CREATE UNIQUE', + 'CREATE', + 'SET', # This updates properties and labels on nodes + 'REMOVE', # +and/or relationships. + 'DELETE', # It deletes nodes and relationships + + 'PROFILE', +] + +SUB_CLAUSES = [ + 'RETURN', + 'WHERE', + 'WITH', + 'DISTINCT', + 'ORDER BY' +] +""" +class Clause: + pass + + +class Match(Clause): + + def __init__(self, expression): + self.expression = expression + + +class Create(Clause): + + def __init__(self, expr): + self.expr = expr + + +class Where(Clause): + + def __init__(self, expr): + self.expr = expr + + +class Return(Clause): + + def __init__(self, props=()): + self.props = ensure_array(props) + + #raise InvalidArguments('Return needs at least one item') + + +MAIN_CLAUSES = [ + 'MATCH', + 'MERGE', + # This matches or creates semantics by using + # indexes and locks. You can specify different + # operations in case of a MATCH (part of the + # pattern already existed) or on CREATE + # (pattern did not exist yet). + + 'CREATE UNIQUE', + 'CREATE', + 'SET', # This updates properties and labels on nodes + 'REMOVE', # +and/or relationships. + 'DELETE', # It deletes nodes and relationships + + 'PROFILE', +] + +SUB_CLAUSES = [ + 'RETURN', + 'WHERE', + 'WITH', + 'DISTINCT', + 'ORDER BY' +] diff --git a/src/query_engine/query_ast/expression.py b/src/query_engine/query_ast/expression.py new file mode 100644 index 0000000..dcf0b55 --- /dev/null +++ b/src/query_engine/query_ast/expression.py @@ -0,0 +1,29 @@ +def ensure_array(value): + return value if isinstance(value, list) else [value] + + +class Expression: + ''' + A where (and other elements?) expression handler + Contains a list of elements - variables, consts and operations + ''' + + def __init__(self, elements): + self.elements = ensure_array(elements) + + # TODO probably not ... + def validate_expression(self): + pass + + +class GraphPatternExpression(Expression): + + def __init__(self, elements): + Expression.__init__(self, elements) + + +class OperatorExpression(Expression): + + def __init__(self, elements): + Expression.__init__(self, elements) + diff --git a/src/query_engine/query_ast/models.py b/src/query_engine/query_ast/models.py new file mode 100644 index 0000000..dea9229 --- /dev/null +++ b/src/query_engine/query_ast/models.py @@ -0,0 +1,127 @@ +# TODO extract to util +# TODO can be a decorator +def ensure_array(value): + return value if isinstance(value, list) else [value] + + +class Literal: + """Defines a literal for an expression.""" + + def __init__(self, value): + """ + Args: + value: + """ + self._value = value + + +class Property: + def __init__(self, key, value): + self.key = key + self.value = value + + +class Identifier: + """ + Defines an identifier for a query. It can be + """ + + def __init__(self, letter, value=None): + """ + Args: + letter: + value: + """ + self._letter = letter + self._value = value + + @property + def value(self): + return self._value + + +class Label: + def __init__(self, name): + self.name = name + + +class Edge: + """ + TODO: make it immutable + An edge: + - has label + - identifier - used to keep the name of the matched edge + - has [properties] + - direction - true/false + - nodeIn, nodeOut - in case a direction if given, the edge direction + is determined from the node sequence given + """ + + def __init__(self, node_in, node_out, label='', directed=False, + identifier=None, + properties=()): + """ + Args: + label: + node_in: + node_out: + directed: + identifier: + properties: + """ + self.__label = label + self.__properties = ensure_array(properties) + self.__node_in = node_in + self.__node_out = node_out + self.__directed = directed + self.identifier = identifier + + def isDirected(self): + return self.direction + + def getNodes(self): + """ + Get a directed node pair out - in (direction flag needed) + """ + return (self.nodeOut, self.nodeIn) + + def getLabel(self): + return self.__label + + def getProperties(self): + return self.__properties + + +class ReturnEdge(Edge): + """ + A returned edge must have an identifier, if not + it shouldn't be returned. + """ + + def __init__(self, direction, label, nodeLeft, nodeRight, _id, identifier, + properties): + Edge.__init__(self, label, properties) + self.__id = _id + + # TODO implement setters + + +class Node: + """ + TODO: make it immutable + A node: + - identifier -- used to define the result variable + - has label/s + - has [properties] + """ + + def __init__(self, labels=(), identifier=None, properties=()): + self.identifier = identifier + self.properties = ensure_array(properties) + self.labels = ensure_array(labels) + + +class ReturnNode(Node): + def __init__(self, identifier, _id, properties, labels=[]): + Node.__init__(self, identifier, properties, labels) + self.__id = _id diff --git a/src/query-engine/query_ast/operatiors.py b/src/query_engine/query_ast/operators.py similarity index 90% rename from src/query-engine/query_ast/operatiors.py rename to src/query_engine/query_ast/operators.py index f7ebb17..4f0dbb0 100644 --- a/src/query-engine/query_ast/operatiors.py +++ b/src/query_engine/query_ast/operators.py @@ -1,3 +1,8 @@ + +""" +OPERATORS_BY_PRIORITY = ['OR', 'XOR', 'AND', 'OR', '>', '<', '<=', '>=', 'IN'] +""" + # TODO Needs redesign class Operator: ''' diff --git a/src/query-engine/query_ast/query.py b/src/query_engine/query_ast/query.py similarity index 95% rename from src/query-engine/query_ast/query.py rename to src/query_engine/query_ast/query.py index 92efe8d..2f7bd15 100644 --- a/src/query-engine/query_ast/query.py +++ b/src/query_engine/query_ast/query.py @@ -19,7 +19,7 @@ class SubQuery: """Docstring for QueryModel. """ - def __init__(self, commands) + def __init__(self, commands): """TODO: to be defined1. Args: diff --git a/src/query_engine/query_engine.py b/src/query_engine/query_engine.py new file mode 100644 index 0000000..072ec4c --- /dev/null +++ b/src/query_engine/query_engine.py @@ -0,0 +1,34 @@ +from src.query_engine.query_parser import QueryParser + +class QueryEngine: + def __init__(self, storage_api): + self.storage_api = storage_api + + def execute_query(self, query): + + """ + Executes a passed query. Follows the steps: + - Parse query -> QueryModel + - Optimize query + - execute the query using the storage_api + query - String + """ + query = QueryParser.parse(query) + query = QueryOptimizer.optimize(query_model) + self.__execute_query() + + @staticmethod + def __apply_post_processors(): + '''Return, sort, ...''' + pass + + + + def __execute_command(command, *args): + # use commands dict + pass + + +command = op[0] +subcommands = op[1:] +QueryEngine.__execute_command(op, *subcommands) diff --git a/src/query-engine/query_optimizer.py b/src/query_engine/query_optimizer.py similarity index 100% rename from src/query-engine/query_optimizer.py rename to src/query_engine/query_optimizer.py diff --git a/src/query_engine/query_parser.py b/src/query_engine/query_parser.py new file mode 100644 index 0000000..4614ae4 --- /dev/null +++ b/src/query_engine/query_parser.py @@ -0,0 +1,220 @@ +import re +import collections +import itertools + +from src.query_engine.query_ast.operators import * +from src.query_engine.query_ast.query import * +from src.query_engine.query_ast.models import * +from src.query_engine.query_ast.clauses import * +from src.query_engine.query_ast.expression import * + +''' +- split to clauses (Match) +- split to sub-query (Where) -> Clause, expr, Cluase, expr ... +- Parse expression by clause + + +new MatchClause() +''' + + +def split_list(unsplitted, sep_list, PN=False): + """ + Splits a string by list of separators + + Unary prefix by default. + """ + + def apply_notation(splitted_list, splitter): + # TODO splitter may provide type of the notation (infix, postfix, suffix) + if PN: + # Binary operators - in_fix + for split_pos in range(0, len(splitted_list), 3): + splitted_list.insert(split_pos, splitter) + else: + # unary operators - prefix + + + + for split_pos in range(1, len(splitted_list)): + + splitted_list.insert(split_pos, splitter) + if not unsplitted or len(sep_list) == 0: + return unsplitted + + splitter = sep_list[0] + rest = sep_list[1:] + # splitted = [] + + if isinstance(unsplitted, list): + splitted_list = [] + for sub_str in unsplitted: + splitted_com = sub_str.split(splitter) + if len(splitted_com) > 0: + apply_notation(splitted_com, splitter) + + splitted_list.append(splitted_com) + splitted = list(itertools.chain(*splitted_list)) + else: + splitted = unsplitted.split(splitter) + apply_notation(splitted, splitter) + + return split_list(splitted, rest) + +# -- EXPRESSIONS -- + +MATCH_SPLITTERS = ['-'] +MULTI_ELEMENTS_SPLITTER = [','] + +OPERATORS_BY_PRIORITY = ['OR', 'XOR', 'AND', 'OR', '>', '<', '<=', '>=', 'IN'] + +COMMANDS = {} + +''' + ** PARSER ** +''' + + +def __parse_node(node_string): + """ + Accepts (var:label {}) + + Do paring of string to node data and + return Node Object + """ + properties = __get_properties(node_string) + + labels = __get_labels(__parse_node) + + varibalbe_str = re.search('[^(]\w*(\)|\{))]', node_string) + return varibalbe_str.group(0).split(':') if varibalbe_str else None + + +def __get_properties(node_string): + """ + {name: "Emu", ..} - dict use eval + returns prop dict or None + """ + properties_string = re.search('\{.*\}', node_string) + return eval(properties_string) if properties_string else None + + +def __get_labels(node_string): + """ + varName:Label1:Label2:... + returns { + varName: ..., + labels: [] + } or None + """ + pass + + +def __parse_edge(self, node_string): + """ + An edge can + Returns an edge with specified properties and orientation. + """ + properties = __get_properties(node_string) + labels = __get_labels(__parse_node) + +# clauses +def generate_expression(str): + #TODO + return Expression() + +def generate_clause(str, expr): + pass + +class QueryParser: + """ + Creates a Query object out of a query string. + """ + + @staticmethod + def parse_query(query_str): + """ + Parses an incoming query + CREATE ... + MATCH ... + as follows: + * get subqueries -- [CREATE ..., MATCH, ...] + * parse each sub query - + * define operation + * extract expression + * extract sub commands (WHERE, RETURN) + * TODO -- optimize query + * run sub query + * result in identifiers + * process next items with results from first + + Returns a list: + [ [operation, [*args]], - the comma separated elements + [ops, [*args], ...] + [ [MATCH, Node, edge, ...] ..] + """ + def parse_expression(expression_string, expression_type): + pass + + def parse_sub_query(sub_query): + + # Break to smaller parts with sub clauses - RETURN, WHERE + # List of: Clause, expressions (, separated) + clauses_split = split_list(sub_query, SUB_CLAUSES) + + # process expressions (of MATCH, WHERE, ... + # TODO expression type is defined by the clause it refers to -- use that cluase + expressions_split = parse_expression(sub_split) + + + # Process query by parts. + # Sub queries are defined by specific Clauses + + # TODO trailing spaces + sub_queries_str = split_list(query_str, MAIN_CLAUSES) + + parsed_sub_queries = [parse_sub_query(sub_query) for sub_query in + sub_queries_str] + # TODO parse to Query object ? + + # createSubQueries() + # createQueryObject() + + return parsed_sub_queries # TODO variables ??? + + +""" +Spliting + By main operation keyword - MATCH, CREATE ... + Subspliting: + CREATE -- ',' (),(), ()-[]->() + MATCH -- ','; '-' + -- split by, and then by - ... it must be: (node),[edge],(node)<,[edge], ... + -- ()-->() + -- ()--() + -- (a)-->()<--(b) -- path + -- ({x:1}) + -- (a)-[{b:3}]->(b) + -- (a)-[:A]-() + + WHERE e.name IN ['a', 'b'] AND e.b > 5 OR ... + """ + +# query -> main_parts -> +""" +CREATE (m:Person {name:'b'}) +MATCH n WHERE n.name='B' RETURN n.name +==> Split by main clauses +'MATCH (n), (a)-->(b)<--c<--(d), WHERE n.name RETURN n.name' +==> Split by sub clauses ((a n1 n2 n3)) +[ +['MATCH', (n)--()], +['WHERE', 'n.name=\'B\''], +['RETURN', 'n.name'] +] +-> [commands] -- [M, W, R], +[expressions] -- list, tree ? + + +list -- a > 5 AND b < 3 --> [and, >, a, 5, <, b, 3] +""" diff --git a/src/query-engine/syntax_checker.py b/src/query_engine/syntax_checker.py similarity index 100% rename from src/query-engine/syntax_checker.py rename to src/query_engine/syntax_checker.py diff --git a/src/query_engine/test_query_parser.py b/src/query_engine/test_query_parser.py new file mode 100644 index 0000000..25ba5f9 --- /dev/null +++ b/src/query_engine/test_query_parser.py @@ -0,0 +1,201 @@ +from unittest import TestCase +from src.query_engine.query_parser import * + +from src.query_engine.query_ast import * +from src.query_engine.query_ast.models import * +from src.query_engine.query_ast.clauses import * +from src.query_engine.query_ast.expression import * + + +# TODO [0] test string, [1] result object + +class TestQueryParser(TestCase): + def test_list_split(self): + self.assertEquals(split_list('a1 b c d', ['a1']), + [['a1', 'b c d']]) + + self.assertEquals(split_list('a1 b a2 d', ['a1', 'a2']), + [['a1', 'b'], ['a2', 'd']]) + + self.assertEquals(split_list('a1 b a1 d', ['a1']), + [['a1', 'b'], ['a1', 'd']]) + + self.assertEquals(split_list('a1 b a1 d', ['a1', 'a2']), + [['a1', 'b'], ['a1', 'd']]) + + self.assertEquals(split_list('a1 b a2 d a1 b', ['a1', 'a2']), + [['a1', 'b'], ['a2', 'd'], ['a1', 'b']]) + + def setUp(self): + self.parser = QueryParser() + + # + # Test Main Method + # + def test_compound(self): + COMPOUND_TEST_BIG = [( + 'MATCH (person:Person)-[:IS_FRIEND_OF]->(friend),' + '(friend)-[:LIKES]->(restaurant:Restaurant),' + '(restaurant)-[:LOCATED_IN]->(loc:Location),' + '(restaurant)-[:SERVES]->(type:Cuisine)' + + 'WHERE person.name = \'Philip\'' + 'AND loc.location = \'New York\'' + 'AND type.cuisine = \'Sushi\'' + + 'RETURN restaurant.name, count(*) AS occurrence' + 'ORDER BY occurrence DESC' + 'LIMIT 5' + ) + # TODO Translate + ] + + COMPOUND_TEST = [( + 'MATCH (neo:Database {name:"Neo4j"})\n' + 'MATCH (anna:Person {name:"Anna"})\n' + 'CREATE (anna)-[:FRIEND]->(:Person:Expert ' + '{name:"Amanda"})-[:WORKED_WITH]->(neo);' + ), + query.Query([ + query.SubQuery([ + Match(GraphPatternExpression([Node(labels=Label('Database'), + identifier=Identifier( + 'neo'), + properties=[ + Property('name', + 'Neo4j')])])), + ]), + query.SubQuery([ + Match(GraphPatternExpression([Node(labels=Label('Person'), + identifier=Identifier( + 'anna'), + properties=[ + Property('name', + 'Anna')])])), + ]), + query.SubQuery([ + Create(GraphPatternExpression([Edge(label='FRIEND', + directed=True, + node_in=Node( + identifier=Identifier( + 'anna')), + node_out=Node( + labels=( + Label('Person'), + Label( + 'Expert')), + properties= + Property('name', + 'Amanda'))), + Edge(label='WORKED_WITH', + directed=True, + node_out=Node( + identifier=Identifier( + 'neo')), + node_in=Node( + labels=( + Label('Person'), + Label( + 'Expert')), + properties= + Property('name', + 'Amanda')))])) + ]) + ]) + ] + self.assertEqual(self.parser.parse_query(COMPOUND_TEST[0]), + COMPOUND_TEST[1]) + + def test_operator_expressions(self): + pass + + def test_graph_expressions(self): + SIMPLE_TEST_MATCH_EDGE = [( + 'MATCH (you {name:"You"})-[:FRIEND]->(yourFriends)' + 'RETURN you, yourFriends' + ), + query.Query([ + query.SubQuery([ + Match(GraphPatternExpression([Edge(label='FRIEND', + directed=True, + node_in= + Node( + identifier=Identifier( + 'you'), + properties= + Property('name', + 'You')), + node_out=Node( + labels=Label( + 'yourFriends'), + properties= + Property('name', + 'Amanda')))])), + Return(['you', 'yourFriends']) + ]) + ]) + ] + + SIMPLE_TEST_CREATE_NODE = [( + 'CREATE (you:Person {name:"You"})' + 'RETURN you' + ), + query.Query([ + query.SubQuery([ + Create(GraphPatternExpression([ + Node( + identifier=Identifier('you'), + labels=Label('Person'), + properties= + Property('name', + 'You')) + ])) + ]) + ]) + + ] + + TEST_MORE_EDGES = [( + 'MATCH (user)-[:PURCHASED]->(product)<-[:PURCHASED]-()-[:PURCHASED]->(otherProduct)' + 'RETURN user.name' + ), + ( + query.Query([ + query.SubQuery([Match( + GraphPatternExpression([Edge(label='PURCHASED', + directed=True, + node_in=Node( + identifier=Identifier( + 'user')), + node_out=Node( + identifier=Identifier( + 'product'))), + Edge(label='PURCHASED', + directed=True, + node_out=Node( + identifier=Identifier( + 'product')), + node_in=Node()), + Edge(label='PURCHASED', + directed=True, + node_out=Node( + identifier=Identifier( + 'otherProduct')), + node_in=Node()) + ])), + Return('user.name') + ])]) + )] + + # TODO Where + + self.assertEqual(self.parser.parse_query(SIMPLE_TEST_CREATE_NODE[0]), + SIMPLE_TEST_CREATE_NODE[1]) + self.assertEqual(self.parser.parse_query(SIMPLE_TEST_MATCH_EDGE[0]), + SIMPLE_TEST_MATCH_EDGE[1]) + self.assertEqual(self.parser.parse_query(TEST_MORE_EDGES[0]), + TEST_MORE_EDGES[1]) + + +def test_exceptions(self): + pass From 69aec379ce69e1f5659b317f63c4d86739129e1d Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 16 Jun 2016 11:08:06 +0300 Subject: [PATCH 04/22] Progress. --- src/lib/utils.py | 2 + src/query_engine/errors/syntax.py | 10 +- src/query_engine/query_ast/clauses.py | 4 +- src/query_engine/query_ast/expression.py | 5 +- src/query_engine/query_ast/models.py | 6 +- src/query_engine/query_ast/utils.py | 14 +++ src/query_engine/query_engine.py | 2 +- src/query_engine/query_parser.py | 140 +++++++++++++++++------ src/query_engine/test_query_parser.py | 7 ++ 9 files changed, 140 insertions(+), 50 deletions(-) create mode 100644 src/query_engine/query_ast/utils.py diff --git a/src/lib/utils.py b/src/lib/utils.py index e69de29..cbae564 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -0,0 +1,2 @@ +def ensure_array(value): + return value if isinstance(value, list) else [value] diff --git a/src/query_engine/errors/syntax.py b/src/query_engine/errors/syntax.py index d1226eb..7714069 100644 --- a/src/query_engine/errors/syntax.py +++ b/src/query_engine/errors/syntax.py @@ -1,5 +1,13 @@ class InvalidSyntaxError(Exception): - pass + def __init__(self, value): + """""" + self.value = value + + +class UnsupportedExpressionType(InvalidSyntaxError): + def __init__(self, value): + """""" + InvalidSyntaxError.__init__(self, value) class NumberOfOperandsError(InvalidSyntaxError): pass diff --git a/src/query_engine/query_ast/clauses.py b/src/query_engine/query_ast/clauses.py index e69b643..51de42a 100644 --- a/src/query_engine/query_ast/clauses.py +++ b/src/query_engine/query_ast/clauses.py @@ -1,5 +1,4 @@ -def ensure_array(value): - return value if isinstance(value, list) else [value] +from src.lib.utils import ensure_array """ Either add some serious logic to items or generate them dynamicaly @@ -86,3 +85,4 @@ def __init__(self, props=()): 'DISTINCT', 'ORDER BY' ] + diff --git a/src/query_engine/query_ast/expression.py b/src/query_engine/query_ast/expression.py index dcf0b55..44b620b 100644 --- a/src/query_engine/query_ast/expression.py +++ b/src/query_engine/query_ast/expression.py @@ -1,6 +1,4 @@ -def ensure_array(value): - return value if isinstance(value, list) else [value] - +from src.lib.utils import ensure_array class Expression: ''' @@ -27,3 +25,4 @@ class OperatorExpression(Expression): def __init__(self, elements): Expression.__init__(self, elements) + diff --git a/src/query_engine/query_ast/models.py b/src/query_engine/query_ast/models.py index dea9229..4cba6d3 100644 --- a/src/query_engine/query_ast/models.py +++ b/src/query_engine/query_ast/models.py @@ -1,8 +1,4 @@ -# TODO extract to util -# TODO can be a decorator -def ensure_array(value): - return value if isinstance(value, list) else [value] - +from src.lib.utils import ensure_array class Literal: """Defines a literal for an expression.""" diff --git a/src/query_engine/query_ast/utils.py b/src/query_engine/query_ast/utils.py new file mode 100644 index 0000000..1ea7f10 --- /dev/null +++ b/src/query_engine/query_ast/utils.py @@ -0,0 +1,14 @@ +from src.query_engine.query_ast.expression import * +from src.query_engine.query_ast.clauses import * + +# TODO +def get_expression_type(clause): + if True: + return GraphPatternExpression + else: + return OperatorExpression + + +# TODO +def get_clause_type(clause_str): + return None \ No newline at end of file diff --git a/src/query_engine/query_engine.py b/src/query_engine/query_engine.py index 072ec4c..0ec4410 100644 --- a/src/query_engine/query_engine.py +++ b/src/query_engine/query_engine.py @@ -14,7 +14,7 @@ def execute_query(self, query): query - String """ query = QueryParser.parse(query) - query = QueryOptimizer.optimize(query_model) + #TODO query = QueryOptimizer.optimize(query_model) self.__execute_query() @staticmethod diff --git a/src/query_engine/query_parser.py b/src/query_engine/query_parser.py index 4614ae4..0c8d70d 100644 --- a/src/query_engine/query_parser.py +++ b/src/query_engine/query_parser.py @@ -2,11 +2,14 @@ import collections import itertools +from src.query_engine.errors.syntax import * + from src.query_engine.query_ast.operators import * from src.query_engine.query_ast.query import * from src.query_engine.query_ast.models import * from src.query_engine.query_ast.clauses import * from src.query_engine.query_ast.expression import * +from src.query_engine.query_ast import utils ''' - split to clauses (Match) @@ -18,6 +21,8 @@ ''' +# TODO FIX +# TODO caseInsensitive def split_list(unsplitted, sep_list, PN=False): """ Splits a string by list of separators @@ -33,16 +38,13 @@ def apply_notation(splitted_list, splitter): splitted_list.insert(split_pos, splitter) else: # unary operators - prefix - - - for split_pos in range(1, len(splitted_list)): splitted_list.insert(split_pos, splitter) if not unsplitted or len(sep_list) == 0: return unsplitted - splitter = sep_list[0] + splitter = sep_list[0] + ' ' # don't match rest = sep_list[1:] # splitted = [] @@ -61,6 +63,7 @@ def apply_notation(splitted_list, splitter): return split_list(splitted, rest) + # -- EXPRESSIONS -- MATCH_SPLITTERS = ['-'] @@ -74,7 +77,6 @@ def apply_notation(splitted_list, splitter): ** PARSER ** ''' - def __parse_node(node_string): """ Accepts (var:label {}) @@ -118,13 +120,62 @@ def __parse_edge(self, node_string): properties = __get_properties(node_string) labels = __get_labels(__parse_node) -# clauses -def generate_expression(str): - #TODO - return Expression() -def generate_clause(str, expr): - pass +def generate_clause(clause, raw_expr): + """ + + Args: + clause (str): + raw_expr (str|List(str)): + + Returns: + Clause: + """ + clause = utils.get_clause_type(clause) + return clause(raw_expr) + + +def parse_expression(expression, expression_type): + """ + + Args: + expression (str): + expression_type (Expression): The type of expression. Note it represents + a class, not an instance. + + Returns: + Expression: the generated expression + + Raises: + InvalidSyntaxError: + """ + + if expression_type == GraphPatternExpression: + parser = parse_graph_expression + elif expression_type == OperatorExpression: + parser = parse_operator_expression + else: + raise UnsupportedExpressionType(expression_type) + + return expression_type(parser(expression)) + + +def parse_clause(raw_clause): + """ + + Args: + raw_clause (List[srt]): [clause, expr] + + Returns: + Clause: The generated clause + """ + clause_str = raw_clause[0] + expr = raw_clause[1] + + expr = parse_expression(expr, + utils.get_expression_type(clause_str)) + return generate_clause(clause_str, expr) + class QueryParser: """ @@ -132,55 +183,68 @@ class QueryParser: """ @staticmethod - def parse_query(query_str): + def parse_query(query): """ Parses an incoming query - CREATE ... - MATCH ... - as follows: - * get subqueries -- [CREATE ..., MATCH, ...] - * parse each sub query - - * define operation - * extract expression - * extract sub commands (WHERE, RETURN) - * TODO -- optimize query - * run sub query - * result in identifiers - * process next items with results from first - - Returns a list: - [ [operation, [*args]], - the comma separated elements - [ops, [*args], ...] - [ [MATCH, Node, edge, ...] ..] + CREATE ... + MATCH ... + as follows: + * get subqueries -- [CREATE ..., MATCH, ...] + * parse each sub query - + * define operation + * extract expression + * extract sub commands (WHERE, RETURN) + * TODO -- optimize query + * run sub query + * result in identifiers + * process next items with results from first + + Args: + query (str): + + Returns: + Query: generated query + """ - def parse_expression(expression_string, expression_type): - pass - def parse_sub_query(sub_query): + def parse_sub_query(raw_sub_query): + """ + + Args: + raw_sub_query (List[str]): + A list containing the sub-query elements, + e.g. ['Match', '(you)'], + ['Match', '(you)', 'Return', 'you.a'] + + Returns: + SubQuery: generated SubQuery + """ # Break to smaller parts with sub clauses - RETURN, WHERE # List of: Clause, expressions (, separated) - clauses_split = split_list(sub_query, SUB_CLAUSES) + subclauses_split = split_list(raw_sub_query, SUB_CLAUSES) # process expressions (of MATCH, WHERE, ... # TODO expression type is defined by the clause it refers to -- use that cluase - expressions_split = parse_expression(sub_split) + subclauses = (parse_clause(subclause_list) + for subclause_list in subclauses_split) + return SubQuery(subclauses) # Process query by parts. # Sub queries are defined by specific Clauses # TODO trailing spaces - sub_queries_str = split_list(query_str, MAIN_CLAUSES) + sub_queries_str = split_list(query, MAIN_CLAUSES) - parsed_sub_queries = [parse_sub_query(sub_query) for sub_query in - sub_queries_str] + sub_queries = [parse_sub_query(sub_query) for sub_query in + sub_queries_str] # TODO parse to Query object ? # createSubQueries() # createQueryObject() - return parsed_sub_queries # TODO variables ??? + return Query(sub_queries) # TODO variables ??? """ diff --git a/src/query_engine/test_query_parser.py b/src/query_engine/test_query_parser.py index 25ba5f9..fd6b38a 100644 --- a/src/query_engine/test_query_parser.py +++ b/src/query_engine/test_query_parser.py @@ -26,6 +26,13 @@ def test_list_split(self): self.assertEquals(split_list('a1 b a2 d a1 b', ['a1', 'a2']), [['a1', 'b'], ['a2', 'd'], ['a1', 'b']]) + # Special conditions + self.assertEquals(split_list('Aa1 Ba a2 d aA1 b', ['aa1', 'a2']), + [['aa1', 'Ba'], ['a2', 'd'], ['aa1', 'b']]) + + self.assertEquals(split_list('Aa Baa aA baab', ['aa']), + [['aa', 'Baa'], ['aa', 'baab']]) + def setUp(self): self.parser = QueryParser() From 6af592d16c800fcbdaed43e0e35239c56de76200 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 16 Jun 2016 12:33:37 +0300 Subject: [PATCH 05/22] Parser - progress ... --- src/query_engine/errors/syntax.py | 27 ++++ src/query_engine/query_ast/expression.py | 22 +++- src/query_engine/query_parser.py | 157 +++++++++++++++++++---- 3 files changed, 176 insertions(+), 30 deletions(-) diff --git a/src/query_engine/errors/syntax.py b/src/query_engine/errors/syntax.py index 7714069..4b5de16 100644 --- a/src/query_engine/errors/syntax.py +++ b/src/query_engine/errors/syntax.py @@ -9,10 +9,37 @@ def __init__(self, value): """""" InvalidSyntaxError.__init__(self, value) + class NumberOfOperandsError(InvalidSyntaxError): pass + class InvalidOperationError(InvalidSyntaxError): pass +class InvalidExpressionError(InvalidSyntaxError): + def __init__(self, value): + InvalidSyntaxError.__init__(self, value) + + +class InvalidGraphExpressionError(InvalidExpressionError): + def __init__(self, value): + InvalidExpressionError.__init__(self, value) + + +class InvalidNodeError(InvalidGraphExpressionError): + def __init__(self, value, msg): + InvalidGraphExpressionError.__init__(self, value) + self.msg = msg + + +class InvalidEdgeError(InvalidGraphExpressionError): + def __init__(self, value, msg): + InvalidGraphExpressionError.__init__(self, value) + self.msg = msg + + +class InvalidOperatorExpression(InvalidExpressionError): + def __init__(self, value): + InvalidExpressionError.__init__(self, value) diff --git a/src/query_engine/query_ast/expression.py b/src/query_engine/query_ast/expression.py index 44b620b..710db65 100644 --- a/src/query_engine/query_ast/expression.py +++ b/src/query_engine/query_ast/expression.py @@ -1,5 +1,6 @@ from src.lib.utils import ensure_array + class Expression: ''' A where (and other elements?) expression handler @@ -14,15 +15,26 @@ def validate_expression(self): pass +class SimpleGraphPatternExpression(Expression): + def __init__(self, expr): + """ + + Args: + expr (List[Node|Edge]): + """ + Expression.__init__(self, expr) + + class GraphPatternExpression(Expression): + def __init__(self, simple_exprs): + """ - def __init__(self, elements): - Expression.__init__(self, elements) + Args: + simple_exprs (SimpleGraphPatternExpression): + """ + Expression.__init__(self, simple_exprs) class OperatorExpression(Expression): - def __init__(self, elements): Expression.__init__(self, elements) - - diff --git a/src/query_engine/query_parser.py b/src/query_engine/query_parser.py index 0c8d70d..aa8132c 100644 --- a/src/query_engine/query_parser.py +++ b/src/query_engine/query_parser.py @@ -77,48 +77,152 @@ def apply_notation(splitted_list, splitter): ** PARSER ** ''' -def __parse_node(node_string): + +def check_valid_edge(raw_node): + # Does it have brackets + error = None + if len(raw_node) - 2 != len(raw_node.strip('[]')): + error = 'Edge must be enclosed in []' + # TODO other checks + # elif + + if error: + raise InvalidEdgeError(raw_node, error) + + +def check_valid_node(raw_node): + # Does it have brackets + error = None + if len(raw_node) - 2 != len(raw_node.strip('()')): + error = 'Node must be enclosed in ()' + # TODO other checks + # elif + + if error: + raise InvalidNodeError(raw_node, error) + + +def get_properties(raw_node): """ - Accepts (var:label {}) + Args: + raw_node (str): + + Returns: + List[Property]: - Do paring of string to node data and - return Node Object """ - properties = __get_properties(node_string) + raw_properties = re.search('\{.*\}', raw_node) + eval(raw_properties) if raw_properties else None - labels = __get_labels(__parse_node) - varibalbe_str = re.search('[^(]\w*(\)|\{))]', node_string) - return varibalbe_str.group(0).split(':') if varibalbe_str else None +def get_labels(raw_node): + """ + Args: + raw_node (str): + Returns: + List[Label]: + """ + pass -def __get_properties(node_string): +def get_identifier(raw_node): """ - {name: "Emu", ..} - dict use eval - returns prop dict or None - """ - properties_string = re.search('\{.*\}', node_string) - return eval(properties_string) if properties_string else None + Args: + raw_node (str): + + Returns: + Identifier|None: + """ + identifier_re = '' -def __get_labels(node_string): +def parse_node(raw_node): """ - varName:Label1:Label2:... - returns { - varName: ..., - labels: [] - } or None + Node must follow the pattern: ([identifier]:[label:label...] [{properties}]) + Args: + raw_node (str): + Returns: + Node: + Raises: + InvalidNodeError: """ - pass + check_valid_node(raw_node) + raw_node = raw_node.strip() + node_split = re.search('[^(]\w*(\)|\{))]', raw_node) + + identifier = get_identifier(raw_node) + labels = get_labels(raw_node) + properties = get_properties(raw_node) + #return varibalbe_str.group(0).split(':') if varibalbe_str else None + return Node(identifier=identifier, labels=labels, properties=properties) -def __parse_edge(self, node_string): + +def parse_edge(self, raw_edge): """ An edge can Returns an edge with specified properties and orientation. """ - properties = __get_properties(node_string) - labels = __get_labels(__parse_node) + check_valid_node(raw_edge) + raw_edge = raw_edge.strip() + node_split = re.search('[^(]\w*(\)|\{))]', raw_edge) + + identifier = get_identifier(raw_edge) + labels = get_labels(raw_edge) + properties = get_properties(raw_edge) + + #return varibalbe_str.group(0).split(':') if varibalbe_str else None + + + +def parse_simple_graph_expr(raw_simple_expr): + """ + Expression must follow the pattern: + Node[Edge && Node ...] + Args: + raw_simple_expr (str): + Returns: + SimpleGraphPatternExpression: + + Raises: + InvalidGraphExpressionError: + + """ + # split to items + # parse Node, parse Edge + simple_expr_raw_elements = raw_simple_expr.split('-') + + parse_method = parse_node + simple_expr_elements = [] + + for elem in simple_expr_raw_elements: + simple_expr_elements.append(parse_method(elem)) + + # Alternate parse method + parse_method = parse_node if parse_method == parse_edge else parse_edge + + # TODO populate Edges + + return SimpleGraphPatternExpression(simple_expr_elements) + + +def parse_graph_expression(simple_graph_exprs): + """ + ()-[]-(); (); (), ()-[]-() + Args: + simple_graph_exprs (List[str]): + Returns: + GraphPatternExpression: + + """ + # Split by , + # split by - + # parse elements as follows -> node, edge, node edge ... + # TODO MOVE TO SPLITTER + simple_graph_exprs = [parse_simple_graph_expr(simple_expr) + for simple_expr in simple_graph_exprs] + + return GraphPatternExpression(simple_graph_exprs) def generate_clause(clause, raw_expr): @@ -149,6 +253,7 @@ def parse_expression(expression, expression_type): Raises: InvalidSyntaxError: """ + expression = expression.split(',') if expression_type == GraphPatternExpression: parser = parse_graph_expression @@ -157,7 +262,7 @@ def parse_expression(expression, expression_type): else: raise UnsupportedExpressionType(expression_type) - return expression_type(parser(expression)) + return parser(expression) def parse_clause(raw_clause): @@ -224,6 +329,8 @@ def parse_sub_query(raw_sub_query): # List of: Clause, expressions (, separated) subclauses_split = split_list(raw_sub_query, SUB_CLAUSES) + #TODO split expressions by ',' + # process expressions (of MATCH, WHERE, ... # TODO expression type is defined by the clause it refers to -- use that cluase subclauses = (parse_clause(subclause_list) From 71844116ce2a31bb8fd3d9dbc7841fc368af665e Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Sun, 19 Jun 2016 12:14:04 +0300 Subject: [PATCH 06/22] Labels, props, identifier ... --- src/query_engine/errors/syntax.py | 11 ++ src/query_engine/query_ast/expression.py | 2 +- src/query_engine/query_ast/models.py | 40 +++++- src/query_engine/query_ast/operators.py | 6 +- src/query_engine/query_parser.py | 135 +++++++++++++----- src/query_engine/test_query_parser.py | 174 +++++++++++++++++++++-- 6 files changed, 320 insertions(+), 48 deletions(-) diff --git a/src/query_engine/errors/syntax.py b/src/query_engine/errors/syntax.py index 4b5de16..5dbf34a 100644 --- a/src/query_engine/errors/syntax.py +++ b/src/query_engine/errors/syntax.py @@ -40,6 +40,17 @@ def __init__(self, value, msg): self.msg = msg +class InvalidEdgeLabelError(InvalidEdgeError): + def __init__(self, value, msg): + InvalidEdgeError.__init__(self, value) + self.msg = msg + + +class InvalidLabelsCountError(InvalidGraphExpressionError): + def __init__(self): + pass + + class InvalidOperatorExpression(InvalidExpressionError): def __init__(self, value): InvalidExpressionError.__init__(self, value) diff --git a/src/query_engine/query_ast/expression.py b/src/query_engine/query_ast/expression.py index 710db65..8a2a680 100644 --- a/src/query_engine/query_ast/expression.py +++ b/src/query_engine/query_ast/expression.py @@ -30,7 +30,7 @@ def __init__(self, simple_exprs): """ Args: - simple_exprs (SimpleGraphPatternExpression): + simple_exprs (List[SimpleGraphPatternExpression]): """ Expression.__init__(self, simple_exprs) diff --git a/src/query_engine/query_ast/models.py b/src/query_engine/query_ast/models.py index 4cba6d3..ac39063 100644 --- a/src/query_engine/query_ast/models.py +++ b/src/query_engine/query_ast/models.py @@ -1,5 +1,6 @@ from src.lib.utils import ensure_array + class Literal: """Defines a literal for an expression.""" @@ -10,12 +11,23 @@ def __init__(self, value): """ self._value = value + def __repr__(self): + return self._value + + def __eq__(self, other): + return self.__dict__ == other.__dict__ class Property: def __init__(self, key, value): self.key = key self.value = value + def __repr__(self): + return self.key + ':' + str(self.value) + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + class Identifier: """ @@ -35,11 +47,23 @@ def __init__(self, letter, value=None): def value(self): return self._value + def __repr__(self): + return self._letter + ' ' + self._value + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + class Label: def __init__(self, name): self.name = name + def __repr__(self): + return self.name + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + class Edge: """ @@ -53,7 +77,7 @@ class Edge: is determined from the node sequence given """ - def __init__(self, node_in, node_out, label='', directed=False, + def __init__(self, node_in=None, node_out=None, label='', directed=False, identifier=None, properties=()): """ @@ -72,6 +96,14 @@ def __init__(self, node_in, node_out, label='', directed=False, self.__directed = directed self.identifier = identifier + def __repr__(self): + return self.identifier + ':' + self.__label + \ + ' ' + self.__properties + ' ' + self.__node_in + \ + ' ' + self.__node_out + ' ' + str(self.__directed) + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + def isDirected(self): return self.direction @@ -116,6 +148,12 @@ def __init__(self, labels=(), identifier=None, properties=()): self.properties = ensure_array(properties) self.labels = ensure_array(labels) + def __repr__(self): + return self.identifier + ':' + self.labels + ' ' + self.properties + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + class ReturnNode(Node): def __init__(self, identifier, _id, properties, labels=[]): diff --git a/src/query_engine/query_ast/operators.py b/src/query_engine/query_ast/operators.py index 4f0dbb0..fd11d49 100644 --- a/src/query_engine/query_ast/operators.py +++ b/src/query_engine/query_ast/operators.py @@ -1,7 +1,5 @@ -""" OPERATORS_BY_PRIORITY = ['OR', 'XOR', 'AND', 'OR', '>', '<', '<=', '>=', 'IN'] -""" # TODO Needs redesign class Operator: @@ -9,13 +7,13 @@ class Operator: Base operator class. ''' - def __init__(self, op, processor, operands=0): + def __init__(self, op, processor, priority, operands=0): self.operation = op self.priority = priority self.processor = processor self.operands = operands - def execute(*args): + def execute(self, *args): ''' Pass the required number of operands to the operator diff --git a/src/query_engine/query_parser.py b/src/query_engine/query_parser.py index aa8132c..4bc66a9 100644 --- a/src/query_engine/query_parser.py +++ b/src/query_engine/query_parser.py @@ -1,6 +1,7 @@ import re import collections import itertools +import logging as Logger from src.query_engine.errors.syntax import * @@ -12,6 +13,19 @@ from src.query_engine.query_ast import utils ''' +Support Notes: +- Nodes properties may have space between value and key +- Properties value may be - string, number +- Nodes may have more than 1 space between labels and properties +- Support whitespaces in id/labels ??? +- Case-sensitive labels and ids ?? +- Edges with properties + +- Only Edge search -> ()-[]-() + +- Support split by ',' expressions + + - split to clauses (Match) - split to sub-query (Where) -> Clause, expr, Cluase, expr ... - Parse expression by clause @@ -19,7 +33,14 @@ new MatchClause() ''' +# TODO flags -> convert to number; find all matches +# TODO use \w ?? or \S +IDENTIFIER_REGEX = re.compile('^(\w+)(?:|\s)?') +LABELS_REGEX = re.compile(':(\w+)(?:|\s)?') +# TODO allowed_val_chars = '\w|\'' +PROPERTIES_BODY_REGEX = re.compile('{(.*?)}') +PROPERTY_REGEX = re.compile('(?P\w+):\s*"?(?P[\w|.]+)"?,?\s*') # TODO FIX # TODO caseInsensitive @@ -30,6 +51,8 @@ def split_list(unsplitted, sep_list, PN=False): Unary prefix by default. """ + # TODO use re.split() !!! + def apply_notation(splitted_list, splitter): # TODO splitter may provide type of the notation (infix, postfix, suffix) if PN: @@ -69,15 +92,9 @@ def apply_notation(splitted_list, splitter): MATCH_SPLITTERS = ['-'] MULTI_ELEMENTS_SPLITTER = [','] -OPERATORS_BY_PRIORITY = ['OR', 'XOR', 'AND', 'OR', '>', '<', '<=', '>=', 'IN'] - -COMMANDS = {} - ''' ** PARSER ** ''' - - def check_valid_edge(raw_node): # Does it have brackets error = None @@ -102,8 +119,9 @@ def check_valid_node(raw_node): raise InvalidNodeError(raw_node, error) -def get_properties(raw_node): +def get_properties(raw_elem): """ + Gets properties; Parses to Number if possible. Args: raw_node (str): @@ -111,34 +129,84 @@ def get_properties(raw_node): List[Property]: """ - raw_properties = re.search('\{.*\}', raw_node) - eval(raw_properties) if raw_properties else None - - -def get_labels(raw_node): + def to_number(s): + """Tries to parse element to number.""" + try: + return float(s) + except ValueError: + try: + return int(s) + except ValueError: + return s + + # MATCH should work (match the beginning ...) + # Else raise error + # TODO or use findAll + # TODO check for {} brackets + properties = [] + raw_props = PROPERTIES_BODY_REGEX.search(raw_elem) + # get matched group + if raw_props and raw_props.group(1): + raw_props = raw_props.group(1) + Logger.debug('Processing properties: ', raw_props) + + # Use Named Groups to match elements + match = PROPERTY_REGEX.match(raw_props) + while match: + key = match.group('key').strip() + value = to_number(match.group('val').strip()) + properties.append(Property(key=key, value=value)) + + match = PROPERTY_REGEX.match(raw_props, match.end()) + + return properties + + +def get_labels(raw_elem, multi=True): """ + ;id {};id:... {} Args: raw_node (str): Returns: List[Label]: """ - pass - - -def get_identifier(raw_node): + match = LABELS_REGEX.findall(raw_elem) + # edge labels + if match and (len(match) > 1 and not multi): + raise InvalidLabelsCountError() + # Make to system labels + match = [Label(raw_label) for raw_label in match] + if match and not multi: + # Is edge label + match = match[0] + return match + + +def get_identifier(raw_elem): """ + ;id {};id:... {} Args: raw_node (str): - Returns: Identifier|None: """ - identifier_re = '' + match = IDENTIFIER_REGEX.search(raw_elem) + if match: + match = Identifier(match.group(1).strip()) + return match + + +def is_node(raw_elem): + pass + + +def is_edge(raw_elem): + pass def parse_node(raw_node): """ - Node must follow the pattern: ([identifier]:[label:label...] [{properties}]) + Node must follow the pattern: ([identifier][:label:label...] [{properties}]) Args: raw_node (str): Returns: @@ -147,32 +215,27 @@ def parse_node(raw_node): InvalidNodeError: """ check_valid_node(raw_node) - raw_node = raw_node.strip() - node_split = re.search('[^(]\w*(\)|\{))]', raw_node) identifier = get_identifier(raw_node) labels = get_labels(raw_node) properties = get_properties(raw_node) - #return varibalbe_str.group(0).split(':') if varibalbe_str else None return Node(identifier=identifier, labels=labels, properties=properties) -def parse_edge(self, raw_edge): +def parse_edge(raw_edge): """ An edge can Returns an edge with specified properties and orientation. """ - check_valid_node(raw_edge) + check_valid_edge(raw_edge) raw_edge = raw_edge.strip() - node_split = re.search('[^(]\w*(\)|\{))]', raw_edge) identifier = get_identifier(raw_edge) - labels = get_labels(raw_edge) + label = get_labels(raw_edge, True) properties = get_properties(raw_edge) - #return varibalbe_str.group(0).split(':') if varibalbe_str else None - + return Edge(identifier=identifier, label=label, properties=properties) def parse_simple_graph_expr(raw_simple_expr): @@ -192,16 +255,20 @@ def parse_simple_graph_expr(raw_simple_expr): # parse Node, parse Edge simple_expr_raw_elements = raw_simple_expr.split('-') - parse_method = parse_node simple_expr_elements = [] for elem in simple_expr_raw_elements: - simple_expr_elements.append(parse_method(elem)) + # Parse by element type + if is_node(elem): + parsed_elem = parse_node(elem) + elif is_edge(elem): + parsed_elem = parse_edge(elem) - # Alternate parse method - parse_method = parse_node if parse_method == parse_edge else parse_edge + else: + raise InvalidGraphExpressionError(elem) - # TODO populate Edges + simple_expr_elements.append(parsed_elem) + # TODO populate Edges return SimpleGraphPatternExpression(simple_expr_elements) @@ -329,7 +396,7 @@ def parse_sub_query(raw_sub_query): # List of: Clause, expressions (, separated) subclauses_split = split_list(raw_sub_query, SUB_CLAUSES) - #TODO split expressions by ',' + # TODO split expressions by ',' # process expressions (of MATCH, WHERE, ... # TODO expression type is defined by the clause it refers to -- use that cluase diff --git a/src/query_engine/test_query_parser.py b/src/query_engine/test_query_parser.py index fd6b38a..5dd2d5d 100644 --- a/src/query_engine/test_query_parser.py +++ b/src/query_engine/test_query_parser.py @@ -9,7 +9,162 @@ # TODO [0] test string, [1] result object + class TestQueryParser(TestCase): + def test_parse_graph_expression(self): + # test with , + # test with 1 node + # test with edge (directed or none) + # test with node and edge + # test with more edges + self.fail() + + def test_get_properties(self): + self.assertEquals(get_properties(':lab {a: 1}'), + [Property('a', 1)], + 'Normal, spaced, int') + self.assertEquals(get_properties(':lab {a: 1.2}'), + [Property('a', 1.2)], + 'Normal, spaced, float') + self.assertEquals(get_properties(':lab {a: "b"} '), + [Property('a', 'b')], + 'Normal spaced str') + self.assertEquals(get_properties(' {a: "b"} '), + [Property('a', 'b')], + 'Normal; only props') + self.assertEquals(get_properties(' {a:"b"} '), + [Property('a', 'b')], + 'Normal; no space') + + self.assertEquals(get_properties(' {a:"b", b:"c"} '), + [Property('a', 'b'), Property('b', 'c')], + 'Multi prop') + self.assertEquals(get_properties(' {a:"b", b:"c",c:1} '), + [Property('a', 'b'), Property('b', 'c'), + Property('c', 1)], + 'Multi prop') + self.assertEquals(get_properties('a:label '), []) + # TODO test EXCEPTIONS + + def test_get_labels(self): + self.assertEquals(get_labels('a:lab {a: 1}'), + [Label('lab')]) + self.assertEquals(get_labels('a:lab:lab1 {a: 1}'), + [Label('lab'), Label('lab1')]) + self.assertEquals(get_labels(':lab:lab1 {a: 1}'), + [Label('lab'), Label('lab1')]) + self.assertEquals(get_labels(':lab:lab1'), + [Label('lab'), Label('lab1')]) + self.assertEquals(get_labels(':lab:lab1:lab3'), + [Label('lab'), Label('lab1'), Label('lab3')]) + self.assertEquals(get_labels('a {}'), + []) + + self.assertRaises(InvalidLabelsCountError, get_labels, 'a:b:c', + multi=False), + self.assertEquals(get_labels('a:b', multi=False), + Label('b')), + + # TODO test EXCEPTIONS + + def test_get_identifier(self): + self.assertEquals(get_identifier('a'), Identifier('a')) + self.assertEquals(get_identifier('a:b '), Identifier('a')) + self.assertIs(get_identifier(':b'), None) + + + def test_parse_edge(self): + # self.assertRaises(InvalidEdgeLabelError, + # parse_edge, '[]') + self.assertEquals(parse_edge('[a]'), + Edge('a')) + self.assertEquals(parse_edge('[a {a: 1}]'), + Edge(identifier=Identifier('a'), + properties=[Property('a', 1)])) + self.assertEquals(parse_edge('[a:b {a: 1}]'), + Edge(identifier=Identifier('a'), + label=Label('b'), + properties=[Property('a', 1)])) + self.assertEquals(parse_edge('[:b {a: 1}]'), + Edge(label=Label('b'), + properties=[Property('a', 1)])) + self.assertEquals(parse_edge('[]'), + Edge()) + + # check direction + # + + # TODO clean up tests + def test_parse_node(self): + self.assertEquals(parse_node('()'), + Node(), + 'Just a node') + + self.assertEquals(parse_node('(id:lab1)'), + Node(identifier=id, labels=[Label('lab1')]), + 'Id and label') + + # + self.assertEquals(parse_node('(id)'), + Node(identifier=id), + 'Just id') + + self.assertEquals(parse_node('(id {a: 1})'), + Node(identifier=id, properties=[Property('a', 1)]), + 'Id and props') + + self.assertEquals(parse_node('(id:lab {a: 1})'), + Node(identifier=id, + labels=[Label('lab')], + properties=[Property('a', 1)]), + 'Id and props') + + self.assertEquals(parse_node('(:lab {a: 1})'), + Node(labels=[Label('lab')], + properties=[Property('a', 1)]), + 'Label and prop') + + self.assertEquals(parse_node('(:lab:lab1:lab2 {a: 1})'), + Node(labels=[Label('lab'), Label('lab1'), + Label('lab2')], + properties=[Property('a', 1)]), + 'Many Labels and prop') + + self.assertEquals(parse_node('(:lab:lab1:lab2)'), + Node(labels=[Label('lab'), Label('lab1'), + Label('lab2')]), + 'Many Labels') + + # PROPERTIES + + self.assertEquals(parse_node('({a: 1})'), + Node(properties=[Property('a', 1)]), + 'Spaced prop'), + + self.assertEquals(parse_node('({a:1})'), + Node(properties=[Property('a', 1)]), + 'No Spaced prop'), + + self.assertEquals(parse_node('({a:1.12})'), + Node(properties=[Property('a', 1.12)]), + 'Float prop'), + + self.assertEquals(parse_node('({a:"abc"})'), + Node(properties=[Property('a', "abc")]), + 'Prop with string') + + self.assertEquals(parse_node('({a:"abc"})'), + Node(properties=[Property('a', "abc")]), + 'Prop with string') + + self.assertEquals(parse_node('({a:"abc", b: 1})'), + Node(properties=[Property('a', 'abc'), + Property('b', 1)]), + 'Mixed properties') + # + # TODO raise Tests + # + def test_list_split(self): self.assertEquals(split_list('a1 b c d', ['a1']), [['a1', 'b c d']]) @@ -65,12 +220,13 @@ def test_compound(self): ), query.Query([ query.SubQuery([ - Match(GraphPatternExpression([Node(labels=Label('Database'), - identifier=Identifier( - 'neo'), - properties=[ - Property('name', - 'Neo4j')])])), + Match(GraphPatternExpression([SimpleGraphPatternExpression( + [Node(labels=Label('Database'), + identifier=Identifier( + 'neo'), + properties=[ + Property('name', + 'Neo4j')])])])), ]), query.SubQuery([ Match(GraphPatternExpression([Node(labels=Label('Person'), @@ -113,8 +269,10 @@ def test_compound(self): self.assertEqual(self.parser.parse_query(COMPOUND_TEST[0]), COMPOUND_TEST[1]) + # TODO test Expressions with ','hj + def test_operator_expressions(self): - pass + self.fail() def test_graph_expressions(self): SIMPLE_TEST_MATCH_EDGE = [( @@ -205,4 +363,4 @@ def test_graph_expressions(self): def test_exceptions(self): - pass + self.fail() From 84ef8990b084e63d76320d8eede54ce48c788d51 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Mon, 20 Jun 2016 12:31:22 +0300 Subject: [PATCH 07/22] Working - split, parse (node, edge ...) --- src/lib/utils.py | 16 +- src/query_engine/errors/syntax.py | 12 +- src/query_engine/query_ast/expression.py | 9 +- src/query_engine/query_ast/models.py | 62 +++--- src/query_engine/query_parser.py | 263 ++++++++++++++--------- src/query_engine/test_query_parser.py | 189 ++++++++++------ 6 files changed, 355 insertions(+), 196 deletions(-) diff --git a/src/lib/utils.py b/src/lib/utils.py index cbae564..2c47fe1 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -1,2 +1,16 @@ +import collections + + def ensure_array(value): - return value if isinstance(value, list) else [value] + return value if isinstance(value, collections.Iterable) else (value,) + + +def pairwise(iterable): + """s -> (s0,s1), (s1,s2), (s2, s3), ...""" + pairs = [] + a = iter(iterable) + try: + while True: + pairs.append((next(a), next(a))) + except StopIteration: + return pairs diff --git a/src/query_engine/errors/syntax.py b/src/query_engine/errors/syntax.py index 5dbf34a..6761083 100644 --- a/src/query_engine/errors/syntax.py +++ b/src/query_engine/errors/syntax.py @@ -28,14 +28,24 @@ def __init__(self, value): InvalidExpressionError.__init__(self, value) +class BadGraphExpressionElementError(InvalidGraphExpressionError): + def __init__(self, value): + self.value = value + + class InvalidNodeError(InvalidGraphExpressionError): def __init__(self, value, msg): InvalidGraphExpressionError.__init__(self, value) self.msg = msg +class EmptyGraphPatternExpressionError(InvalidGraphExpressionError): + def __init__(self, value): + self.value = value + + class InvalidEdgeError(InvalidGraphExpressionError): - def __init__(self, value, msg): + def __init__(self, value, msg=''): InvalidGraphExpressionError.__init__(self, value) self.msg = msg diff --git a/src/query_engine/query_ast/expression.py b/src/query_engine/query_ast/expression.py index 8a2a680..c21a879 100644 --- a/src/query_engine/query_ast/expression.py +++ b/src/query_engine/query_ast/expression.py @@ -1,7 +1,8 @@ from src.lib.utils import ensure_array +from src.lib.Printable import Printable -class Expression: +class Expression(Printable): ''' A where (and other elements?) expression handler Contains a list of elements - variables, consts and operations @@ -10,6 +11,12 @@ class Expression: def __init__(self, elements): self.elements = ensure_array(elements) + def __repr__(self): + return '<' + type(self).__name__ + '>' + str(self.elements) + + def __eq__(self, other): + return self.elements == other.elements + # TODO probably not ... def validate_expression(self): pass diff --git a/src/query_engine/query_ast/models.py b/src/query_engine/query_ast/models.py index ac39063..8b6e385 100644 --- a/src/query_engine/query_ast/models.py +++ b/src/query_engine/query_ast/models.py @@ -1,6 +1,11 @@ from src.lib.utils import ensure_array +class Printable: + def __repr__(self): + return "<" + type(self).__name__ + "> " + str(self.__dict__) + + class Literal: """Defines a literal for an expression.""" @@ -17,24 +22,25 @@ def __repr__(self): def __eq__(self, other): return self.__dict__ == other.__dict__ -class Property: + +class Property(Printable): def __init__(self, key, value): self.key = key self.value = value def __repr__(self): - return self.key + ':' + str(self.value) + return '<' + self.key + ': ' + str(self.value) + '>' def __eq__(self, other): return self.__dict__ == other.__dict__ -class Identifier: +class Identifier(Printable): """ Defines an identifier for a query. It can be """ - def __init__(self, letter, value=None): + def __init__(self, letter=None, value=None): """ Args: letter: @@ -47,15 +53,18 @@ def __init__(self, letter, value=None): def value(self): return self._value + def __str__(self): + return self._letter + ' ' + str(self._value) + def __repr__(self): - return self._letter + ' ' + self._value + return '<' + str(self._letter) + ' ' + str(self._value) + '>' def __eq__(self, other): return self.__dict__ == other.__dict__ -class Label: - def __init__(self, name): +class Label(Printable): + def __init__(self, name=None): self.name = name def __repr__(self): @@ -65,7 +74,7 @@ def __eq__(self, other): return self.__dict__ == other.__dict__ -class Edge: +class Edge(Printable): """ TODO: make it immutable An edge: @@ -77,15 +86,14 @@ class Edge: is determined from the node sequence given """ - def __init__(self, node_in=None, node_out=None, label='', directed=False, - identifier=None, - properties=()): + def __init__(self, node_in=None, node_out=None, label=None, + directed=False, identifier=None, properties=()): """ Args: label: - node_in: - node_out: - directed: + node_in (Node): + node_out (Node): + directed (Boolean|String): left or right directed identifier: properties: """ @@ -97,26 +105,29 @@ def __init__(self, node_in=None, node_out=None, label='', directed=False, self.identifier = identifier def __repr__(self): - return self.identifier + ':' + self.__label + \ - ' ' + self.__properties + ' ' + self.__node_in + \ - ' ' + self.__node_out + ' ' + str(self.__directed) + return ' ' + str(self.identifier) + ':' + \ + str(self.__label) + ' { ' + str(self.__properties) + ' } < ' + \ + str(self.__node_in) + ' > < ' + str(self.__node_out) + ' > - ' + \ + str(self.__directed) def __eq__(self, other): return self.__dict__ == other.__dict__ - def isDirected(self): - return self.direction + def is_directed(self): + return self.__directed - def getNodes(self): + def get_nodes(self): """ Get a directed node pair out - in (direction flag needed) """ - return (self.nodeOut, self.nodeIn) + return (self.__node_in, self.__node_out) - def getLabel(self): + @property + def label(self): return self.__label - def getProperties(self): + @property + def properties(self): return self.__properties @@ -134,7 +145,7 @@ def __init__(self, direction, label, nodeLeft, nodeRight, _id, identifier, # TODO implement setters -class Node: +class Node(Printable): """ TODO: make it immutable A node: @@ -148,9 +159,6 @@ def __init__(self, labels=(), identifier=None, properties=()): self.properties = ensure_array(properties) self.labels = ensure_array(labels) - def __repr__(self): - return self.identifier + ':' + self.labels + ' ' + self.properties - def __eq__(self, other): return self.__dict__ == other.__dict__ diff --git a/src/query_engine/query_parser.py b/src/query_engine/query_parser.py index 4bc66a9..90b3e89 100644 --- a/src/query_engine/query_parser.py +++ b/src/query_engine/query_parser.py @@ -1,6 +1,6 @@ import re -import collections -import itertools +from enum import Enum + import logging as Logger from src.query_engine.errors.syntax import * @@ -10,7 +10,8 @@ from src.query_engine.query_ast.models import * from src.query_engine.query_ast.clauses import * from src.query_engine.query_ast.expression import * -from src.query_engine.query_ast import utils +from src.query_engine.query_ast.utils import * +from src.lib import utils ''' Support Notes: @@ -33,58 +34,45 @@ new MatchClause() ''' +MATCH_NODE = '\((?P.*?)\)' +MATCH_EDGE = '\[(?P.*?)\]' +PARSE_GRAPH_EXPRESSION = re.compile('({})|({})'.format(MATCH_NODE, MATCH_EDGE)) + + +# TODO ONE BIG VALIDATION REGEX + +class EdgeDirections(Enum): + left = 1 + right = 2 + # TODO flags -> convert to number; find all matches # TODO use \w ?? or \S IDENTIFIER_REGEX = re.compile('^(\w+)(?:|\s)?') -LABELS_REGEX = re.compile(':(\w+)(?:|\s)?') + +# Match labels, without matching properties +LABELS_REGEX = re.compile('[\w:]*?:(\w+)(?:|\s)?') # TODO allowed_val_chars = '\w|\'' PROPERTIES_BODY_REGEX = re.compile('{(.*?)}') PROPERTY_REGEX = re.compile('(?P\w+):\s*"?(?P[\w|.]+)"?,?\s*') +BODY_REGEX = '(.*?)' +EDGE_BODY_REGEX = re.compile('?'.format(BODY_REGEX)) +NODE_BODY_REGEX = re.compile('\({}\)'.format(BODY_REGEX)) + + # TODO FIX # TODO caseInsensitive -def split_list(unsplitted, sep_list, PN=False): +def split_list(unsplitted, sep_list): """ Splits a string by list of separators - - Unary prefix by default. """ + splitted = re.split('\s*({})\s*'.format('|'.join(sep_list)), + unsplitted) - # TODO use re.split() !!! - - def apply_notation(splitted_list, splitter): - # TODO splitter may provide type of the notation (infix, postfix, suffix) - if PN: - # Binary operators - in_fix - for split_pos in range(0, len(splitted_list), 3): - splitted_list.insert(split_pos, splitter) - else: - # unary operators - prefix - for split_pos in range(1, len(splitted_list)): - - splitted_list.insert(split_pos, splitter) - if not unsplitted or len(sep_list) == 0: - return unsplitted - - splitter = sep_list[0] + ' ' # don't match - rest = sep_list[1:] - # splitted = [] - - if isinstance(unsplitted, list): - splitted_list = [] - for sub_str in unsplitted: - splitted_com = sub_str.split(splitter) - if len(splitted_com) > 0: - apply_notation(splitted_com, splitter) - - splitted_list.append(splitted_com) - splitted = list(itertools.chain(*splitted_list)) - else: - splitted = unsplitted.split(splitter) - apply_notation(splitted, splitter) - - return split_list(splitted, rest) + if len(splitted) > 0 and not splitted[0]: + splitted = splitted[1:] + return splitted # -- EXPRESSIONS -- @@ -95,25 +83,23 @@ def apply_notation(splitted_list, splitter): ''' ** PARSER ** ''' + + def check_valid_edge(raw_node): - # Does it have brackets + # XXX + # TODO FIX + # XXX error = None - if len(raw_node) - 2 != len(raw_node.strip('[]')): - error = 'Edge must be enclosed in []' - # TODO other checks - # elif if error: raise InvalidEdgeError(raw_node, error) def check_valid_node(raw_node): - # Does it have brackets + # XXX + # TODO FIX + # XXX error = None - if len(raw_node) - 2 != len(raw_node.strip('()')): - error = 'Node must be enclosed in ()' - # TODO other checks - # elif if error: raise InvalidNodeError(raw_node, error) @@ -129,13 +115,22 @@ def get_properties(raw_elem): List[Property]: """ + def to_number(s): """Tries to parse element to number.""" - try: - return float(s) - except ValueError: + + def is_int(n): + try: + int(n) + except ValueError: + return False + return float(n) == int(n) + + if is_int(s): + return int(s) + else: try: - return int(s) + return float(s) except ValueError: return s @@ -159,27 +154,35 @@ def to_number(s): match = PROPERTY_REGEX.match(raw_props, match.end()) - return properties + return tuple(properties) def get_labels(raw_elem, multi=True): """ ;id {};id:... {} Args: - raw_node (str): + raw_elem (str): + multi (bool): Returns: List[Label]: """ - match = LABELS_REGEX.findall(raw_elem) + matches = [] + # NOTE Use match to match the begging. + match = LABELS_REGEX.match(raw_elem) + while match: + matches.append(match.group(1)) + match = LABELS_REGEX.match(raw_elem, match.end()) + # edge labels - if match and (len(match) > 1 and not multi): + if matches and (len(matches) > 1 and not multi): raise InvalidLabelsCountError() # Make to system labels - match = [Label(raw_label) for raw_label in match] - if match and not multi: + matches = tuple(Label(raw_label.strip()) for raw_label in matches) + if not multi: # Is edge label - match = match[0] - return match + return matches[0] if matches else None + else: + return matches def get_identifier(raw_elem): @@ -196,46 +199,71 @@ def get_identifier(raw_elem): return match -def is_node(raw_elem): - pass - - -def is_edge(raw_elem): - pass - - def parse_node(raw_node): """ Node must follow the pattern: ([identifier][:label:label...] [{properties}]) Args: - raw_node (str): + raw_node (str): (data) Returns: Node: Raises: InvalidNodeError: """ check_valid_node(raw_node) + raw_node_body = NODE_BODY_REGEX.match(raw_node) + if raw_node_body: + raw_node_body = raw_node_body.group(1).strip() + else: + raise InvalidEdgeError(raw_node) - identifier = get_identifier(raw_node) - labels = get_labels(raw_node) - properties = get_properties(raw_node) + identifier = get_identifier(raw_node_body) + labels = get_labels(raw_node_body) + properties = get_properties(raw_node_body) return Node(identifier=identifier, labels=labels, properties=properties) -def parse_edge(raw_edge): +def parse_edge(raw_edge, node_left, node_right): """ - An edge can - Returns an edge with specified properties and orientation. + Args: + raw_edge (str): -[]-, <-[]-, ... + node_left (Node): + node_right (Node): + Returns: + Edge: """ + + def get_edge_direction(raw_edge): + # TODO raise on bad direction ... + if raw_edge[0] == '<': + return EdgeDirections.left + elif raw_edge[len(raw_edge) - 1] == '>': + return EdgeDirections.right + return None + check_valid_edge(raw_edge) - raw_edge = raw_edge.strip() + raw_edge_body = EDGE_BODY_REGEX.match(raw_edge) + if raw_edge_body: + raw_edge_body = raw_edge_body.group(1).strip() + else: + raise InvalidEdgeError(raw_edge) - identifier = get_identifier(raw_edge) - label = get_labels(raw_edge, True) - properties = get_properties(raw_edge) + identifier = get_identifier(raw_edge_body) + label = get_labels(raw_edge_body, False) + properties = get_properties(raw_edge_body) + direction = get_edge_direction(raw_edge) - return Edge(identifier=identifier, label=label, properties=properties) + if direction == EdgeDirections.left: + # Swap edges to keep the proper ordering + # simplifies the code + node_left, node_right = node_right, node_left + + return Edge(identifier=identifier, + label=label, + properties=properties, + directed=bool(direction), + node_in=node_left, + node_out=node_right) def parse_simple_graph_expr(raw_simple_expr): @@ -251,26 +279,48 @@ def parse_simple_graph_expr(raw_simple_expr): InvalidGraphExpressionError: """ + # split to items # parse Node, parse Edge - simple_expr_raw_elements = raw_simple_expr.split('-') + def collect_elements(raw_simple_expr): + raw_nodes = [] + raw_edges = [] + + match = PARSE_GRAPH_EXPRESSION.match(raw_simple_expr) + while match: + if match.group('node') or match.group() == '()': + raw_nodes.append(match.group('node')) + elif match.group('edge') or match.group() == '[]': + raw_edges.append(match.group('edge')) + else: + raise BadGraphExpressionElementError(match.group()) + match = PARSE_GRAPH_EXPRESSION.match(raw_simple_expr, match.end()) - simple_expr_elements = [] + return {'nodes': raw_edges, 'edges': raw_edges} - for elem in simple_expr_raw_elements: - # Parse by element type - if is_node(elem): - parsed_elem = parse_node(elem) - elif is_edge(elem): - parsed_elem = parse_edge(elem) + raw_elements = collect_elements(raw_simple_expr) - else: - raise InvalidGraphExpressionError(elem) + # TODO check number of nodes + + # Parse nodes + nodes = [parse_node(raw_node) for raw_node in raw_elements['nodes']] - simple_expr_elements.append(parsed_elem) + edges = [] + # Parse edges + nodes_iter = iter(nodes) + for raw_edge in raw_elements['nodes']: + edges.append(parse_edge(raw_edge, next(nodes_iter), next(nodes_iter))) # TODO populate Edges - return SimpleGraphPatternExpression(simple_expr_elements) + res = None + if edges: + res = edges + elif nodes: + if len(nodes) > 1: + raise InvalidGraphExpressionError + res = nodes[0] + + return SimpleGraphPatternExpression(res) def parse_graph_expression(simple_graph_exprs): @@ -292,6 +342,11 @@ def parse_graph_expression(simple_graph_exprs): return GraphPatternExpression(simple_graph_exprs) +def parse_operator_expression(simple_graph_exprs): + # TODO + pass + + def generate_clause(clause, raw_expr): """ @@ -341,11 +396,10 @@ def parse_clause(raw_clause): Returns: Clause: The generated clause """ - clause_str = raw_clause[0] - expr = raw_clause[1] + clause_str, expr = raw_clause expr = parse_expression(expr, - utils.get_expression_type(clause_str)) + get_expression_type(clause_str)) return generate_clause(clause_str, expr) @@ -381,7 +435,6 @@ def parse_query(query): def parse_sub_query(raw_sub_query): """ - Args: raw_sub_query (List[str]): A list containing the sub-query elements, @@ -394,14 +447,14 @@ def parse_sub_query(raw_sub_query): # Break to smaller parts with sub clauses - RETURN, WHERE # List of: Clause, expressions (, separated) - subclauses_split = split_list(raw_sub_query, SUB_CLAUSES) - - # TODO split expressions by ',' + clause = raw_sub_query[0] + subclauses_split = split_list(raw_sub_query[1], SUB_CLAUSES) # process expressions (of MATCH, WHERE, ... # TODO expression type is defined by the clause it refers to -- use that cluase - subclauses = (parse_clause(subclause_list) - for subclause_list in subclauses_split) + subclauses = (parse_clause(clause) + for clause in + utils.pairwise((clause, *subclauses_split))) return SubQuery(subclauses) @@ -409,7 +462,7 @@ def parse_sub_query(raw_sub_query): # Sub queries are defined by specific Clauses # TODO trailing spaces - sub_queries_str = split_list(query, MAIN_CLAUSES) + sub_queries_str = utils.pairwise(split_list(query, MAIN_CLAUSES)) sub_queries = [parse_sub_query(sub_query) for sub_query in sub_queries_str] diff --git a/src/query_engine/test_query_parser.py b/src/query_engine/test_query_parser.py index 5dd2d5d..e57c5bc 100644 --- a/src/query_engine/test_query_parser.py +++ b/src/query_engine/test_query_parser.py @@ -17,53 +17,105 @@ def test_parse_graph_expression(self): # test with edge (directed or none) # test with node and edge # test with more edges - self.fail() + ## self.assertRaises(InvalidGraphExpressionError, + ## parse_graph_expression, ['()-[]']) + ## self.assertRaises(EmptyGraphPatternExpressionError, + ## parse_graph_expression, ['()-[]-()']) + + self.assertEquals(parse_graph_expression(['(a)']), + GraphPatternExpression([SimpleGraphPatternExpression( + [Node(identifier=Identifier('a'))])])) + self.assertEquals(parse_graph_expression(['(a)-[]-(b)']), + GraphPatternExpression( + [SimpleGraphPatternExpression([Edge( + directed=False, + node_out=Node(identifier=Identifier('b')), + node_in=Node(identifier=Identifier('a')))])])) + self.assertEquals(parse_graph_expression(['(a)-[:b]-(b)']), + GraphPatternExpression( + [SimpleGraphPatternExpression([Edge( + label=Label('b'), + directed=False, + node_out=Node(identifier=Identifier('b')), + node_in=Node(identifier=Identifier('a')))])])) + self.assertEquals(parse_graph_expression(['(a)<-[]-(b)']), + GraphPatternExpression( + [SimpleGraphPatternExpression([Edge( + directed=True, + node_out=Node(identifier=Identifier('a')), + node_in=Node(identifier=Identifier('b')))])])) + + self.assertEquals(parse_graph_expression(['(a)<-[:b]-()-[:c]->(d)']), + GraphPatternExpression([SimpleGraphPatternExpression([ + Edge(label=Label('b'), + directed=True, + node_out=Node(identifier=Identifier('a')), + node_in=Node()), + Edge(label=Label('c'), + directed=True, + node_out=Node(identifier=Identifier('d')), + node_in=Node()) + ])])) + + self.assertEquals(parse_graph_expression(['(a)<-[]-(b)', + '(c)']), + GraphPatternExpression([ + SimpleGraphPatternExpression([Edge( + directed=True, + node_out=Node(identifier=Identifier('a')), + node_in=Node(identifier=Identifier('b')))]), + SimpleGraphPatternExpression( + [Node(identifier='c')])])) def test_get_properties(self): self.assertEquals(get_properties(':lab {a: 1}'), - [Property('a', 1)], + (Property('a', 1),), 'Normal, spaced, int') self.assertEquals(get_properties(':lab {a: 1.2}'), - [Property('a', 1.2)], + (Property('a', 1.2),), 'Normal, spaced, float') self.assertEquals(get_properties(':lab {a: "b"} '), - [Property('a', 'b')], + (Property('a', 'b'),), 'Normal spaced str') self.assertEquals(get_properties(' {a: "b"} '), - [Property('a', 'b')], + (Property('a', 'b'),), 'Normal; only props') self.assertEquals(get_properties(' {a:"b"} '), - [Property('a', 'b')], + (Property('a', 'b'),), 'Normal; no space') self.assertEquals(get_properties(' {a:"b", b:"c"} '), - [Property('a', 'b'), Property('b', 'c')], + (Property('a', 'b'), Property('b', 'c')), 'Multi prop') self.assertEquals(get_properties(' {a:"b", b:"c",c:1} '), - [Property('a', 'b'), Property('b', 'c'), - Property('c', 1)], + (Property('a', 'b'), Property('b', 'c'), + Property('c', 1)), 'Multi prop') - self.assertEquals(get_properties('a:label '), []) + self.assertEquals(get_properties('a:label '), ()) # TODO test EXCEPTIONS def test_get_labels(self): self.assertEquals(get_labels('a:lab {a: 1}'), - [Label('lab')]) + (Label('lab'),)) self.assertEquals(get_labels('a:lab:lab1 {a: 1}'), - [Label('lab'), Label('lab1')]) + (Label('lab'), Label('lab1'))) self.assertEquals(get_labels(':lab:lab1 {a: 1}'), - [Label('lab'), Label('lab1')]) + (Label('lab'), Label('lab1'))) self.assertEquals(get_labels(':lab:lab1'), - [Label('lab'), Label('lab1')]) + (Label('lab'), Label('lab1'))) self.assertEquals(get_labels(':lab:lab1:lab3'), - [Label('lab'), Label('lab1'), Label('lab3')]) + (Label('lab'), Label('lab1'), Label('lab3'))) self.assertEquals(get_labels('a {}'), - []) + ()) self.assertRaises(InvalidLabelsCountError, get_labels, 'a:b:c', multi=False), self.assertEquals(get_labels('a:b', multi=False), Label('b')), + self.assertIs(get_labels('', multi=False), + None), + self.assertEquals(get_labels('', multi=True), + ()) # TODO test EXCEPTIONS @@ -72,94 +124,110 @@ def test_get_identifier(self): self.assertEquals(get_identifier('a:b '), Identifier('a')) self.assertIs(get_identifier(':b'), None) - def test_parse_edge(self): + n1 = Node(identifier='a') + n2 = Node(identifier='b') # self.assertRaises(InvalidEdgeLabelError, # parse_edge, '[]') - self.assertEquals(parse_edge('[a]'), - Edge('a')) - self.assertEquals(parse_edge('[a {a: 1}]'), + self.assertEquals(parse_edge('-[a]-', n1, n2), + Edge(identifier=Identifier('a'), + node_in=n1, node_out=n2)) + self.assertEquals(parse_edge('-[a {a: 1}]-', n1, n2), Edge(identifier=Identifier('a'), - properties=[Property('a', 1)])) - self.assertEquals(parse_edge('[a:b {a: 1}]'), + properties=(Property('a', 1),), + node_in=n1, node_out=n2)), + self.assertEquals(parse_edge('-[a:b {a: 1}]-', n1, n2), Edge(identifier=Identifier('a'), label=Label('b'), - properties=[Property('a', 1)])) - self.assertEquals(parse_edge('[:b {a: 1}]'), + node_in=n1, node_out=n2, + properties=(Property('a', 1),))) + self.assertEquals(parse_edge('-[:b {a: 1}]-', n1, n2), + Edge(label=Label('b'), + node_in=n1, node_out=n2, + properties=(Property('a', 1),))) + self.assertEquals(parse_edge('-[]-', n1, n2), + Edge(node_in=n1, node_out=n2)) + # Check direction + self.assertEquals(parse_edge('-[:b {a: 1}]->', n1, n2), + Edge(label=Label('b'), + node_in=n1, node_out=n2, + directed=True, + properties=(Property('a', 1),))) + self.assertEquals(parse_edge('<-[:b {a: 1}]-', n1, n2), Edge(label=Label('b'), - properties=[Property('a', 1)])) - self.assertEquals(parse_edge('[]'), - Edge()) + node_in=n2, node_out=n1, + directed=True, + properties=(Property('a', 1),))) - # check direction - # + ## self.assertRaises(InvalidEdgeError, parse_edge, '-[]-', n1, n2) - # TODO clean up tests def test_parse_node(self): + # TODO clean up tests self.assertEquals(parse_node('()'), Node(), 'Just a node') self.assertEquals(parse_node('(id:lab1)'), - Node(identifier=id, labels=[Label('lab1')]), + Node(identifier=Identifier('id'), + labels=Label('lab1')), 'Id and label') # self.assertEquals(parse_node('(id)'), - Node(identifier=id), + Node(identifier=Identifier('id')), 'Just id') self.assertEquals(parse_node('(id {a: 1})'), - Node(identifier=id, properties=[Property('a', 1)]), + Node(identifier=Identifier('id'), properties=(Property('a', 1),)), 'Id and props') self.assertEquals(parse_node('(id:lab {a: 1})'), - Node(identifier=id, - labels=[Label('lab')], - properties=[Property('a', 1)]), + Node(identifier=Identifier('id'), + labels=Label('lab'), + properties=(Property('a', 1),)), 'Id and props') self.assertEquals(parse_node('(:lab {a: 1})'), - Node(labels=[Label('lab')], - properties=[Property('a', 1)]), + Node(labels=Label('lab'), + properties=(Property('a', 1),)), 'Label and prop') self.assertEquals(parse_node('(:lab:lab1:lab2 {a: 1})'), - Node(labels=[Label('lab'), Label('lab1'), - Label('lab2')], - properties=[Property('a', 1)]), + Node(labels=(Label('lab'), Label('lab1'), + Label('lab2')), + properties=(Property('a', 1),)), 'Many Labels and prop') self.assertEquals(parse_node('(:lab:lab1:lab2)'), - Node(labels=[Label('lab'), Label('lab1'), - Label('lab2')]), + Node(labels=(Label('lab'), Label('lab1'), + Label('lab2'))), 'Many Labels') # PROPERTIES self.assertEquals(parse_node('({a: 1})'), - Node(properties=[Property('a', 1)]), + Node(properties=(Property('a', 1),)), 'Spaced prop'), self.assertEquals(parse_node('({a:1})'), - Node(properties=[Property('a', 1)]), + Node(properties=(Property('a', 1),)), 'No Spaced prop'), self.assertEquals(parse_node('({a:1.12})'), - Node(properties=[Property('a', 1.12)]), + Node(properties=(Property('a', 1.12),)), 'Float prop'), self.assertEquals(parse_node('({a:"abc"})'), - Node(properties=[Property('a', "abc")]), + Node(properties=(Property('a', "abc"),)), 'Prop with string') self.assertEquals(parse_node('({a:"abc"})'), - Node(properties=[Property('a', "abc")]), + Node(properties=(Property('a', 'abc'))), 'Prop with string') self.assertEquals(parse_node('({a:"abc", b: 1})'), - Node(properties=[Property('a', 'abc'), - Property('b', 1)]), + Node(properties=(Property('a', 'abc'), + Property('b', 1))), 'Mixed properties') # # TODO raise Tests @@ -167,31 +235,31 @@ def test_parse_node(self): def test_list_split(self): self.assertEquals(split_list('a1 b c d', ['a1']), - [['a1', 'b c d']]) + ['a1', 'b c d']) self.assertEquals(split_list('a1 b a2 d', ['a1', 'a2']), - [['a1', 'b'], ['a2', 'd']]) + ['a1', 'b', 'a2', 'd']) self.assertEquals(split_list('a1 b a1 d', ['a1']), - [['a1', 'b'], ['a1', 'd']]) + ['a1', 'b', 'a1', 'd']) self.assertEquals(split_list('a1 b a1 d', ['a1', 'a2']), - [['a1', 'b'], ['a1', 'd']]) + ['a1', 'b', 'a1', 'd']) self.assertEquals(split_list('a1 b a2 d a1 b', ['a1', 'a2']), - [['a1', 'b'], ['a2', 'd'], ['a1', 'b']]) + ['a1', 'b', 'a2', 'd', 'a1', 'b']) # Special conditions self.assertEquals(split_list('Aa1 Ba a2 d aA1 b', ['aa1', 'a2']), - [['aa1', 'Ba'], ['a2', 'd'], ['aa1', 'b']]) + ['aa1', 'Ba', 'a2', 'd', 'aa1', 'b']) self.assertEquals(split_list('Aa Baa aA baab', ['aa']), - [['aa', 'Baa'], ['aa', 'baab']]) + ['aa', 'Baa', 'aa', 'baab']) def setUp(self): self.parser = QueryParser() - # + # lib # Test Main Method # def test_compound(self): @@ -361,6 +429,5 @@ def test_graph_expressions(self): self.assertEqual(self.parser.parse_query(TEST_MORE_EDGES[0]), TEST_MORE_EDGES[1]) - -def test_exceptions(self): - self.fail() + def test_exceptions(self): + self.fail() From 74e37bc5080ac851657155255177230f09b29689 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 23 Jun 2016 13:25:38 +0300 Subject: [PATCH 08/22] Fix up simple expression parsing (graph expr) --- src/lib/utils.py | 18 +++- src/query_engine/errors/syntax.py | 6 ++ src/query_engine/query_ast/clauses.py | 4 +- src/query_engine/query_ast/expression.py | 4 +- src/query_engine/query_ast/models.py | 48 +++++++---- src/query_engine/query_ast/utils.py | 5 ++ src/query_engine/query_parser.py | 102 ++++++++++++++++++----- src/query_engine/test_query_parser.py | 70 ++++++++++------ 8 files changed, 189 insertions(+), 68 deletions(-) diff --git a/src/lib/utils.py b/src/lib/utils.py index 2c47fe1..12bfecf 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -1,11 +1,17 @@ import collections +from itertools import tee -def ensure_array(value): - return value if isinstance(value, collections.Iterable) else (value,) +def ensure_tuple(value): + if isinstance(value, tuple): + return value + elif isinstance(value, collections.Iterable): + return tuple(value) + else: + return (value,) -def pairwise(iterable): +def pairize(iterable): """s -> (s0,s1), (s1,s2), (s2, s3), ...""" pairs = [] a = iter(iterable) @@ -14,3 +20,9 @@ def pairwise(iterable): pairs.append((next(a), next(a))) except StopIteration: return pairs + +def pairwise(iterable): + "s -> (s0,s1), (s1,s2), (s2, s3), ..." + a, b = tee(iterable) + next(b, None) + return zip(a, b) diff --git a/src/query_engine/errors/syntax.py b/src/query_engine/errors/syntax.py index 6761083..f5b93f8 100644 --- a/src/query_engine/errors/syntax.py +++ b/src/query_engine/errors/syntax.py @@ -44,6 +44,12 @@ def __init__(self, value): self.value = value +class InvalidGraphExpressionPropertiesError(InvalidGraphExpressionError): + def __init__(self, value, msg=''): + InvalidGraphExpressionError.__init__(self, value) + self.msg = msg + + class InvalidEdgeError(InvalidGraphExpressionError): def __init__(self, value, msg=''): InvalidGraphExpressionError.__init__(self, value) diff --git a/src/query_engine/query_ast/clauses.py b/src/query_engine/query_ast/clauses.py index 51de42a..47e23ba 100644 --- a/src/query_engine/query_ast/clauses.py +++ b/src/query_engine/query_ast/clauses.py @@ -1,4 +1,4 @@ -from src.lib.utils import ensure_array +from src.lib.utils import ensure_tuple """ Either add some serious logic to items or generate them dynamicaly @@ -55,7 +55,7 @@ def __init__(self, expr): class Return(Clause): def __init__(self, props=()): - self.props = ensure_array(props) + self.props = ensure_tuple(props) #raise InvalidArguments('Return needs at least one item') diff --git a/src/query_engine/query_ast/expression.py b/src/query_engine/query_ast/expression.py index c21a879..f27fbb6 100644 --- a/src/query_engine/query_ast/expression.py +++ b/src/query_engine/query_ast/expression.py @@ -1,4 +1,4 @@ -from src.lib.utils import ensure_array +from src.lib.utils import ensure_tuple from src.lib.Printable import Printable @@ -9,7 +9,7 @@ class Expression(Printable): ''' def __init__(self, elements): - self.elements = ensure_array(elements) + self.elements = ensure_tuple(elements) def __repr__(self): return '<' + type(self).__name__ + '>' + str(self.elements) diff --git a/src/query_engine/query_ast/models.py b/src/query_engine/query_ast/models.py index 8b6e385..f6abdba 100644 --- a/src/query_engine/query_ast/models.py +++ b/src/query_engine/query_ast/models.py @@ -1,4 +1,4 @@ -from src.lib.utils import ensure_array +from src.lib.utils import ensure_tuple class Printable: @@ -25,6 +25,11 @@ def __eq__(self, other): class Property(Printable): def __init__(self, key, value): + """ + Args: + key: + value (str|number|Identifier): + """ self.key = key self.value = value @@ -35,35 +40,50 @@ def __eq__(self, other): return self.__dict__ == other.__dict__ -class Identifier(Printable): +class Identifier: """ Defines an identifier for a query. It can be """ - def __init__(self, letter=None, value=None): + def __init__(self, name=None, fields=(), value=None): """ Args: letter: value: """ - self._letter = letter + self._name = name self._value = value + self._fields = ensure_tuple(fields) @property def value(self): return self._value def __str__(self): - return self._letter + ' ' + str(self._value) + return self._name + '.' + '.'.join(self._fields) + ' ' +\ + str(self._value) def __repr__(self): - return '<' + str(self._letter) + ' ' + str(self._value) + '>' + return '<' + str(self._name) + ' ' + str(self._value) + '>' def __eq__(self, other): return self.__dict__ == other.__dict__ -class Label(Printable): +class Variable(Identifier): + """Keeps data about referenced identifiers.""" + def __init__(self, name, fields): + """ + Args: + name (str): + fields (List[str]): represents the variable properties + sequence -> a.b.c -> [b, c] + """ + Identifier.__init__(self, name) + self.fields = ensure_tuple(fields) + + +class Label: def __init__(self, name=None): self.name = name @@ -93,22 +113,22 @@ def __init__(self, node_in=None, node_out=None, label=None, label: node_in (Node): node_out (Node): - directed (Boolean|String): left or right directed - identifier: + directed (bool|str): left or right directed + identifier (Identifier|Variable): properties: """ self.__label = label - self.__properties = ensure_array(properties) + self.__properties = ensure_tuple(properties) self.__node_in = node_in self.__node_out = node_out self.__directed = directed self.identifier = identifier def __repr__(self): - return ' ' + str(self.identifier) + ':' + \ + return '[' + str(self.identifier) + ':' + \ str(self.__label) + ' { ' + str(self.__properties) + ' } < ' + \ str(self.__node_in) + ' > < ' + str(self.__node_out) + ' > - ' + \ - str(self.__directed) + str(self.__directed) + ']' def __eq__(self, other): return self.__dict__ == other.__dict__ @@ -156,8 +176,8 @@ class Node(Printable): def __init__(self, labels=(), identifier=None, properties=()): self.identifier = identifier - self.properties = ensure_array(properties) - self.labels = ensure_array(labels) + self.properties = ensure_tuple(properties) + self.labels = ensure_tuple(labels) def __eq__(self, other): return self.__dict__ == other.__dict__ diff --git a/src/query_engine/query_ast/utils.py b/src/query_engine/query_ast/utils.py index 1ea7f10..a16fb6a 100644 --- a/src/query_engine/query_ast/utils.py +++ b/src/query_engine/query_ast/utils.py @@ -3,6 +3,11 @@ # TODO def get_expression_type(clause): + # TODO type of expression + # is variable expression + # is some other type of expression ? + # is graph expresssion + # is Operations expression if True: return GraphPatternExpression else: diff --git a/src/query_engine/query_parser.py b/src/query_engine/query_parser.py index 90b3e89..d5763f6 100644 --- a/src/query_engine/query_parser.py +++ b/src/query_engine/query_parser.py @@ -16,7 +16,10 @@ ''' Support Notes: - Nodes properties may have space between value and key -- Properties value may be - string, number +- Properties value may be - string, number, identifier +-- any string +-- float or integer +-- letters_and_numbers. ... - Nodes may have more than 1 space between labels and properties - Support whitespaces in id/labels ??? - Case-sensitive labels and ids ?? @@ -32,11 +35,26 @@ - Parse expression by clause -new MatchClause() +TODO -- +- Set x.y = 10 +- WITH c, SUM(..) AS x + + +TODO -- support for properties +Variable fields: +- in node -> (var) +- Return, With, ... +- properties + +GraphExpressions +OperatorExpressions +IdExpressions + ''' -MATCH_NODE = '\((?P.*?)\)' -MATCH_EDGE = '\[(?P.*?)\]' -PARSE_GRAPH_EXPRESSION = re.compile('({})|({})'.format(MATCH_NODE, MATCH_EDGE)) +MATCH_NODE = '\(.*?\)' +MATCH_EDGE = '?' +PARSE_GRAPH_EXPRESSION = re.compile( + '(?P{})|(?P{})'.format(MATCH_NODE, MATCH_EDGE)) # TODO ONE BIG VALIDATION REGEX @@ -45,16 +63,23 @@ class EdgeDirections(Enum): left = 1 right = 2 + # TODO flags -> convert to number; find all matches # TODO use \w ?? or \S IDENTIFIER_REGEX = re.compile('^(\w+)(?:|\s)?') +VARIABLE_REGEX = re.compile('\w+(\.\w+)*(?:|\s)?') # Match labels, without matching properties LABELS_REGEX = re.compile('[\w:]*?:(\w+)(?:|\s)?') # TODO allowed_val_chars = '\w|\'' PROPERTIES_BODY_REGEX = re.compile('{(.*?)}') -PROPERTY_REGEX = re.compile('(?P\w+):\s*"?(?P[\w|.]+)"?,?\s*') +KEY = '(?P\w+)' +VAR = '(?P\w+(\.\w+)*)' +NUM = '(?P[\d.]+)' +STR = '("(?P(.+?))")' +# Matches item sequentianally by their type +PROPERTY_REGEX = re.compile('{}:\s*({}|{}|{}),?\s*'.format(KEY, NUM, VAR, STR)) BODY_REGEX = '(.*?)' EDGE_BODY_REGEX = re.compile('?'.format(BODY_REGEX)) @@ -149,7 +174,20 @@ def is_int(n): match = PROPERTY_REGEX.match(raw_props) while match: key = match.group('key').strip() - value = to_number(match.group('val').strip()) + if match.group('var'): + # it's an identifier + var = match.group('var').split('.') + value = Identifier(name=var[0], + fields=var[1:]) + elif match.group('num'): + # try to parse it to string + value = to_number(match.group('num').strip()) + elif match.group('val'): + # it's just a string + value = to_number(match.group('val').strip()) + else: + raise InvalidGraphExpressionPropertiesError(raw_props) + properties.append(Property(key=key, value=value)) match = PROPERTY_REGEX.match(raw_props, match.end()) @@ -185,6 +223,21 @@ def get_labels(raw_elem, multi=True): return matches +def get_variable(raw_elem): + """ + Args: + raw_elem (str): + Returns: + List[str] + """ + match = VARIABLE_REGEX.match(raw_elem) + if match: + # separate the properties + match = match.group(0).split('.') + match = Variable(name=match[0], fields=match[1:]) + return match + + def get_identifier(raw_elem): """ ;id {};id:... {} @@ -193,9 +246,10 @@ def get_identifier(raw_elem): Returns: Identifier|None: """ - match = IDENTIFIER_REGEX.search(raw_elem) + match = VARIABLE_REGEX.match(raw_elem) if match: - match = Identifier(match.group(1).strip()) + match = match.group(0).split('.') + match = Identifier(name=match[0], fields=match[1:]) return match @@ -288,15 +342,15 @@ def collect_elements(raw_simple_expr): match = PARSE_GRAPH_EXPRESSION.match(raw_simple_expr) while match: - if match.group('node') or match.group() == '()': + if match.group('node'): raw_nodes.append(match.group('node')) - elif match.group('edge') or match.group() == '[]': + elif match.group('edge'): raw_edges.append(match.group('edge')) else: raise BadGraphExpressionElementError(match.group()) match = PARSE_GRAPH_EXPRESSION.match(raw_simple_expr, match.end()) - return {'nodes': raw_edges, 'edges': raw_edges} + return {'nodes': raw_nodes, 'edges': raw_edges} raw_elements = collect_elements(raw_simple_expr) @@ -306,15 +360,15 @@ def collect_elements(raw_simple_expr): nodes = [parse_node(raw_node) for raw_node in raw_elements['nodes']] edges = [] + # Parse edges - nodes_iter = iter(nodes) - for raw_edge in raw_elements['nodes']: - edges.append(parse_edge(raw_edge, next(nodes_iter), next(nodes_iter))) - # TODO populate Edges + for raw_edge, edge_nodes in zip(raw_elements['edges'], + utils.pairwise(nodes)): + edges.append(parse_edge(raw_edge, edge_nodes[0], edge_nodes[1])) res = None if edges: - res = edges + res = tuple(edges) elif nodes: if len(nodes) > 1: raise InvalidGraphExpressionError @@ -336,8 +390,8 @@ def parse_graph_expression(simple_graph_exprs): # split by - # parse elements as follows -> node, edge, node edge ... # TODO MOVE TO SPLITTER - simple_graph_exprs = [parse_simple_graph_expr(simple_expr) - for simple_expr in simple_graph_exprs] + simple_graph_exprs = tuple(parse_simple_graph_expr(simple_expr) + for simple_expr in simple_graph_exprs) return GraphPatternExpression(simple_graph_exprs) @@ -381,6 +435,8 @@ def parse_expression(expression, expression_type): parser = parse_graph_expression elif expression_type == OperatorExpression: parser = parse_operator_expression + elif expression_type == VariableExpression: + parser = parse_variable_expression else: raise UnsupportedExpressionType(expression_type) @@ -454,7 +510,7 @@ def parse_sub_query(raw_sub_query): # TODO expression type is defined by the clause it refers to -- use that cluase subclauses = (parse_clause(clause) for clause in - utils.pairwise((clause, *subclauses_split))) + utils.pairize((clause, *subclauses_split))) return SubQuery(subclauses) @@ -462,14 +518,14 @@ def parse_sub_query(raw_sub_query): # Sub queries are defined by specific Clauses # TODO trailing spaces - sub_queries_str = utils.pairwise(split_list(query, MAIN_CLAUSES)) + sub_queries_str = utils.pairize(split_list(query, MAIN_CLAUSES)) sub_queries = [parse_sub_query(sub_query) for sub_query in sub_queries_str] # TODO parse to Query object ? - # createSubQueries() - # createQueryObject() + # TODO - go through the sub_queries and apply matching variables + ##apply_variables(sub_queries) return Query(sub_queries) # TODO variables ??? diff --git a/src/query_engine/test_query_parser.py b/src/query_engine/test_query_parser.py index e57c5bc..2aa13f6 100644 --- a/src/query_engine/test_query_parser.py +++ b/src/query_engine/test_query_parser.py @@ -23,30 +23,30 @@ def test_parse_graph_expression(self): ## parse_graph_expression, ['()-[]-()']) self.assertEquals(parse_graph_expression(['(a)']), - GraphPatternExpression([SimpleGraphPatternExpression( - [Node(identifier=Identifier('a'))])])) + GraphPatternExpression((SimpleGraphPatternExpression( + (Node(identifier=Identifier('a'))))))) self.assertEquals(parse_graph_expression(['(a)-[]-(b)']), GraphPatternExpression( - [SimpleGraphPatternExpression([Edge( + (SimpleGraphPatternExpression((Edge( directed=False, node_out=Node(identifier=Identifier('b')), - node_in=Node(identifier=Identifier('a')))])])) + node_in=Node(identifier=Identifier('a')))))))) self.assertEquals(parse_graph_expression(['(a)-[:b]-(b)']), GraphPatternExpression( - [SimpleGraphPatternExpression([Edge( + (SimpleGraphPatternExpression((Edge( label=Label('b'), directed=False, node_out=Node(identifier=Identifier('b')), - node_in=Node(identifier=Identifier('a')))])])) + node_in=Node(identifier=Identifier('a')))))))) self.assertEquals(parse_graph_expression(['(a)<-[]-(b)']), GraphPatternExpression( - [SimpleGraphPatternExpression([Edge( + (SimpleGraphPatternExpression((Edge( directed=True, node_out=Node(identifier=Identifier('a')), - node_in=Node(identifier=Identifier('b')))])])) + node_in=Node(identifier=Identifier('b')))))))) self.assertEquals(parse_graph_expression(['(a)<-[:b]-()-[:c]->(d)']), - GraphPatternExpression([SimpleGraphPatternExpression([ + GraphPatternExpression((SimpleGraphPatternExpression(( Edge(label=Label('b'), directed=True, node_out=Node(identifier=Identifier('a')), @@ -55,17 +55,17 @@ def test_parse_graph_expression(self): directed=True, node_out=Node(identifier=Identifier('d')), node_in=Node()) - ])])) + ))))) self.assertEquals(parse_graph_expression(['(a)<-[]-(b)', '(c)']), - GraphPatternExpression([ - SimpleGraphPatternExpression([Edge( + GraphPatternExpression(( + SimpleGraphPatternExpression((Edge( directed=True, node_out=Node(identifier=Identifier('a')), - node_in=Node(identifier=Identifier('b')))]), + node_in=Node(identifier=Identifier('b'))))), SimpleGraphPatternExpression( - [Node(identifier='c')])])) + (Node(identifier=Identifier('c'))))))) def test_get_properties(self): self.assertEquals(get_properties(':lab {a: 1}'), @@ -92,6 +92,21 @@ def test_get_properties(self): Property('c', 1)), 'Multi prop') self.assertEquals(get_properties('a:label '), ()) + + self.assertEquals(get_properties(' {a: c.b, b: a.b.d} '), + (Property('a', Identifier(name='c', fields=('b',))), + Property('b', + Identifier(name='a', fields=('b', 'd')))), + 'Variable properties') + self.assertEquals(get_properties(' {a: c.b, b:"c"} '), + (Property('a', Identifier(name='c', fields=('b',))), + Property('b', 'c')), + 'Variable properties') + self.assertEquals(get_properties(' {a: c.b.d.df, b:"c"} '), + (Property('a', Identifier(name='c', + fields=('b', 'd', 'df'))), + Property('b', 'c')), + 'Variable properties more') # TODO test EXCEPTIONS def test_get_labels(self): @@ -121,7 +136,12 @@ def test_get_labels(self): def test_get_identifier(self): self.assertEquals(get_identifier('a'), Identifier('a')) + self.assertEquals(get_identifier('a.b'), Identifier(name='a', + fields=('b',))) + self.assertEquals(get_identifier('a.bc.cf'), + Identifier(name='a', fields=('bc', 'cf'))) self.assertEquals(get_identifier('a:b '), Identifier('a')) + self.assertEquals(get_identifier('a.b:b '), Identifier('a', ('b',))) self.assertIs(get_identifier(':b'), None) def test_parse_edge(self): @@ -178,7 +198,8 @@ def test_parse_node(self): 'Just id') self.assertEquals(parse_node('(id {a: 1})'), - Node(identifier=Identifier('id'), properties=(Property('a', 1),)), + Node(identifier=Identifier('id'), + properties=(Property('a', 1),)), 'Id and props') self.assertEquals(parse_node('(id:lab {a: 1})'), @@ -256,6 +277,9 @@ def test_list_split(self): self.assertEquals(split_list('Aa Baa aA baab', ['aa']), ['aa', 'Baa', 'aa', 'baab']) + def test_parse_id_expression(self): + self.fail() + def setUp(self): self.parser = QueryParser() @@ -375,17 +399,15 @@ def test_graph_expressions(self): ), query.Query([ query.SubQuery([ - Create(GraphPatternExpression([ - Node( - identifier=Identifier('you'), - labels=Label('Person'), - properties= - Property('name', - 'You')) - ])) + Create(GraphPatternExpression( + (SimpleGraphPatternExpression(( + Node(identifier=Identifier('you'), + labels=Label('Person'), + properties=Property('name', 'You')) + ), ),) + )), ]) ]) - ] TEST_MORE_EDGES = [( From a86cea7468f35fd25d0183d7a79cf9710371a1c9 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Sat, 25 Jun 2016 13:54:57 +0300 Subject: [PATCH 09/22] Add clause support; Some working state ... --- src/query_engine/errors/syntax.py | 6 ++++ src/query_engine/query_ast/clauses.py | 37 +++++++++++++------ src/query_engine/query_ast/expression.py | 14 ++++++-- src/query_engine/query_ast/query.py | 46 ++++++++++++------------ src/query_engine/query_ast/utils.py | 34 +++++++++++------- src/query_engine/query_parser.py | 46 ++++++++++++++---------- src/query_engine/test_query_parser.py | 19 ++++++++-- 7 files changed, 130 insertions(+), 72 deletions(-) diff --git a/src/query_engine/errors/syntax.py b/src/query_engine/errors/syntax.py index f5b93f8..c316e6e 100644 --- a/src/query_engine/errors/syntax.py +++ b/src/query_engine/errors/syntax.py @@ -4,6 +4,12 @@ def __init__(self, value): self.value = value +class UnsupportedClauseError(InvalidSyntaxError): + def __init__(self, value): + """""" + self.value = value + + class UnsupportedExpressionType(InvalidSyntaxError): def __init__(self, value): """""" diff --git a/src/query_engine/query_ast/clauses.py b/src/query_engine/query_ast/clauses.py index 47e23ba..d1da374 100644 --- a/src/query_engine/query_ast/clauses.py +++ b/src/query_engine/query_ast/clauses.py @@ -1,4 +1,6 @@ -from src.lib.utils import ensure_tuple +from src.query_engine.errors.syntax import * +from src.query_engine.query_ast.expression import * +from src.lib.printable import Printable """ Either add some serious logic to items or generate them dynamicaly @@ -30,34 +32,48 @@ 'ORDER BY' ] """ -class Clause: - pass -class Match(Clause): +class Clause(Printable): + expression_type = None def __init__(self, expression): self.expression = expression + @classmethod + def get_expression_type(cls): + return cls.expression_type + + +class Match(Clause): + expression_type = GraphPatternExpression + + def __init__(self, expression): + super().__init__(expression) + class Create(Clause): + expression_type = GraphPatternExpression - def __init__(self, expr): - self.expr = expr + def __init__(self, expression): + super().__init__(expression) class Where(Clause): + expression_type = OperatorExpression - def __init__(self, expr): - self.expr = expr + def __init__(self, expression): + super().__init__(expression) class Return(Clause): + expression_type = GenericExpression - def __init__(self, props=()): + def __init__(self, expression, props=()): + super().__init__(expression) self.props = ensure_tuple(props) - #raise InvalidArguments('Return needs at least one item') + # raise InvalidArguments('Return needs at least one item') MAIN_CLAUSES = [ @@ -85,4 +101,3 @@ def __init__(self, props=()): 'DISTINCT', 'ORDER BY' ] - diff --git a/src/query_engine/query_ast/expression.py b/src/query_engine/query_ast/expression.py index f27fbb6..426dbf0 100644 --- a/src/query_engine/query_ast/expression.py +++ b/src/query_engine/query_ast/expression.py @@ -1,12 +1,12 @@ from src.lib.utils import ensure_tuple -from src.lib.Printable import Printable +from src.lib.printable import Printable class Expression(Printable): - ''' + """ A where (and other elements?) expression handler Contains a list of elements - variables, consts and operations - ''' + """ def __init__(self, elements): self.elements = ensure_tuple(elements) @@ -45,3 +45,11 @@ def __init__(self, simple_exprs): class OperatorExpression(Expression): def __init__(self, elements): Expression.__init__(self, elements) + + +class GenericExpression(Expression): + def __init__(self, elements): + super().__init__(elements) + + + diff --git a/src/query_engine/query_ast/query.py b/src/query_engine/query_ast/query.py index 2f7bd15..7f183f4 100644 --- a/src/query_engine/query_ast/query.py +++ b/src/query_engine/query_ast/query.py @@ -1,35 +1,28 @@ -class Command: +from src.lib.utils import ensure_tuple - """ Defines a basic command for the db (MATCH, WHERE, ...) """ - def __init__(self, clause, expression): - """TODO: to be defined1. - - Args: - clause (Clause): TODO - expression (Expression): TODO - - - """ - self._clause = clause - self._expression = expression - - class SubQuery: - """Docstring for QueryModel. """ def __init__(self, commands): - """TODO: to be defined1. - + """ Args: - commands (List[Command]): + commands (List[Cluase]): """ - self._commands = commands - + self._clauses = ensure_tuple(commands) + + @property + def clauses(self): + return self._clauses + + def __repr__(self): + return str(self._clauses) + + def __eq__(self, other): + return self._clauses == other.clauses -class Query: +class Query: """Docstring for Query. """ def __init__(self, queries): @@ -39,5 +32,10 @@ def __init__(self, queries): Args: queries (List[SubQuery]): """ - self._queries = queries - + self._queries = ensure_tuple(queries) + + def __repr__(self): + return str(self._queries) + + def __eq__(self, other): + return self._queries == other._queries diff --git a/src/query_engine/query_ast/utils.py b/src/query_engine/query_ast/utils.py index a16fb6a..19dd917 100644 --- a/src/query_engine/query_ast/utils.py +++ b/src/query_engine/query_ast/utils.py @@ -1,19 +1,27 @@ from src.query_engine.query_ast.expression import * from src.query_engine.query_ast.clauses import * -# TODO -def get_expression_type(clause): - # TODO type of expression - # is variable expression - # is some other type of expression ? - # is graph expresssion - # is Operations expression - if True: - return GraphPatternExpression - else: - return OperatorExpression +# TODO rename file ... + +STR_TO_CLAUSE = { + 'match': Match, + 'create': Create, + 'return': Return, + 'where': Where +} -# TODO def get_clause_type(clause_str): - return None \ No newline at end of file + """ + Args: + clause_str (str): + Returns: + Clause|None: + """ + clause = clause_str.lower() + clause = STR_TO_CLAUSE.get(clause) + if clause: + return clause + else: + raise UnsupportedClauseError(clause_str) + diff --git a/src/query_engine/query_parser.py b/src/query_engine/query_parser.py index d5763f6..45576e3 100644 --- a/src/query_engine/query_parser.py +++ b/src/query_engine/query_parser.py @@ -92,6 +92,7 @@ def split_list(unsplitted, sep_list): """ Splits a string by list of separators """ + # TODO make it case insensitive splitted = re.split('\s*({})\s*'.format('|'.join(sep_list)), unsplitted) @@ -377,6 +378,27 @@ def collect_elements(raw_simple_expr): return SimpleGraphPatternExpression(res) +####################################################################### +# Expressions parsing # +####################################################################### + +def parse_generic_expression(raw_subexprs): + """ + Args: + raw_subexprs (List[str]): + Returns: + GenericExpression: + """ + + def parse_generic_subexpression(subexpession): + # TODO more cases + return get_identifier(subexpession) + + generic_subexpressions = \ + tuple(parse_generic_subexpression(expr) for expr in raw_subexprs) + return GenericExpression(generic_subexpressions) + + def parse_graph_expression(simple_graph_exprs): """ ()-[]-(); (); (), ()-[]-() @@ -401,20 +423,6 @@ def parse_operator_expression(simple_graph_exprs): pass -def generate_clause(clause, raw_expr): - """ - - Args: - clause (str): - raw_expr (str|List(str)): - - Returns: - Clause: - """ - clause = utils.get_clause_type(clause) - return clause(raw_expr) - - def parse_expression(expression, expression_type): """ @@ -435,8 +443,8 @@ def parse_expression(expression, expression_type): parser = parse_graph_expression elif expression_type == OperatorExpression: parser = parse_operator_expression - elif expression_type == VariableExpression: - parser = parse_variable_expression + elif expression_type == GenericExpression: + parser = parse_generic_expression else: raise UnsupportedExpressionType(expression_type) @@ -454,9 +462,11 @@ def parse_clause(raw_clause): """ clause_str, expr = raw_clause + clause_type = get_clause_type(clause_str) + expr = parse_expression(expr, - get_expression_type(clause_str)) - return generate_clause(clause_str, expr) + clause_type.get_expression_type()) + return clause_type(expr) class QueryParser: diff --git a/src/query_engine/test_query_parser.py b/src/query_engine/test_query_parser.py index 2aa13f6..506892a 100644 --- a/src/query_engine/test_query_parser.py +++ b/src/query_engine/test_query_parser.py @@ -2,6 +2,7 @@ from src.query_engine.query_parser import * from src.query_engine.query_ast import * +from src.query_engine.query_ast.query import * from src.query_engine.query_ast.models import * from src.query_engine.query_ast.clauses import * from src.query_engine.query_ast.expression import * @@ -363,6 +364,17 @@ def test_compound(self): # TODO test Expressions with ','hj + def test_generic_expression(self): + self.assertEquals(parse_generic_expression(['a']), + GenericExpression(( + Identifier('a') + ))) + self.assertEquals(parse_generic_expression(['a.b.c']), + GenericExpression(( + Identifier(name='a', fields=('b', 'c')) + ))) + # TODO more cases + def test_operator_expressions(self): self.fail() @@ -397,15 +409,16 @@ def test_graph_expressions(self): 'CREATE (you:Person {name:"You"})' 'RETURN you' ), - query.Query([ - query.SubQuery([ + Query([ + SubQuery([ Create(GraphPatternExpression( (SimpleGraphPatternExpression(( Node(identifier=Identifier('you'), labels=Label('Person'), properties=Property('name', 'You')) - ), ),) + ),),) )), + Return(GenericExpression((Identifier('you'),))) ]) ]) ] From 05ef64a90c092414b120f43c609ce68493224b00 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Tue, 5 Jul 2016 13:25:23 +0300 Subject: [PATCH 10/22] Identifiers. --- src/lib/utils.py | 11 +++ src/query_engine/query_ast/query.py | 41 ---------- src/query_engine/query_engine.py | 34 -------- .../errors/syntax.py | 0 .../query_ast/__init__.py | 0 .../query_ast/clauses.py | 18 ++++- .../query_ast/expression.py | 32 +++++--- .../query_ast/models.py | 34 ++++++-- .../query_ast/operators.py | 0 src/query_processor/query_ast/query.py | 79 +++++++++++++++++++ .../query_ast/utils.py | 4 +- .../query_parser.py | 23 +++--- 12 files changed, 169 insertions(+), 107 deletions(-) delete mode 100644 src/query_engine/query_ast/query.py delete mode 100644 src/query_engine/query_engine.py rename src/{query_engine => query_processor}/errors/syntax.py (100%) rename src/{query_engine => query_processor}/query_ast/__init__.py (100%) rename src/{query_engine => query_processor}/query_ast/clauses.py (83%) rename src/{query_engine => query_processor}/query_ast/expression.py (54%) rename src/{query_engine => query_processor}/query_ast/models.py (83%) rename src/{query_engine => query_processor}/query_ast/operators.py (100%) create mode 100644 src/query_processor/query_ast/query.py rename src/{query_engine => query_processor}/query_ast/utils.py (79%) rename src/{query_engine => query_processor}/query_parser.py (96%) diff --git a/src/lib/utils.py b/src/lib/utils.py index 12bfecf..786270c 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -1,5 +1,6 @@ import collections from itertools import tee +from src.query_processor.models import * def ensure_tuple(value): @@ -26,3 +27,13 @@ def pairwise(iterable): a, b = tee(iterable) next(b, None) return zip(a, b) + + +def collect_identifiers(elems): + identifiers = set() + for elem in elems: + if isinstance(elem, Identifier): + identifiers.add(elem) + elif hasattr(elem, 'get_identifiers'): + identifiers.update(elem.get_identifiers()) + return identifiers diff --git a/src/query_engine/query_ast/query.py b/src/query_engine/query_ast/query.py deleted file mode 100644 index 7f183f4..0000000 --- a/src/query_engine/query_ast/query.py +++ /dev/null @@ -1,41 +0,0 @@ -from src.lib.utils import ensure_tuple - - -class SubQuery: - """Docstring for QueryModel. """ - - def __init__(self, commands): - """ - Args: - commands (List[Cluase]): - """ - self._clauses = ensure_tuple(commands) - - @property - def clauses(self): - return self._clauses - - def __repr__(self): - return str(self._clauses) - - def __eq__(self, other): - return self._clauses == other.clauses - - -class Query: - """Docstring for Query. """ - - def __init__(self, queries): - """ - Defines a whole query. - - Args: - queries (List[SubQuery]): - """ - self._queries = ensure_tuple(queries) - - def __repr__(self): - return str(self._queries) - - def __eq__(self, other): - return self._queries == other._queries diff --git a/src/query_engine/query_engine.py b/src/query_engine/query_engine.py deleted file mode 100644 index 0ec4410..0000000 --- a/src/query_engine/query_engine.py +++ /dev/null @@ -1,34 +0,0 @@ -from src.query_engine.query_parser import QueryParser - -class QueryEngine: - def __init__(self, storage_api): - self.storage_api = storage_api - - def execute_query(self, query): - - """ - Executes a passed query. Follows the steps: - - Parse query -> QueryModel - - Optimize query - - execute the query using the storage_api - query - String - """ - query = QueryParser.parse(query) - #TODO query = QueryOptimizer.optimize(query_model) - self.__execute_query() - - @staticmethod - def __apply_post_processors(): - '''Return, sort, ...''' - pass - - - - def __execute_command(command, *args): - # use commands dict - pass - - -command = op[0] -subcommands = op[1:] -QueryEngine.__execute_command(op, *subcommands) diff --git a/src/query_engine/errors/syntax.py b/src/query_processor/errors/syntax.py similarity index 100% rename from src/query_engine/errors/syntax.py rename to src/query_processor/errors/syntax.py diff --git a/src/query_engine/query_ast/__init__.py b/src/query_processor/query_ast/__init__.py similarity index 100% rename from src/query_engine/query_ast/__init__.py rename to src/query_processor/query_ast/__init__.py diff --git a/src/query_engine/query_ast/clauses.py b/src/query_processor/query_ast/clauses.py similarity index 83% rename from src/query_engine/query_ast/clauses.py rename to src/query_processor/query_ast/clauses.py index d1da374..92a5ef1 100644 --- a/src/query_engine/query_ast/clauses.py +++ b/src/query_processor/query_ast/clauses.py @@ -1,5 +1,5 @@ -from src.query_engine.errors.syntax import * -from src.query_engine.query_ast.expression import * +from src.query_processor.query_ast.expression import * +from src.query_processor.query_ast.models import * from src.lib.printable import Printable """ @@ -33,13 +33,25 @@ ] """ +# TODO more heiracal Clauses -class Clause(Printable): +class Clause(Printable, IdentifierHolder): expression_type = None def __init__(self, expression): + """ + + Args: + expression (Expression): + """ self.expression = expression + def get_identifiers(self): + if isinstance(self.expression, IdentifierExpression): + return self.expression.get_identifiers() + else: + return () + @classmethod def get_expression_type(cls): return cls.expression_type diff --git a/src/query_engine/query_ast/expression.py b/src/query_processor/query_ast/expression.py similarity index 54% rename from src/query_engine/query_ast/expression.py rename to src/query_processor/query_ast/expression.py index 426dbf0..18aa5aa 100644 --- a/src/query_engine/query_ast/expression.py +++ b/src/query_processor/query_ast/expression.py @@ -1,5 +1,7 @@ from src.lib.utils import ensure_tuple +from src.lib.utils import collect_identifiers from src.lib.printable import Printable +from src.query_processor.query_ast.models import * class Expression(Printable): @@ -22,34 +24,42 @@ def validate_expression(self): pass -class SimpleGraphPatternExpression(Expression): +class IdentifierExpression(Expression, IdentifierHolder): + def __init__(self, elements, identifiers): + """""" + super().__init__(elements) + self.identifiers = identifiers + + def get_identifiers(self): + return self.identifiers + + +class SimpleGraphPatternExpression(IdentifierExpression): def __init__(self, expr): """ Args: expr (List[Node|Edge]): """ - Expression.__init__(self, expr) + # collect identifiers + super().__init__(expr, collect_identifiers(expr)) -class GraphPatternExpression(Expression): - def __init__(self, simple_exprs): +class GraphPatternExpression(IdentifierExpression): + def __init__(simple_exprs, identifiers): """ Args: simple_exprs (List[SimpleGraphPatternExpression]): """ - Expression.__init__(self, simple_exprs) + super().__init__(simple_exprs, collect_identifiers(simple_exprs)) class OperatorExpression(Expression): - def __init__(self, elements): - Expression.__init__(self, elements) - - -class GenericExpression(Expression): def __init__(self, elements): super().__init__(elements) - +class GenericExpression(IdentifierExpression): + def __init__(self, elements): + super().__init__(elements, collect_identifiers(elements)) diff --git a/src/query_engine/query_ast/models.py b/src/query_processor/query_ast/models.py similarity index 83% rename from src/query_engine/query_ast/models.py rename to src/query_processor/query_ast/models.py index f6abdba..4b8c59f 100644 --- a/src/query_engine/query_ast/models.py +++ b/src/query_processor/query_ast/models.py @@ -1,9 +1,11 @@ from src.lib.utils import ensure_tuple +from src.lib.printable import Printable +from src.lib.utils import collect_identifiers -class Printable: - def __repr__(self): - return "<" + type(self).__name__ + "> " + str(self.__dict__) +class IdentifierHolder: + def get_identifiers(self): + raise NotImplementedError() class Literal: @@ -60,7 +62,7 @@ def value(self): return self._value def __str__(self): - return self._name + '.' + '.'.join(self._fields) + ' ' +\ + return self._name + '.' + '.'.join(self._fields) + ' ' + \ str(self._value) def __repr__(self): @@ -69,9 +71,14 @@ def __repr__(self): def __eq__(self, other): return self.__dict__ == other.__dict__ + def __hash__(self): + # XXX it's a quick fix -- __hash__ must be compatible with __eq__ + return id(self) + class Variable(Identifier): """Keeps data about referenced identifiers.""" + def __init__(self, name, fields): """ Args: @@ -94,7 +101,7 @@ def __eq__(self, other): return self.__dict__ == other.__dict__ -class Edge(Printable): +class Edge(Printable, IdentifierHolder): """ TODO: make it immutable An edge: @@ -127,7 +134,7 @@ def __init__(self, node_in=None, node_out=None, label=None, def __repr__(self): return '[' + str(self.identifier) + ':' + \ str(self.__label) + ' { ' + str(self.__properties) + ' } < ' + \ - str(self.__node_in) + ' > < ' + str(self.__node_out) + ' > - ' + \ + str(self.__node_in) + ' > < ' + str(self.__node_out) + ' > - ' + \ str(self.__directed) + ']' def __eq__(self, other): @@ -142,6 +149,16 @@ def get_nodes(self): """ return (self.__node_in, self.__node_out) + def get_identifiers(self): + ids = [] + if self.identifier: + ids.append(self.identifier) + if self.__node_in and self.__node_in.get_identifiers(): + ids.append(*self.__node_in.get_identifiers()) + if self.__node_out and self.__node_in.get_identifiers(): + ids.append(*self.__node_out.get_identifiers()) + return ids + @property def label(self): return self.__label @@ -165,7 +182,7 @@ def __init__(self, direction, label, nodeLeft, nodeRight, _id, identifier, # TODO implement setters -class Node(Printable): +class Node(Printable, IdentifierHolder): """ TODO: make it immutable A node: @@ -179,6 +196,9 @@ def __init__(self, labels=(), identifier=None, properties=()): self.properties = ensure_tuple(properties) self.labels = ensure_tuple(labels) + def get_identifiers(self): + return [self.identifier] if self.identifier else [] + def __eq__(self, other): return self.__dict__ == other.__dict__ diff --git a/src/query_engine/query_ast/operators.py b/src/query_processor/query_ast/operators.py similarity index 100% rename from src/query_engine/query_ast/operators.py rename to src/query_processor/query_ast/operators.py diff --git a/src/query_processor/query_ast/query.py b/src/query_processor/query_ast/query.py new file mode 100644 index 0000000..53b7eaf --- /dev/null +++ b/src/query_processor/query_ast/query.py @@ -0,0 +1,79 @@ +from src.lib.utils import ensure_tuple +from src.lib.utils import collect_identifiers + + +class SubQuery: + """Docstring for QueryModel. """ + + def __init__(self, clauses): + """ + Args: + clauses (List[Cluase]): + identifiers (List[Identifiers]): Keeps sub query identifiers for + faster lookup + """ + self._clauses = ensure_tuple(clauses) + + def get_identifiers(self): + return collect_identifiers(self._clauses) + + @property + def clauses(self): + return self._clauses + + def __repr__(self): + return str(self._clauses) + + def __eq__(self, other): + return self._clauses == other.clauses + + +class Query: + """The root of all evil.""" + + def __init__(self, queries): + """ + Defines a whole query. + + Keeps identifiers list for faster lookup. + Args: + queries (List[SubQuery]): + """ + self._queries = ensure_tuple(queries) + self.identifiers_map = Query.get_identifiers_map(queries) + + @staticmethod + def get_identifiers_map(sub_queries): + """ + Collects Identifiers from the sub-queries and generates a name-dict for + objects (same name may be present in multiple identifiers) + Args: + sub_queries (List[SubQuery]): + Returns: + dict: + """ + name_to_identifiers = {} + # collect identifiers lists from the subqueries + identifiers = [sub_query.collect_identifiers() for + sub_query in + sub_queries] + identifiers = set().union(*identifiers) + + # now populate the map + for identifier in identifiers: + name = identifier.name + if name_to_identifiers.get(name): + name_to_identifiers[name].add(identifier) + else: + name_to_identifiers[name] = {identifier} + return name_to_identifiers + + @property + def queries(self): + return self._queries + + def __repr__(self): + return str(self._queries) + + def __eq__(self, other): + return self._queries == other._queries diff --git a/src/query_engine/query_ast/utils.py b/src/query_processor/query_ast/utils.py similarity index 79% rename from src/query_engine/query_ast/utils.py rename to src/query_processor/query_ast/utils.py index 19dd917..261faa9 100644 --- a/src/query_engine/query_ast/utils.py +++ b/src/query_processor/query_ast/utils.py @@ -1,5 +1,5 @@ -from src.query_engine.query_ast.expression import * -from src.query_engine.query_ast.clauses import * +from src.query_processor.query_ast.expression import * +from src.query_processor.query_ast.clauses import * # TODO rename file ... diff --git a/src/query_engine/query_parser.py b/src/query_processor/query_parser.py similarity index 96% rename from src/query_engine/query_parser.py rename to src/query_processor/query_parser.py index 45576e3..aa4c26d 100644 --- a/src/query_engine/query_parser.py +++ b/src/query_processor/query_parser.py @@ -3,14 +3,14 @@ import logging as Logger -from src.query_engine.errors.syntax import * - -from src.query_engine.query_ast.operators import * -from src.query_engine.query_ast.query import * -from src.query_engine.query_ast.models import * -from src.query_engine.query_ast.clauses import * -from src.query_engine.query_ast.expression import * -from src.query_engine.query_ast.utils import * +from src.query_processor.errors.syntax import * + +from src.query_processor.query_ast.operators import * +from src.query_processor.query_ast.query import * +from src.query_processor.query_ast.models import * +from src.query_processor.query_ast.clauses import * +from src.query_processor.query_ast.expression import * +from src.query_processor.query_ast.utils import * from src.lib import utils ''' @@ -355,7 +355,7 @@ def collect_elements(raw_simple_expr): raw_elements = collect_elements(raw_simple_expr) - # TODO check number of nodes + # TODO check number of nodeshttps://gist.github.com/mkaz/141394d9ee97bed99121 # Parse nodes nodes = [parse_node(raw_node) for raw_node in raw_elements['nodes']] @@ -377,6 +377,9 @@ def collect_elements(raw_simple_expr): return SimpleGraphPatternExpression(res) +def optimize_identifiers(expr): + """Reuse same identifier objects for different elements.""" + return expr ####################################################################### # Expressions parsing # @@ -448,6 +451,8 @@ def parse_expression(expression, expression_type): else: raise UnsupportedExpressionType(expression_type) + expression = optimize_identifiers(expression) + return parser(expression) From 460b28b7273cd67fefe0a0dd7fbec5f7121cdea8 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Tue, 5 Jul 2016 13:26:23 +0300 Subject: [PATCH 11/22] Better abstraction (query processor) -- in progress. --- .../process_manager/process_manager.py | 0 .../__init__.py | 0 src/query_processor/plan_executor.py | 71 +++++++++++++++++++ .../query_optimizer.py | 0 src/query_processor/query_processor.py | 33 +++++++++ .../syntax_checker.py | 0 .../test_query_parser.py | 12 ++-- 7 files changed, 110 insertions(+), 6 deletions(-) rename pyFly.py => src/process_manager/process_manager.py (100%) rename src/{query_engine => query_processor}/__init__.py (100%) create mode 100644 src/query_processor/plan_executor.py rename src/{query_engine => query_processor}/query_optimizer.py (100%) create mode 100644 src/query_processor/query_processor.py rename src/{query_engine => query_processor}/syntax_checker.py (100%) rename src/{query_engine => query_processor}/test_query_parser.py (98%) diff --git a/pyFly.py b/src/process_manager/process_manager.py similarity index 100% rename from pyFly.py rename to src/process_manager/process_manager.py diff --git a/src/query_engine/__init__.py b/src/query_processor/__init__.py similarity index 100% rename from src/query_engine/__init__.py rename to src/query_processor/__init__.py diff --git a/src/query_processor/plan_executor.py b/src/query_processor/plan_executor.py new file mode 100644 index 0000000..cf84dae --- /dev/null +++ b/src/query_processor/plan_executor.py @@ -0,0 +1,71 @@ + + +class PlanExecutor: + + def __init__(self, storage_manager, execution_scheduler): + self.storage_manager = storage_manager + self.scheduler = execution_scheduler + + + @staticmethod + def _post_processors(): + '''Return, sort, ...''' + pass + + async def execute(query, *args): + # TODO + """ + Registers operation to the scheduler and waits for it's + result + Args: + query (Query): + *args: + + Returns: + + """ + name_to_identifiers_map = query.identifiers + + def populate_post_queries(queries, population_data): + """ + Applies data from results of a query. (populates identifiers) + N.B. query param is altered + + TODO optimize -- collect needed fields for the result and search only + those in the db + Args: + queries (Query): + population_data (dict): + Returns: + None: It updates the input object + """ + def update_identifier_data(query, name, value): + if query.get_identifiers_map().get(key): + query.get_identifiers_map()[key] + + for key, value in population_data.iteritems(): + for query in queries: + update_identifier_data(query, key, value) + query.get_identifiers_map() + + + + + async def execute_sub_query(sub_query): + """ + Args: + sub_query: + + Returns: + dict: Identifier -> List[Node|Edge|...] + """ + return {} + + sub_queries = query.queries + cur_query = query.queries[0] + for idx in range(1, len(sub_queries)): + results = await execute_sub_query(cur_query) + # populate following queries (as they might share an identifier) + populate_post_queries(sub_queries[idx:], results) + cur_query = sub_queries[idx] + diff --git a/src/query_engine/query_optimizer.py b/src/query_processor/query_optimizer.py similarity index 100% rename from src/query_engine/query_optimizer.py rename to src/query_processor/query_optimizer.py diff --git a/src/query_processor/query_processor.py b/src/query_processor/query_processor.py new file mode 100644 index 0000000..a7f834d --- /dev/null +++ b/src/query_processor/query_processor.py @@ -0,0 +1,33 @@ +from src.query_processor.query_parser import QueryParser +import asyncio + + +class QueryProcessor: + def __init__(self, plan_executor): + self.executor = plan_executor + + async def process(self, query): + """ + Executes a passed query. Follows the steps: + - Parse query -> QueryModel + - Query rewrite + - Optimize query + - execute the query using the storage_api + Args: + query (str): + """ + query = QueryParser.parse_query(query) + plan = QueryProcessor.query_rewrite(query) + + # TODO query = QueryOptimizer.optimize(query) + + # plan executor + self.executor.execute(plan) + + + @staticmethod + def query_rewrite(query): + # TODO + return query + + diff --git a/src/query_engine/syntax_checker.py b/src/query_processor/syntax_checker.py similarity index 100% rename from src/query_engine/syntax_checker.py rename to src/query_processor/syntax_checker.py diff --git a/src/query_engine/test_query_parser.py b/src/query_processor/test_query_parser.py similarity index 98% rename from src/query_engine/test_query_parser.py rename to src/query_processor/test_query_parser.py index 506892a..5668dac 100644 --- a/src/query_engine/test_query_parser.py +++ b/src/query_processor/test_query_parser.py @@ -1,11 +1,11 @@ from unittest import TestCase -from src.query_engine.query_parser import * +from src.query_processor.query_parser import * -from src.query_engine.query_ast import * -from src.query_engine.query_ast.query import * -from src.query_engine.query_ast.models import * -from src.query_engine.query_ast.clauses import * -from src.query_engine.query_ast.expression import * +from src.query_processor.query_ast import * +from src.query_processor.query_ast.query import * +from src.query_processor.query_ast.models import * +from src.query_processor.query_ast.clauses import * +from src.query_processor.query_ast.expression import * # TODO [0] test string, [1] result object From e2d85082252490564ec4f93beca6d90f4f6824a5 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Tue, 5 Jul 2016 22:16:29 +0300 Subject: [PATCH 12/22] Fun with sockets (and threads) -- Shell and communication manager. --- pyfly_shell.py | 37 ++++++++++++++++ src/communications_manager.py | 81 +++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 pyfly_shell.py create mode 100644 src/communications_manager.py diff --git a/pyfly_shell.py b/pyfly_shell.py new file mode 100644 index 0000000..c294d3e --- /dev/null +++ b/pyfly_shell.py @@ -0,0 +1,37 @@ +# Echo client program +import socket + +# Cap response to 2048 bytes + +QUERY_END_SYMBOL = ';' + +PROMPT = '> ' + +HOST = 'localhost' # The remote host +PORT = 50003 # The same port as used by the server +print('Welcome to PyFlyDB shell. Make your queries.') +print('Use Ctrl+C to exit.') +print('Establishing connection') +with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + try: + s.connect((HOST, PORT)) + except OSError as e: + s.close() + print(e, 'Exiting ...') + print('Try restarting. ') + exit() + print('Connected!') + print('Enter your commands') + + # Prompt loop + while True: + query = [] + line = '' + while not line or line[-1] != QUERY_END_SYMBOL: + line = input(PROMPT if not query else '... ').strip() + query.append(line) + # Send the final query + s.sendall(bytearray(' '.join(query), encoding='utf-8')) + print('Waiting response ...') + data = s.recv(2048) + print('Received', repr(data)) diff --git a/src/communications_manager.py b/src/communications_manager.py new file mode 100644 index 0000000..975a0d7 --- /dev/null +++ b/src/communications_manager.py @@ -0,0 +1,81 @@ +import logging +import socket +from threading import Thread + +# TODO proper SIGUP handling (close connections ?) + +### SUPPORTS ### +# Support only UTF-8 messages +# A message must end on ; in order to be processed +# Parallel request processing +### + +QUERY_END_SYMBOL = ';' +UTF8 = 'utf-8' + +logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) +Logger = logging.getLogger('Communications') + +HOST = '' # Symbolic name meaning all available interfaces (lo, eth0, ...) +PORT = 50003 # Arbitrary non-privileged port +MAX_CONNECTIONS = 2 + +connection_trds = [] + +def is_request_end(data): + return data.strip()[-1] == QUERY_END_SYMBOL + + +class SocketCommunicationsManager: + def __init__(self, query_processor): + self.processor = query_processor + + def run(self): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind((HOST, PORT)) + Logger.info('Socket created: %s:%s', HOST or 'ALL', PORT) + s.listen(1) + while True: + Logger.info('Listening ...') + conn, addr = s.accept() + + t = Thread(target=self.connection_worker, args=(conn, addr)) + t.start() + connection_trds.append(t) + # TODO send signals to threads upon close + + # TODO is this needed ? + for t in connection_trds: + t.join() + + def connection_worker(self, conn, addr): + """The assigned worker for the established connection.""" + + def process_request(query): + Logger.debug('Processing request: %s', query) + result = self.processor.process(query) + + print('Sending result ...') + conn.sendall(bytearray(result, encoding=UTF8)) + + Logger.info('Connections established: %s', conn) + with conn: + Logger.info('Connected by %s', addr) + query = [] + while True: + data = str(conn.recv(1024), encoding=UTF8) + if not data: + # on connection close + break + query.append(data) + Logger.debug('Data: %s', data) + # check and run + # executor + if is_request_end(data): + process_request(''.join(query)) + + Logger.info('Connection closed %s', addr) + + # TODO what to do on connection close ? + def close_connection(self): + pass From 68698d9c6cd0a5f9e7ec8f16355b6f6fb79075c0 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Wed, 6 Jul 2016 12:27:14 +0300 Subject: [PATCH 13/22] Fix identifier collecting. --- src/lib/utils.py | 4 +-- src/query_processor/query_ast/expression.py | 2 +- src/query_processor/query_ast/models.py | 35 ++++++++++++++------- src/query_processor/query_ast/query.py | 4 +-- src/query_processor/query_parser.py | 25 +++++++++++++++ 5 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/lib/utils.py b/src/lib/utils.py index 786270c..cfae43b 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -1,6 +1,6 @@ import collections from itertools import tee -from src.query_processor.models import * +from src.query_processor.query_ast import models def ensure_tuple(value): @@ -32,7 +32,7 @@ def pairwise(iterable): def collect_identifiers(elems): identifiers = set() for elem in elems: - if isinstance(elem, Identifier): + if isinstance(elem, models.Identifier): identifiers.add(elem) elif hasattr(elem, 'get_identifiers'): identifiers.update(elem.get_identifiers()) diff --git a/src/query_processor/query_ast/expression.py b/src/query_processor/query_ast/expression.py index 18aa5aa..3039f51 100644 --- a/src/query_processor/query_ast/expression.py +++ b/src/query_processor/query_ast/expression.py @@ -46,7 +46,7 @@ def __init__(self, expr): class GraphPatternExpression(IdentifierExpression): - def __init__(simple_exprs, identifiers): + def __init__(self, simple_exprs): """ Args: diff --git a/src/query_processor/query_ast/models.py b/src/query_processor/query_ast/models.py index 4b8c59f..1d1af33 100644 --- a/src/query_processor/query_ast/models.py +++ b/src/query_processor/query_ast/models.py @@ -1,6 +1,13 @@ -from src.lib.utils import ensure_tuple from src.lib.printable import Printable -from src.lib.utils import collect_identifiers +from src.lib.utils import ensure_tuple + + +class PropertiesHolder: + def properties_as_dict(self): + props = {} + for prop in self.properties: + props[prop.key] = prop.value + return props class IdentifierHolder: @@ -61,6 +68,10 @@ def __init__(self, name=None, fields=(), value=None): def value(self): return self._value + @property + def name(self): + return self._name + def __str__(self): return self._name + '.' + '.'.join(self._fields) + ' ' + \ str(self._value) @@ -76,17 +87,17 @@ def __hash__(self): return id(self) -class Variable(Identifier): +class Variable: """Keeps data about referenced identifiers.""" - def __init__(self, name, fields): + def __init__(self, identifier, fields): """ Args: name (str): fields (List[str]): represents the variable properties sequence -> a.b.c -> [b, c] """ - Identifier.__init__(self, name) + self.id = identifier self.fields = ensure_tuple(fields) @@ -101,7 +112,7 @@ def __eq__(self, other): return self.__dict__ == other.__dict__ -class Edge(Printable, IdentifierHolder): +class Edge(Printable, IdentifierHolder, PropertiesHolder): """ TODO: make it immutable An edge: @@ -121,7 +132,7 @@ def __init__(self, node_in=None, node_out=None, label=None, node_in (Node): node_out (Node): directed (bool|str): left or right directed - identifier (Identifier|Variable): + variable (Identifier|Variable): properties: """ self.__label = label @@ -155,7 +166,7 @@ def get_identifiers(self): ids.append(self.identifier) if self.__node_in and self.__node_in.get_identifiers(): ids.append(*self.__node_in.get_identifiers()) - if self.__node_out and self.__node_in.get_identifiers(): + if self.__node_out and self.__node_out.get_identifiers(): ids.append(*self.__node_out.get_identifiers()) return ids @@ -174,7 +185,7 @@ class ReturnEdge(Edge): it shouldn't be returned. """ - def __init__(self, direction, label, nodeLeft, nodeRight, _id, identifier, + def __init__(self, direction, label, nodeLeft, nodeRight, _id, variable, properties): Edge.__init__(self, label, properties) self.__id = _id @@ -182,7 +193,7 @@ def __init__(self, direction, label, nodeLeft, nodeRight, _id, identifier, # TODO implement setters -class Node(Printable, IdentifierHolder): +class Node(Printable, IdentifierHolder, PropertiesHolder): """ TODO: make it immutable A node: @@ -204,6 +215,6 @@ def __eq__(self, other): class ReturnNode(Node): - def __init__(self, identifier, _id, properties, labels=[]): - Node.__init__(self, identifier, properties, labels) + def __init__(self, variable, _id, properties, labels=[]): + Node.__init__(self, variable, properties, labels) self.__id = _id diff --git a/src/query_processor/query_ast/query.py b/src/query_processor/query_ast/query.py index 53b7eaf..64dcb29 100644 --- a/src/query_processor/query_ast/query.py +++ b/src/query_processor/query_ast/query.py @@ -54,7 +54,7 @@ def get_identifiers_map(sub_queries): """ name_to_identifiers = {} # collect identifiers lists from the subqueries - identifiers = [sub_query.collect_identifiers() for + identifiers = [sub_query.get_identifiers() for sub_query in sub_queries] identifiers = set().union(*identifiers) @@ -69,7 +69,7 @@ def get_identifiers_map(sub_queries): return name_to_identifiers @property - def queries(self): + def sub_queries(self): return self._queries def __repr__(self): diff --git a/src/query_processor/query_parser.py b/src/query_processor/query_parser.py index aa4c26d..b89cf83 100644 --- a/src/query_processor/query_parser.py +++ b/src/query_processor/query_parser.py @@ -223,6 +223,31 @@ def get_labels(raw_elem, multi=True): else: return matches +# ## TODO ONLY ONE INSTANCE +# def get_identifier_by_name(name): +# """Keep only one identifier instance by name.""" +# identifier = identifiers.get(name) +# if not identifier: +# identifier = Identifier(name=name) +# # add to existing +# identifiers[name] = identifier +# return identifier +# +# def get_variable(raw_elem): +# """ +# ;id {};id:... {} +# Args: +# raw_node (str): +# Returns: +# Variable|None: +# """ +# match = VARIABLE_REGEX.match(raw_elem) +# if match: +# match = match.group(0).split('.') +# id = get_identifier_by_name(match[0]) +# fields = match[1:] +# match = Variable(identifier=id, fields=fields ) +# return match def get_variable(raw_elem): """ From f662068c5b4dd99e00b9c06ffe46df4294b79881 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Wed, 6 Jul 2016 22:10:00 +0300 Subject: [PATCH 14/22] Update comm_manger. --- src/communications_manager.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/communications_manager.py b/src/communications_manager.py index 975a0d7..4dae1d7 100644 --- a/src/communications_manager.py +++ b/src/communications_manager.py @@ -10,8 +10,6 @@ # Parallel request processing ### -QUERY_END_SYMBOL = ';' -UTF8 = 'utf-8' logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) Logger = logging.getLogger('Communications') @@ -19,8 +17,10 @@ HOST = '' # Symbolic name meaning all available interfaces (lo, eth0, ...) PORT = 50003 # Arbitrary non-privileged port MAX_CONNECTIONS = 2 +QUERY_END_SYMBOL = ';' +UTF8 = 'utf-8' + -connection_trds = [] def is_request_end(data): return data.strip()[-1] == QUERY_END_SYMBOL @@ -29,7 +29,9 @@ def is_request_end(data): class SocketCommunicationsManager: def __init__(self, query_processor): self.processor = query_processor + self.connection_trds = [] + # TODO rename def run(self): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind((HOST, PORT)) @@ -41,11 +43,11 @@ def run(self): t = Thread(target=self.connection_worker, args=(conn, addr)) t.start() - connection_trds.append(t) + self.connection_trds.append(t) # TODO send signals to threads upon close # TODO is this needed ? - for t in connection_trds: + for t in self.connection_trds: t.join() def connection_worker(self, conn, addr): From ca7c459ab6c6dcc265c9437fbebe50580eb600d1 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Wed, 6 Jul 2016 22:14:31 +0300 Subject: [PATCH 15/22] Support Lodash in labels. --- src/query_processor/query_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query_processor/query_parser.py b/src/query_processor/query_parser.py index b89cf83..dc4c905 100644 --- a/src/query_processor/query_parser.py +++ b/src/query_processor/query_parser.py @@ -70,7 +70,7 @@ class EdgeDirections(Enum): VARIABLE_REGEX = re.compile('\w+(\.\w+)*(?:|\s)?') # Match labels, without matching properties -LABELS_REGEX = re.compile('[\w:]*?:(\w+)(?:|\s)?') +LABELS_REGEX = re.compile('[\w:]*?:([\w_]+)(?:|\s)?') # TODO allowed_val_chars = '\w|\'' PROPERTIES_BODY_REGEX = re.compile('{(.*?)}') From 2dcea773eb3c5affe5fa235b32e627516b7e0121 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 7 Jul 2016 00:38:06 +0300 Subject: [PATCH 16/22] Fix Clause splitting. --- src/query_processor/query_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query_processor/query_parser.py b/src/query_processor/query_parser.py index dc4c905..8c6c58c 100644 --- a/src/query_processor/query_parser.py +++ b/src/query_processor/query_parser.py @@ -93,7 +93,7 @@ def split_list(unsplitted, sep_list): Splits a string by list of separators """ # TODO make it case insensitive - splitted = re.split('\s*({})\s*'.format('|'.join(sep_list)), + splitted = re.split('\s*({})\s+'.format('|'.join(sep_list)), unsplitted) if len(splitted) > 0 and not splitted[0]: From 80ec7fb5b31b2016b67084d1a1c531a15dd7f202 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 7 Jul 2016 00:39:42 +0300 Subject: [PATCH 17/22] Test fixing (Fails) --- src/query_processor/query_ast/utils.py | 1 + src/query_processor/test_query_parser.py | 81 ++++++++++++++++++++---- 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/src/query_processor/query_ast/utils.py b/src/query_processor/query_ast/utils.py index 261faa9..498c1fa 100644 --- a/src/query_processor/query_ast/utils.py +++ b/src/query_processor/query_ast/utils.py @@ -1,5 +1,6 @@ from src.query_processor.query_ast.expression import * from src.query_processor.query_ast.clauses import * +from src.query_processor.errors.syntax import UnsupportedClauseError # TODO rename file ... diff --git a/src/query_processor/test_query_parser.py b/src/query_processor/test_query_parser.py index 5668dac..633dec1 100644 --- a/src/query_processor/test_query_parser.py +++ b/src/query_processor/test_query_parser.py @@ -1,17 +1,17 @@ from unittest import TestCase -from src.query_processor.query_parser import * -from src.query_processor.query_ast import * -from src.query_processor.query_ast.query import * from src.query_processor.query_ast.models import * from src.query_processor.query_ast.clauses import * from src.query_processor.query_ast.expression import * +from src.query_processor.query_ast.query import * +from src.query_processor.query_parser import * # TODO [0] test string, [1] result object - class TestQueryParser(TestCase): + # TODO test identifier - should return same object + def test_parse_graph_expression(self): # test with , # test with 1 node @@ -277,6 +277,7 @@ def test_list_split(self): self.assertEquals(split_list('Aa Baa aA baab', ['aa']), ['aa', 'Baa', 'aa', 'baab']) + # TODO substrings def test_parse_id_expression(self): self.fail() @@ -311,8 +312,8 @@ def test_compound(self): 'CREATE (anna)-[:FRIEND]->(:Person:Expert ' '{name:"Amanda"})-[:WORKED_WITH]->(neo);' ), - query.Query([ - query.SubQuery([ + Query([ + SubQuery([ Match(GraphPatternExpression([SimpleGraphPatternExpression( [Node(labels=Label('Database'), identifier=Identifier( @@ -321,7 +322,7 @@ def test_compound(self): Property('name', 'Neo4j')])])])), ]), - query.SubQuery([ + SubQuery([ Match(GraphPatternExpression([Node(labels=Label('Person'), identifier=Identifier( 'anna'), @@ -329,7 +330,7 @@ def test_compound(self): Property('name', 'Anna')])])), ]), - query.SubQuery([ + SubQuery([ Create(GraphPatternExpression([Edge(label='FRIEND', directed=True, node_in=Node( @@ -383,8 +384,8 @@ def test_graph_expressions(self): 'MATCH (you {name:"You"})-[:FRIEND]->(yourFriends)' 'RETURN you, yourFriends' ), - query.Query([ - query.SubQuery([ + Query([ + SubQuery([ Match(GraphPatternExpression([Edge(label='FRIEND', directed=True, node_in= @@ -428,8 +429,8 @@ def test_graph_expressions(self): 'RETURN user.name' ), ( - query.Query([ - query.SubQuery([Match( + Query([ + SubQuery([Match( GraphPatternExpression([Edge(label='PURCHASED', directed=True, node_in=Node( @@ -464,5 +465,61 @@ def test_graph_expressions(self): self.assertEqual(self.parser.parse_query(TEST_MORE_EDGES[0]), TEST_MORE_EDGES[1]) + def test_query_utils(self): + idAnna1 = Identifier('anna') + idAnna2 = Identifier('anna') + idNeo1 = Identifier('neo') + idNeo2 = Identifier('neo') + query = Query([ + SubQuery([ + Match(GraphPatternExpression([SimpleGraphPatternExpression( + [Node(labels=Label('Database'), + identifier=idNeo2, + properties=[ + Property('name', + 'Neo4j')])])])), + ]), + SubQuery([ + Match(GraphPatternExpression([Node(labels=Label('Person'), + identifier=idAnna1, + properties=[ + Property('name', + 'Anna')])])), + ]), + SubQuery([ + Create(GraphPatternExpression([Edge(label='FRIEND', + directed=True, + node_in=Node( + identifier=idAnna2), + node_out=Node( + labels=( + Label('Person'), + Label( + 'Expert')), + properties= + Property('name', + 'Amanda'))), + Edge(label='WORKED_WITH', + directed=True, + node_out=Node( + identifier=idNeo1), + node_in=Node( + labels=( + Label('Person'), + Label( + 'Expert')), + properties= + Property('name', + 'Amanda')))])) + ]) + ]) + + self.assertEquals(Query.get_identifiers_map(query.sub_queries), + { + 'anna': {idAnna1, idAnna2}, + 'neo': {idNeo1, idNeo2} + }) + + def test_exceptions(self): self.fail() From 0ea14ddc537fb22772bde3f7cf40940ad90e31f2 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 7 Jul 2016 15:07:34 +0300 Subject: [PATCH 18/22] Update communication. --- pyfly_shell.py | 70 ++++++++++++++++++++++------------- src/communications_manager.py | 26 ++++++++++--- 2 files changed, 65 insertions(+), 31 deletions(-) diff --git a/pyfly_shell.py b/pyfly_shell.py index c294d3e..6f9e45e 100644 --- a/pyfly_shell.py +++ b/pyfly_shell.py @@ -9,29 +9,47 @@ HOST = 'localhost' # The remote host PORT = 50003 # The same port as used by the server -print('Welcome to PyFlyDB shell. Make your queries.') -print('Use Ctrl+C to exit.') -print('Establishing connection') -with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - try: - s.connect((HOST, PORT)) - except OSError as e: - s.close() - print(e, 'Exiting ...') - print('Try restarting. ') - exit() - print('Connected!') - print('Enter your commands') - - # Prompt loop - while True: - query = [] - line = '' - while not line or line[-1] != QUERY_END_SYMBOL: - line = input(PROMPT if not query else '... ').strip() - query.append(line) - # Send the final query - s.sendall(bytearray(' '.join(query), encoding='utf-8')) - print('Waiting response ...') - data = s.recv(2048) - print('Received', repr(data)) + + +def default_input_method(cur_query): + return input(PROMPT if not cur_query else '... ').strip() + + +class PyflyShell: + def __init__(self, host=HOST, port=PORT, input_method=default_input_method): + """""" + self.input_method = input_method + self.host = host + self.port = port + + def run(self): + print('Welcome to PyFlyDB shell. Make your queries.') + print('Use Ctrl+C to exit.') + print('Establishing connection') + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + try: + s.connect((self.host, self.port)) + except OSError as e: + s.close() + print(e, 'Exiting ...') + print('Try restarting. ') + exit() + print('Connected!') + print('Enter your commands') + + # Prompt loop + while True: + query = [] + line = '' + while not line or line[-1] != QUERY_END_SYMBOL: + line = self.input_method(query) + query.append(line) + # Send the final query + s.sendall(bytearray(' '.join(query), encoding='utf-8')) + print('Waiting response ...') + data = s.recv(2048) + print('Received', repr(data)) + + +if __name__ == '__main__': + PyflyShell().run() diff --git a/src/communications_manager.py b/src/communications_manager.py index 4dae1d7..841188c 100644 --- a/src/communications_manager.py +++ b/src/communications_manager.py @@ -1,5 +1,6 @@ import logging import socket +import sys from threading import Thread # TODO proper SIGUP handling (close connections ?) @@ -10,8 +11,12 @@ # Parallel request processing ### +logging.basicConfig( + level=logging.NOTSET, + format='%(threadName)10s %(name)18s: %(message)s', + stream=sys.stderr +) -logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) Logger = logging.getLogger('Communications') HOST = '' # Symbolic name meaning all available interfaces (lo, eth0, ...) @@ -55,7 +60,7 @@ def connection_worker(self, conn, addr): def process_request(query): Logger.debug('Processing request: %s', query) - result = self.processor.process(query) + result = self.processor.process(query[:-1]) print('Sending result ...') conn.sendall(bytearray(result, encoding=UTF8)) @@ -63,21 +68,32 @@ def process_request(query): Logger.info('Connections established: %s', conn) with conn: Logger.info('Connected by %s', addr) - query = [] + query_builder = [] while True: data = str(conn.recv(1024), encoding=UTF8) if not data: # on connection close break - query.append(data) + query_builder.append(data) Logger.debug('Data: %s', data) # check and run # executor if is_request_end(data): - process_request(''.join(query)) + process_request(''.join(query_builder)) Logger.info('Connection closed %s', addr) # TODO what to do on connection close ? def close_connection(self): pass + + +# XXX testing +class DummyRepeaterProcessor: + def process(self, query): + Logger.debug(query) + return query + + +if __name__ == '__main__': + SocketCommunicationsManager(DummyRepeaterProcessor()).run() From a9700fe4227f900d6e803317c0b77ab010457d28 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 7 Jul 2016 15:21:06 +0300 Subject: [PATCH 19/22] Update with notes. --- src/query_processor/query_ast/clauses.py | 3 +++ src/query_processor/query_parser.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/query_processor/query_ast/clauses.py b/src/query_processor/query_ast/clauses.py index 92a5ef1..05584e3 100644 --- a/src/query_processor/query_ast/clauses.py +++ b/src/query_processor/query_ast/clauses.py @@ -34,6 +34,9 @@ """ # TODO more heiracal Clauses +# - clause possible possitions (after, before, only in the end) +# ... + class Clause(Printable, IdentifierHolder): expression_type = None diff --git a/src/query_processor/query_parser.py b/src/query_processor/query_parser.py index 8c6c58c..ee46be7 100644 --- a/src/query_processor/query_parser.py +++ b/src/query_processor/query_parser.py @@ -36,11 +36,13 @@ TODO -- +- RETURN - only in the end - Set x.y = 10 - WITH c, SUM(..) AS x +- x = (:A)-[]-(:B) -- pattern variables http://neo4j.com/docs/developer-manual/current/#_pattern_variables -TODO -- support for properties +-- support for properties Variable fields: - in node -> (var) - Return, With, ... From a0198c3b8231b1546e993ce327a414408058981a Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 7 Jul 2016 16:42:09 +0300 Subject: [PATCH 20/22] Init daemonizer (Problem with socket server daemonizing). --- pyfly_init.py | 33 ++++++++++++++ pyflyd.py | 109 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + setup.py | 7 ++- 4 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 pyfly_init.py create mode 100755 pyflyd.py diff --git a/pyfly_init.py b/pyfly_init.py new file mode 100644 index 0000000..16ebeb3 --- /dev/null +++ b/pyfly_init.py @@ -0,0 +1,33 @@ +import asyncio + +from src.communications_manager import SocketCommunicationsManager +from src.process_manager.process_manager import ProcessManager +from src.query_processor.query_processor import QueryProcessor + +loop = asyncio.get_event_loop() +loop.run_forever() + +# communication_manager = None +# query_engine = None +# plan_executor = None +# process_manager = None + +""" +Set up environment +""" + + +def init(): + # XXX + # storage_manager = StorageManager() + process_manager = ProcessManager() + query_processor = QueryProcessor(process_manager=process_manager, + storage_manager=None) #,storage_manager) + + communications_manager = SocketCommunicationsManager(query_processor) + # Initialize the main process + communications_manager.run() + + +if __name__ == '__main__': + init() diff --git a/pyflyd.py b/pyflyd.py new file mode 100755 index 0000000..ded9f00 --- /dev/null +++ b/pyflyd.py @@ -0,0 +1,109 @@ + +import argparse +import grp +import logging +import logging.handlers +import signal +import sys + +import daemon +import lockfile + +from src.communications_manager import SocketCommunicationsManager + +import pyfly_init +import os + +# Deafults +DATA_DIR = os.path.dirname(os.path.realpath(__file__)) # Script dir '/var/lib/pyfly' +LOG_FILENAME = "/tmp/pyfly.log" +LOG_LEVEL = logging.INFO + +PY_FLY_GRP = 'pyfly' + +# Define and parse command line arguments +parser = argparse.ArgumentParser(description="Runs the PyFly Graph database.") +parser.add_argument("-l", "--log", + help="file to write log to (default '" + LOG_FILENAME + "')") +parser.add_argument("--data-dir", dest='data_dir', + help="data directory (default '" + DATA_DIR + "')") + +# If the log file is specified on the command line then override the default +args = parser.parse_args() +if args.log: + LOG_FILENAME = args.log +if args.data_dir: + DATA_DIR = args.data_dir + +####################################################################### +# LOGGING # +####################################################################### +# Configure logging to log to a file, making a new file at midnight +# and keeping the last 3 day's data +# Give the logger a unique name (good practice) +logger = logging.getLogger(__name__) +# Set the log level to LOG_LEVEL +logger.setLevel(LOG_LEVEL) +# Make a handler that writes to a file, making a new file at midnight and +# keeping 3 backups +handler = logging.handlers.TimedRotatingFileHandler(LOG_FILENAME, + when="midnight", + backupCount=3) +# Format each log message like this +formatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s') +# Attach the formatter to the handler +handler.setFormatter(formatter) +# Attach the handler to the logger +logger.addHandler(handler) + + +# Make a class we can use to capture stdout and sterr in the log +class MyLogger: + def __init__(self, logger, level): + """Needs a logger and a logger level.""" + self.logger = logger + self.level = level + + def write(self, message): + # Only log if there is a message (not just a new line) + if message.rstrip() != "": + self.logger.log(self.level, message.rstrip()) + + def flush(self): + pass + + +# Replace stdout with logging to file at INFO level +sys.stdout = MyLogger(logger, logging.INFO) +# Replace stderr with logging to file at ERROR level +sys.stderr = MyLogger(logger, logging.ERROR) + +####################################################################### +# DAEMON # +####################################################################### +print('Starting ...') +context = daemon.DaemonContext( + working_directory=DATA_DIR, + umask=0o002, + pidfile=lockfile.FileLock('/var/run/pyfly.pid'), +) + +# TODO +context.signal_map = { + signal.SIGTERM: exit, #program_cleanup, + signal.SIGHUP: 'terminate', + # signal.SIGUSR1: reload_program_config, +} + +mail_gid = grp.getgrnam(PY_FLY_GRP).gr_gid +context.gid = mail_gid + +# context.files_preserve = [important_file, interesting_file] + +# initial_program_setup() + +# run the daemon +with context: + print('Running ...') + pyfly_init.init() + # SocketCommunicationsManager().run() diff --git a/requirements.txt b/requirements.txt index b4cd666..3e8837d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +daemon SQLAlchemy==1.0.13 diff --git a/setup.py b/setup.py index 5fbf0f5..9f0f0d5 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,11 @@ - -m setuptools import setup, find_packages +from setuptools import setup, find_packages setup( name="pyfly", version="0.1", packages=find_packages(), - install_requires=['SQLAlchemy>=1.0.13'], + install_requires=['SQLAlchemy>=1.0.13', + 'python-daemon>=2.1.0'], author="alexnad", author_email="alexandernadjarian@gmail.com", description="native python graph database", @@ -13,4 +13,3 @@ keywords="graph database NoSQL databases", url="https://github.com/alexnad/PyFlyDB" ) - From 2d18ba2819ed02e871cb7731736036ee69f7c8c2 Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Thu, 7 Jul 2016 22:51:33 +0300 Subject: [PATCH 21/22] Update structure. - unfinished [query_rewriter, plan_executor] --- .gitignore | 2 + pyFlyd.py | 6 - src/communications_manager.py | 15 +- src/lib/utils.py | 1 + src/process_manager/process_manager.py | 57 +++ src/query_processor/plan_executor.py | 32 +- src/query_processor/query_ast/__init__.py | 2 +- src/query_processor/query_ast/plan.py | 39 ++ src/query_processor/query_processor.py | 24 +- src/query_processor/query_rewriter.py | 41 ++ src/query_processor/test_query_parser.py | 525 ---------------------- tests/query_engine/test_query_parser.py | 0 12 files changed, 168 insertions(+), 576 deletions(-) delete mode 100644 pyFlyd.py create mode 100644 src/query_processor/query_ast/plan.py create mode 100644 src/query_processor/query_rewriter.py delete mode 100644 src/query_processor/test_query_parser.py delete mode 100644 tests/query_engine/test_query_parser.py diff --git a/.gitignore b/.gitignore index 923c7da..c65f8da 100755 --- a/.gitignore +++ b/.gitignore @@ -74,3 +74,5 @@ source/data/ *.db celerybeat-schedule.db .vimrc.local +test* +note* diff --git a/pyFlyd.py b/pyFlyd.py deleted file mode 100644 index 4a425b0..0000000 --- a/pyFlyd.py +++ /dev/null @@ -1,6 +0,0 @@ -import re - - -re.search - -re.match diff --git a/src/communications_manager.py b/src/communications_manager.py index 841188c..185dbf4 100644 --- a/src/communications_manager.py +++ b/src/communications_manager.py @@ -26,13 +26,19 @@ UTF8 = 'utf-8' +# XXX testing +class DummyRepeaterProcessor: + def process(self, query): + Logger.debug(query) + return query + def is_request_end(data): return data.strip()[-1] == QUERY_END_SYMBOL class SocketCommunicationsManager: - def __init__(self, query_processor): + def __init__(self, query_processor=DummyRepeaterProcessor): self.processor = query_processor self.connection_trds = [] @@ -88,12 +94,5 @@ def close_connection(self): pass -# XXX testing -class DummyRepeaterProcessor: - def process(self, query): - Logger.debug(query) - return query - - if __name__ == '__main__': SocketCommunicationsManager(DummyRepeaterProcessor()).run() diff --git a/src/lib/utils.py b/src/lib/utils.py index cfae43b..0ca7bb1 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -31,6 +31,7 @@ def pairwise(iterable): def collect_identifiers(elems): identifiers = set() + elems = ensure_tuple(elems) for elem in elems: if isinstance(elem, models.Identifier): identifiers.add(elem) diff --git a/src/process_manager/process_manager.py b/src/process_manager/process_manager.py index e69de29..8a7a0aa 100644 --- a/src/process_manager/process_manager.py +++ b/src/process_manager/process_manager.py @@ -0,0 +1,57 @@ +import asyncio +import queue +from enum import Enum + +import concurrent.futures + +from src.query_processor.query_ast.plan import Operation + +# TODO set some state of operations +# TODO stop threads on exit ! + +DEFAULT_PROCESSES_LIMIT = 3 +QUEUE_MAX_SIZE = 30 +MAX_BLOCK_TIME = 60 # seconds + + +class TaskStatuses(Enum): + NEW = 'new' + WAITING = 'waiting' + RUNNING = 'running' + FINISHEd = 'finished' + + +class OperationTask: + def __init__(self, operation, future=None): + """ + An operation task. + Args: + operation: + future (Future): Whether a notification to be sent + """ + self.operation = operation + self.future = future + self.state = TaskStatuses.NEW + + +### MAIN ### + + +class ProcessManager(concurrent.futures.ThreadPoolExecutor): + def __init__(self, processes_limit=DEFAULT_PROCESSES_LIMIT): + super().__init__(processes_limit) + + def submit(self, operation): + """ + Schedules an operation for execution + Args: + operation (Operation): + + Returns: + Future: + """ + return super().submit(operation.method, *operation.args) + + def operation_worker(self): + # get task + pass diff --git a/src/query_processor/plan_executor.py b/src/query_processor/plan_executor.py index cf84dae..eecc060 100644 --- a/src/query_processor/plan_executor.py +++ b/src/query_processor/plan_executor.py @@ -1,5 +1,4 @@ - class PlanExecutor: def __init__(self, storage_manager, execution_scheduler): @@ -12,13 +11,13 @@ def _post_processors(): '''Return, sort, ...''' pass - async def execute(query, *args): + def execute(self, query_plan, *args): # TODO """ Registers operation to the scheduler and waits for it's result Args: - query (Query): + query_plan (QueryPlan): *args: Returns: @@ -48,24 +47,15 @@ def update_identifier_data(query, name, value): update_identifier_data(query, key, value) query.get_identifiers_map() + # TODO execute atomically ?? + for operation in query_plan.operations: + future = self.scheduler.submit(operation) + # on future ready: + # - populate results + # - execute next + # -- rework operation (as op may be split) + result = future.result() + populate_post_queries() - async def execute_sub_query(sub_query): - """ - Args: - sub_query: - - Returns: - dict: Identifier -> List[Node|Edge|...] - """ - return {} - - sub_queries = query.queries - cur_query = query.queries[0] - for idx in range(1, len(sub_queries)): - results = await execute_sub_query(cur_query) - # populate following queries (as they might share an identifier) - populate_post_queries(sub_queries[idx:], results) - cur_query = sub_queries[idx] - diff --git a/src/query_processor/query_ast/__init__.py b/src/query_processor/query_ast/__init__.py index 7baf2a8..dccc3ea 100644 --- a/src/query_processor/query_ast/__init__.py +++ b/src/query_processor/query_ast/__init__.py @@ -1 +1 @@ -__all__ = ["clauses", "expression", "operators", "models", "query"] +__all__ = ["clauses", "expression", "operators", "models", "query", "plan"] diff --git a/src/query_processor/query_ast/plan.py b/src/query_processor/query_ast/plan.py new file mode 100644 index 0000000..f59ed8b --- /dev/null +++ b/src/query_processor/query_ast/plan.py @@ -0,0 +1,39 @@ +""" +MATCH (a:b {c: 'd'}) RETURN a.b -> [find(Node, Identifier('a'), props] +""" + + +class Runnable: + def run(self): + raise NotImplementedError + + +class Operation(Runnable): + def __init__(self, method, *args): + """A wrapper for executor""" + self.method = method + self.args = args + + def run(self): + self.method(*self.args) + + +class QueryPlan: + def __init__(self, operations): + self.operations = operations + + +class LogicalQueryPlan(QueryPlan): + def __init__(self, query): + """ + + Args: + query (Query): + """ + pass + + +class PhysicalQueryPlan(QueryPlan): + def __init__(self, logical_query): + """""" + pass diff --git a/src/query_processor/query_processor.py b/src/query_processor/query_processor.py index a7f834d..00f63eb 100644 --- a/src/query_processor/query_processor.py +++ b/src/query_processor/query_processor.py @@ -1,12 +1,14 @@ +from query_processor.plan_executor import PlanExecutor +from query_processor.query_rewriter import QueryRewriter from src.query_processor.query_parser import QueryParser -import asyncio class QueryProcessor: - def __init__(self, plan_executor): - self.executor = plan_executor + def __init__(self, process_manager, storage_manager): + self.query_rewriter = QueryRewriter()#storage_manager) + self.plan_executor = PlanExecutor(execution_scheduler=process_manager) - async def process(self, query): + async def process(self, raw_query): """ Executes a passed query. Follows the steps: - Parse query -> QueryModel @@ -16,18 +18,10 @@ async def process(self, query): Args: query (str): """ - query = QueryParser.parse_query(query) - plan = QueryProcessor.query_rewrite(query) + parsed_query = QueryParser.parse_query(raw_query) + query_plan = self.query_rewriter.rewrite(parsed_query) # TODO query = QueryOptimizer.optimize(query) # plan executor - self.executor.execute(plan) - - - @staticmethod - def query_rewrite(query): - # TODO - return query - - + self.plan_executor.execute(query_plan) diff --git a/src/query_processor/query_rewriter.py b/src/query_processor/query_rewriter.py new file mode 100644 index 0000000..4b35faf --- /dev/null +++ b/src/query_processor/query_rewriter.py @@ -0,0 +1,41 @@ +from query_processor.query_ast.plan import QueryPlan + +class DummyStorageManager: + def find_node(self, identifier=None, *properties): + pass + + def find_edge(self, identifier=None, *properties): + pass + +clause_to_method = { + +} + + + + +class QueryRewriter: + + def __init__(self, storage_manager=None): + self.storage_manager = storage_manager + + def rewrite(self, query): + """ + + Args: + query (Query): + + find -> populate -> find for each -> ... + + Returns: + QueryPlan: + """ + # TODO + # identifiers -> to single instance + + for sub_query in query.sub_queries(): + sub_query + + return query + + diff --git a/src/query_processor/test_query_parser.py b/src/query_processor/test_query_parser.py deleted file mode 100644 index 633dec1..0000000 --- a/src/query_processor/test_query_parser.py +++ /dev/null @@ -1,525 +0,0 @@ -from unittest import TestCase - -from src.query_processor.query_ast.models import * -from src.query_processor.query_ast.clauses import * -from src.query_processor.query_ast.expression import * -from src.query_processor.query_ast.query import * -from src.query_processor.query_parser import * - - -# TODO [0] test string, [1] result object - -class TestQueryParser(TestCase): - # TODO test identifier - should return same object - - def test_parse_graph_expression(self): - # test with , - # test with 1 node - # test with edge (directed or none) - # test with node and edge - # test with more edges - ## self.assertRaises(InvalidGraphExpressionError, - ## parse_graph_expression, ['()-[]']) - ## self.assertRaises(EmptyGraphPatternExpressionError, - ## parse_graph_expression, ['()-[]-()']) - - self.assertEquals(parse_graph_expression(['(a)']), - GraphPatternExpression((SimpleGraphPatternExpression( - (Node(identifier=Identifier('a'))))))) - self.assertEquals(parse_graph_expression(['(a)-[]-(b)']), - GraphPatternExpression( - (SimpleGraphPatternExpression((Edge( - directed=False, - node_out=Node(identifier=Identifier('b')), - node_in=Node(identifier=Identifier('a')))))))) - self.assertEquals(parse_graph_expression(['(a)-[:b]-(b)']), - GraphPatternExpression( - (SimpleGraphPatternExpression((Edge( - label=Label('b'), - directed=False, - node_out=Node(identifier=Identifier('b')), - node_in=Node(identifier=Identifier('a')))))))) - self.assertEquals(parse_graph_expression(['(a)<-[]-(b)']), - GraphPatternExpression( - (SimpleGraphPatternExpression((Edge( - directed=True, - node_out=Node(identifier=Identifier('a')), - node_in=Node(identifier=Identifier('b')))))))) - - self.assertEquals(parse_graph_expression(['(a)<-[:b]-()-[:c]->(d)']), - GraphPatternExpression((SimpleGraphPatternExpression(( - Edge(label=Label('b'), - directed=True, - node_out=Node(identifier=Identifier('a')), - node_in=Node()), - Edge(label=Label('c'), - directed=True, - node_out=Node(identifier=Identifier('d')), - node_in=Node()) - ))))) - - self.assertEquals(parse_graph_expression(['(a)<-[]-(b)', - '(c)']), - GraphPatternExpression(( - SimpleGraphPatternExpression((Edge( - directed=True, - node_out=Node(identifier=Identifier('a')), - node_in=Node(identifier=Identifier('b'))))), - SimpleGraphPatternExpression( - (Node(identifier=Identifier('c'))))))) - - def test_get_properties(self): - self.assertEquals(get_properties(':lab {a: 1}'), - (Property('a', 1),), - 'Normal, spaced, int') - self.assertEquals(get_properties(':lab {a: 1.2}'), - (Property('a', 1.2),), - 'Normal, spaced, float') - self.assertEquals(get_properties(':lab {a: "b"} '), - (Property('a', 'b'),), - 'Normal spaced str') - self.assertEquals(get_properties(' {a: "b"} '), - (Property('a', 'b'),), - 'Normal; only props') - self.assertEquals(get_properties(' {a:"b"} '), - (Property('a', 'b'),), - 'Normal; no space') - - self.assertEquals(get_properties(' {a:"b", b:"c"} '), - (Property('a', 'b'), Property('b', 'c')), - 'Multi prop') - self.assertEquals(get_properties(' {a:"b", b:"c",c:1} '), - (Property('a', 'b'), Property('b', 'c'), - Property('c', 1)), - 'Multi prop') - self.assertEquals(get_properties('a:label '), ()) - - self.assertEquals(get_properties(' {a: c.b, b: a.b.d} '), - (Property('a', Identifier(name='c', fields=('b',))), - Property('b', - Identifier(name='a', fields=('b', 'd')))), - 'Variable properties') - self.assertEquals(get_properties(' {a: c.b, b:"c"} '), - (Property('a', Identifier(name='c', fields=('b',))), - Property('b', 'c')), - 'Variable properties') - self.assertEquals(get_properties(' {a: c.b.d.df, b:"c"} '), - (Property('a', Identifier(name='c', - fields=('b', 'd', 'df'))), - Property('b', 'c')), - 'Variable properties more') - # TODO test EXCEPTIONS - - def test_get_labels(self): - self.assertEquals(get_labels('a:lab {a: 1}'), - (Label('lab'),)) - self.assertEquals(get_labels('a:lab:lab1 {a: 1}'), - (Label('lab'), Label('lab1'))) - self.assertEquals(get_labels(':lab:lab1 {a: 1}'), - (Label('lab'), Label('lab1'))) - self.assertEquals(get_labels(':lab:lab1'), - (Label('lab'), Label('lab1'))) - self.assertEquals(get_labels(':lab:lab1:lab3'), - (Label('lab'), Label('lab1'), Label('lab3'))) - self.assertEquals(get_labels('a {}'), - ()) - - self.assertRaises(InvalidLabelsCountError, get_labels, 'a:b:c', - multi=False), - self.assertEquals(get_labels('a:b', multi=False), - Label('b')), - self.assertIs(get_labels('', multi=False), - None), - self.assertEquals(get_labels('', multi=True), - ()) - - # TODO test EXCEPTIONS - - def test_get_identifier(self): - self.assertEquals(get_identifier('a'), Identifier('a')) - self.assertEquals(get_identifier('a.b'), Identifier(name='a', - fields=('b',))) - self.assertEquals(get_identifier('a.bc.cf'), - Identifier(name='a', fields=('bc', 'cf'))) - self.assertEquals(get_identifier('a:b '), Identifier('a')) - self.assertEquals(get_identifier('a.b:b '), Identifier('a', ('b',))) - self.assertIs(get_identifier(':b'), None) - - def test_parse_edge(self): - n1 = Node(identifier='a') - n2 = Node(identifier='b') - # self.assertRaises(InvalidEdgeLabelError, - # parse_edge, '[]') - self.assertEquals(parse_edge('-[a]-', n1, n2), - Edge(identifier=Identifier('a'), - node_in=n1, node_out=n2)) - self.assertEquals(parse_edge('-[a {a: 1}]-', n1, n2), - Edge(identifier=Identifier('a'), - properties=(Property('a', 1),), - node_in=n1, node_out=n2)), - self.assertEquals(parse_edge('-[a:b {a: 1}]-', n1, n2), - Edge(identifier=Identifier('a'), - label=Label('b'), - node_in=n1, node_out=n2, - properties=(Property('a', 1),))) - self.assertEquals(parse_edge('-[:b {a: 1}]-', n1, n2), - Edge(label=Label('b'), - node_in=n1, node_out=n2, - properties=(Property('a', 1),))) - self.assertEquals(parse_edge('-[]-', n1, n2), - Edge(node_in=n1, node_out=n2)) - # Check direction - self.assertEquals(parse_edge('-[:b {a: 1}]->', n1, n2), - Edge(label=Label('b'), - node_in=n1, node_out=n2, - directed=True, - properties=(Property('a', 1),))) - self.assertEquals(parse_edge('<-[:b {a: 1}]-', n1, n2), - Edge(label=Label('b'), - node_in=n2, node_out=n1, - directed=True, - properties=(Property('a', 1),))) - - ## self.assertRaises(InvalidEdgeError, parse_edge, '-[]-', n1, n2) - - def test_parse_node(self): - # TODO clean up tests - self.assertEquals(parse_node('()'), - Node(), - 'Just a node') - - self.assertEquals(parse_node('(id:lab1)'), - Node(identifier=Identifier('id'), - labels=Label('lab1')), - 'Id and label') - - # - self.assertEquals(parse_node('(id)'), - Node(identifier=Identifier('id')), - 'Just id') - - self.assertEquals(parse_node('(id {a: 1})'), - Node(identifier=Identifier('id'), - properties=(Property('a', 1),)), - 'Id and props') - - self.assertEquals(parse_node('(id:lab {a: 1})'), - Node(identifier=Identifier('id'), - labels=Label('lab'), - properties=(Property('a', 1),)), - 'Id and props') - - self.assertEquals(parse_node('(:lab {a: 1})'), - Node(labels=Label('lab'), - properties=(Property('a', 1),)), - 'Label and prop') - - self.assertEquals(parse_node('(:lab:lab1:lab2 {a: 1})'), - Node(labels=(Label('lab'), Label('lab1'), - Label('lab2')), - properties=(Property('a', 1),)), - 'Many Labels and prop') - - self.assertEquals(parse_node('(:lab:lab1:lab2)'), - Node(labels=(Label('lab'), Label('lab1'), - Label('lab2'))), - 'Many Labels') - - # PROPERTIES - - self.assertEquals(parse_node('({a: 1})'), - Node(properties=(Property('a', 1),)), - 'Spaced prop'), - - self.assertEquals(parse_node('({a:1})'), - Node(properties=(Property('a', 1),)), - 'No Spaced prop'), - - self.assertEquals(parse_node('({a:1.12})'), - Node(properties=(Property('a', 1.12),)), - 'Float prop'), - - self.assertEquals(parse_node('({a:"abc"})'), - Node(properties=(Property('a', "abc"),)), - 'Prop with string') - - self.assertEquals(parse_node('({a:"abc"})'), - Node(properties=(Property('a', 'abc'))), - 'Prop with string') - - self.assertEquals(parse_node('({a:"abc", b: 1})'), - Node(properties=(Property('a', 'abc'), - Property('b', 1))), - 'Mixed properties') - # - # TODO raise Tests - # - - def test_list_split(self): - self.assertEquals(split_list('a1 b c d', ['a1']), - ['a1', 'b c d']) - - self.assertEquals(split_list('a1 b a2 d', ['a1', 'a2']), - ['a1', 'b', 'a2', 'd']) - - self.assertEquals(split_list('a1 b a1 d', ['a1']), - ['a1', 'b', 'a1', 'd']) - - self.assertEquals(split_list('a1 b a1 d', ['a1', 'a2']), - ['a1', 'b', 'a1', 'd']) - - self.assertEquals(split_list('a1 b a2 d a1 b', ['a1', 'a2']), - ['a1', 'b', 'a2', 'd', 'a1', 'b']) - - # Special conditions - self.assertEquals(split_list('Aa1 Ba a2 d aA1 b', ['aa1', 'a2']), - ['aa1', 'Ba', 'a2', 'd', 'aa1', 'b']) - - self.assertEquals(split_list('Aa Baa aA baab', ['aa']), - ['aa', 'Baa', 'aa', 'baab']) - # TODO substrings - - def test_parse_id_expression(self): - self.fail() - - def setUp(self): - self.parser = QueryParser() - - # lib - # Test Main Method - # - def test_compound(self): - COMPOUND_TEST_BIG = [( - 'MATCH (person:Person)-[:IS_FRIEND_OF]->(friend),' - '(friend)-[:LIKES]->(restaurant:Restaurant),' - '(restaurant)-[:LOCATED_IN]->(loc:Location),' - '(restaurant)-[:SERVES]->(type:Cuisine)' - - 'WHERE person.name = \'Philip\'' - 'AND loc.location = \'New York\'' - 'AND type.cuisine = \'Sushi\'' - - 'RETURN restaurant.name, count(*) AS occurrence' - 'ORDER BY occurrence DESC' - 'LIMIT 5' - ) - # TODO Translate - ] - - COMPOUND_TEST = [( - 'MATCH (neo:Database {name:"Neo4j"})\n' - 'MATCH (anna:Person {name:"Anna"})\n' - 'CREATE (anna)-[:FRIEND]->(:Person:Expert ' - '{name:"Amanda"})-[:WORKED_WITH]->(neo);' - ), - Query([ - SubQuery([ - Match(GraphPatternExpression([SimpleGraphPatternExpression( - [Node(labels=Label('Database'), - identifier=Identifier( - 'neo'), - properties=[ - Property('name', - 'Neo4j')])])])), - ]), - SubQuery([ - Match(GraphPatternExpression([Node(labels=Label('Person'), - identifier=Identifier( - 'anna'), - properties=[ - Property('name', - 'Anna')])])), - ]), - SubQuery([ - Create(GraphPatternExpression([Edge(label='FRIEND', - directed=True, - node_in=Node( - identifier=Identifier( - 'anna')), - node_out=Node( - labels=( - Label('Person'), - Label( - 'Expert')), - properties= - Property('name', - 'Amanda'))), - Edge(label='WORKED_WITH', - directed=True, - node_out=Node( - identifier=Identifier( - 'neo')), - node_in=Node( - labels=( - Label('Person'), - Label( - 'Expert')), - properties= - Property('name', - 'Amanda')))])) - ]) - ]) - ] - self.assertEqual(self.parser.parse_query(COMPOUND_TEST[0]), - COMPOUND_TEST[1]) - - # TODO test Expressions with ','hj - - def test_generic_expression(self): - self.assertEquals(parse_generic_expression(['a']), - GenericExpression(( - Identifier('a') - ))) - self.assertEquals(parse_generic_expression(['a.b.c']), - GenericExpression(( - Identifier(name='a', fields=('b', 'c')) - ))) - # TODO more cases - - def test_operator_expressions(self): - self.fail() - - def test_graph_expressions(self): - SIMPLE_TEST_MATCH_EDGE = [( - 'MATCH (you {name:"You"})-[:FRIEND]->(yourFriends)' - 'RETURN you, yourFriends' - ), - Query([ - SubQuery([ - Match(GraphPatternExpression([Edge(label='FRIEND', - directed=True, - node_in= - Node( - identifier=Identifier( - 'you'), - properties= - Property('name', - 'You')), - node_out=Node( - labels=Label( - 'yourFriends'), - properties= - Property('name', - 'Amanda')))])), - Return(['you', 'yourFriends']) - ]) - ]) - ] - - SIMPLE_TEST_CREATE_NODE = [( - 'CREATE (you:Person {name:"You"})' - 'RETURN you' - ), - Query([ - SubQuery([ - Create(GraphPatternExpression( - (SimpleGraphPatternExpression(( - Node(identifier=Identifier('you'), - labels=Label('Person'), - properties=Property('name', 'You')) - ),),) - )), - Return(GenericExpression((Identifier('you'),))) - ]) - ]) - ] - - TEST_MORE_EDGES = [( - 'MATCH (user)-[:PURCHASED]->(product)<-[:PURCHASED]-()-[:PURCHASED]->(otherProduct)' - 'RETURN user.name' - ), - ( - Query([ - SubQuery([Match( - GraphPatternExpression([Edge(label='PURCHASED', - directed=True, - node_in=Node( - identifier=Identifier( - 'user')), - node_out=Node( - identifier=Identifier( - 'product'))), - Edge(label='PURCHASED', - directed=True, - node_out=Node( - identifier=Identifier( - 'product')), - node_in=Node()), - Edge(label='PURCHASED', - directed=True, - node_out=Node( - identifier=Identifier( - 'otherProduct')), - node_in=Node()) - ])), - Return('user.name') - ])]) - )] - - # TODO Where - - self.assertEqual(self.parser.parse_query(SIMPLE_TEST_CREATE_NODE[0]), - SIMPLE_TEST_CREATE_NODE[1]) - self.assertEqual(self.parser.parse_query(SIMPLE_TEST_MATCH_EDGE[0]), - SIMPLE_TEST_MATCH_EDGE[1]) - self.assertEqual(self.parser.parse_query(TEST_MORE_EDGES[0]), - TEST_MORE_EDGES[1]) - - def test_query_utils(self): - idAnna1 = Identifier('anna') - idAnna2 = Identifier('anna') - idNeo1 = Identifier('neo') - idNeo2 = Identifier('neo') - query = Query([ - SubQuery([ - Match(GraphPatternExpression([SimpleGraphPatternExpression( - [Node(labels=Label('Database'), - identifier=idNeo2, - properties=[ - Property('name', - 'Neo4j')])])])), - ]), - SubQuery([ - Match(GraphPatternExpression([Node(labels=Label('Person'), - identifier=idAnna1, - properties=[ - Property('name', - 'Anna')])])), - ]), - SubQuery([ - Create(GraphPatternExpression([Edge(label='FRIEND', - directed=True, - node_in=Node( - identifier=idAnna2), - node_out=Node( - labels=( - Label('Person'), - Label( - 'Expert')), - properties= - Property('name', - 'Amanda'))), - Edge(label='WORKED_WITH', - directed=True, - node_out=Node( - identifier=idNeo1), - node_in=Node( - labels=( - Label('Person'), - Label( - 'Expert')), - properties= - Property('name', - 'Amanda')))])) - ]) - ]) - - self.assertEquals(Query.get_identifiers_map(query.sub_queries), - { - 'anna': {idAnna1, idAnna2}, - 'neo': {idNeo1, idNeo2} - }) - - - def test_exceptions(self): - self.fail() diff --git a/tests/query_engine/test_query_parser.py b/tests/query_engine/test_query_parser.py deleted file mode 100644 index e69de29..0000000 From b83977f6f1930504ac7801c62ab84282eeac5acf Mon Sep 17 00:00:00 2001 From: dandimitrov Date: Fri, 8 Jul 2016 16:24:43 +0300 Subject: [PATCH 22/22] Better style. --- src/query_processor/errors/syntax.py | 28 +++++++-------------- src/query_processor/query_ast/clauses.py | 2 +- src/query_processor/query_ast/expression.py | 2 +- src/query_processor/query_ast/models.py | 8 +++--- 4 files changed, 15 insertions(+), 25 deletions(-) diff --git a/src/query_processor/errors/syntax.py b/src/query_processor/errors/syntax.py index c316e6e..7c9fe6c 100644 --- a/src/query_processor/errors/syntax.py +++ b/src/query_processor/errors/syntax.py @@ -1,19 +1,15 @@ class InvalidSyntaxError(Exception): def __init__(self, value): - """""" + """Base syntax error.""" self.value = value class UnsupportedClauseError(InvalidSyntaxError): - def __init__(self, value): - """""" - self.value = value + pass class UnsupportedExpressionType(InvalidSyntaxError): - def __init__(self, value): - """""" - InvalidSyntaxError.__init__(self, value) + pass class NumberOfOperandsError(InvalidSyntaxError): @@ -25,18 +21,15 @@ class InvalidOperationError(InvalidSyntaxError): class InvalidExpressionError(InvalidSyntaxError): - def __init__(self, value): - InvalidSyntaxError.__init__(self, value) + pass class InvalidGraphExpressionError(InvalidExpressionError): - def __init__(self, value): - InvalidExpressionError.__init__(self, value) + pass class BadGraphExpressionElementError(InvalidGraphExpressionError): - def __init__(self, value): - self.value = value + pass class InvalidNodeError(InvalidGraphExpressionError): @@ -46,8 +39,7 @@ def __init__(self, value, msg): class EmptyGraphPatternExpressionError(InvalidGraphExpressionError): - def __init__(self, value): - self.value = value + pass class InvalidGraphExpressionPropertiesError(InvalidGraphExpressionError): @@ -69,10 +61,8 @@ def __init__(self, value, msg): class InvalidLabelsCountError(InvalidGraphExpressionError): - def __init__(self): - pass + pass class InvalidOperatorExpression(InvalidExpressionError): - def __init__(self, value): - InvalidExpressionError.__init__(self, value) + pass diff --git a/src/query_processor/query_ast/clauses.py b/src/query_processor/query_ast/clauses.py index 05584e3..56ea098 100644 --- a/src/query_processor/query_ast/clauses.py +++ b/src/query_processor/query_ast/clauses.py @@ -38,7 +38,7 @@ # ... -class Clause(Printable, IdentifierHolder): +class Clause(Printable, IdentifierHolderMixin): expression_type = None def __init__(self, expression): diff --git a/src/query_processor/query_ast/expression.py b/src/query_processor/query_ast/expression.py index 3039f51..6f3a216 100644 --- a/src/query_processor/query_ast/expression.py +++ b/src/query_processor/query_ast/expression.py @@ -24,7 +24,7 @@ def validate_expression(self): pass -class IdentifierExpression(Expression, IdentifierHolder): +class IdentifierExpression(Expression, IdentifierHolderMixin): def __init__(self, elements, identifiers): """""" super().__init__(elements) diff --git a/src/query_processor/query_ast/models.py b/src/query_processor/query_ast/models.py index 1d1af33..1bf1aef 100644 --- a/src/query_processor/query_ast/models.py +++ b/src/query_processor/query_ast/models.py @@ -2,7 +2,7 @@ from src.lib.utils import ensure_tuple -class PropertiesHolder: +class PropertiesHolderMixin: def properties_as_dict(self): props = {} for prop in self.properties: @@ -10,7 +10,7 @@ def properties_as_dict(self): return props -class IdentifierHolder: +class IdentifierHolderMixin: def get_identifiers(self): raise NotImplementedError() @@ -112,7 +112,7 @@ def __eq__(self, other): return self.__dict__ == other.__dict__ -class Edge(Printable, IdentifierHolder, PropertiesHolder): +class Edge(Printable, IdentifierHolderMixin, PropertiesHolderMixin): """ TODO: make it immutable An edge: @@ -193,7 +193,7 @@ def __init__(self, direction, label, nodeLeft, nodeRight, _id, variable, # TODO implement setters -class Node(Printable, IdentifierHolder, PropertiesHolder): +class Node(Printable, IdentifierHolderMixin, PropertiesHolderMixin): """ TODO: make it immutable A node: