"""
Converting the 'parse-tree' output of pyparsing to a SPARQL Algebra expression
http://www.w3.org/TR/sparql11-query/#sparqlQuery
"""
import functools
import operator
import collections
from functools import reduce
from rdflib import Literal, Variable, URIRef, BNode
from rdflib.plugins.sparql.sparql import Prologue, Query
from rdflib.plugins.sparql.parserutils import CompValue, Expr
from rdflib.plugins.sparql.operators import (
and_,
TrueFilter,
simplify as simplifyFilters,
)
from rdflib.paths import InvPath, AlternativePath, SequencePath, MulPath, NegatedPath
from pyparsing import ParseResults
# ---------------------------
# Some convenience methods
from rdflib.term import Identifier
[docs]def OrderBy(p, expr):
return CompValue("OrderBy", p=p, expr=expr)
[docs]def ToMultiSet(p):
return CompValue("ToMultiSet", p=p)
[docs]def Union(p1, p2):
return CompValue("Union", p1=p1, p2=p2)
[docs]def Join(p1, p2):
return CompValue("Join", p1=p1, p2=p2)
[docs]def Minus(p1, p2):
return CompValue("Minus", p1=p1, p2=p2)
[docs]def Graph(term, graph):
return CompValue("Graph", term=term, p=graph)
[docs]def BGP(triples=None):
return CompValue("BGP", triples=triples or [])
[docs]def LeftJoin(p1, p2, expr):
return CompValue("LeftJoin", p1=p1, p2=p2, expr=expr)
[docs]def Filter(expr, p):
return CompValue("Filter", expr=expr, p=p)
[docs]def Extend(p, expr, var):
return CompValue("Extend", p=p, expr=expr, var=var)
[docs]def Values(res):
return CompValue("values", res=res)
[docs]def Project(p, PV):
return CompValue("Project", p=p, PV=PV)
[docs]def Group(p, expr=None):
return CompValue("Group", p=p, expr=expr)
def _knownTerms(triple, varsknown, varscount):
return (
len(
[
x
for x in triple
if x not in varsknown and isinstance(x, (Variable, BNode))
]
),
-sum(varscount.get(x, 0) for x in triple),
not isinstance(triple[2], Literal),
)
[docs]def reorderTriples(l_):
"""
Reorder triple patterns so that we execute the
ones with most bindings first
"""
def _addvar(term, varsknown):
if isinstance(term, (Variable, BNode)):
varsknown.add(term)
l_ = [(None, x) for x in l_]
varsknown = set()
varscount = collections.defaultdict(int)
for t in l_:
for c in t[1]:
if isinstance(c, (Variable, BNode)):
varscount[c] += 1
i = 0
# Done in steps, sort by number of bound terms
# the top block of patterns with the most bound terms is kept
# the rest is resorted based on the vars bound after the first
# block is evaluated
# we sort by decorate/undecorate, since we need the value of the sort keys
while i < len(l_):
l_[i:] = sorted((_knownTerms(x[1], varsknown, varscount), x[1]) for x in l_[i:])
t = l_[i][0][0] # top block has this many terms bound
j = 0
while i + j < len(l_) and l_[i + j][0][0] == t:
for c in l_[i + j][1]:
_addvar(c, varsknown)
j += 1
i += 1
return [x[1] for x in l_]
[docs]def triples(l):
l = reduce(lambda x, y: x + y, l)
if (len(l) % 3) != 0:
raise Exception("these aint triples")
return reorderTriples((l[x], l[x + 1], l[x + 2]) for x in range(0, len(l), 3))
[docs]def translatePName(p, prologue):
"""
Expand prefixed/relative URIs
"""
if isinstance(p, CompValue):
if p.name == "pname":
return prologue.absolutize(p)
if p.name == "literal":
return Literal(
p.string, lang=p.lang, datatype=prologue.absolutize(p.datatype)
)
elif isinstance(p, URIRef):
return prologue.absolutize(p)
[docs]def translatePath(p):
"""
Translate PropertyPath expressions
"""
if isinstance(p, CompValue):
if p.name == "PathAlternative":
if len(p.part) == 1:
return p.part[0]
else:
return AlternativePath(*p.part)
elif p.name == "PathSequence":
if len(p.part) == 1:
return p.part[0]
else:
return SequencePath(*p.part)
elif p.name == "PathElt":
if not p.mod:
return p.part
else:
if isinstance(p.part, list):
if len(p.part) != 1:
raise Exception("Denkfehler!")
return MulPath(p.part[0], p.mod)
else:
return MulPath(p.part, p.mod)
elif p.name == "PathEltOrInverse":
if isinstance(p.part, list):
if len(p.part) != 1:
raise Exception("Denkfehler!")
return InvPath(p.part[0])
else:
return InvPath(p.part)
elif p.name == "PathNegatedPropertySet":
if isinstance(p.part, list):
return NegatedPath(AlternativePath(*p.part))
else:
return NegatedPath(p.part)
[docs]def translateExists(e):
"""
Translate the graph pattern used by EXISTS and NOT EXISTS
http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters
"""
def _c(n):
if isinstance(n, CompValue):
if n.name in ("Builtin_EXISTS", "Builtin_NOTEXISTS"):
n.graph = translateGroupGraphPattern(n.graph)
if n.graph.name == "Filter":
# filters inside (NOT) EXISTS can see vars bound outside
n.graph.no_isolated_scope = True
e = traverse(e, visitPost=_c)
return e
[docs]def collectAndRemoveFilters(parts):
"""
FILTER expressions apply to the whole group graph pattern in which
they appear.
http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters
"""
filters = []
i = 0
while i < len(parts):
p = parts[i]
if p.name == "Filter":
filters.append(translateExists(p.expr))
parts.pop(i)
else:
i += 1
if filters:
return and_(*filters)
return None
[docs]def translateGroupOrUnionGraphPattern(graphPattern):
A = None
for g in graphPattern.graph:
g = translateGroupGraphPattern(g)
if not A:
A = g
else:
A = Union(A, g)
return A
[docs]def translateGraphGraphPattern(graphPattern):
return Graph(graphPattern.term, translateGroupGraphPattern(graphPattern.graph))
[docs]def translateInlineData(graphPattern):
return ToMultiSet(translateValues(graphPattern))
[docs]def translateGroupGraphPattern(graphPattern):
"""
http://www.w3.org/TR/sparql11-query/#convertGraphPattern
"""
if graphPattern.name == "SubSelect":
return ToMultiSet(translate(graphPattern)[0])
if not graphPattern.part:
graphPattern.part = [] # empty { }
filters = collectAndRemoveFilters(graphPattern.part)
g = []
for p in graphPattern.part:
if p.name == "TriplesBlock":
# merge adjacent TripleBlocks
if not (g and g[-1].name == "BGP"):
g.append(BGP())
g[-1]["triples"] += triples(p.triples)
else:
g.append(p)
G = BGP()
for p in g:
if p.name == "OptionalGraphPattern":
A = translateGroupGraphPattern(p.graph)
if A.name == "Filter":
G = LeftJoin(G, A.p, A.expr)
else:
G = LeftJoin(G, A, TrueFilter)
elif p.name == "MinusGraphPattern":
G = Minus(p1=G, p2=translateGroupGraphPattern(p.graph))
elif p.name == "GroupOrUnionGraphPattern":
G = Join(p1=G, p2=translateGroupOrUnionGraphPattern(p))
elif p.name == "GraphGraphPattern":
G = Join(p1=G, p2=translateGraphGraphPattern(p))
elif p.name == "InlineData":
G = Join(p1=G, p2=translateInlineData(p))
elif p.name == "ServiceGraphPattern":
G = Join(p1=G, p2=p)
elif p.name in ("BGP", "Extend"):
G = Join(p1=G, p2=p)
elif p.name == "Bind":
G = Extend(G, p.expr, p.var)
else:
raise Exception(
"Unknown part in GroupGraphPattern: %s - %s" % (type(p), p.name)
)
if filters:
G = Filter(expr=filters, p=G)
return G
[docs]class StopTraversal(Exception):
[docs] def __init__(self, rv):
self.rv = rv
def _traverse(e, visitPre=lambda n: None, visitPost=lambda n: None):
"""
Traverse a parse-tree, visit each node
if visit functions return a value, replace current node
"""
_e = visitPre(e)
if _e is not None:
return _e
if e is None:
return None
if isinstance(e, (list, ParseResults)):
return [_traverse(x, visitPre, visitPost) for x in e]
elif isinstance(e, tuple):
return tuple([_traverse(x, visitPre, visitPost) for x in e])
elif isinstance(e, CompValue):
for k, val in e.items():
e[k] = _traverse(val, visitPre, visitPost)
_e = visitPost(e)
if _e is not None:
return _e
return e
def _traverseAgg(e, visitor=lambda n, v: None):
"""
Traverse a parse-tree, visit each node
if visit functions return a value, replace current node
"""
res = []
if isinstance(e, (list, ParseResults, tuple)):
res = [_traverseAgg(x, visitor) for x in e]
elif isinstance(e, CompValue):
for k, val in e.items():
if val is not None:
res.append(_traverseAgg(val, visitor))
return visitor(e, res)
[docs]def traverse(tree, visitPre=lambda n: None, visitPost=lambda n: None, complete=None):
"""
Traverse tree, visit each node with visit function
visit function may raise StopTraversal to stop traversal
if complete!=None, it is returned on complete traversal,
otherwise the transformed tree is returned
"""
try:
r = _traverse(tree, visitPre, visitPost)
if complete is not None:
return complete
return r
except StopTraversal as st:
return st.rv
def _hasAggregate(x):
"""
Traverse parse(sub)Tree
return true if any aggregates are used
"""
if isinstance(x, CompValue):
if x.name.startswith("Aggregate_"):
raise StopTraversal(True)
def _aggs(e, A):
"""
Collect Aggregates in A
replaces aggregates with variable references
"""
# TODO: nested Aggregates?
if isinstance(e, CompValue) and e.name.startswith("Aggregate_"):
A.append(e)
aggvar = Variable("__agg_%d__" % len(A))
e["res"] = aggvar
return aggvar
def _findVars(x, res):
"""
Find all variables in a tree
"""
if isinstance(x, Variable):
res.add(x)
if isinstance(x, CompValue):
if x.name == "Bind":
res.add(x.var)
return x # stop recursion and finding vars in the expr
elif x.name == "SubSelect":
if x.projection:
res.update(v.var or v.evar for v in x.projection)
return x
def _addVars(x, children):
"""
find which variables may be bound by this part of the query
"""
if isinstance(x, Variable):
return set([x])
elif isinstance(x, CompValue):
if x.name == "RelationalExpression":
x["_vars"] = set()
elif x.name == "Extend":
# vars only used in the expr for a bind should not be included
x["_vars"] = reduce(
operator.or_,
[child for child, part in zip(children, x) if part != "expr"],
set(),
)
else:
x["_vars"] = set(reduce(operator.or_, children, set()))
if x.name == "SubSelect":
if x.projection:
s = set(v.var or v.evar for v in x.projection)
else:
s = set()
return s
return x["_vars"]
return reduce(operator.or_, children, set())
def _sample(e, v=None):
"""
For each unaggregated variable V in expr
Replace V with Sample(V)
"""
if isinstance(e, CompValue) and e.name.startswith("Aggregate_"):
return e # do not replace vars in aggregates
if isinstance(e, Variable) and v != e:
return CompValue("Aggregate_Sample", vars=e)
def _simplifyFilters(e):
if isinstance(e, Expr):
return simplifyFilters(e)
[docs]def translateAggregates(q, M):
E = []
A = []
# collect/replace aggs in :
# select expr as ?var
if q.projection:
for v in q.projection:
if v.evar:
v.expr = traverse(v.expr, functools.partial(_sample, v=v.evar))
v.expr = traverse(v.expr, functools.partial(_aggs, A=A))
# having clause
if traverse(q.having, _hasAggregate, complete=False):
q.having = traverse(q.having, _sample)
traverse(q.having, functools.partial(_aggs, A=A))
# order by
if traverse(q.orderby, _hasAggregate, complete=False):
q.orderby = traverse(q.orderby, _sample)
traverse(q.orderby, functools.partial(_aggs, A=A))
# sample all other select vars
# TODO: only allowed for vars in group-by?
if q.projection:
for v in q.projection:
if v.var:
rv = Variable("__agg_%d__" % (len(A) + 1))
A.append(CompValue("Aggregate_Sample", vars=v.var, res=rv))
E.append((rv, v.var))
return CompValue("AggregateJoin", A=A, p=M), E
[docs]def translateValues(v):
# if len(v.var)!=len(v.value):
# raise Exception("Unmatched vars and values in ValueClause: "+str(v))
res = []
if not v.var:
return res
if not v.value:
return res
if not isinstance(v.value[0], list):
for val in v.value:
res.append({v.var[0]: val})
else:
for vals in v.value:
res.append(dict(zip(v.var, vals)))
return Values(res)
[docs]def translate(q):
"""
http://www.w3.org/TR/sparql11-query/#convertSolMod
"""
_traverse(q, _simplifyFilters)
q.where = traverse(q.where, visitPost=translatePath)
# TODO: Var scope test
VS = set()
traverse(q.where, functools.partial(_findVars, res=VS))
# all query types have a where part
M = translateGroupGraphPattern(q.where)
aggregate = False
if q.groupby:
conditions = []
# convert "GROUP BY (?expr as ?var)" to an Extend
for c in q.groupby.condition:
if isinstance(c, CompValue) and c.name == "GroupAs":
M = Extend(M, c.expr, c.var)
c = c.var
conditions.append(c)
M = Group(p=M, expr=conditions)
aggregate = True
elif (
traverse(q.having, _hasAggregate, complete=False)
or traverse(q.orderby, _hasAggregate, complete=False)
or any(
traverse(x.expr, _hasAggregate, complete=False)
for x in q.projection or []
if x.evar
)
):
# if any aggregate is used, implicit group by
M = Group(p=M)
aggregate = True
if aggregate:
M, E = translateAggregates(q, M)
else:
E = []
# HAVING
if q.having:
M = Filter(expr=and_(*q.having.condition), p=M)
# VALUES
if q.valuesClause:
M = Join(p1=M, p2=ToMultiSet(translateValues(q.valuesClause)))
if not q.projection:
# select *
PV = list(VS)
else:
PV = list()
for v in q.projection:
if v.var:
if v not in PV:
PV.append(v.var)
elif v.evar:
if v not in PV:
PV.append(v.evar)
E.append((v.expr, v.evar))
else:
raise Exception("I expected a var or evar here!")
for e, v in E:
M = Extend(M, e, v)
# ORDER BY
if q.orderby:
M = OrderBy(
M,
[
CompValue("OrderCondition", expr=c.expr, order=c.order)
for c in q.orderby.condition
],
)
# PROJECT
M = Project(M, PV)
if q.modifier:
if q.modifier == "DISTINCT":
M = CompValue("Distinct", p=M)
elif q.modifier == "REDUCED":
M = CompValue("Reduced", p=M)
if q.limitoffset:
offset = 0
if q.limitoffset.offset is not None:
offset = q.limitoffset.offset.toPython()
if q.limitoffset.limit is not None:
M = CompValue(
"Slice", p=M, start=offset, length=q.limitoffset.limit.toPython()
)
else:
M = CompValue("Slice", p=M, start=offset)
return M, PV
[docs]def simplify(n):
"""Remove joins to empty BGPs"""
if isinstance(n, CompValue):
if n.name == "Join":
if n.p1.name == "BGP" and len(n.p1.triples) == 0:
return n.p2
if n.p2.name == "BGP" and len(n.p2.triples) == 0:
return n.p1
elif n.name == "BGP":
n["triples"] = reorderTriples(n.triples)
return n
[docs]def analyse(n, children):
"""
Some things can be lazily joined.
This propegates whether they can up the tree
and sets lazy flags for all joins
"""
if isinstance(n, CompValue):
if n.name == "Join":
n["lazy"] = all(children)
return False
elif n.name in ("Slice", "Distinct"):
return False
else:
return all(children)
else:
return True
[docs]def translatePrologue(p, base, initNs=None, prologue=None):
if prologue is None:
prologue = Prologue()
prologue.base = ""
if base:
prologue.base = base
if initNs:
for k, v in initNs.items():
prologue.bind(k, v)
for x in p:
if x.name == "Base":
prologue.base = x.iri
elif x.name == "PrefixDecl":
prologue.bind(x.prefix, prologue.absolutize(x.iri))
return prologue
[docs]def translateQuads(quads):
if quads.triples:
alltriples = triples(quads.triples)
else:
alltriples = []
allquads = collections.defaultdict(list)
if quads.quadsNotTriples:
for q in quads.quadsNotTriples:
if q.triples:
allquads[q.term] += triples(q.triples)
return alltriples, allquads
[docs]def translateUpdate1(u, prologue):
if u.name in ("Load", "Clear", "Drop", "Create"):
pass # no translation needed
elif u.name in ("Add", "Move", "Copy"):
pass
elif u.name in ("InsertData", "DeleteData", "DeleteWhere"):
t, q = translateQuads(u.quads)
u["quads"] = q
u["triples"] = t
if u.name in ("DeleteWhere", "DeleteData"):
pass # TODO: check for bnodes in triples
elif u.name == "Modify":
if u.delete:
u.delete["triples"], u.delete["quads"] = translateQuads(u.delete.quads)
if u.insert:
u.insert["triples"], u.insert["quads"] = translateQuads(u.insert.quads)
u["where"] = translateGroupGraphPattern(u.where)
else:
raise Exception("Unknown type of update operation: %s" % u)
u.prologue = prologue
return u
[docs]def translateUpdate(q, base=None, initNs=None):
"""
Returns a list of SPARQL Update Algebra expressions
"""
res = []
prologue = None
if not q.request:
return res
for p, u in zip(q.prologue, q.request):
prologue = translatePrologue(p, base, initNs, prologue)
# absolutize/resolve prefixes
u = traverse(u, visitPost=functools.partial(translatePName, prologue=prologue))
u = _traverse(u, _simplifyFilters)
u = traverse(u, visitPost=translatePath)
res.append(translateUpdate1(u, prologue))
return res
[docs]def translateQuery(q, base=None, initNs=None):
"""
Translate a query-parsetree to a SPARQL Algebra Expression
Return a rdflib.plugins.sparql.sparql.Query object
"""
# We get in: (prologue, query)
prologue = translatePrologue(q[0], base, initNs)
# absolutize/resolve prefixes
q[1] = traverse(
q[1], visitPost=functools.partial(translatePName, prologue=prologue)
)
P, PV = translate(q[1])
datasetClause = q[1].datasetClause
if q[1].name == "ConstructQuery":
template = triples(q[1].template) if q[1].template else None
res = CompValue(q[1].name, p=P, template=template, datasetClause=datasetClause)
else:
res = CompValue(q[1].name, p=P, datasetClause=datasetClause, PV=PV)
res = traverse(res, visitPost=simplify)
_traverseAgg(res, visitor=analyse)
_traverseAgg(res, _addVars)
return Query(prologue, res)
[docs]class ExpressionNotCoveredException(Exception):
pass
[docs]def translateAlgebra(query_algebra: Query):
"""
:param query_algebra: An algebra returned by the function call algebra.translateQuery(parse_tree).
:return: The query form generated from the SPARQL 1.1 algebra tree for select queries.
"""
import os
def overwrite(text):
file = open("query.txt", "w+")
file.write(text)
file.close()
def replace(
old,
new,
search_from_match: str = None,
search_from_match_occurrence: int = None,
count: int = 1,
):
# Read in the file
with open("query.txt", "r") as file:
filedata = file.read()
def find_nth(haystack, needle, n):
start = haystack.lower().find(needle)
while start >= 0 and n > 1:
start = haystack.lower().find(needle, start + len(needle))
n -= 1
return start
if search_from_match and search_from_match_occurrence:
position = find_nth(
filedata, search_from_match, search_from_match_occurrence
)
filedata_pre = filedata[:position]
filedata_post = filedata[position:].replace(old, new, count)
filedata = filedata_pre + filedata_post
else:
filedata = filedata.replace(old, new, count)
# Write the file out again
with open("query.txt", "w") as file:
file.write(filedata)
def convert_node_arg(node_arg):
if isinstance(node_arg, Identifier):
return node_arg.n3()
elif isinstance(node_arg, CompValue):
return "{" + node_arg.name + "}"
elif isinstance(node_arg, Expr):
return "{" + node_arg.name + "}"
elif isinstance(node_arg, str):
return node_arg
else:
raise ExpressionNotCoveredException(
"The expression {0} might not be covered yet.".format(node_arg)
)
def sparql_query_text(node):
"""
https://www.w3.org/TR/sparql11-query/#sparqlSyntax
:param node:
:return:
"""
if isinstance(node, CompValue):
# 18.2 Query Forms
if node.name == "SelectQuery":
overwrite("-*-SELECT-*- " + "{" + node.p.name + "}")
# 18.2 Graph Patterns
elif node.name == "BGP":
# Identifiers or Paths
# Negated path throws a type error. Probably n3() method of negated paths should be fixed
triples = "".join(
triple[0].n3() + " " + triple[1].n3() + " " + triple[2].n3() + "."
for triple in node.triples
)
replace("{BGP}", triples)
# The dummy -*-SELECT-*- is placed during a SelectQuery or Multiset pattern in order to be able
# to match extended variables in a specific Select-clause (see "Extend" below)
replace("-*-SELECT-*-", "SELECT", count=-1)
# If there is no "Group By" clause the placeholder will simply be deleted. Otherwise there will be
# no matching {GroupBy} placeholder because it has already been replaced by "group by variables"
replace("{GroupBy}", "", count=-1)
replace("{Having}", "", count=-1)
elif node.name == "Join":
replace("{Join}", "{" + node.p1.name + "}{" + node.p2.name + "}") #
elif node.name == "LeftJoin":
replace(
"{LeftJoin}",
"{" + node.p1.name + "}OPTIONAL{{" + node.p2.name + "}}",
)
elif node.name == "Filter":
if isinstance(node.expr, CompValue):
expr = node.expr.name
else:
raise ExpressionNotCoveredException(
"This expression might not be covered yet."
)
if node.p:
# Filter with p=AggregateJoin = Having
if node.p.name == "AggregateJoin":
replace("{Filter}", "{" + node.p.name + "}")
replace("{Having}", "HAVING({" + expr + "})")
else:
replace(
"{Filter}", "FILTER({" + expr + "}) {" + node.p.name + "}"
)
else:
replace("{Filter}", "FILTER({" + expr + "})")
elif node.name == "Union":
replace(
"{Union}", "{{" + node.p1.name + "}}UNION{{" + node.p2.name + "}}"
)
elif node.name == "Graph":
expr = "GRAPH " + node.term.n3() + " {{" + node.p.name + "}}"
replace("{Graph}", expr)
elif node.name == "Extend":
query_string = open("query.txt", "r").read().lower()
select_occurrences = query_string.count("-*-select-*-")
replace(
node.var.n3(),
"(" + convert_node_arg(node.expr) + " as " + node.var.n3() + ")",
search_from_match="-*-select-*-",
search_from_match_occurrence=select_occurrences,
)
replace("{Extend}", "{" + node.p.name + "}")
elif node.name == "Minus":
expr = "{" + node.p1.name + "}MINUS{{" + node.p2.name + "}}"
replace("{Minus}", expr)
elif node.name == "Group":
group_by_vars = []
if node.expr:
for var in node.expr:
if isinstance(var, Identifier):
group_by_vars.append(var.n3())
else:
raise ExpressionNotCoveredException(
"This expression might not be covered yet."
)
replace("{Group}", "{" + node.p.name + "}")
replace("{GroupBy}", "GROUP BY " + " ".join(group_by_vars) + " ")
else:
replace("{Group}", "{" + node.p.name + "}")
elif node.name == "AggregateJoin":
replace("{AggregateJoin}", "{" + node.p.name + "}")
for agg_func in node.A:
if isinstance(agg_func.res, Identifier):
identifier = agg_func.res.n3()
else:
raise ExpressionNotCoveredException(
"This expression might not be covered yet."
)
agg_func_name = agg_func.name.split("_")[1]
distinct = ""
if agg_func.distinct:
distinct = agg_func.distinct + " "
if agg_func_name == "GroupConcat":
replace(
identifier,
"GROUP_CONCAT"
+ "("
+ distinct
+ agg_func.vars.n3()
+ ";SEPARATOR="
+ agg_func.separator.n3()
+ ")",
)
else:
replace(
identifier,
agg_func_name.upper()
+ "("
+ distinct
+ convert_node_arg(agg_func.vars)
+ ")",
)
# For non-aggregated variables the aggregation function "sample" is automatically assigned.
# However, we do not want to have "sample" wrapped around non-aggregated variables. That is
# why we replace it. If "sample" is used on purpose it will not be replaced as the alias
# must be different from the variable in this case.
replace(
"(SAMPLE({0}) as {0})".format(convert_node_arg(agg_func.vars)),
convert_node_arg(agg_func.vars),
)
elif node.name == "GroupGraphPatternSub":
replace(
"GroupGraphPatternSub",
" ".join([convert_node_arg(pattern) for pattern in node.part]),
)
elif node.name == "TriplesBlock":
print("triplesblock")
replace(
"{TriplesBlock}",
"".join(
triple[0].n3()
+ " "
+ triple[1].n3()
+ " "
+ triple[2].n3()
+ "."
for triple in node.triples
),
)
# 18.2 Solution modifiers
elif node.name == "ToList":
raise ExpressionNotCoveredException(
"This expression might not be covered yet."
)
elif node.name == "OrderBy":
order_conditions = []
for c in node.expr:
if isinstance(c.expr, Identifier):
var = c.expr.n3()
if c.order is not None:
cond = var + "(" + c.order + ")"
else:
cond = var
order_conditions.append(cond)
else:
raise ExpressionNotCoveredException(
"This expression might not be covered yet."
)
replace("{OrderBy}", "{" + node.p.name + "}")
replace("{OrderConditions}", " ".join(order_conditions) + " ")
elif node.name == "Project":
project_variables = []
for var in node.PV:
if isinstance(var, Identifier):
project_variables.append(var.n3())
else:
raise ExpressionNotCoveredException(
"This expression might not be covered yet."
)
order_by_pattern = ""
if node.p.name == "OrderBy":
order_by_pattern = "ORDER BY {OrderConditions}"
replace(
"{Project}",
" ".join(project_variables)
+ "{{"
+ node.p.name
+ "}}"
+ "{GroupBy}"
+ order_by_pattern
+ "{Having}",
)
elif node.name == "Distinct":
replace("{Distinct}", "DISTINCT {" + node.p.name + "}")
elif node.name == "Reduced":
replace("{Reduced}", "REDUCED {" + node.p.name + "}")
elif node.name == "Slice":
slice = "OFFSET " + str(node.start) + " LIMIT " + str(node.length)
replace("{Slice}", "{" + node.p.name + "}" + slice)
elif node.name == "ToMultiSet":
if node.p.name == "values":
replace("{ToMultiSet}", "{{" + node.p.name + "}}")
else:
replace(
"{ToMultiSet}", "{-*-SELECT-*- " + "{" + node.p.name + "}" + "}"
)
# 18.2 Property Path
# 17 Expressions and Testing Values
# # 17.3 Operator Mapping
elif node.name == "RelationalExpression":
expr = convert_node_arg(node.expr)
op = node.op
if isinstance(list, type(node.other)):
other = (
"("
+ ", ".join(convert_node_arg(expr) for expr in node.other)
+ ")"
)
else:
other = convert_node_arg(node.other)
condition = "{left} {operator} {right}".format(
left=expr, operator=op, right=other
)
replace("{RelationalExpression}", condition)
elif node.name == "ConditionalAndExpression":
inner_nodes = " && ".join(
[convert_node_arg(expr) for expr in node.other]
)
replace(
"{ConditionalAndExpression}",
convert_node_arg(node.expr) + " && " + inner_nodes,
)
elif node.name == "ConditionalOrExpression":
inner_nodes = " || ".join(
[convert_node_arg(expr) for expr in node.other]
)
replace(
"{ConditionalOrExpression}",
"(" + convert_node_arg(node.expr) + " || " + inner_nodes + ")",
)
elif node.name == "MultiplicativeExpression":
left_side = convert_node_arg(node.expr)
multiplication = left_side
for i, operator in enumerate(node.op):
multiplication += (
operator + " " + convert_node_arg(node.other[i]) + " "
)
replace("{MultiplicativeExpression}", multiplication)
elif node.name == "AdditiveExpression":
left_side = convert_node_arg(node.expr)
addition = left_side
for i, operator in enumerate(node.op):
addition += operator + " " + convert_node_arg(node.other[i]) + " "
replace("{AdditiveExpression}", addition)
elif node.name == "UnaryNot":
replace("{UnaryNot}", "!" + convert_node_arg(node.expr))
# # 17.4 Function Definitions
# # # 17.4.1 Functional Forms
elif node.name.endswith("BOUND"):
bound_var = convert_node_arg(node.arg)
replace("{Builtin_BOUND}", "bound(" + bound_var + ")")
elif node.name.endswith("IF"):
arg2 = convert_node_arg(node.arg2)
arg3 = convert_node_arg(node.arg3)
if_expression = (
"IF(" + "{" + node.arg1.name + "}, " + arg2 + ", " + arg3 + ")"
)
replace("{Builtin_IF}", if_expression)
elif node.name.endswith("COALESCE"):
replace(
"{Builtin_COALESCE}",
"COALESCE("
+ ", ".join(convert_node_arg(arg) for arg in node.arg)
+ ")",
)
elif node.name.endswith("Builtin_EXISTS"):
# The node's name which we get with node.graph.name returns "Join" instead of GroupGraphPatternSub
# According to https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rExistsFunc
# ExistsFunc can only have a GroupGraphPattern as parameter. However, when we print the query algebra
# we get a GroupGraphPatternSub
replace("{Builtin_EXISTS}", "EXISTS " + "{{" + node.graph.name + "}}")
traverse(node.graph, visitPre=sparql_query_text)
return node.graph
elif node.name.endswith("Builtin_NOTEXISTS"):
# The node's name which we get with node.graph.name returns "Join" instead of GroupGraphPatternSub
# According to https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rNotExistsFunc
# NotExistsFunc can only have a GroupGraphPattern as parameter. However, when we print the query algebra
# we get a GroupGraphPatternSub
print(node.graph.name)
replace(
"{Builtin_NOTEXISTS}", "NOT EXISTS " + "{{" + node.graph.name + "}}"
)
traverse(node.graph, visitPre=sparql_query_text)
return node.graph
# # # # 17.4.1.5 logical-or: Covered in "RelationalExpression"
# # # # 17.4.1.6 logical-and: Covered in "RelationalExpression"
# # # # 17.4.1.7 RDFterm-equal: Covered in "RelationalExpression"
elif node.name.endswith("sameTerm"):
replace(
"{Builtin_sameTerm}",
"SAMETERM("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
# # # # IN: Covered in "RelationalExpression"
# # # # NOT IN: Covered in "RelationalExpression"
# # # 17.4.2 Functions on RDF Terms
elif node.name.endswith("Builtin_isIRI"):
replace("{Builtin_isIRI}", "isIRI(" + convert_node_arg(node.arg) + ")")
elif node.name.endswith("Builtin_isBLANK"):
replace(
"{Builtin_isBLANK}", "isBLANK(" + convert_node_arg(node.arg) + ")"
)
elif node.name.endswith("Builtin_isLITERAL"):
replace(
"{Builtin_isLITERAL}",
"isLITERAL(" + convert_node_arg(node.arg) + ")",
)
elif node.name.endswith("Builtin_isNUMERIC"):
replace(
"{Builtin_isNUMERIC}",
"isNUMERIC(" + convert_node_arg(node.arg) + ")",
)
elif node.name.endswith("Builtin_STR"):
replace("{Builtin_STR}", "STR(" + convert_node_arg(node.arg) + ")")
elif node.name.endswith("Builtin_LANG"):
replace("{Builtin_LANG}", "LANG(" + convert_node_arg(node.arg) + ")")
elif node.name.endswith("Builtin_DATATYPE"):
replace(
"{Builtin_DATATYPE}", "DATATYPE(" + convert_node_arg(node.arg) + ")"
)
elif node.name.endswith("Builtin_IRI"):
replace("{Builtin_IRI}", "IRI(" + convert_node_arg(node.arg) + ")")
elif node.name.endswith("Builtin_BNODE"):
replace("{Builtin_BNODE}", "BNODE(" + convert_node_arg(node.arg) + ")")
elif node.name.endswith("STRDT"):
replace(
"{Builtin_STRDT}",
"STRDT("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
elif node.name.endswith("Builtin_STRLANG"):
replace(
"{Builtin_STRLANG}",
"STRLANG("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
elif node.name.endswith("Builtin_UUID"):
replace("{Builtin_UUID}", "UUID()")
elif node.name.endswith("Builtin_STRUUID"):
replace("{Builtin_STRUUID}", "STRUUID()")
# # # 17.4.3 Functions on Strings
elif node.name.endswith("Builtin_STRLEN"):
replace(
"{Builtin_STRLEN}", "STRLEN(" + convert_node_arg(node.arg) + ")"
)
elif node.name.endswith("Builtin_SUBSTR"):
args = [node.arg.n3(), node.start]
if node.length:
args.append(node.length)
expr = "SUBSTR(" + ", ".join(args) + ")"
replace("{Builtin_SUBSTR}", expr)
elif node.name.endswith("Builtin_UCASE"):
replace("{Builtin_UCASE}", "UCASE(" + convert_node_arg(node.arg) + ")")
elif node.name.endswith("Builtin_LCASE"):
replace("{Builtin_LCASE}", "LCASE(" + convert_node_arg(node.arg) + ")")
elif node.name.endswith("Builtin_STRSTARTS"):
replace(
"{Builtin_STRSTARTS}",
"STRSTARTS("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
elif node.name.endswith("Builtin_STRENDS"):
replace(
"{Builtin_STRENDS}",
"STRENDS("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
elif node.name.endswith("Builtin_CONTAINS"):
replace(
"{Builtin_CONTAINS}",
"CONTAINS("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
elif node.name.endswith("Builtin_STRBEFORE"):
replace(
"{Builtin_STRBEFORE}",
"STRBEFORE("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
elif node.name.endswith("Builtin_STRAFTER"):
replace(
"{Builtin_STRAFTER}",
"STRAFTER("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
elif node.name.endswith("Builtin_ENCODE_FOR_URI"):
replace(
"{Builtin_ENCODE_FOR_URI}",
"ENCODE_FOR_URI(" + convert_node_arg(node.arg) + ")",
)
elif node.name.endswith("Builtin_CONCAT"):
expr = "CONCAT({vars})".format(
vars=", ".join(elem.n3() for elem in node.arg)
)
replace("{Builtin_CONCAT}", expr)
elif node.name.endswith("Builtin_LANGMATCHES"):
replace(
"{Builtin_LANGMATCHES}",
"LANGMATCHES("
+ convert_node_arg(node.arg1)
+ ", "
+ convert_node_arg(node.arg2)
+ ")",
)
elif node.name.endswith("REGEX"):
args = [convert_node_arg(node.text), convert_node_arg(node.pattern)]
expr = "REGEX(" + ", ".join(args) + ")"
replace("{Builtin_REGEX}", expr)
elif node.name.endswith("REPLACE"):
replace(
"{Builtin_REPLACE}",
"REPLACE("
+ convert_node_arg(node.arg)
+ ", "
+ convert_node_arg(node.pattern)
+ ", "
+ convert_node_arg(node.replacement)
+ ")",
)
# # # 17.4.4 Functions on Numerics
elif node.name == "Builtin_ABS":
replace("{Builtin_ABS}", "ABS(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_ROUND":
replace("{Builtin_ROUND}", "ROUND(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_CEIL":
replace("{Builtin_CEIL}", "CEIL(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_FLOOR":
replace("{Builtin_FLOOR}", "FLOOR(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_RAND":
replace("{Builtin_RAND}", "RAND()")
# # # 17.4.5 Functions on Dates and Times
elif node.name == "Builtin_NOW":
replace("{Builtin_NOW}", "NOW()")
elif node.name == "Builtin_YEAR":
replace("{Builtin_YEAR}", "YEAR(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_MONTH":
replace("{Builtin_MONTH}", "MONTH(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_DAY":
replace("{Builtin_DAY}", "DAY(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_HOURS":
replace("{Builtin_HOURS}", "HOURS(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_MINUTES":
replace(
"{Builtin_MINUTES}", "MINUTES(" + convert_node_arg(node.arg) + ")"
)
elif node.name == "Builtin_SECONDS":
replace(
"{Builtin_SECONDS}", "SECONDS(" + convert_node_arg(node.arg) + ")"
)
elif node.name == "Builtin_TIMEZONE":
replace(
"{Builtin_TIMEZONE}", "TIMEZONE(" + convert_node_arg(node.arg) + ")"
)
elif node.name == "Builtin_TZ":
replace("{Builtin_TZ}", "TZ(" + convert_node_arg(node.arg) + ")")
# # # 17.4.6 Hash functions
elif node.name == "Builtin_MD5":
replace("{Builtin_MD5}", "MD5(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_SHA1":
replace("{Builtin_SHA1}", "SHA1(" + convert_node_arg(node.arg) + ")")
elif node.name == "Builtin_SHA256":
replace(
"{Builtin_SHA256}", "SHA256(" + convert_node_arg(node.arg) + ")"
)
elif node.name == "Builtin_SHA384":
replace(
"{Builtin_SHA384}", "SHA384(" + convert_node_arg(node.arg) + ")"
)
elif node.name == "Builtin_SHA512":
replace(
"{Builtin_SHA512}", "SHA512(" + convert_node_arg(node.arg) + ")"
)
# Other
elif node.name == "values":
columns = []
for key in node.res[0].keys():
if isinstance(key, Identifier):
columns.append(key.n3())
else:
raise ExpressionNotCoveredException(
"The expression {0} might not be covered yet.".format(key)
)
values = "VALUES (" + " ".join(columns) + ")"
rows = ""
for elem in node.res:
row = []
for term in elem.values():
if isinstance(term, Identifier):
row.append(
term.n3()
) # n3() is not part of Identifier class but every subclass has it
elif isinstance(term, str):
row.append(term)
else:
raise ExpressionNotCoveredException(
"The expression {0} might not be covered yet.".format(
term
)
)
rows += "(" + " ".join(row) + ")"
replace("values", values + "{" + rows + "}")
elif node.name == "ServiceGraphPattern":
replace(
"{ServiceGraphPattern}",
"SERVICE "
+ convert_node_arg(node.term)
+ "{"
+ node.graph.name
+ "}",
)
traverse(node.graph, visitPre=sparql_query_text)
return node.graph
# else:
# raise ExpressionNotCoveredException("The expression {0} might not be covered yet.".format(node.name))
traverse(query_algebra.algebra, visitPre=sparql_query_text)
query_from_algebra = open("query.txt", "r").read()
os.remove("query.txt")
return query_from_algebra
[docs]def pprintAlgebra(q):
def pp(p, ind=" "):
# if isinstance(p, list):
# print "[ "
# for x in p: pp(x,ind)
# print "%s ]"%ind
# return
if not isinstance(p, CompValue):
print(p)
return
print("%s(" % (p.name,))
for k in p:
print(
"%s%s ="
% (
ind,
k,
),
end=" ",
)
pp(p[k], ind + " ")
print("%s)" % ind)
try:
pp(q.algebra)
except AttributeError:
# it's update, just a list
for x in q:
pp(x)
if __name__ == "__main__":
import sys
from rdflib.plugins.sparql import parser
import os.path
if os.path.exists(sys.argv[1]):
q = open(sys.argv[1]).read()
else:
q = sys.argv[1]
pq = parser.parseQuery(q)
print(pq)
print("--------")
tq = translateQuery(pq)
pprintAlgebra(tq)