With the introduction of JSON-LD support in the Wikirate API, Wikirate data can now be treated as linked data. This unlocks the potential of combining Wikirate’s structured data with external knowledge graphs such as Wikidata by using standard semantic web tools.
In this guide, we walk through a practical example showing how to consume Wikirate data as JSON-LD, load it into a local RDF graph, and enrich it with additional company information from Wikidata such as industry classifications, founding dates, and descriptions. Using Python and RDFLib, we demonstrate how JSON-LD makes cross-platform data integration possible.
The result is a local knowledge graph that connects Wikirate and Wikidata through shared identifiers.
Install dependencies:
pip install requests rdflib
If you run this in production, store your API key in an environment variable (not inline in code).
We define a small EnrichmentField configuration. It specifies:
which Wikidata property to fetch and where to store it in our local RDF graph.
from dataclasses import dataclass
from typing import Optional
from rdflib import URIRef
from rdflib.namespace import XSD
@dataclass(frozen=True)
class EnrichmentField:
# Variable name in SPARQL results, e.g. "industries"
var: str
# Wikidata direct property id like "P571" or a prefixed property like "schema:description"
wd_prop: Optional[str] = None
# If True, treat the value as an entity and fetch its English label
as_label: bool = False
# Predicate used when writing results into the local graph
target_predicate: Optional[URIRef] = None
# Optional literal handling
lang: Optional[str] = None
datatype: Optional[URIRef] = None # e.g. XSD.dateTime
Tip: Keeping enrichment "field-driven" makes it easy to add new Wikidata properties later without rewriting logic.
The core move is: request JSON-LD and let RDFLib parse it directly into a graph.
import requests
from typing import Optional
from rdflib import Graph
def load_jsonld_from_api(url: str, graph: Optional[Graph] = None) -> Graph:
if graph is None:
graph = Graph()
headers = {
"Accept": "application/ld+json",
"X-API-Key": "YOUR_API_KEY",
}
response = requests.get(url, headers=headers)
response.raise_for_status()
graph.parse(data=response.text, format="json-ld")
return graph
This is where JSON-LD pays off: no custom parsing, just load the document and you have RDF triples.
The ontology provides the semantic backbone (classes and properties). We add its triples to a working graph, then add API data.
from rdflib import Graph, Namespace
from rdflib.namespace import RDF, OWL
g_onto = Graph()
g_onto.parse("https://wikirate.org/ontology/wikirate.ttl", format="turtle")
print(f"Triples in ontology: {len(g_onto)}")
W = Namespace("https://wikirate.org/ontology/")
SCHEMA = Namespace("https://schema.org/")
# Optional sanity-check: list classes
for s in g_onto.subjects(RDF.type, OWL.Class):
print("Class:", s)
# Merge ontology + data
g = Graph()
g += g_onto
g = load_jsonld_from_api("https://wikirate.org/Companies?format=jsonld", g)
print("Total triples (ontology + data):", len(g))
Once loaded, the data behaves like a local graph. You can iterate through instances of wikirate:Company.
from rdflib.namespace import RDF
for company in g.subjects(RDF.type, W.Company):
name = g.value(company, SCHEMA.name)
country = g.value(company, SCHEMA.addressCountry)
print(company, "→", name, "|", country)
This is useful for quick exploration and debugging before writing SPARQL queries.
We will enrich only the companies that already have wikirate:wikidataId.
First, pull those QIDs from the local graph with SPARQL.
QUERY = """
PREFIX wikirate: <https://wikirate.org/ontology/>
PREFIX schema: <https://schema.org/>
SELECT ?company ?name ?wikidata_id
WHERE {
?company a wikirate:Company ;
schema:name ?name ;
wikirate:wikidataId ?wikidata_id .
}
"""
qids = []
for row in g.query(QUERY):
print(row.company, "→", row.name, "→", row.wikidata_id)
qids.append(row.wikidata_id.toPython())
We generate one Wikidata query that asks for multiple fields for multiple QIDs at once. This avoids per-company calls and keeps the enrichment efficient.
fields = [
EnrichmentField(
var="desc",
wd_prop="schema:description",
target_predicate=SCHEMA.description,
lang="en",
),
EnrichmentField(
var="inception",
wd_prop="P571",
target_predicate=SCHEMA.foundingDate,
datatype=XSD.dateTime,
),
EnrichmentField(
var="industries",
wd_prop="P452",
as_label=True,
target_predicate=SCHEMA.industry,
),
]
def build_wikidata_query(qids: list[str], fields: list[EnrichmentField]) -> str:
select_vars = ["?item", "?qid"]
optional_blocks: list[str] = []
label_entities: list[tuple[str, str]] = []
for f in fields:
if f.as_label:
entity_var = f"?{f.var}Entity"
label_var = f"?{f.var}Label"
select_vars.append(f"(GROUP_CONCAT(DISTINCT {label_var}; separator=\\"; \\") AS ?{f.var})")
optional_blocks.append(f"""
OPTIONAL {{
?item wdt:{f.wd_prop} {entity_var} .
}}
""")
label_entities.append((entity_var, label_var))
else:
select_vars.append(f"?{f.var}")
filter_line = f'FILTER(LANG(?{f.var}) = "{f.lang}")' if f.lang else ""
prop = f"wdt:{f.wd_prop}" if f.wd_prop.startswith("P") else f.wd_prop
optional_blocks.append(f"""
OPTIONAL {{
?item {prop} ?{f.var} .
{filter_line}
}}
""")
label_service = ""
if label_entities:
label_lines = "\\n".join([f" {ev} rdfs:label {lv} ." for ev, lv in label_entities])
label_filters = "\\n".join([f" FILTER(LANG({lv}) = \\"en\\")" for _, lv in label_entities])
label_service = f"\\n{label_lines}\\n{label_filters}\\n"
group_by = "GROUP BY ?item ?qid " + " ".join(
f"?{f.var}" for f in fields if not f.as_label
)
return f\"\"\"
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <http://schema.org/>
SELECT {" ".join(select_vars)}
WHERE {{
VALUES ?item {{ {" ".join(f"wd:{qid}" for qid in qids)} }}
BIND(REPLACE(STR(?item), "^.*/", "") AS ?qid)
{"".join(optional_blocks)}
{"" if not label_service else f"OPTIONAL {{ { label_service } }}"}
}}
{group_by}
\"\"\"
This "field-driven" builder keeps the example extensible.
def exec_wikidata_query(query: str) -> dict:
endpoint = "https://query.wikidata.org/sparql"
headers = {
"Accept": "application/sparql-results+json",
"User-Agent": "wikirate-jsonld-example/1.0 (contact: [email protected])",
}
r = requests.get(endpoint, params={"query": query}, headers=headers, timeout=60)
r.raise_for_status()
return r.json()
def get_enrichment(data: dict, fields: list[EnrichmentField]) -> dict[str, dict[str, str]]:
field_names = [f.var for f in fields]
out: dict[str, dict[str, str]] = {}
for b in data["results"]["bindings"]:
qid = b["qid"]["value"]
out.setdefault(qid, {})
for k in field_names:
if k in b:
out[qid][k] = b[k]["value"]
return out
We add a schema:sameAs link to the Wikidata entity and store the enrichment as new triples.
from rdflib import URIRef, Literal
from rdflib.namespace import RDF
def apply_enrichment(g: Graph,
enrichment: dict[str, dict[str, str]],
W: Namespace,
SCHEMA: Namespace,
fields: list[EnrichmentField]) -> Graph:
WD_ENTITY = "http://www.wikidata.org/entity/"
for company in g.subjects(RDF.type, W.Company):
qid_lit = g.value(company, W.wikidataId)
if not qid_lit:
continue
qid = str(qid_lit)
facts = enrichment.get(qid)
if not facts:
continue
# Add schema:sameAs ONLY if it is not already present
if (company, SCHEMA.sameAs, wd_iri) not in g:
g.add((company, SCHEMA.sameAs, wd_iri))
for f in fields:
if not f.target_predicate or f.var not in facts:
continue
val = facts[f.var]
if not val:
continue
if f.lang:
lit = Literal(val, lang=f.lang)
elif f.datatype:
lit = Literal(val, datatype=f.datatype)
else:
lit = Literal(val)
g.add((company, f.target_predicate, lit))
return g
QUERY = """
PREFIX wikirate: <https://wikirate.org/ontology/>
PREFIX schema: <https://schema.org/>
SELECT ?company ?name ?country ?wikidata ?desc ?industry ?foundingDate
WHERE {
?company a wikirate:Company ;
schema:name ?name ;
wikirate:wikidataId ?wikidata .
OPTIONAL { ?company schema:addressCountry ?country }
OPTIONAL { ?company schema:description ?desc }
OPTIONAL { ?company schema:industry ?industry }
OPTIONAL { ?company schema:foundingDate ?foundingDate }
}
LIMIT 20
"""
for row in g.query(QUERY):
print(row.company, "→", row.name, "→", row.country, "→", row.wikidata, "→", row.desc, "→", row.industry, "→", row.foundingDate)
# Turtle
g.serialize("wikirate_enriched.ttl", format="turtle")
# JSON-LD (compaction/framing depends on context settings)
g.serialize("wikirate_enriched.jsonld", format="json-ld")
/Companies?format=jsonld returns one page; iterate hydra:next to fetch more.schema:industry etc. is pragmatic; you can also store Wikidata IRIs directly if you prefer.