1 module wikidata; 2 3 // (C) 2018-2021 by Matthias Rossmy 4 // This file is distributed under the "Fair Use License v2" 5 6 /* Example 7 class GeoObject : WdObject 8 { 9 @WikiDataProp("?itemDescription") string desc; 10 } 11 class DatePopulation 12 { 13 @WikiDataProp("point in time") string date; 14 @WikiDataProp("population") string population; 15 } 16 17 auto results = WikiData.find("geographic region","Springfield").getList!GeoObject; 18 19 foreach(obj;results) 20 { 21 auto history = obj.query("population").sortAsc("date").getList!DatePopulation; 22 if(history.length>0) 23 { 24 writeln("Wikidata Object: ",obj.id); 25 writeln("Description: ",obj.desc); 26 foreach(item;history) 27 { 28 writeln(item.date," ",item.population); 29 } 30 } 31 } 32 */ 33 34 public import std.xml; 35 36 import std.stdio; 37 import std.net.curl; 38 import std.uri; 39 import std.path; 40 import std.traits; 41 import std.conv; 42 import std.ascii; 43 import std..string; 44 45 import easyd.base; 46 47 string bindingName(Element e) 48 { 49 try 50 { 51 return e.tag.attr["name"]; 52 } 53 catch(Exception e) 54 { 55 return ""; 56 } 57 } 58 59 string bindingValue(Element e) 60 { 61 foreach(value;e.elements) return value.text; 62 return ""; 63 } 64 65 class WikiData 66 { 67 static string dataLang="en"; //language for the name parameter of the find function and for all data returned by Wikidata 68 static string codingLang="en"; //language for properties, units, categories... 69 70 static bool verbose=false; 71 private static string[string] propCache; 72 private static string[string] unitCache; 73 private static string[string] catCache; 74 75 static bool isNativeID(string name) 76 { 77 return name.length>=2 && name[0].isUpper && name[1..$].isNumeric; 78 } 79 80 static string propID(string name) 81 { 82 if(isNativeID(name)) return name; 83 auto cacheResult = (name in propCache); 84 if(cacheResult !is null) return *cacheResult; 85 86 auto resultList = find("Q18616576",name).getList(codingLang); 87 if(resultList.length==0) throw new Exception("Property "~name~" does not exist"); 88 auto result = resultList[0].id; 89 propCache[name] = result; 90 return result; 91 } 92 93 static string unitID(string name) 94 { 95 if(isNativeID(name)) return name; 96 auto cacheResult = (name in unitCache); 97 if(cacheResult !is null) return *cacheResult; 98 99 auto resultList = find("Q47574",name).getList(codingLang); 100 if(resultList.length==0) throw new Exception("Unit "~name~" does not exist"); 101 auto result = resultList[0].id; 102 unitCache[name] = result; 103 return result; 104 } 105 106 static string name2IdList(string name, string category="") 107 { 108 auto cacheResult = (name in catCache); 109 if(cacheResult !is null) return *cacheResult; 110 111 auto resultList = find(category,name).getList(codingLang); 112 if(resultList.length==0) throw new Exception(name~" does not exist"); 113 string result; 114 foreach(item;resultList) result ~= ("(wd:"~item.id~")"); 115 catCache[name] = result; 116 return result; 117 } 118 119 static Element rawQuery(string sparql) 120 { 121 try 122 { 123 if(verbose) writeln(sparql); 124 auto reply = get("https://query.wikidata.org/sparql?query="~std.uri.encode(sparql)).idup; 125 if(verbose) writeln(reply); 126 auto d = new Document(reply); 127 foreach(item;d.elements) if(item.tag.name=="results") return item; 128 if(verbose) writeln("No result"); 129 return null; 130 } 131 catch(Exception e) 132 { 133 if(verbose) writeln(e.msg); 134 return null; 135 } 136 } 137 138 static class Query 139 { 140 string[] filters; 141 string nameFilter; 142 bool nameFilterCase; 143 bool nameFilterExact; 144 string bindings; 145 string analyzeVar="item"; 146 string analyzeProp; 147 string[string] binding2member; 148 string postfix; 149 150 Query isPartOf(string what) 151 { 152 if(isNativeID(what)) 153 { 154 filters ~= ("?item wdt:P361 wd:"~what); 155 }else{ 156 filters ~= ("VALUES (?partOf) { "~name2IdList(what)~" }"); 157 filters ~= "?item wdt:P361 ?partOf"; 158 } 159 return this; 160 } 161 162 /*Query where(string prop, string value) //TODO: diese Funktion mit dem cities-Beispiel funktionsfähig machen 163 { 164 auto pid = propID(prop); 165 if(isNativeID(value)) 166 { 167 filters ~= ("?item wdt:"~pid~" wd:"~value); 168 }else{ 169 filters ~= ("VALUES (?partOf) { "~name2IdList(value,prop)~" }"); 170 filters ~= "?item wdt:"~pid~" ?partOf"; 171 } 172 return this; 173 }*/ 174 175 Query bind(string name) 176 { 177 if(name[0]!='?') name = "?"~name; 178 bindings ~= (name~" "); 179 return this; 180 } 181 182 Query bind(string prop, string bindTo) 183 { 184 prop = WikiData.propID(prop); 185 if(analyzeVar=="item") 186 { 187 filters ~= ("?"~analyzeVar~" wdt:"~prop~" ?"~bindTo); 188 }else{ 189 if(prop==analyzeProp) 190 { 191 filters ~= ("?"~analyzeVar~" ps:"~prop~" ?"~bindTo); 192 }else{ 193 filters ~= ("?"~analyzeVar~" pq:"~prop~" ?"~bindTo); 194 } 195 } 196 return bind(bindTo~"Label"); 197 } 198 199 Query bind(string prop, string unit, string bindTo) 200 { 201 prop = WikiData.propID(prop); 202 unit = WikiData.unitID(unit); 203 if(analyzeVar=="item") 204 { 205 filters ~= ("?"~analyzeVar~" p:"~prop~"/psv:"~prop~" [ wikibase:quantityAmount ?"~bindTo~"; wikibase:quantityUnit wd:"~unit~"; ]"); 206 }else{ 207 throw new Exception("Filtering sub-properties by unit is not implemented yet"); 208 } 209 return bind(bindTo~"Label"); 210 } 211 212 private void bindHelper(T2)(T2 obj) 213 { 214 //writeln("BindHelper for ",typeid(T2).to!string); 215 foreach (i,m; obj.tupleof) 216 { 217 //writeln(" Member ",__traits(identifier, obj.tupleof[i])); 218 static if(hasUDA!(obj.tupleof[i], WikiDataProp)) 219 { 220 auto prop = getUDAs!(obj.tupleof[i], WikiDataProp)[0].prop; 221 auto unit = getUDAs!(obj.tupleof[i], WikiDataProp)[0].unit; 222 if(verbose) writeln("Auto-bind ",prop); 223 if(prop[0]=='?') 224 { 225 bind(prop); 226 binding2member[prop[1..$]] = __traits(identifier, obj.tupleof[i]); 227 }else{ 228 if(unit=="") 229 { 230 bind(prop,__traits(identifier, obj.tupleof[i])); 231 }else{ 232 bind(prop,unit,__traits(identifier, obj.tupleof[i])); 233 } 234 binding2member[__traits(identifier, obj.tupleof[i])~"Label"] = __traits(identifier, obj.tupleof[i]); 235 } 236 } 237 } 238 } 239 240 Query sortAsc(string var) 241 { 242 postfix ~= "ORDER BY ASC(?"~var~") "; 243 return this; 244 } 245 246 Query sortDesc(string var) 247 { 248 postfix ~= "ORDER BY DESC(?"~var~") "; 249 return this; 250 } 251 252 Element getXml(T=WdObject)(string dataLanguage="") 253 { 254 if(dataLanguage=="") dataLanguage=dataLang; 255 if(nameFilter!="") 256 { 257 if(nameFilterCase) 258 { 259 filters ~= ("?item ?label \""~nameFilter~"\"@"~dataLanguage); 260 if(nameFilterExact) 261 { 262 filters ~= "?item rdfs:label ?name"; 263 filters ~= ("FILTER regex(?name, \"^"~nameFilter~"$\")"); 264 } 265 }else{ 266 filters ~= "?item rdfs:label ?name"; 267 if(nameFilterExact) 268 { 269 filters ~= ("FILTER regex(?name, \"^"~nameFilter~"$\", \"i\")"); 270 }else{ 271 filters ~= ("FILTER regex(?name, \""~nameFilter~"\", \"i\")"); 272 } 273 } 274 nameFilter=""; 275 } 276 277 auto querystr = "SELECT REDUCED "; 278 static if(is(T:WdObject)) 279 { 280 querystr ~= "?item "; 281 } 282 283 auto dummy = new T; 284 foreach(t; BaseClassesTuple!(Unqual!T)) 285 { 286 bindHelper(cast(t)(dummy)); 287 } 288 bindHelper(dummy); 289 290 querystr ~= (bindings~" WHERE { "); 291 foreach(f;filters) querystr ~= (f~". "); 292 querystr ~= ("SERVICE wikibase:label { bd:serviceParam wikibase:language \""~dataLanguage~","~codingLang~"\". } } "~postfix); 293 294 return rawQuery(querystr); 295 } 296 297 T[] getList(T=WdObject)(string dataLanguage="") 298 { 299 auto xml = getXml!T(dataLanguage); 300 T[] resultlist; 301 foreach(result;xml.elements) 302 { 303 auto item = new T; 304 foreach(binding;result.elements) 305 { 306 static if(is(T:WdObject)) 307 { 308 if(binding.bindingName=="item") item.id = binding.bindingValue.baseName; 309 } 310 auto targetMember = (binding.bindingName in binding2member); 311 if(targetMember !is null) 312 { 313 setMember(item,*targetMember,binding.bindingValue); 314 } 315 } 316 317 resultlist ~= item; 318 } 319 return resultlist; 320 } 321 } 322 323 static Query find(string category, string name="", bool caseSensitive=true, bool allowContain=false) 324 { 325 auto q=new Query; 326 if(category!="") 327 { 328 if(isNativeID(category)) 329 { 330 q.filters ~= ("?item (wdt:P31/wdt:P279*) wd:"~category); 331 }else{ 332 q.filters ~= ("VALUES (?categories) { "~name2IdList(category)~" }"); 333 q.filters ~= "?item (wdt:P31/wdt:P279*) ?categories"; 334 } 335 } 336 if(name!="") 337 { 338 q.nameFilter = name; 339 q.nameFilterCase = caseSensitive; 340 q.nameFilterExact = !allowContain; 341 } 342 return q; 343 } 344 } 345 346 class WdObject 347 { 348 string id; 349 350 string get(string prop) 351 { 352 string bindTo = "val"; 353 prop = WikiData.propID(prop); 354 auto xml = WikiData.rawQuery("SELECT ?"~bindTo~" WHERE { wd:"~id~" wdt:"~prop~" ?"~bindTo~". }"); 355 foreach(result;xml.elements) foreach(binding;result.elements) if(binding.bindingName==bindTo) return binding.bindingValue; 356 return ""; 357 } 358 359 WikiData.Query query(string prop, string propVar="prop") 360 { 361 prop = WikiData.propID(prop); 362 auto q=new WikiData.Query; 363 q.filters ~= ("wd:"~id~" p:"~prop~" ?"~propVar); 364 q.analyzeVar = propVar; 365 q.analyzeProp = prop; 366 return q; 367 } 368 } 369 370 class WdNamedObject : WdObject 371 { 372 @WikiDataProp("?itemLabel") string name; 373 } 374 375 struct WikiDataProp 376 { 377 string prop; 378 string unit; 379 }