1 module wikidata;
2 
3 // (C) 2018-2021 by Matthias Rossmy
4 // This file is distributed under the "Fair Use License v2"
5 
6 /* Example
7 class GeoObject : WdObject
8 {
9 	@WikiDataProp("?itemDescription") string desc;
10 }
11 class DatePopulation
12 {
13 	@WikiDataProp("point in time") string date;
14 	@WikiDataProp("population") string population;
15 }
16 
17 auto results = WikiData.find("geographic region","Springfield").getList!GeoObject;
18 
19 foreach(obj;results)
20 {		
21 	auto history = obj.query("population").sortAsc("date").getList!DatePopulation;
22 	if(history.length>0)
23 	{
24 		writeln("Wikidata Object: ",obj.id);
25 		writeln("Description: ",obj.desc);
26 		foreach(item;history)
27 		{
28 			writeln(item.date," ",item.population);
29 		}
30 	}
31 }
32 */
33 
34 public import std.xml;
35 
36 import std.stdio;
37 import std.net.curl;
38 import std.uri;
39 import std.path;
40 import std.traits;
41 import std.conv;
42 import std.ascii;
43 import std..string;
44 
45 import easyd.base;
46 
47 string bindingName(Element e)
48 {
49 	try
50 	{
51 		return e.tag.attr["name"];
52 	}
53 	catch(Exception e)
54 	{
55 		return "";
56 	}
57 }
58 
59 string bindingValue(Element e)
60 {
61 	foreach(value;e.elements) return value.text;
62 	return "";
63 }
64 
65 class WikiData
66 {
67 	static string dataLang="en"; //language for the name parameter of the find function and for all data returned by Wikidata
68 	static string codingLang="en"; //language for properties, units, categories...
69 	
70 	static bool verbose=false;
71 	private static string[string] propCache;
72 	private static string[string] unitCache;
73 	private static string[string] catCache;
74 	
75 	static bool isNativeID(string name)
76 	{
77 		return name.length>=2 && name[0].isUpper && name[1..$].isNumeric;
78 	}
79 	
80 	static string propID(string name)
81 	{
82 		if(isNativeID(name)) return name;
83 		auto cacheResult = (name in propCache);
84 		if(cacheResult !is null) return *cacheResult;
85 		
86 		auto resultList = find("Q18616576",name).getList(codingLang);
87 		if(resultList.length==0) throw new Exception("Property "~name~" does not exist");
88 		auto result = resultList[0].id;
89 		propCache[name] = result;
90 		return result;
91 	}
92 	
93 	static string unitID(string name)
94 	{
95 		if(isNativeID(name)) return name;
96 		auto cacheResult = (name in unitCache);
97 		if(cacheResult !is null) return *cacheResult;
98 		
99 		auto resultList = find("Q47574",name).getList(codingLang);
100 		if(resultList.length==0) throw new Exception("Unit "~name~" does not exist");
101 		auto result = resultList[0].id;
102 		unitCache[name] = result;
103 		return result;
104 	}
105 	
106 	static string name2IdList(string name, string category="")
107 	{
108 		auto cacheResult = (name in catCache);
109 		if(cacheResult !is null) return *cacheResult;
110 		
111 		auto resultList = find(category,name).getList(codingLang);
112 		if(resultList.length==0) throw new Exception(name~" does not exist");
113 		string result;
114 		foreach(item;resultList) result ~= ("(wd:"~item.id~")");
115 		catCache[name] = result;
116 		return result;
117 	}
118 	
119 	static Element rawQuery(string sparql)
120 	{
121 		try
122 		{
123 			if(verbose) writeln(sparql);
124 			auto reply = get("https://query.wikidata.org/sparql?query="~std.uri.encode(sparql)).idup;
125 			if(verbose) writeln(reply);
126 			auto d = new Document(reply);
127 			foreach(item;d.elements) if(item.tag.name=="results") return item;
128 			if(verbose) writeln("No result");
129 			return null;
130 		}
131 		catch(Exception e)		
132 		{
133 			if(verbose) writeln(e.msg);
134 			return null;
135 		}
136 	}
137 	
138 	static class Query
139 	{
140 		string[] filters;
141 		string nameFilter;
142 		bool nameFilterCase;
143 		bool nameFilterExact;
144 		string bindings;
145 		string analyzeVar="item";
146 		string analyzeProp;
147 		string[string] binding2member;
148 		string postfix;
149 		
150 		Query isPartOf(string what)
151 		{
152 			if(isNativeID(what))
153 			{
154 				filters ~= ("?item wdt:P361 wd:"~what);
155 			}else{
156 				filters ~= ("VALUES (?partOf) { "~name2IdList(what)~" }");
157 				filters ~= "?item wdt:P361 ?partOf";
158 			}
159 			return this;
160 		}
161 		
162 		/*Query where(string prop, string value) //TODO: diese Funktion mit dem cities-Beispiel funktionsfähig machen
163 		{
164 			auto pid = propID(prop);
165 			if(isNativeID(value))
166 			{
167 				filters ~= ("?item wdt:"~pid~" wd:"~value);
168 			}else{
169 				filters ~= ("VALUES (?partOf) { "~name2IdList(value,prop)~" }");
170 				filters ~= "?item wdt:"~pid~" ?partOf";
171 			}
172 			return this;
173 		}*/
174 		
175 		Query bind(string name)
176 		{
177 			if(name[0]!='?') name = "?"~name;
178 			bindings ~= (name~" ");
179 			return this;
180 		}
181 		
182 		Query bind(string prop, string bindTo)
183 		{
184 			prop = WikiData.propID(prop);
185 			if(analyzeVar=="item")
186 			{
187 				filters ~= ("?"~analyzeVar~" wdt:"~prop~" ?"~bindTo);
188 			}else{
189 				if(prop==analyzeProp)
190 				{
191 					filters ~= ("?"~analyzeVar~" ps:"~prop~" ?"~bindTo);
192 				}else{
193 					filters ~= ("?"~analyzeVar~" pq:"~prop~" ?"~bindTo);
194 				}
195 			}
196 			return bind(bindTo~"Label");
197 		}
198 		
199 		Query bind(string prop, string unit, string bindTo)
200 		{
201 			prop = WikiData.propID(prop);
202 			unit = WikiData.unitID(unit);
203 			if(analyzeVar=="item")
204 			{
205 				filters ~= ("?"~analyzeVar~" p:"~prop~"/psv:"~prop~" [ wikibase:quantityAmount ?"~bindTo~"; wikibase:quantityUnit wd:"~unit~"; ]");
206 			}else{
207 				throw new Exception("Filtering sub-properties by unit is not implemented yet");
208 			}
209 			return bind(bindTo~"Label");
210 		}
211 		
212 		private void bindHelper(T2)(T2 obj)
213 		{
214 			//writeln("BindHelper for ",typeid(T2).to!string);
215 			foreach (i,m; obj.tupleof)
216 			{
217 				//writeln("  Member ",__traits(identifier, obj.tupleof[i]));
218 				static if(hasUDA!(obj.tupleof[i], WikiDataProp))
219 				{
220 					auto prop = getUDAs!(obj.tupleof[i], WikiDataProp)[0].prop;
221 					auto unit = getUDAs!(obj.tupleof[i], WikiDataProp)[0].unit;
222 					if(verbose) writeln("Auto-bind ",prop);
223 					if(prop[0]=='?')
224 					{
225 						bind(prop);
226 						binding2member[prop[1..$]] = __traits(identifier, obj.tupleof[i]);
227 					}else{
228 						if(unit=="")
229 						{
230 							bind(prop,__traits(identifier, obj.tupleof[i]));
231 						}else{
232 							bind(prop,unit,__traits(identifier, obj.tupleof[i]));
233 						}
234 						binding2member[__traits(identifier, obj.tupleof[i])~"Label"] = __traits(identifier, obj.tupleof[i]);
235 					}
236 				}
237 			}
238 		}
239 		
240 		Query sortAsc(string var)
241 		{
242 			postfix ~= "ORDER BY ASC(?"~var~") ";
243 			return this;
244 		}
245 		
246 		Query sortDesc(string var)
247 		{
248 			postfix ~= "ORDER BY DESC(?"~var~") ";
249 			return this;
250 		}
251 		
252 		Element getXml(T=WdObject)(string dataLanguage="")
253 		{
254 			if(dataLanguage=="") dataLanguage=dataLang;
255 			if(nameFilter!="")
256 			{
257 				if(nameFilterCase)
258 				{
259 					filters ~= ("?item ?label \""~nameFilter~"\"@"~dataLanguage);
260 					if(nameFilterExact)
261 					{
262 						filters ~= "?item rdfs:label ?name";
263 						filters ~= ("FILTER regex(?name, \"^"~nameFilter~"$\")");
264 					}
265 				}else{
266 					filters ~= "?item rdfs:label ?name";
267 					if(nameFilterExact)
268 					{
269 						filters ~= ("FILTER regex(?name, \"^"~nameFilter~"$\", \"i\")");
270 					}else{
271 						filters ~= ("FILTER regex(?name, \""~nameFilter~"\", \"i\")");
272 					}
273 				}
274 				nameFilter="";
275 			}
276 			
277 			auto querystr = "SELECT REDUCED ";
278 			static if(is(T:WdObject))
279 			{
280 				querystr ~= "?item ";
281 			}
282 			
283 			auto dummy = new T;
284 			foreach(t; BaseClassesTuple!(Unqual!T))
285 			{
286 				bindHelper(cast(t)(dummy));
287 			}
288 			bindHelper(dummy);
289 			
290 			querystr ~= (bindings~" WHERE { ");
291 			foreach(f;filters) querystr ~= (f~". ");
292 			querystr ~= ("SERVICE wikibase:label { bd:serviceParam wikibase:language \""~dataLanguage~","~codingLang~"\". } } "~postfix);
293 			
294 			return rawQuery(querystr);
295 		}
296 		
297 		T[] getList(T=WdObject)(string dataLanguage="")
298 		{
299 			auto xml = getXml!T(dataLanguage);
300 			T[] resultlist;
301 			foreach(result;xml.elements)
302 			{
303 				auto item = new T;
304 				foreach(binding;result.elements)
305 				{
306 					static if(is(T:WdObject))
307 					{
308 						if(binding.bindingName=="item") item.id = binding.bindingValue.baseName;
309 					}
310 					auto targetMember = (binding.bindingName in binding2member);
311 					if(targetMember !is null)
312 					{
313 						setMember(item,*targetMember,binding.bindingValue);
314 					}
315 				}
316 				
317 				resultlist ~= item;
318 			}
319 			return resultlist;
320 		}
321 	}
322 	
323 	static Query find(string category, string name="", bool caseSensitive=true, bool allowContain=false)
324 	{
325 		auto q=new Query;
326 		if(category!="")
327 		{
328 			if(isNativeID(category))
329 			{
330 				q.filters ~= ("?item (wdt:P31/wdt:P279*) wd:"~category);
331 			}else{
332 				q.filters ~= ("VALUES (?categories) { "~name2IdList(category)~" }");
333 				q.filters ~= "?item (wdt:P31/wdt:P279*) ?categories";
334 			}
335 		}
336 		if(name!="")
337 		{
338 			q.nameFilter = name;
339 			q.nameFilterCase = caseSensitive;
340 			q.nameFilterExact = !allowContain;
341 		}
342 		return q;
343 	}
344 }
345 
346 class WdObject
347 {
348 	string id;
349 	
350 	string get(string prop)
351 	{
352 		string bindTo = "val";
353 		prop = WikiData.propID(prop);
354 		auto xml = WikiData.rawQuery("SELECT ?"~bindTo~" WHERE { wd:"~id~" wdt:"~prop~" ?"~bindTo~". }");
355 		foreach(result;xml.elements) foreach(binding;result.elements) if(binding.bindingName==bindTo) return binding.bindingValue;
356 		return "";
357 	}
358 	
359 	WikiData.Query query(string prop, string propVar="prop")
360 	{
361 		prop = WikiData.propID(prop);
362 		auto q=new WikiData.Query;
363 		q.filters ~= ("wd:"~id~" p:"~prop~" ?"~propVar);
364 		q.analyzeVar = propVar;
365 		q.analyzeProp = prop;
366 		return q;
367 	}
368 }
369 
370 class WdNamedObject : WdObject
371 {
372 	@WikiDataProp("?itemLabel") string name;
373 }
374 
375 struct WikiDataProp
376 {
377 	string prop;
378 	string unit;
379 }