Hi,
Is the search engine suitable for making German content searchable?
Yes, you need to set charset_table = non_cjk to include the non-asian charset. There is also German morphology processing, either by lemmatizer or stemmer (see Manticore Search Manual: Creating a table > NLP and tokenization > Morphology)
If you want to check searching German content you can have a look at our site myHealthbox - Die Suchmaschine für Arzneimittelinformationen we indexed thousands of documents and German data.
Hope this helps.
Roberto
1 Like
Vector search with auto-embeddings works with German content too, e.g.:
mysql> drop table if exists test; CREATE TABLE test (f text, vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME='aari1995/German_Semantic_STS_V2' FROM='f' ); insert into test(id, f) values(1, 'Brot'),(2, 'Vogel'),(3, 'Auto'),(4, 'schönes Wetter'),(5, 'gemütliches Haus'); select id, f, knn_dist() from test where knn(vec, 10, 'Gebäck'); select id, f, knn_dist() from test where knn(vec, 10, 'Wagen'); select id, f, knn_dist() from test where knn(vec, 10, 'gutes Klima'); select id, f, knn_dist() from test where knn(vec, 10, 'behagliche Wohnung');
--------------
drop table if exists test
--------------
Query OK, 0 rows affected (0.01 sec)
--------------
CREATE TABLE test (f text, vec FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME='aari1995/German_Semantic_STS_V2' FROM='f' )
--------------
Query OK, 0 rows affected (0.00 sec)
--------------
insert into test(id, f) values(1, 'Brot'),(2, 'Vogel'),(3, 'Auto'),(4, 'schönes Wetter'),(5, 'gemütliches Haus')
--------------
Query OK, 5 rows affected (0.74 sec)
--------------
select id, f, knn_dist() from test where knn(vec, 10, 'Gebäck')
--------------
+------+-------------------+------------+
| id | f | knn_dist() |
+------+-------------------+------------+
| 1 | Brot | 0.47242486 |
| 4 | schönes Wetter | 0.92988324 |
| 5 | gemütliches Haus | 0.95076525 |
| 3 | Auto | 1.011164 |
| 2 | Vogel | 1.01821077 |
+------+-------------------+------------+
5 rows in set (0.05 sec)
--- 5 out of 5 results in 54ms ---
--------------
select id, f, knn_dist() from test where knn(vec, 10, 'Wagen')
--------------
+------+-------------------+------------+
| id | f | knn_dist() |
+------+-------------------+------------+
| 3 | Auto | 0.48945644 |
| 2 | Vogel | 0.88367367 |
| 5 | gemütliches Haus | 0.91333634 |
| 4 | schönes Wetter | 0.99366325 |
| 1 | Brot | 1.00853014 |
+------+-------------------+------------+
5 rows in set (0.06 sec)
--- 5 out of 5 results in 53ms ---
--------------
select id, f, knn_dist() from test where knn(vec, 10, 'gutes Klima')
--------------
+------+-------------------+------------+
| id | f | knn_dist() |
+------+-------------------+------------+
| 4 | schönes Wetter | 0.38445532 |
| 5 | gemütliches Haus | 0.75586104 |
| 3 | Auto | 0.94646019 |
| 1 | Brot | 0.94855773 |
| 2 | Vogel | 0.97620988 |
+------+-------------------+------------+
5 rows in set (0.05 sec)
--- 5 out of 5 results in 53ms ---
--------------
select id, f, knn_dist() from test where knn(vec, 10, 'behagliche Wohnung')
--------------
+------+-------------------+------------+
| id | f | knn_dist() |
+------+-------------------+------------+
| 5 | gemütliches Haus | 0.319957 |
| 4 | schönes Wetter | 0.80002886 |
| 1 | Brot | 0.93668818 |
| 3 | Auto | 0.94838274 |
| 2 | Vogel | 0.98493361 |
+------+-------------------+------------+
5 rows in set (0.08 sec)
--- 5 out of 5 results in 83ms ---