diff --git a/changelog/unreleased/PR#4375-language-model-refactoring.yml b/changelog/unreleased/PR#4375-language-model-refactoring.yml new file mode 100644 index 000000000000..95d3f6b3745c --- /dev/null +++ b/changelog/unreleased/PR#4375-language-model-refactoring.yml @@ -0,0 +1,9 @@ +title: Refactor of the language-model module to accomodate new upcoming LLM oriented features avoiding code duplication. +type: changed +authors: + - name: Nicolò Rinaldi + - name: Anna Ruggero + - name: Alessandro benedetti +links: +- name: PR#4375 + url: https://github.com/apache/solr/pull/4375 diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/TextToVectorModelException.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/LanguageModelException.java similarity index 78% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/TextToVectorModelException.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/LanguageModelException.java index 8709ebf69298..b047daa21e7c 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/TextToVectorModelException.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/LanguageModelException.java @@ -14,17 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.store; +package org.apache.solr.languagemodels; -public class TextToVectorModelException extends RuntimeException { +public class LanguageModelException extends RuntimeException { private static final long serialVersionUID = 1L; - public TextToVectorModelException(String message) { + public LanguageModelException(String message) { super(message); } - public TextToVectorModelException(String message, Exception cause) { + public LanguageModelException(String message, Exception cause) { super(message, cause); } } diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrLanguageModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrLanguageModel.java new file mode 100644 index 000000000000..b0a6ecc44809 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrLanguageModel.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.model; + +import java.util.Map; + +/** + * Abstract base class for Solr-managed wrappers around langchain4j used in {@code language-models} + * module + */ +public abstract class SolrLanguageModel { + + // common parameters + protected static final String TIMEOUT_PARAM = "timeout"; + protected static final String MAX_RETRIES_PARAM = "maxRetries"; + + protected final String name; + protected final Map params; + + protected SolrLanguageModel(String name, Map params) { + this.name = name; + this.params = params; + } + + public String getName() { + return name; + } + + public Map getParams() { + return params; + } + + /** Returns the class name of the underlying langchain4j model instance. */ + public abstract String getModelClassName(); +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/model/SolrTextToVectorModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrTextToVectorModel.java similarity index 84% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/model/SolrTextToVectorModel.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrTextToVectorModel.java index 21f7f8035be7..cb34d7b621b0 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/model/SolrTextToVectorModel.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/SolrTextToVectorModel.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.model; +package org.apache.solr.languagemodels.model; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.model.embedding.EmbeddingModel; @@ -28,26 +28,21 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.solr.common.SolrException; import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.languagemodels.textvectorisation.store.TextToVectorModelException; -import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.LanguageModelException; +import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * This object wraps a {@link dev.langchain4j.model.embedding.EmbeddingModel} to encode text to - * vector. It's meant to be used as a managed resource with the {@link - * ManagedTextToVectorModelStore} + * This object wraps a {@link EmbeddingModel} to encode text to vector. It's meant to be used as a + * managed resource with the {@link ManagedTextToVectorModelStore} */ -public class SolrTextToVectorModel implements Accountable { +public class SolrTextToVectorModel extends SolrLanguageModel implements Accountable { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(SolrTextToVectorModel.class); - private static final String TIMEOUT_PARAM = "timeout"; private static final String MAX_SEGMENTS_PER_BATCH_PARAM = "maxSegmentsPerBatch"; - private static final String MAX_RETRIES_PARAM = "maxRetries"; - private final String name; - private final Map params; private final EmbeddingModel textToVector; private final int hashCode; @@ -56,7 +51,7 @@ public static SolrTextToVectorModel getInstance( String className, String name, Map params) - throws TextToVectorModelException { + throws LanguageModelException { try { /* * The idea here is to build a {@link dev.langchain4j.model.embedding.EmbeddingModel} using inversion @@ -125,15 +120,14 @@ public static SolrTextToVectorModel getInstance( textToVector = (EmbeddingModel) builder.getClass().getMethod("build").invoke(builder); return new SolrTextToVectorModel(name, textToVector, params); } catch (final Exception e) { - throw new TextToVectorModelException("Model loading failed for " + className, e); + throw new LanguageModelException("Model loading failed for " + className, e); } } public SolrTextToVectorModel( String name, EmbeddingModel textToVector, Map params) { - this.name = name; + super(name, params); this.textToVector = textToVector; - this.params = params; this.hashCode = calculateHashCode(); } @@ -170,20 +164,12 @@ private int calculateHashCode() { @Override public boolean equals(Object obj) { if (this == obj) return true; - if (!(obj instanceof SolrTextToVectorModel)) return false; - final SolrTextToVectorModel other = (SolrTextToVectorModel) obj; + if (!(obj instanceof SolrTextToVectorModel other)) return false; return Objects.equals(textToVector, other.textToVector) && Objects.equals(name, other.name); } - public String getName() { - return name; - } - - public String getEmbeddingModelClassName() { + @Override + public String getModelClassName() { return textToVector.getClass().getName(); } - - public Map getParams() { - return params; - } } diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/package-info.java similarity index 88% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/package-info.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/package-info.java index 5e79341f9927..f385bff798d3 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/package-info.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/model/package-info.java @@ -15,5 +15,5 @@ * limitations under the License. */ -/** Contains model store related classes. */ -package org.apache.solr.languagemodels.textvectorisation.store; +/** Contains model related classes. */ +package org.apache.solr.languagemodels.model; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/model/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/package-info.java similarity index 86% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/model/package-info.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/package-info.java index cadec9601749..42e741828e9d 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/model/package-info.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/package-info.java @@ -15,5 +15,5 @@ * limitations under the License. */ -/** APIs and classes for implementing text to vector logic. */ -package org.apache.solr.languagemodels.textvectorisation.model; +/** Contains the whole module for Language Models. */ +package org.apache.solr.languagemodels; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserPlugin.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/TextToVectorQParserPlugin.java similarity index 94% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserPlugin.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/TextToVectorQParserPlugin.java index 3c213928cb3f..9cbfbf5dc1f4 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserPlugin.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/TextToVectorQParserPlugin.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.search; +package org.apache.solr.languagemodels.search; import java.io.IOException; import java.util.Arrays; @@ -26,8 +26,8 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.languagemodels.textvectorisation.model.SolrTextToVectorModel; -import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.model.SolrTextToVectorModel; +import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.rest.ManagedResource; import org.apache.solr.rest.ManagedResourceObserver; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/package-info.java new file mode 100644 index 000000000000..2a7237bd847f --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/search/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** APIs and classes for implementing language models QueryParsers. */ +package org.apache.solr.languagemodels.search; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedLanguageModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedLanguageModelStore.java new file mode 100644 index 000000000000..f7c547223907 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedLanguageModelStore.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.store.rest; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import net.jcip.annotations.ThreadSafe; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.LanguageModelException; +import org.apache.solr.languagemodels.model.SolrLanguageModel; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Abstract base class for {@link ManagedResource} wrappers that expose a {@link LanguageModelStore} + * via the REST API. Concrete subclasses supply the REST endpoint and the model instantiation logic. + */ +@ThreadSafe +public abstract class ManagedLanguageModelStore + extends ManagedResource implements ManagedResource.ChildResourceSupport { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private static final String MODELS_JSON_FIELD = "models"; + + protected static final String CLASS_KEY = "class"; + protected static final String NAME_KEY = "name"; + protected static final String PARAMS_KEY = "params"; + + private final LanguageModelStore store; + private Object managedData; + + protected ManagedLanguageModelStore( + String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + store = new LanguageModelStore(); + } + + /** + * Creates a model instance from the JSON map persisted in the managed resource storage. + * + * @param loader the resource loader for the current core + * @param modelMap a map containing {@code "class"}, {@code "name"}, and {@code "params"} keys + * @return the instantiated model + */ + protected abstract ModelT fromModelMap(SolrResourceLoader loader, Map modelMap); + + private static LinkedHashMap toModelMap(SolrLanguageModel model) { + final LinkedHashMap modelMap = new LinkedHashMap<>(3, 1.0f); + modelMap.put(NAME_KEY, model.getName()); + modelMap.put(CLASS_KEY, model.getModelClassName()); + modelMap.put(PARAMS_KEY, model.getParams()); + return modelMap; + } + + @Override + protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData) + throws SolrException { + store.clear(); + this.managedData = managedData; + } + + public void loadStoredModels() { + log.info("------ managed models ~ loading ------"); + if ((managedData != null) && (managedData instanceof List)) { + @SuppressWarnings("unchecked") + final List> models = (List>) managedData; + for (final Map model : models) { + addModelFromMap(model); + } + } + } + + private void addModelFromMap(Map modelMap) { + try { + addModel(fromModelMap(solrResourceLoader, modelMap)); + } catch (final LanguageModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + public void addModel(ModelT model) throws SolrException { + try { + if (log.isInfoEnabled()) { + log.info("adding model {}", model.getName()); + } + store.addModel(model); + } catch (final LanguageModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + if (updates instanceof List) { + final List> models = (List>) updates; + for (final Map model : models) { + addModelFromMap(model); + } + } + if (updates instanceof Map) { + addModelFromMap((Map) updates); + } + return modelsAsManagedResources(store.getModels()); + } + + @Override + public void doDeleteChild(BaseSolrResource endpoint, String childId) { + store.delete(childId); + storeManagedData(applyUpdatesToManagedData(null)); + } + + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + final SolrQueryResponse response = endpoint.getSolrResponse(); + response.add(MODELS_JSON_FIELD, modelsAsManagedResources(store.getModels())); + } + + public ModelT getModel(String modelName) { + return store.getModel(modelName); + } + + private static List modelsAsManagedResources(List models) { + return models.stream().map(ManagedLanguageModelStore::toModelMap).collect(Collectors.toList()); + } + + @Override + public String toString() { + return getClass().getSimpleName() + " [store=" + store + "]"; + } + + // Inner Data Structure to deal with Store persistence + private class LanguageModelStore { + + private final Map availableModels; + + public LanguageModelStore() { + availableModels = Collections.synchronizedMap(new LinkedHashMap<>()); + } + + public ModelT getModel(String name) { + return availableModels.get(name); + } + + public void clear() { + availableModels.clear(); + } + + public List getModels() { + synchronized (availableModels) { + final List availableModelsValues = new ArrayList<>(availableModels.values()); + return Collections.unmodifiableList(availableModelsValues); + } + } + + @Override + public String toString() { + return "LanguageModelStore [availableModels=" + availableModels.keySet() + "]"; + } + + public ModelT delete(String modelName) { + return availableModels.remove(modelName); + } + + public void addModel(ModelT modelData) throws LanguageModelException { + final String name = modelData.getName(); + if (availableModels.putIfAbsent(name, modelData) != null) { + throw new LanguageModelException( + "model '" + name + "' already exists. Please use a different name"); + } + } + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedTextToVectorModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedTextToVectorModelStore.java new file mode 100644 index 000000000000..d0f364062e7a --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/ManagedTextToVectorModelStore.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.store.rest; + +import java.util.Map; +import net.jcip.annotations.ThreadSafe; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.model.SolrTextToVectorModel; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage; + +/** Managed Resource wrapper for the text-to-vector model store, exposed via REST */ +@ThreadSafe +public class ManagedTextToVectorModelStore + extends ManagedLanguageModelStore { + + /** the model store rest endpoint */ + public static final String REST_END_POINT = "/schema/text-to-vector-model-store"; + + public static void registerManagedTextToVectorModelStore( + SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { + solrResourceLoader + .getManagedResourceRegistry() + .registerManagedResource( + REST_END_POINT, ManagedTextToVectorModelStore.class, managedResourceObserver); + } + + public static ManagedTextToVectorModelStore getManagedModelStore(SolrCore core) { + return (ManagedTextToVectorModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + } + + @Override + @SuppressWarnings("unchecked") + protected SolrTextToVectorModel fromModelMap( + SolrResourceLoader solrResourceLoader, Map textToVectorModel) { + return SolrTextToVectorModel.getInstance( + solrResourceLoader, + (String) textToVectorModel.get(CLASS_KEY), // modelClassName + (String) textToVectorModel.get(NAME_KEY), // modelName + (Map) textToVectorModel.get(PARAMS_KEY)); + } + + public ManagedTextToVectorModelStore( + String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/package-info.java new file mode 100644 index 000000000000..dd4548c93a5d --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/store/rest/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Contains model store rest related classes. */ +package org.apache.solr.languagemodels.store.rest; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/TextToVectorModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/TextToVectorModelStore.java deleted file mode 100644 index 7d24d25f57e3..000000000000 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/TextToVectorModelStore.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.languagemodels.textvectorisation.store; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import org.apache.solr.languagemodels.textvectorisation.model.SolrTextToVectorModel; - -/** Simple store to manage CRUD operations on the {@link SolrTextToVectorModel} */ -public class TextToVectorModelStore { - - private final Map availableModels; - - public TextToVectorModelStore() { - availableModels = Collections.synchronizedMap(new LinkedHashMap<>()); - } - - public SolrTextToVectorModel getModel(String name) { - return availableModels.get(name); - } - - public void clear() { - availableModels.clear(); - } - - public List getModels() { - synchronized (availableModels) { - final List availableModelsValues = - new ArrayList(availableModels.values()); - return Collections.unmodifiableList(availableModelsValues); - } - } - - @Override - public String toString() { - return "ModelStore [availableModels=" + availableModels.keySet() + "]"; - } - - public SolrTextToVectorModel delete(String modelName) { - return availableModels.remove(modelName); - } - - public void addModel(SolrTextToVectorModel modeldata) throws TextToVectorModelException { - final String name = modeldata.getName(); - if (availableModels.putIfAbsent(modeldata.getName(), modeldata) != null) { - throw new TextToVectorModelException( - "model '" + name + "' already exists. Please use a different name"); - } - } -} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/ManagedTextToVectorModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/ManagedTextToVectorModelStore.java deleted file mode 100644 index 70c03ffc47ea..000000000000 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/ManagedTextToVectorModelStore.java +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.languagemodels.textvectorisation.store.rest; - -import java.lang.invoke.MethodHandles; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import net.jcip.annotations.ThreadSafe; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.SolrCore; -import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.languagemodels.textvectorisation.model.SolrTextToVectorModel; -import org.apache.solr.languagemodels.textvectorisation.store.TextToVectorModelException; -import org.apache.solr.languagemodels.textvectorisation.store.TextToVectorModelStore; -import org.apache.solr.response.SolrQueryResponse; -import org.apache.solr.rest.BaseSolrResource; -import org.apache.solr.rest.ManagedResource; -import org.apache.solr.rest.ManagedResourceObserver; -import org.apache.solr.rest.ManagedResourceStorage; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Managed Resource wrapper for the {@link TextToVectorModelStore} to expose it via REST */ -@ThreadSafe -public class ManagedTextToVectorModelStore extends ManagedResource - implements ManagedResource.ChildResourceSupport { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - /** the model store rest endpoint */ - public static final String REST_END_POINT = "/schema/text-to-vector-model-store"; - - /** Managed model store: the name of the attribute containing all the models of a model store */ - private static final String MODELS_JSON_FIELD = "models"; - - /** name of the attribute containing a class */ - static final String CLASS_KEY = "class"; - - /** name of the attribute containing a name */ - static final String NAME_KEY = "name"; - - /** name of the attribute containing parameters */ - static final String PARAMS_KEY = "params"; - - public static void registerManagedTextToVectorModelStore( - SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { - solrResourceLoader - .getManagedResourceRegistry() - .registerManagedResource( - REST_END_POINT, ManagedTextToVectorModelStore.class, managedResourceObserver); - } - - public static ManagedTextToVectorModelStore getManagedModelStore(SolrCore core) { - return (ManagedTextToVectorModelStore) core.getRestManager().getManagedResource(REST_END_POINT); - } - - /** - * Returns the available models as a list of Maps objects. After an update the managed resources - * needs to return the resources in this format in order to store in json somewhere (zookeeper, - * disk...) - * - * @return the available models as a list of Maps objects - */ - private static List modelsAsManagedResources(List models) { - return models.stream() - .map(ManagedTextToVectorModelStore::toModelMap) - .collect(Collectors.toList()); - } - - @SuppressWarnings("unchecked") - public static SolrTextToVectorModel fromModelMap( - SolrResourceLoader solrResourceLoader, Map embeddingModel) { - return SolrTextToVectorModel.getInstance( - solrResourceLoader, - (String) embeddingModel.get(CLASS_KEY), // modelClassName - (String) embeddingModel.get(NAME_KEY), // modelName - (Map) embeddingModel.get(PARAMS_KEY)); - } - - private static LinkedHashMap toModelMap(SolrTextToVectorModel model) { - final LinkedHashMap modelMap = new LinkedHashMap<>(5, 1.0f); - modelMap.put(NAME_KEY, model.getName()); - modelMap.put(CLASS_KEY, model.getEmbeddingModelClassName()); - modelMap.put(PARAMS_KEY, model.getParams()); - return modelMap; - } - - private final TextToVectorModelStore store; - private Object managedData; - - public ManagedTextToVectorModelStore( - String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) - throws SolrException { - super(resourceId, loader, storageIO); - store = new TextToVectorModelStore(); - } - - @Override - protected ManagedResourceStorage createStorage( - ManagedResourceStorage.StorageIO storageIO, SolrResourceLoader loader) throws SolrException { - return new ManagedResourceStorage.JsonStorage(storageIO, loader, -1); - } - - @Override - protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData) - throws SolrException { - store.clear(); - this.managedData = managedData; - } - - public void loadStoredModels() { - log.info("------ managed models ~ loading ------"); - - if ((managedData != null) && (managedData instanceof List)) { - @SuppressWarnings({"unchecked"}) - final List> textToVectorModels = (List>) managedData; - for (final Map textToVectorModel : textToVectorModels) { - addModelFromMap(textToVectorModel); - } - } - } - - private void addModelFromMap(Map modelMap) { - try { - addModel(fromModelMap(solrResourceLoader, modelMap)); - } catch (final TextToVectorModelException e) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); - } - } - - public void addModel(SolrTextToVectorModel model) throws TextToVectorModelException { - try { - if (log.isInfoEnabled()) { - log.info("adding model {}", model.getName()); - } - store.addModel(model); - } catch (final TextToVectorModelException e) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); - } - } - - @SuppressWarnings("unchecked") - @Override - protected Object applyUpdatesToManagedData(Object updates) { - if (updates instanceof List) { - final List> textToVectorModels = (List>) updates; - for (final Map textToVectorModel : textToVectorModels) { - addModelFromMap(textToVectorModel); - } - } - - if (updates instanceof Map) { - final Map map = (Map) updates; - addModelFromMap(map); - } - - return modelsAsManagedResources(store.getModels()); - } - - @Override - public void doDeleteChild(BaseSolrResource endpoint, String childId) { - store.delete(childId); - storeManagedData(applyUpdatesToManagedData(null)); - } - - /** - * Called to retrieve a named part (the given childId) of the resource at the given endpoint. - * Note: since we have a unique child managed store we ignore the childId. - */ - @Override - public void doGet(BaseSolrResource endpoint, String childId) { - final SolrQueryResponse response = endpoint.getSolrResponse(); - response.add(MODELS_JSON_FIELD, modelsAsManagedResources(store.getModels())); - } - - public SolrTextToVectorModel getModel(String modelName) { - return store.getModel(modelName); - } - - @Override - public String toString() { - return "ManagedModelStore [store=" + store + "]"; - } -} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/package-info.java deleted file mode 100644 index ca70f6d96110..000000000000 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/store/rest/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Contains the {@link org.apache.solr.rest.ManagedResource} that encapsulate the model stores. */ -package org.apache.solr.languagemodels.textvectorisation.store.rest; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessor.java similarity index 93% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessor.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessor.java index 116b4ba125bd..44c75988e7fc 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessor.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessor.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.update.processor; +package org.apache.solr.languagemodels.update.processor; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -23,7 +23,7 @@ import java.util.List; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; -import org.apache.solr.languagemodels.textvectorisation.model.SolrTextToVectorModel; +import org.apache.solr.languagemodels.model.SolrTextToVectorModel; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; @@ -32,7 +32,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -class TextToVectorUpdateProcessor extends UpdateRequestProcessor { +public class TextToVectorUpdateProcessor extends UpdateRequestProcessor { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private IndexSchema schema; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactory.java similarity index 94% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactory.java index f89766337aef..f8c35906093c 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactory.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.update.processor; +package org.apache.solr.languagemodels.update.processor.factory; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.RequiredSolrParams; @@ -23,8 +23,9 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.languagemodels.textvectorisation.model.SolrTextToVectorModel; -import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.model.SolrTextToVectorModel; +import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.update.processor.TextToVectorUpdateProcessor; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.rest.ManagedResource; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/search/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/package-info.java similarity index 85% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/search/package-info.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/package-info.java index 04aebea77890..94a7cf8104b8 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/search/package-info.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/factory/package-info.java @@ -15,5 +15,5 @@ * limitations under the License. */ -/** APIs and classes for implementing text to vector QueryParsers. */ -package org.apache.solr.languagemodels.textvectorisation.search; +/** Contains update request processor factories. */ +package org.apache.solr.languagemodels.update.processor.factory; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/package-info.java similarity index 91% rename from solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/package-info.java rename to solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/package-info.java index 877a4fa081b8..c3d500fbec5d 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/package-info.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/update/processor/package-info.java @@ -16,4 +16,4 @@ */ /** Contains update request processor related classes. */ -package org.apache.solr.languagemodels.textvectorisation.update.processor; +package org.apache.solr.languagemodels.update.processor; diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model.json b/solr/modules/language-models/src/test-files/modelExamples/dummy-model.json deleted file mode 100644 index 115766e8612e..000000000000 --- a/solr/modules/language-models/src/test-files/modelExamples/dummy-model.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "class": "org.apache.solr.languagemodels.textvectorisation.model.DummyEmbeddingModel", - "name": "dummy-1", - "params": { - "embedding": [1.0, 2.0, 3.0, 4.0] - } -} diff --git a/solr/modules/language-models/src/test-files/modelExamples/exception-throwing-model.json b/solr/modules/language-models/src/test-files/modelExamples/exception-throwing-model.json deleted file mode 100644 index 76e704f3ac44..000000000000 --- a/solr/modules/language-models/src/test-files/modelExamples/exception-throwing-model.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "class": "org.apache.solr.languagemodels.textvectorisation.model.ExceptionThrowingEmbeddingModel", - "name": "exception-throwing-model", - "params": { - } -} diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-query-parser-only.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-query-parser-only.xml index 80054a9775a2..5da6c14c04e8 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-query-parser-only.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-query-parser-only.xml @@ -23,7 +23,7 @@ + class="org.apache.solr.languagemodels.search.TextToVectorQParserPlugin" /> - + _text_ vector dummy-1 diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models.xml index e910d9118267..3910f769a697 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models.xml @@ -23,7 +23,7 @@ + class="org.apache.solr.languagemodels.search.TextToVectorQParserPlugin" /> - + _text_ vector dummy-1 @@ -64,7 +64,7 @@ - + _text_ vector exception-throwing-model @@ -73,7 +73,7 @@ - + string_field vector dummy-1 @@ -83,7 +83,7 @@ - + string_field vector dummy-1 diff --git a/solr/modules/language-models/src/test-files/modelExamples/cohere-model.json b/solr/modules/language-models/src/test-files/textToVectorModelExamples/cohere-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/cohere-model.json rename to solr/modules/language-models/src/test-files/textToVectorModelExamples/cohere-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model-ambiguous.json b/solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model-ambiguous.json similarity index 53% rename from solr/modules/language-models/src/test-files/modelExamples/dummy-model-ambiguous.json rename to solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model-ambiguous.json index c9fb4dc48dbb..48403b6b3497 100644 --- a/solr/modules/language-models/src/test-files/modelExamples/dummy-model-ambiguous.json +++ b/solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model-ambiguous.json @@ -1,5 +1,5 @@ { - "class": "org.apache.solr.languagemodels.textvectorisation.model.DummyEmbeddingModel", + "class": "org.apache.solr.languagemodels.model.DummyEmbeddingModel", "name": "dummy-1", "params": { "embedding": [1.0, 2.0, 3.0, 4.0], diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model-unsupported.json b/solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model-unsupported.json similarity index 53% rename from solr/modules/language-models/src/test-files/modelExamples/dummy-model-unsupported.json rename to solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model-unsupported.json index cddbacfdac39..1df1e351c941 100644 --- a/solr/modules/language-models/src/test-files/modelExamples/dummy-model-unsupported.json +++ b/solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model-unsupported.json @@ -1,5 +1,5 @@ { - "class": "org.apache.solr.languagemodels.textvectorisation.model.DummyEmbeddingModel", + "class": "org.apache.solr.languagemodels.model.DummyEmbeddingModel", "name": "dummy-1", "params": { "embedding": [1.0, 2.0, 3.0, 4.0], diff --git a/solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model.json b/solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model.json new file mode 100644 index 000000000000..8ce8f53de947 --- /dev/null +++ b/solr/modules/language-models/src/test-files/textToVectorModelExamples/dummy-model.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.model.DummyEmbeddingModel", + "name": "dummy-1", + "params": { + "embedding": [1.0, 2.0, 3.0, 4.0] + } +} diff --git a/solr/modules/language-models/src/test-files/textToVectorModelExamples/exception-throwing-model.json b/solr/modules/language-models/src/test-files/textToVectorModelExamples/exception-throwing-model.json new file mode 100644 index 000000000000..c0ef1aeedd84 --- /dev/null +++ b/solr/modules/language-models/src/test-files/textToVectorModelExamples/exception-throwing-model.json @@ -0,0 +1,6 @@ +{ + "class": "org.apache.solr.languagemodels.model.ExceptionThrowingEmbeddingModel", + "name": "exception-throwing-model", + "params": { + } +} diff --git a/solr/modules/language-models/src/test-files/modelExamples/huggingface-model.json b/solr/modules/language-models/src/test-files/textToVectorModelExamples/huggingface-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/huggingface-model.json rename to solr/modules/language-models/src/test-files/textToVectorModelExamples/huggingface-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/mistralai-model.json b/solr/modules/language-models/src/test-files/textToVectorModelExamples/mistralai-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/mistralai-model.json rename to solr/modules/language-models/src/test-files/textToVectorModelExamples/mistralai-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/openai-model.json b/solr/modules/language-models/src/test-files/textToVectorModelExamples/openai-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/openai-model.json rename to solr/modules/language-models/src/test-files/textToVectorModelExamples/openai-model.json diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java index a54e8e1875d5..f555f2e3b973 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java @@ -26,7 +26,7 @@ import java.util.List; import org.apache.commons.io.file.PathUtils; import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; import org.apache.solr.util.RestTestBase; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,11 +38,12 @@ public class TestLanguageModelBase extends RestTestBase { protected static Path tmpSolrHome; protected static Path tmpConfDir; - public static final String MODEL_FILE_NAME = "_schema_text-to-vector-model-store.json"; + public static final String TEXT_TO_VECTOR_MODEL_FILE_NAME = + "_schema_text-to-vector-model-store.json"; protected static final String COLLECTION = "collection1"; protected static final String CONF_DIR = COLLECTION + "/conf"; - protected static Path embeddingModelStoreFile = null; + protected static Path textToVectorModelStoreFile = null; protected static String IDField = "id"; protected static String vectorField = "vector"; @@ -61,17 +62,17 @@ protected static void initFolders(boolean isPersistent) throws Exception { tmpSolrHome = createTempDir(); tmpConfDir = tmpSolrHome.resolve(CONF_DIR); PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath()); - final Path modelStore = tmpConfDir.resolve(MODEL_FILE_NAME); + final Path textToVectorStore = tmpConfDir.resolve(TEXT_TO_VECTOR_MODEL_FILE_NAME); if (isPersistent) { - embeddingModelStoreFile = modelStore; + textToVectorModelStoreFile = textToVectorStore; } - if (Files.exists(modelStore)) { + if (Files.exists(textToVectorStore)) { if (log.isInfoEnabled()) { - log.info("remove model store config file in {}", modelStore.toAbsolutePath()); + log.info("remove model store config file in {}", textToVectorStore.toAbsolutePath()); } - Files.delete(modelStore); + Files.delete(textToVectorStore); } System.setProperty("managed.schema.mutable", "true"); @@ -87,7 +88,8 @@ protected static void afterTest() throws Exception { } public static void loadModel(String fileName, String status) throws Exception { - final URL url = TestLanguageModelBase.class.getResource("/modelExamples/" + fileName); + final URL url = + TestLanguageModelBase.class.getResource("/textToVectorModelExamples/" + fileName); final String multipleModels = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); assertJPut( @@ -97,7 +99,8 @@ public static void loadModel(String fileName, String status) throws Exception { } public static void loadModel(String fileName) throws Exception { - final URL url = TestLanguageModelBase.class.getResource("/modelExamples/" + fileName); + final URL url = + TestLanguageModelBase.class.getResource("/textToVectorModelExamples/" + fileName); final String multipleModels = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); assertJPut( diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/DummyEmbeddingModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyEmbeddingModel.java similarity index 97% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/DummyEmbeddingModel.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyEmbeddingModel.java index cbb966348f08..bb813537b52d 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/DummyEmbeddingModel.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyEmbeddingModel.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.model; +package org.apache.solr.languagemodels.model; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.segment.TextSegment; diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/DummyEmbeddingModelTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyEmbeddingModelTest.java similarity index 96% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/DummyEmbeddingModelTest.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyEmbeddingModelTest.java index 0948af705eb4..7515e0dc3ddb 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/DummyEmbeddingModelTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/DummyEmbeddingModelTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.model; +package org.apache.solr.languagemodels.model; import org.apache.solr.SolrTestCase; import org.junit.Test; diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/ExceptionThrowingEmbeddingModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/ExceptionThrowingEmbeddingModel.java similarity index 96% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/ExceptionThrowingEmbeddingModel.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/ExceptionThrowingEmbeddingModel.java index c0c3b52bf399..01eca396381e 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/model/ExceptionThrowingEmbeddingModel.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/model/ExceptionThrowingEmbeddingModel.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.model; +package org.apache.solr.languagemodels.model; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.segment.TextSegment; diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/search/TextToVectorQParserTest.java similarity index 99% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/search/TextToVectorQParserTest.java index 95395c185656..3f0b0a1f0c6d 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/search/TextToVectorQParserTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.search; +package org.apache.solr.languagemodels.search; import java.util.Arrays; import java.util.Locale; diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestManagedModelStoreInitialization.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestManagedLanguageModelStoreInitialization.java similarity index 94% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestManagedModelStoreInitialization.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestManagedLanguageModelStoreInitialization.java index 244094b8764e..4b414a18de6e 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestManagedModelStoreInitialization.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestManagedLanguageModelStoreInitialization.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.store.rest; +package org.apache.solr.languagemodels.store.rest; import org.apache.solr.languagemodels.TestLanguageModelBase; import org.junit.After; import org.junit.Test; -public class TestManagedModelStoreInitialization extends TestLanguageModelBase { +public class TestManagedLanguageModelStoreInitialization extends TestLanguageModelBase { @After public void cleanUp() throws Exception { diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManager.java similarity index 98% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestModelManager.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManager.java index 66b488848548..b3dd51a59445 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManager.java @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.store.rest; +package org.apache.solr.languagemodels.store.rest; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.languagemodels.TestLanguageModelBase; -import org.apache.solr.languagemodels.textvectorisation.search.TextToVectorQParserPlugin; +import org.apache.solr.languagemodels.search.TextToVectorQParserPlugin; import org.apache.solr.rest.ManagedResource; import org.apache.solr.rest.ManagedResourceStorage; import org.apache.solr.rest.RestManager; diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManagerPersistence.java similarity index 89% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestModelManagerPersistence.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManagerPersistence.java index 92e8b68244e6..21f8ba4b6dc9 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestModelManagerPersistence.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/store/rest/TestModelManagerPersistence.java @@ -14,13 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.store.rest; +package org.apache.solr.languagemodels.store.rest; -import static java.nio.charset.StandardCharsets.UTF_8; - -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import org.apache.solr.common.util.Utils; import org.apache.solr.languagemodels.TestLanguageModelBase; import org.junit.After; import org.junit.Before; @@ -38,15 +33,6 @@ public void cleanup() throws Exception { afterTest(); } - @Test - public void testModelAreStoredCompact() throws Exception { - loadModel("cohere-model.json"); - - final String JSONOnDisk = Files.readString(embeddingModelStoreFile, StandardCharsets.UTF_8); - Object objectFromDisk = Utils.fromJSONString(JSONOnDisk); - assertEquals(new String(Utils.toJSON(objectFromDisk, -1), UTF_8), JSONOnDisk); - } - @Test public void testModelStorePersistence() throws Exception { // check models are empty diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessorTest.java similarity index 98% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessorTest.java index 2dd9cda1a585..75a9f206f14e 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/TextToVectorUpdateProcessorTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.update.processor; +package org.apache.solr.languagemodels.update.processor; import java.io.IOException; import java.util.Map; @@ -24,7 +24,7 @@ import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.languagemodels.TestLanguageModelBase; -import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactoryTest.java similarity index 96% rename from solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java rename to solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactoryTest.java index 5ccb9d95e605..02617c646364 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactoryTest.java @@ -14,15 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.languagemodels.textvectorisation.update.processor; +package org.apache.solr.languagemodels.update.processor.factory; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.languagemodels.TestLanguageModelBase; -import org.apache.solr.languagemodels.textvectorisation.model.SolrTextToVectorModel; -import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.apache.solr.languagemodels.model.SolrTextToVectorModel; +import org.apache.solr.languagemodels.store.rest.ManagedTextToVectorModelStore; import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.junit.After; diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc index 4d67da152419..e18632462674 100644 --- a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc +++ b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc @@ -423,7 +423,7 @@ See the README files associated with each module for details: The {solr-javadocs}/modules/language-models/index.html[`language-models`] module provides:: -{solr-javadocs}/modules/language-models/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.html[TextToVectorUpdateProcessorFactory]:: Update processor which vectorises a textual field in input and adds the resulting vector as the value of a new field. +{solr-javadocs}/modules/language-models/org/apache/solr/languagemodels/update/processor/factory/TextToVectorUpdateProcessorFactory.html[TextToVectorUpdateProcessorFactory]:: Update processor which vectorises a textual field in input and adds the resulting vector as the value of a new field. It uses external text to vectors language models to perform the vectorisation for each processed document. For more information: xref:query-guide:text-to-vector.adoc[Update Request Processor] diff --git a/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc b/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc index aafafd861fb6..7e3843230cf7 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc @@ -85,7 +85,7 @@ See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details. [source,xml] ---- - + _text_ vector dummy-1 @@ -97,7 +97,7 @@ See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details. + [source,xml] ---- - + ---- [NOTE] @@ -301,7 +301,7 @@ To vectorise textual fields of your documents at indexing time you need to confi [source,xml] ---- - + _text_ vector dummy-1 @@ -355,7 +355,7 @@ This can be done in Solr defining two update request processors chains: one that ... - + _text_ vector dummy-1 @@ -394,7 +394,7 @@ You still define two chains, but this time the 'vectorisation' one only includes ---- - + _text_ vector dummy-1 @@ -434,9 +434,10 @@ Faceting or querying on the boolean 'vectorised' field can also give you a quick === Running a Text-to-Vector Query Before running a Text-to-Vector query, ensure that the `knn_text_to_vector` query parser is declared in `solrconfig.xml`: + [source,xml] ---- - + ---- To run a query that vectorises your query text, using a model you previously uploaded is simple: