import gradio as gr import sentence_transformers from sentence_transformers import SentenceTransformer import torch from sentence_transformers.util import semantic_search import pandas as pd import requests from datasets import load_dataset import os #Import corpus embeddings corpus_ger = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_ger', token=str(os.environ['private_token']))['train']) corpus_eng = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_eng', token=str(os.environ['private_token']))['train']) corpus_fr = pd.DataFrame(load_dataset('ECLASS-Standard/eclass_properties_fr', token=str(os.environ['private_token']))['train']) #Import models model_ger = SentenceTransformer('ECLASS-Standard/gbert-base-eclass', token=str(os.environ['private_token'])) model_eng = SentenceTransformer('ECLASS-Standard/mboth-distil-eng-quora-sentence', token=str(os.environ['private_token'])) model_fr = SentenceTransformer('ECLASS-Standard/Sahajtomar-french_semantic', token=str(os.environ['private_token'])) #Definition of search function def predict(name, description, language, classCode='nofilter', top_k=10): #language detection if language == 'german': model = model_ger corpus = corpus_ger if language == 'english': model = model_eng corpus = corpus_eng if language == 'french': model = model_fr corpus = corpus_fr text = name + '; ' + description #Verkettung name und description query_embedding = model.encode(text, convert_to_tensor=True) #Erzeugung Query Embedding #Filterung ECLASS Corpus if classCode == 'nofilter': corpus_filtered = corpus else: url = f'https://bcon2-api.azurewebsites.net/api/eclass?codedname={classCode}' response = requests.get(url) #http request um auf Filter API zuzugreifen lines = response.text.split('\n') properties_filtered_list = [line[-21:-1] for line in lines] corpus_filtered = corpus[corpus['irdi'].isin(properties_filtered_list)] #Der ECLASS Corpus wird mit Hilfe der Liste gefiltert corpus_filtered.reset_index(drop=True,inplace=True) #Index wird zurückgesetzt #Umwandlung corpus Embeddings in Tensor corpus_embeddings = torch.Tensor(corpus_filtered["Embeddings"]) #Einspeisung Modell output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = top_k) #Auslesen der Modellausgabe preferedNames = [] definitions = [] irdis = [] scores = [] for i in range(0,top_k): preferedNames.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),1]) definitions.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),2]) irdis.append(corpus_filtered.iloc[output[0][i].get('corpus_id'),0]) scores.append(round(output[0][i].get('score'),2)) predictions = pd.DataFrame({'preferedName' : preferedNames, 'irdi' : irdis,'score' : scores,'definition' : definitions}) return predictions #gradio user interface with gr.Blocks() as demo: with gr.Row(): with gr.Column(scale=15): gr.Markdown(""" # ECLASS-Search-Demo This is a semantic search application that maps unknown product properties to the ECLASS standard. It is created by ECLASS e.V. in collaboration with the GART-labortory of the cologne university of applied science. You can find further information in our [paper](https://www.researchgate.net/publication/382002596_Erstellung_eines_semantischen_Suchalgorithmus_zur_Abbildung_proprietarer_Merkmale_auf_den_ECLASS-Standard)""") with gr.Column(scale=1): gr.Markdown("""

""") with gr.Row(): #inputs name_tx = gr.Textbox(label="Name:",placeholder="Name of the Property", lines=1) description_tx = gr.Textbox(label="Description:", placeholder="Description of the Property", lines=1) with gr.Row(): #inputs classcode_tx = gr.Textbox(value='nofilter', label="Filter with ECLASS ClassCode", placeholder="for no filter type: 'nofilter'", lines=1) top_k_nu = gr.Number(value=10, label="Number of Matches") language_drop = gr.Dropdown(["german", "english","french"], value='german', label="Select language") #button search = gr.Button("search") #output prediction_df = gr.Dataframe(headers = ['preferedName', 'irdi', 'score', 'definition']) #defines search function for button “search” search.click(fn=predict, inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu], outputs=prediction_df) #examples gr.Examples(examples=[["LED grün", "","german","nofilter",10],["Abmessungen", "","german","27272803",10],["Kabel", "","german","27272803",10],["Umgebungstemperatur", "","german","27272803",10],["Reproduzierbarkeit", "","german","27272803",10],["Repeat accuracy", "","english","27272803",10]],inputs=[name_tx,description_tx,language_drop,classcode_tx,top_k_nu]) demo.launch(debug=True)