File size: 1,151 Bytes
c19ce61
 
a85bc9a
 
c19ce61
 
5334ec8
c19ce61
 
5334ec8
478308a
5334ec8
c19ce61
a85bc9a
c19ce61
a85bc9a
 
 
c19ce61
a85bc9a
 
 
 
 
5334ec8
a85bc9a
d7838fe
5334ec8
 
 
a85bc9a
 
 
 
 
 
c19ce61
a85bc9a
 
 
c19ce61
a85bc9a
478308a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os

import gradio as gr
import pandas as pd
import pymarc

from marcai.predict import predict
from marcai.process import process
from marcai.utils.parsing import record_dict
from marcai.pl import SimilarityVectorModel

root = os.path.dirname(os.path.abspath(__file__))

def compare(file1, file2):
    # Load records
    record1 = pymarc.parse_xml_to_array(file1)[0]
    record2 = pymarc.parse_xml_to_array(file2)[0]

    # Turn into dataframes
    df1 = pd.DataFrame.from_dict([record_dict(record1)])
    df2 = pd.DataFrame.from_dict([record_dict(record2)])

    df = process(df1, df2)

    model = SimilarityVectorModel.from_pretrained("cdlib/marc-match-ai")

    input_df = df[model.features]

    # Run model
    prediction = predict(model, input_df).item()

    return {"match": prediction, "not match": 1 - prediction}


interface = gr.Interface(
    fn=compare,
    inputs=[gr.File(label="MARC XML File 1"), gr.File(label="MARC XML File 2")],
    outputs=gr.Label(label="Classification"),
    title="MARC Record Matcher",
    description="Upload two MARC XML files with one record each.",
    allow_flagging="never",
)
interface.launch()