Commit 8f62992f by Robbie Hott

Added Java version of the CBW reconciliation example.

parent 5f25d92b
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>CBW-Reconciler</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>CBW-Reconciler</groupId>
<artifactId>CBW-Reconciler</artifactId>
<version>0.0.1-SNAPSHOT</version>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-io</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>org.json</artifactId>
<version>chargebee-1.0</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
/**
* SNAC Reconciliation Example (CBW Java Example)
*
* For the full license, see the LICENSE file in the repository root
*
* @author Robbie Hott
* @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause
* @copyright 2017 the Rector and Visitors of the University of Virginia, and
* the Regents of the University of California
*/
import java.awt.BorderLayout;
import java.awt.FlowLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JProgressBar;
import javax.swing.JSeparator;
import javax.swing.SwingConstants;
import javax.swing.SwingUtilities;
import javax.swing.filechooser.FileNameExtensionFilter;
/**
* CBW Reconciler UI
*
* This is the GUI interface class that prompts the user to choose CSV files to reconcile against SNAC.
*
* @author Robbie Hott
*
*/
public class CBWReconcileUI extends javax.swing.JFrame {
/**
* Serial ID
*/
private static final long serialVersionUID = -8115653654144568030L;
/**
* GUI Variables
*/
private JPanel title;
private JLabel titleLabel;
private JPanel bodyPanel;
private JPanel reconcilePanel;
private JSeparator jSeparator2;
private JButton reconcileSNAC;
private JSeparator jSeparator3;
private JProgressBar reconcileProgressBar;
private JSeparator jSeparator4;
private JLabel reconcileProgressLabel;
private JSeparator jSeparator1;
private JLabel fromCSVFileLabel;
private JLabel fromCSVFileLocationLabel;
private JButton fromCSVFileButton;
private JLabel toCSVFileLabel;
private JLabel toCSVFileLocationLabel;
private JButton toCSVFileButton;
/**
* Reconciliation File Variables
*/
private String fromCSVFile;
private String toCSVFile;
{
//Set Look & Feel
try {
javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName());
} catch(Exception e) {
//e.printStackTrace();
}
}
/**
* Main Method
*
* Creates an instance of this GUI and starts the example
*
* @param args Command-line arguments
*/
public static void main(String[] args) {
//Mac Niceness
System.setProperty("com.apple.mrj.application.apple.menu.about.name", "CBW-SNAC Reconciler");
SwingUtilities.invokeLater(new Runnable() {
public void run() {
CBWReconcileUI inst = new CBWReconcileUI();
inst.setLocationRelativeTo(null);
inst.setVisible(true);
}
});
}
/**
* Constructor
*
* Calls JFrame's constructor and then initializes and displays the GUI
*/
public CBWReconcileUI() {
super();
initGUI();
}
/**
* Initialize GUI
*
* Builds the JFrame to display to the user.
*/
private void initGUI() {
try {
BorderLayout thisLayout = new BorderLayout();
getContentPane().setLayout(thisLayout);
{
title = new JPanel();
getContentPane().add(title, BorderLayout.NORTH);
title.setSize(700, 100);
{
titleLabel = new JLabel("<html><body style='text-align: center; font-size: 25px;'>CBW-SNAC Reconciler</body></html>", SwingConstants.CENTER);
title.add(titleLabel);
titleLabel.setPreferredSize(new java.awt.Dimension(700, 100));
}
}
//continue building GUI
bodyPanel = new JPanel();
FlowLayout bodyPanelLayout = new FlowLayout();
getContentPane().add(bodyPanel, BorderLayout.CENTER);
bodyPanel.setLayout(bodyPanelLayout);
{
reconcilePanel = new JPanel();
bodyPanel.add(reconcilePanel);
reconcilePanel.setSize(700, 200);
reconcilePanel.setPreferredSize(new java.awt.Dimension(700, 200));
// lookup buttons
{
fromCSVFileLabel = new JLabel();
reconcilePanel.add(fromCSVFileLabel);
fromCSVFileLabel.setText("CBW CSV File: ");
}
{
fromCSVFileLocationLabel = new JLabel();
reconcilePanel.add(fromCSVFileLocationLabel);
fromCSVFileLocationLabel.setText("<choose>");
}
{
fromCSVFileButton = new JButton();
reconcilePanel.add(fromCSVFileButton);
fromCSVFileButton.setText("Browse");
fromCSVFileButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent evt) {
// Pop up a file chooser for the user to pick a CSV file
JFileChooser chooser2 = new JFileChooser();
chooser2.setDialogTitle("Choose a CSV File.");
chooser2.setFileFilter(new FileNameExtensionFilter("CSV Files", "csv"));
int returnVal = chooser2.showOpenDialog(null);
if(returnVal == JFileChooser.APPROVE_OPTION) {
fromCSVFile = chooser2.getSelectedFile().getAbsolutePath();
fromCSVFileLocationLabel.setText(fromCSVFile);
}
}
});
}
{
jSeparator1 = new JSeparator();
reconcilePanel.add(jSeparator1);
jSeparator1.setPreferredSize(new java.awt.Dimension(700, 6));
}
{
toCSVFileLabel = new JLabel();
reconcilePanel.add(toCSVFileLabel);
toCSVFileLabel.setText("Reconciled CSV File: ");
}
{
toCSVFileLocationLabel = new JLabel();
reconcilePanel.add(toCSVFileLocationLabel);
toCSVFileLocationLabel.setText("<choose>");
}
{
toCSVFileButton = new JButton();
reconcilePanel.add(toCSVFileButton);
toCSVFileButton.setText("Browse");
toCSVFileButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent evt) {
// Pop up a file chooser for the user to find a directory and
// enter a filename for the destination CSV.
JFileChooser chooser2 = new JFileChooser();
chooser2.setDialogTitle("Choose a Destination CSV File.");
chooser2.setFileFilter(new FileNameExtensionFilter("CSV Files","csv"));
int returnVal = chooser2.showOpenDialog(null);
if(returnVal == JFileChooser.APPROVE_OPTION) {
toCSVFile = chooser2.getSelectedFile().getAbsolutePath();
toCSVFileLocationLabel.setText(toCSVFile);
}
}
});
}
{
jSeparator2 = new JSeparator();
reconcilePanel.add(jSeparator2);
jSeparator2.setPreferredSize(new java.awt.Dimension(700, 6));
}
// Reconcile with SNAC button
{
reconcileSNAC = new JButton();
reconcilePanel.add(reconcileSNAC);
reconcileSNAC.setText("Reconcile Against SNAC");
reconcileSNAC.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent evt) {
// If a to and from file are set, then stand up a reconcile worker to
// do the work and reconcile against snac. This GUI will be updated on
// the progress of the worker.
if (fromCSVFile != null && toCSVFile != null) {
try {
// do the reconcile loop in the background
final CBWReconcileWorker rw = new CBWReconcileWorker(fromCSVFile, toCSVFile);
rw.addPropertyChangeListener(new PropertyChangeListener() {
@Override
public void propertyChange(
PropertyChangeEvent evt) {
if ("progress" == evt.getPropertyName()) {
int progress = (Integer) evt.getNewValue();
reconcileProgressBar.setValue(progress);
reconcileProgressLabel.setText(rw.getProgressText());
}
}
});
rw.execute();
} catch (Exception e) {
// Silently ignoring errors
}
}
}
});
}
{
jSeparator3 = new JSeparator();
reconcilePanel.add(jSeparator3);
jSeparator3.setPreferredSize(new java.awt.Dimension(700, 6));
}
{
reconcileProgressBar = new JProgressBar(0, 100);
reconcileProgressBar.setValue(0);
reconcileProgressBar.setStringPainted(true);
reconcilePanel.add(reconcileProgressBar);
reconcileProgressBar.setPreferredSize(new java.awt.Dimension(600, 20));
}
{
jSeparator4 = new JSeparator();
reconcilePanel.add(jSeparator4);
jSeparator4.setPreferredSize(new java.awt.Dimension(700, 0));
}
{
reconcileProgressLabel = new JLabel();
reconcilePanel.add(reconcileProgressLabel);
}
}
this.setSize(700, 400);
this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
} catch (Exception e) {
// Silently ignoring errors
}
}
}
/**
* SNAC Reconciliation Example (CBW Java Example)
*
* For the full license, see the LICENSE file in the repository root
*
* @author Robbie Hott
* @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause
* @copyright 2017 the Rector and Visitors of the University of Virginia, and
* the Regents of the University of California
*/
import java.io.BufferedInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.SwingWorker;
import org.json.JSONObject;
import com.opencsv.CSVReader;
import com.opencsv.CSVWriter;
/**
* CBW Reconcile Worker
*
* This class does the actual work of connecting to SNAC and requesting for reconciliation.
* It also handles the result from the server (JSON) and parses the data into a CSV.
*
* @author Robbie Hott
*
*/
public class CBWReconcileWorker extends SwingWorker<Void, Void> {
/**
* Filenames to use
*/
private String fromFile;
private String toFile;
/**
* Progress of the reconciliation
*/
double progress;
/**
* Where the application is currently looking
*/
String progressText;
/**
* Constructor
*
* Create a new worker using the given from and to filenames.
*
* @param from CSV file to read from
* @param to CSV file to write to
*/
public CBWReconcileWorker (String from, String to) {
fromFile = from;
toFile = to;
progress = 0.0;
progressText = "";
}
/**
* Background worker
*
* SwingWorker calls this method when it spawns the new worker thread. This method
* then calls the actual reconcile method to perform the reconcilation.
*/
public Void doInBackground() {
try {
reconcile();
} catch (Exception e) {
// Silently ignoring errors
}
return null;
}
/**
* Set the progress text
*
* @param text String to use for the progress text
*/
private void setProgressText(String text) {
progressText = text;
}
/**
* Get progress text
*
* Returns the current progress status (what individual in the CSV file the system is currently looking at)
*
* @return The progress text
*/
public String getProgressText() {
return progressText;
}
/**
* Main Reconcile Method
*
* This method performs the heart of the client-side reconciliation process.
*
* @throws Exception
*/
private void reconcile() throws Exception {
// Use a pre-packaged reader to read the given CSV file
CSVReader reader = new CSVReader(new FileReader(fromFile));
List<String[]> toReconcile = reader.readAll();
// Number of lines in the CSV file (minus the header)
int reconcileCount = toReconcile.size() - 1;
// Use a pre-packaged writer to write out the CSV file
CSVWriter writer = new CSVWriter(new FileWriter(toFile));
// Write out header of the CSV
String[] headers = {
"CBW Name",
"CBW ID",
"Snac Name",
"Snac ARK",
"Overall Reconciliation Score",
"Elastic Full Name Score",
"Elastic Name-Only Score",
"Elastic75 Score",
"Original Length Score",
"Original Length Difference Score",
"Entity Type Filter Score",
"SNAC Degree Score"
};
writer.writeNext(headers);
// Step through the input data lines
for (int i = 1; i < toReconcile.size(); i++) {
// Calculate a "percent done" maxing out at 95%
int percentage = (i * 95) / reconcileCount;
// Pull the current line of the CSV as array
String[] data = toReconcile.get(i);
// grab the name components from the CSV file and create a snac-like name heading
String nameOnly = data[4].trim() + ", " + data[2].trim() + " " + data[3].trim();
nameOnly = nameOnly.trim();
if (nameOnly.endsWith(",")) {
nameOnly = nameOnly.substring(0, nameOnly.length()-1);
}
// Create the given name
String name = nameOnly;
// If the input line has a 12th column (dates), then use a regex to grab 4-digit years
// and add them to the name
if (data.length >= 13) {
String pattern = "[0-9][0-9][0-9][0-9]";
Pattern r = Pattern.compile(pattern);
Matcher m = r.matcher(data[12]);
String date = "";
while (m.find()) {
date += data[12].substring(m.start(), m.end()) + "-";
}
if (date.length() > 0) {
date = date.substring(0, date.length()-1);
name = nameOnly + ", " + date;
}
}
// Update the progress for this run of the reconciliation
setProgress(percentage);
setProgressText(name);
// Create the JSON query string for the SNAC RestAPI
String query = "{"+
"\"command\" : \"reconcile\"," +
"\"constellation\" : { " +
"\"dataType\" : \"Constellation\"," +
"\"entityType\" : {" +
"\"term\" : \"person\"" +
"}," +
"\"nameEntries\" : [" +
"{" +
"\"dataType\" : \"NameEntry\"," +
"\"original\" : \""+ name +"\"," +
"\"preferenceScore\" : 1" +
"}" +
"]" +
"}" +
"}";
// Perform connection to SNAC
HttpURLConnection httpcon = (HttpURLConnection) ((new URL("http://snac-web.iath.virginia.edu:81/").openConnection()));
httpcon.setDoOutput(true);
httpcon.setRequestProperty("Content-Type", "application/json");
httpcon.setRequestMethod("PUT");
httpcon.connect();
// Write the query to the RestAPI
byte[] outputBytes = query.getBytes("UTF-8");
OutputStream os = httpcon.getOutputStream();
os.write(outputBytes);
os.close();
// Read the response from the RestAPI
InputStream in = new BufferedInputStream(httpcon.getInputStream());
String resultStr = org.apache.commons.io.IOUtils.toString(in, "UTF-8");
JSONObject resultObj = new JSONObject(resultStr);
in.close();
// Close the connection
httpcon.disconnect();
// If reconciliation succeeded, then process the results
if (resultObj.has("reconciliation")) {
for (int j = 0; j < resultObj.getJSONArray("reconciliation").length(); j++) {
JSONObject result = (JSONObject) resultObj.getJSONArray("reconciliation").get(j);
// only grab the first 6 results
if (j > 5) break;
if (!result.has("vector"))
continue;
JSONObject vector = result.getJSONObject("vector");
// Create the result data to add to the CSV output file
String[] output = {
name,
toReconcile.get(i)[0],
((JSONObject) result.getJSONObject("identity").getJSONArray("nameEntries").get(0)).getString("original"),
result.getJSONObject("identity").getString("ark"),
String.format("%.2f", result.getDouble("strength")),
vector.has("ElasticOriginalNameEntry") ? JSONObject.doubleToString(vector.getDouble("ElasticOriginalNameEntry")) : "0",
vector.has("ElasticNameOnly") ? JSONObject.doubleToString(vector.getDouble("ElasticNameOnly")) : "0",
vector.has("ElasticSeventyFive") ? JSONObject.doubleToString(vector.getDouble("ElasticSeventyFive")) : "0",
vector.has("OriginalLength") ? JSONObject.doubleToString(vector.getDouble("OriginalLength")) : "0",
vector.has("MultiStage:ElasticNameOnly:OriginalLengthDifference") ? JSONObject.doubleToString(vector.getDouble("MultiStage:ElasticNameOnly:OriginalLengthDifference")) : "0",
vector.has("MultiStage:ElasticNameOnly:EntityTypeFilter") ? JSONObject.doubleToString(vector.getDouble("MultiStage:ElasticNameOnly:EntityTypeFilter")) : "0",
vector.has("MultiStage:ElasticNameOnly:SNACDegree") ? JSONObject.doubleToString(vector.getDouble("MultiStage:ElasticNameOnly:SNACDegree")) : "0"
};
// Write the line to the CSV file
writer.writeNext(output);
}
}
}
// Close the CSV Writer
writer.close();
// Close the CSV Reader
reader.close();
// Update the progress to 100%
progressText = "DONE!";
setProgress(100);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment