There are two options to use seq2seq diacritization. You can view demo or use web API.
View Demo Use web API
import json
import requests
url = 'https://farasa.qcri.org/webapi/seq2seq_diacritize/'
text = 'يُشار إلى أن اللغة العربية'
api_key = "#####################"
dialect = "mor"
payload = {'text': text, 'api_key': api_key, "dialect": dialect}
data = requests.post(url, data=payload)
result = json.loads(data.text)
print(result)
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
public class FarasaDiacritizer {
private static HttpURLConnection con;
public static void main(String[] args) throws IOException {
var url = "https://farasa.qcri.org/webapi/seq2seq_diacritize/";
var text = "يُشار إلى أن اللغة العربية يتحدثها أكثر من 422 مليون";
var api_key = "#####################";
var dialect = "mor"
var urlParameters = "text=" + text + "&api_key=" + api_key+ "&dialect=" + dialect;
byte[] postData = urlParameters.getBytes(StandardCharsets.UTF_8);
try {
var myurl = new URL(url);
con = (HttpURLConnection) myurl.openConnection();
con.setDoOutput(true);
con.setRequestMethod("POST");
con.setRequestProperty("User-Agent", "Java client");
con.setRequestProperty("Content-Type", "application/json");
try (var wr = new DataOutputStream(con.getOutputStream())) {
wr.write(postData);
}
try (var br = new BufferedReader(
new InputStreamReader(con.getInputStream()))) {
String line = br.readLine();
System.out.println(line);
}
} finally {
con.disconnect();
}
}
}
var text ='يُشار إلى أن اللغة العربية';
var api_key = "#####################";
var dialect = "mor";
var settings = { "async": true,
"crossDomain": true,
"url": "https://farasa.qcri.org/webapi/seq2seq_diacritize/",
"method": "POST",
"headers": { "content-type": "application/json", "cache-control": "no-cache", },
"processData": false, "data": "{\"text\":"+"\""+text+"\", \"api_key\":"+"\""+api_key+"\", \"dialect\":"+"\""+dialect+"\"}";
$.ajax(settings).done(function (response) { console.log(response); });
curl --header "Content-Type: application/json" -d
"{\"text\":\"'يُشار إلى أن اللغة العربية'\, \"api_key\":\"'###################'\", \"dialect\":\"'mor'\"}" https://farasa.qcri.org/webapi/seq2seq_diacritize/
java -jar dist/farasaDiacritizeJar.jar -i <inputfile> -o <output_file>
package tryingfarasa;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import com.qcri.farasa.segmenter.Farasa;
import com.qcri.farasa.pos.FarasaPOSTagger;
import com.qcri.farasa.diacritize.DiacritizeText;
public class TryingFarasaPOS {
public static void main(String[] args) throws IOException, FileNotFoundException, ClassNotFoundException,
UnsupportedEncodingException, InterruptedException, Exception {
Farasa farasa = new Farasa();
FarasaPOSTagger farasaPOS = new FarasaPOSTagger(farasa);
String dataDirectory = "/var/www/farasa/data/";
DiacritizeText dt = new DiacritizeText(dataDirectory, "all-text.txt.nocase.blm", farasa, tagger);
String diacritized = dt.diacritize("النص المراد معالجته");
}
}