There are two options to download Farasa Diacritization; either downloading just the jar file, or downloading the entire sourcecode zipped. You can also view demo or use web API.
Please login/register to download.
View Demo Use web API
import json
import requests
url = 'https://farasa.qcri.org/webapi/diacritize/'
text = 'يُشار إلى أن اللغة العربية'
api_key = "#####################"
payload = {'text': text, 'api_key': api_key}
data = requests.post(url, data=payload)
result = json.loads(data.text)
print(result)
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
public class FarasaDiacritizer {
private static HttpURLConnection con;
public static void main(String[] args) throws IOException {
var url = "https://farasa.qcri.org/webapi/diacritize/";
var text = "يُشار إلى أن اللغة العربية يتحدثها أكثر من 422 مليون";
var api_key = "#####################";
var urlParameters = "text=" + text + "&api_key=" + api_key;
byte[] postData = urlParameters.getBytes(StandardCharsets.UTF_8);
try {
var myurl = new URL(url);
con = (HttpURLConnection) myurl.openConnection();
con.setDoOutput(true);
con.setRequestMethod("POST");
con.setRequestProperty("User-Agent", "Java client");
con.setRequestProperty("Content-Type", "application/json");
try (var wr = new DataOutputStream(con.getOutputStream())) {
wr.write(postData);
}
try (var br = new BufferedReader(
new InputStreamReader(con.getInputStream()))) {
String line = br.readLine();
System.out.println(line);
}
} finally {
con.disconnect();
}
}
}
var text ='يُشار إلى أن اللغة العربية';
var api_key = "API-KEY";
var settings = {
"async": true,
"crossDomain": true,
"url": "https://farasa.qcri.org/webapi/diacritize/",
"method": "POST",
"contentType": 'application/x-www-form-urlencoded; charset=UTF-8',
"processData": false,
"data": "text="+text+"&api_key="+api_key
}
$.ajax(settings).done(function (response) {
$("#result").text(JSON.parse(response).text);
});
curl --header "Content-Type: application/json" -d "{\"text\":\"'يُشار إلى أن اللغة العربية'\, \"api_key\":\"'###################'\"}" https://farasa.qcri.org/webapi/diacritize/
java -jar dist/farasaDiacritizeJar.jar -i <inputfile> -o <output_file>
package tryingfarasa;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import com.qcri.farasa.segmenter.Farasa;
import com.qcri.farasa.pos.FarasaPOSTagger;
import com.qcri.farasa.diacritize.DiacritizeText;
public class TryingFarasaPOS {
public static void main(String[] args) throws IOException, FileNotFoundException, ClassNotFoundException,
UnsupportedEncodingException, InterruptedException, Exception {
Farasa farasa = new Farasa();
FarasaPOSTagger farasaPOS = new FarasaPOSTagger(farasa);
String dataDirectory = "/var/www/farasa/data/";
DiacritizeText dt = new DiacritizeText(dataDirectory, "all-text.txt.nocase.blm", farasa, tagger);
String diacritized = dt.diacritize("النص المراد معالجته");
}
}