There are two options to download Farasa Lemmatization; either downloading just the jar file, or downloading the entire sourcecode zipped. You can also view demo or use web API.
Please login/register to download.
View Demo Use web API
import json
import requests
url = 'https://farasa.qcri.org/webapi/lemmatization/'
text = 'يُشار إلى أن اللغة العربية'
api_key = "#####################"
payload = {'text': text, 'api_key': api_key}
data = requests.post(url, data=payload)
result = json.loads(data.text)
print(result)
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
public class FarasaLemmatization {
private static HttpURLConnection con;
public static void main(String[] args) throws IOException {
var url = "https://farasa.qcri.org/webapi/lemmatization/";
var text = "يُشار إلى أن اللغة العربية يتحدثها أكثر من 422 مليون";
var api_key = "#####################";
var urlParameters = "text=" + text + "&api_key=" + api_key;
byte[] postData = urlParameters.getBytes(StandardCharsets.UTF_8);
try {
var myurl = new URL(url);
con = (HttpURLConnection) myurl.openConnection();
con.setDoOutput(true);
con.setRequestMethod("POST");
con.setRequestProperty("User-Agent", "Java client");
con.setRequestProperty("Content-Type", "application/json");
try (var wr = new DataOutputStream(con.getOutputStream())) {
wr.write(postData);
}
try (var br = new BufferedReader(
new InputStreamReader(con.getInputStream()))) {
String line = br.readLine();
System.out.println(line);
}
} finally {
con.disconnect();
}
}
}
var text ='يُشار إلى أن اللغة العربية';
var api_key = "#####################";
var settings = { "async": true,
"crossDomain": true,
"url": "https://farasa.qcri.org/webapi/lemmatization/",
"method": "POST",
"headers": { "content-type": "application/json", "cache-control": "no-cache", },
"processData": false, "data": "{\"text\":"+"\""+text+"\", \"api_key\":"+"\""+api_key+"\"}";
$.ajax(settings).done(function (response) { console.log(response); });
curl --header "Content-Type: application/json" -d "{\"text\":\"'يُشار إلى أن اللغة العربية'\", \"api_key\":\"'###################'\"}" https://farasa.qcri.org/webapi/lemmatization/
ant java
ant jar
java -jar dist/farasaSeg.jar
Or, just pass a text file (where the encoding is utf-8) as input to the package and specify the output file name as following:
java -jar dist/farasaSeg.jar -i <inputfile> -o <output_file>
package tryingfarasa;
import com.qcri.farasa.segmenter.Farasa;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
public class TryingSeg {
...
public static void main(String[] args) throws IOException, FileNotFoundException, ClassNotFoundException {
...
Farasa farasa = new Farasa();
ArrayList output = farasa.segmentLine("النص المراد معالجته");
for(String s: output)
System.out.println(s);
...
}
...
}