package mobi.eup.jpnews.util.news;

import android.content.Context;
import android.os.AsyncTask;
import androidx.media2.exoplayer.external.text.ttml.TtmlNode;
import com.google.gson.Gson;
import com.google.gson.JsonSyntaxException;
import com.google.gson.reflect.TypeToken;
import com.raizlabs.android.dbflow.sql.language.Operator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import mobi.eup.jpnews.R;
import mobi.eup.jpnews.listener.NewsContentJsonCallback;
import mobi.eup.jpnews.listener.StringCallback;
import mobi.eup.jpnews.model.news.NewsContentJson;
import mobi.eup.jpnews.model.userprofile.UserJLPT;
import mobi.eup.jpnews.util.language.StringHelper;
import mobi.eup.jpnews.util.language.WanaKanaJava;
import net.java.sen.SenFactory;
import net.java.sen.StringTagger;
import net.java.sen.dictionary.Token;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: classes5.dex */
public class AnalyzerTextHelper extends AsyncTask<String, Void, NewsContentJson> {
    private Context context;
    private NewsContentJsonCallback onPostExecute;
    private StringCallback stringCallback;

    public AnalyzerTextHelper(Context context, NewsContentJsonCallback newsContentJsonCallback, StringCallback stringCallback) {
        this.context = context;
        this.onPostExecute = newsContentJsonCallback;
        this.stringCallback = stringCallback;
    }

    private String convertResultDetail(NewsContentJson newsContentJson, String str) {
        String str2;
        String str3;
        String str4 = "<rt>";
        boolean contains = str.contains("<rt>");
        String str5 = contains ? str : "";
        List<Token> list = tokenizerJapanese(contains ? StringHelper.html2String(str) : str);
        try {
            str2 = StringHelper.getStringFromAsset(this.context, "lv_word.json");
        } catch (IOException | OutOfMemoryError unused) {
            str2 = "";
        }
        int i2 = 0;
        WanaKanaJava wanaKanaJava = new WanaKanaJava(false);
        int i3 = 0;
        for (Token token : list) {
            String surface = token.getSurface();
            String hiragana = !token.getMorpheme().getReadings().isEmpty() ? wanaKanaJava.toHiragana(token.getMorpheme().getReadings().get(i2)) : "";
            if (!contains && !wanaKanaJava.isKatakana(surface) && !wanaKanaJava.isHiragana(surface) && !hiragana.isEmpty() && !hiragana.equals(surface)) {
                surface = "<ruby>" + surface + str4 + hiragana + "</rt></ruby>";
            }
            Pattern pattern = null;
            try {
                pattern = Pattern.compile("\"id\":\\d+?,\"level\":\\d,\"phonetic\":\".{1," + (token.getSurface().length() * 3) + "}?\",\"word\":\"" + token.getSurface() + "\"");
            } catch (RuntimeException unused2) {
            }
            if (pattern == null) {
                return "";
            }
            Matcher matcher = pattern.matcher(str2);
            String group = matcher.find() ? matcher.group() : "";
            if (group.isEmpty()) {
                str3 = str4;
                if (!contains) {
                    surface = "<span class=\"unknown\">" + surface + "</span>";
                }
                if (newsContentJson.getResult().getLevelWords().getUnknown() == null) {
                    newsContentJson.getResult().getLevelWords().setUnknown(new ArrayList<>());
                }
                if (!newsContentJson.getResult().getLevelWords().getUnknown().contains(token.getSurface())) {
                    newsContentJson.getResult().getLevelWords().getUnknown().add(token.getSurface());
                }
            } else {
                try {
                    Gson gson = new Gson();
                    StringBuilder sb = new StringBuilder();
                    str3 = str4;
                    try {
                        sb.append("{");
                        sb.append(group);
                        sb.append("}");
                        UserJLPT userJLPT = (UserJLPT) gson.fromJson(sb.toString(), new TypeToken<UserJLPT>() { // from class: mobi.eup.jpnews.util.news.AnalyzerTextHelper.1
                        }.getType());
                        if (!contains) {
                            surface = "<span class=\"jlpt-n" + userJLPT.getLevel() + "\">" + surface + "</span>";
                        }
                        int level = userJLPT.getLevel();
                        if (level == 1) {
                            if (newsContentJson.getResult().getLevelWords().get1() == null) {
                                newsContentJson.getResult().getLevelWords().set1(new ArrayList<>());
                            }
                            if (!newsContentJson.getResult().getLevelWords().get1().contains(token.getSurface())) {
                                newsContentJson.getResult().getLevelWords().get1().add(token.getSurface());
                            }
                        } else if (level == 2) {
                            if (newsContentJson.getResult().getLevelWords().get2() == null) {
                                newsContentJson.getResult().getLevelWords().set2(new ArrayList<>());
                            }
                            if (!newsContentJson.getResult().getLevelWords().get2().contains(token.getSurface())) {
                                newsContentJson.getResult().getLevelWords().get2().add(token.getSurface());
                            }
                        } else if (level == 3) {
                            if (newsContentJson.getResult().getLevelWords().get3() == null) {
                                newsContentJson.getResult().getLevelWords().set3(new ArrayList<>());
                            }
                            if (!newsContentJson.getResult().getLevelWords().get3().contains(token.getSurface())) {
                                newsContentJson.getResult().getLevelWords().get3().add(token.getSurface());
                            }
                        } else if (level != 4) {
                            if (newsContentJson.getResult().getLevelWords().get5() == null) {
                                newsContentJson.getResult().getLevelWords().set5(new ArrayList<>());
                            }
                            if (!newsContentJson.getResult().getLevelWords().get5().contains(token.getSurface())) {
                                newsContentJson.getResult().getLevelWords().get5().add(token.getSurface());
                            }
                        } else {
                            if (newsContentJson.getResult().getLevelWords().get4() == null) {
                                newsContentJson.getResult().getLevelWords().set4(new ArrayList<>());
                            }
                            if (!newsContentJson.getResult().getLevelWords().get4().contains(token.getSurface())) {
                                newsContentJson.getResult().getLevelWords().get4().add(token.getSurface());
                            }
                        }
                    } catch (JsonSyntaxException unused3) {
                    }
                } catch (JsonSyntaxException unused4) {
                    str3 = str4;
                }
            }
            group = surface;
            if (contains) {
                i2 = 0;
            } else {
                if (i3 != 0 || token.getStart() <= 1) {
                    i2 = 0;
                    if (i3 > 0 && i3 < token.getStart()) {
                        str5 = str5 + str.substring(i3, token.getStart());
                    }
                } else {
                    StringBuilder sb2 = new StringBuilder();
                    sb2.append(str5);
                    i2 = 0;
                    sb2.append(str.substring(0, token.getStart()));
                    str5 = sb2.toString();
                }
                i3 = token.end();
                str5 = str5 + group;
            }
            str4 = str3;
        }
        if (i3 <= 0 || i3 >= str.length()) {
            return str5;
        }
        return str5 + str.substring(i3);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r8v22, types: [mobi.eup.jpnews.model.news.NewsContentJson] */
    /* JADX WARN: Type inference failed for: r8v24, types: [mobi.eup.jpnews.model.news.NewsContentJson] */
    private NewsContentJson crapJSoup(Document document, String str) {
        String text;
        Elements select;
        Elements select2;
        Elements select3;
        String str2 = null;
        if (str.contains("nhk.or.jp/news/easy")) {
            String html2String = StringHelper.html2String(document.getElementsByClass("article-main__title").html());
            String text2 = document.getElementsByClass("article-main__date").text();
            Elements select4 = document.select(".article-main__figure");
            if (select4 != null && (select3 = select4.select("img")) != null) {
                str2 = select3.attr("src");
            }
            return new NewsContentJson(html2String, text2, StringHelper.html2String(document.select(".article-main__body").html()), ((str2 == null || str2.isEmpty()) && (select2 = document.select(".content--thumb").select("img")) != null) ? select2.attr("src").replace("/news/html", "..") : str2, null, "easy", str);
        }
        if (str.contains("nhk.or.jp/news/html")) {
            String text3 = document.select("h1.content--title").text();
            String text4 = document.select("time").text();
            String attr = document.select("img.lazy").attr("src");
            return new NewsContentJson(text3, text4, document.select("div.content--detail-body").text(), ((attr == null || attr.isEmpty()) && (select = document.select(".content--thumb").select("img")) != null) ? select.attr("src").replace("/news/html", "..") : attr, null, "normal", str);
        }
        if (str.contains("matcha-jp.com/easy")) {
            Document parse = Jsoup.parse(document.html().replace("<div class=\"recommended_articles\">(.|\n)+?</ul>(\t|\n)*?</div>", "").replace("<div.+?class=\"medium-insert-images medium-insert-images-wide\">(.|\n)+?</figure>(\t|\n)*?</div>", "").replace("<div.*?class=\"medium-insert-images medium-insert-images-wide medium-insert-active\">(.|\n)+?</figure>(\t|\n)*?</div>", ""));
            String html2String2 = StringHelper.html2String(parse.select("h1.title").html());
            String text5 = parse.select("span.date").first().text();
            String attr2 = parse.select("main > div > img").attr("src");
            parse.select("[class*=recommended_articles]").remove();
            return new NewsContentJson(html2String2, text5, StringHelper.html2String(parse.select("main > div").get(6).html().replace("<rp>(</rp>", "").replace("<rp>)</rp>", "")), attr2, null, "easy", str);
        }
        if (str.contains("matcha-jp.com/jp")) {
            Document parse2 = Jsoup.parse(document.html().replace("<div class=\"recommended_articles\">(.|\n)+?</ul>(\t|\n)*?</div>", "").replace("<div.*?class=\"medium-insert-images medium-insert-images-wide\">(.|\n)+?</figure>(\t|\n)*?</div>", "").replace("<div.*?class=\"medium-insert-images medium-insert-images-wide medium-insert-active\">(.|\n)+?</figure>(\t|\n)*?</div>", "").replace("<section(.|\n)+?class=\"place_wrapper place_sc_wrapper permanently_closed_false has_thumbnail embed_mode_true\">(.|\n)+?</section>", ""));
            String text6 = parse2.select("header > h1").text();
            if (text6 == null || text6.isEmpty()) {
                text6 = parse2.select("h1.title").text();
            }
            String text7 = parse2.select("time").first().text();
            if (text7 == null || text7.isEmpty()) {
                text7 = parse2.select("span.date").first().text();
            }
            String attr3 = parse2.select("main > div > article > header > picture > img").attr("src");
            if (attr3 == null || attr3.isEmpty()) {
                String attr4 = parse2.select("main > div > img").attr("src");
                String text8 = parse2.select("main > div > article > section").first().text();
                str2 = new NewsContentJson(text6, text7, (text8 == null || text8.isEmpty()) ? parse2.select("main > div").get(6).text() : text8, attr4, null, "easy", str);
            }
            return str2;
        }
        if (str.contains("watanoc.com")) {
            Document parse3 = Jsoup.parse(document.html().replace("<div style=\"text-align: center;\">(.|\n)+?</span>(\t|\n)*?</div>", ""));
            return new NewsContentJson(parse3.select("article > header > h1 > span").first().text(), parse3.select("time.updated").first().text(), parse3.select("div[class$=entry-content]").text().replace("http://watanoc.com/wp-content/uploads/.+?\\.mp3", ""), parse3.select("[alt=watanoc]").first().attr("src"), null, "easy", str);
        }
        if (str.contains("buzzfeed.com")) {
            Document parse4 = Jsoup.parse(document.html().replace("<span class=\"subbuzz__attribution(.|\n)+?</span>(\t|\n)*?</span>", ""));
            return new NewsContentJson(parse4.select("h1.title__2wEoS_Bqpp").text(), parse4.select("span.timestamp__12oPwMqFR8").first().text(), parse4.select("div[data-module=subbuzz-text]").text(), parse4.select("img[class^=js-subbuzz__media]").first().attr("src"), null, "easy", str);
        }
        if (str.contains("cnn.co.jp")) {
            return new NewsContentJson(document.select("h1.ttl-headline-top").text(), document.select("div.metadata-updatetime").text(), document.select("div#leaf-body").text(), "https://www.cnn.co.jp" + document.select("div.story-media-main").select("a").select("img").attr("src"), null, "easy", str);
        }
        if (str.contains("engadget.com")) {
            Document parse5 = Jsoup.parse(document.html().replace("<figcaption>(.|\n)+?</figcaption>", "").replace("<div class=\"photo-credit\">.+?</div>", ""));
            return new NewsContentJson(parse5.select("h1[class^=Ff($ff-primary)]").text(), parse5.select("div[class$=C(engadgetFontLightGray)]").text(), parse5.select("div[data-testid=PostContentContainer]").text(), parse5.select("div[data-testid=PostContentContainer]").select("img").attr("src"), null, "easy", str);
        }
        if (str.contains("huffingtonpost.jp")) {
            String text9 = document.select("h1.headline__title").text();
            String text10 = document.select("span[class^=timestamp__date]").text();
            String attr5 = document.select("img.image__src").attr("src");
            String text11 = document.select("h2.headline__subtitle").text();
            String str3 = text11.isEmpty() ? "" : text11;
            if (str3.isEmpty()) {
                text = document.select("div[class^=content-list-component]").text();
            } else {
                text = str3 + document.select("div[class^=content-list-component]").text();
            }
            return new NewsContentJson(text9, text10, text, attr5, null, "easy", str);
        }
        if (str.contains("nippon.com")) {
            String text12 = document.select("h1.c-h1").text();
            String text13 = document.select("time.c-date").text();
            String attr6 = document.select("img.c-detailmv").attr("src");
            if (attr6.isEmpty()) {
                String attr7 = document.select("div.editArea").select("img").attr("src");
                if (!attr7.isEmpty()) {
                    attr6 = "https://www.nippon.com" + attr7;
                }
            } else {
                attr6 = "https://www.nippon.com" + attr6;
            }
            return new NewsContentJson(text12, text13, document.select("div.editArea").text(), attr6, null, "easy", str);
        }
        if (str.contains("rocketnews24.com")) {
            String text14 = document.select("h2.entry-title").text();
            String text15 = document.select("li.date").text();
            String attr8 = document.select("div.entry-content").select("img").attr("src");
            document.select("div.extra-content").remove();
            return new NewsContentJson(text14, text15, document.select("div.entry-content").text(), attr8, null, "easy", str);
        }
        if (str.contains("tbs.co.jp")) {
            String text16 = document.select("div.ls-title").first().text();
            String text17 = document.select("div.ls__data").first().text();
            String attr9 = document.select("div.ls-print").select("img").attr("src");
            if (!attr9.isEmpty()) {
                attr9 = "http://news.tbs.co.jp" + attr9;
            }
            return new NewsContentJson(text16, text17, document.select("div.ls-body").text(), attr9, null, "easy", str);
        }
        if (str.contains("jp.techcrunch.com")) {
            return new NewsContentJson(document.select("h1[class$=tweet-title]").text(), document.select("time.timestamp").text(), document.select("div[class^=article-entry]").select(TtmlNode.TAG_P).text(), document.select("div[class^=article-entry]").select("img").attr("src"), null, "easy", str);
        }
        if (str.contains("wired.jp")) {
            String text18 = document.select("span.th__inner").text();
            String text19 = document.select("time[class^=publish-date]").first().text();
            document.select("div[class^=article-related]").remove();
            return new NewsContentJson(text18, text19, document.select("div.ab__inner").select(TtmlNode.TAG_P).text(), document.select("img.tp__img").attr("src"), null, "easy", str);
        }
        if (str.contains("news.yahoo.co.jp")) {
            return str.contains("pickup") ? new NewsContentJson(document.select("p.pickupMain_articleTitle").text(), document.select("p.pickupMain_image_date").text(), document.select("p.pickupMain_articleSummary").text(), document.select("div[class^=pickupMain_image]").select("img").attr("src"), null, "easy", str) : new NewsContentJson(document.select("h1[class^=sc-cmTdod]").text(), document.select("p[class^=sc-bYSBpT]").text(), document.select("p[class^=sc-dVhcbM]").text(), document.select("img[class^=sc-jDwBTQ]").attr("src"), null, "easy", str);
        }
        if (str.contains("easyjapanese.net")) {
            return new NewsContentJson(StringHelper.html2String(document.select("div#h3-bold-detail").html()), document.select("div.public-date").text(), StringHelper.html2String(document.select("div.content").select(TtmlNode.TAG_P).html()), document.select("div.content-media").select("img").attr("src"), null, "easy", str);
        }
        if (str.contains("asahi.com")) {
            String text20 = document.select("div.Title").first().select("h1").text();
            String text21 = document.select("span.UpdateDate").text();
            String attr10 = document.select("div.Image").select("img").attr("src");
            if (!attr10.isEmpty()) {
                attr10 = "http:" + attr10;
            }
            return new NewsContentJson(text20, text21, document.select("div.ArticleText").select(TtmlNode.TAG_P).text(), attr10, null, "easy", str);
        }
        if (str.contains("businessinsider.jp")) {
            return new NewsContentJson(document.select("h1.p-post-title").first().text(), document.select("li.p-post-bylineDate").text(), document.select("div[class^=p-post-content]").select(TtmlNode.TAG_P).text(), document.select("p.p-post-contentFigureImage").select("img").attr("data-src"), null, "easy", str);
        }
        if (str.contains("chunichi.co.jp")) {
            Element first = document.select("h1.hdg").first();
            String text22 = first != null ? first.text() : "";
            String text23 = document.select("div.info").select("span.data").text();
            String attr11 = document.select("div.thumb").select("img").attr("src");
            if (!attr11.isEmpty()) {
                attr11 = "http:" + attr11;
            }
            return new NewsContentJson(text22, text23, document.select("div.l-container").select("div.block").text(), attr11, null, "easy", str);
        }
        if (str.contains("forbesjapan.com")) {
            return new NewsContentJson(document.select("h1.article-headline").text(), "", document.select("div.kizi-honbun").text(), document.select("div.in-box").select("img").attr("data-original"), null, "easy", str);
        }
        if (str.contains("news.goo.ne.jp")) {
            String text24 = document.select("h1[class^=article-title]").text();
            if (text24 == null || text24.isEmpty()) {
                text24 = document.select("h2[class^=topics-title]").text();
            }
            String text25 = document.select("p[class^=article-date]").text();
            String attr12 = document.select("a#picdetail_image").select("img").attr("src");
            if (attr12 == null || attr12.isEmpty()) {
                attr12 = document.select("div.article-thumbs").select("img").attr("src");
            }
            String str4 = attr12;
            String text26 = document.select("div[class^=article-text]").text();
            if (text26 == null || text26.isEmpty()) {
                text26 = document.select("p.topics-text").text();
            }
            return new NewsContentJson(text24, text25, text26, str4, null, "easy", str);
        }
        if (!str.contains("news.livedoor.com")) {
            if (str.contains("news.microsoft.com")) {
                return new NewsContentJson(document.select("h1[class^=entry-title]").text(), document.select("time[itemprop=datePublished]").first().text(), document.select("div[class^=entry-content]").select(TtmlNode.TAG_P).text(), document.select("img").first().attr("src"), null, "easy", str);
            }
            String text27 = document.body().text();
            return new NewsContentJson("", "", text27 == null ? "" : text27, null, null, "easy", str);
        }
        String text28 = document.select("h1.articleTtl").text();
        if (text28 == null || text28.isEmpty()) {
            text28 = document.select("h1.topicsTtl").text();
        }
        String text29 = document.select("time.articleDate").text();
        if (text29 == null || text29.isEmpty()) {
            text29 = document.select("time.topicsTime").text();
        }
        String attr13 = document.select("p.topicsImg").select("img").attr("src");
        if (attr13 == null || attr13.isEmpty()) {
            attr13 = document.select("figure.captionWithImage").select("img").attr("src");
        }
        String str5 = attr13;
        String text30 = document.select("p.articleBody").text();
        if (text30 == null || text30.isEmpty()) {
            text30 = document.select("div.articleBody").text();
        }
        return new NewsContentJson(text28, text29, text30, str5, null, "easy", str);
    }

    private List<Token> tokenizerJapanese(String str) {
        ArrayList arrayList = new ArrayList();
        try {
            StringTagger stringTagger = SenFactory.getStringTagger(null);
            ArrayList arrayList2 = new ArrayList();
            stringTagger.analyze(str, arrayList2);
            WanaKanaJava wanaKanaJava = new WanaKanaJava(false);
            for (Token token : arrayList2) {
                String surface = token.getSurface();
                if (surface != null && !surface.isEmpty() && !surface.equals("、") && !surface.equals("」") && !surface.equals("「") && !surface.equals("。") && !surface.equals("(") && !surface.equals("\u3000") && !surface.equals(")") && !surface.equals("＆") && !surface.equals(",") && !surface.equals(".") && !surface.equals(Operator.Operation.MINUS) && !surface.equals("！") && !surface.equals("（") && !surface.equals("）") && !surface.equals("【") && !surface.equals("】") && !surface.equals(Operator.Operation.MULTIPLY) && (!wanaKanaJava.isHiragana(surface) || surface.length() != 1)) {
                    arrayList.add(token);
                }
            }
        } catch (IOException | OutOfMemoryError e) {
            e.printStackTrace();
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // android.os.AsyncTask
    public NewsContentJson doInBackground(String... strArr) {
        NewsContentJson newsContentJson;
        String str = strArr[0];
        if (str.contains("http")) {
            try {
                newsContentJson = crapJSoup(Jsoup.connect(str).get(), str);
            } catch (IOException | RuntimeException e) {
                e.printStackTrace();
                newsContentJson = null;
            }
            StringCallback stringCallback = this.stringCallback;
            if (stringCallback != null) {
                stringCallback.execute(this.context.getString(R.string.prepareing) + "...");
            }
        } else {
            newsContentJson = new NewsContentJson("", "", str, null, null, "easy", "");
        }
        StringCallback stringCallback2 = this.stringCallback;
        if (stringCallback2 != null) {
            stringCallback2.execute(this.context.getString(R.string.analyzing) + " 1/3...");
        }
        if (newsContentJson == null || newsContentJson.getResult() == null) {
            return null;
        }
        newsContentJson.getResult().setLevelWords(new NewsContentJson.LevelWords());
        if (newsContentJson.getResult().getTitle() != null) {
            newsContentJson.getResult().setTitle(convertResultDetail(newsContentJson, newsContentJson.getResult().getTitle()));
        }
        StringCallback stringCallback3 = this.stringCallback;
        if (stringCallback3 != null) {
            stringCallback3.execute(this.context.getString(R.string.analyzing) + " 2/3...");
        }
        if (newsContentJson.getResult().getContent() != null) {
            newsContentJson.getResult().getContent().setTextbody(convertResultDetail(newsContentJson, newsContentJson.getResult().getContent().getTextbody()));
            if (newsContentJson.getResult().getContent().getTextmore() != null) {
                StringCallback stringCallback4 = this.stringCallback;
                if (stringCallback4 != null) {
                    stringCallback4.execute(this.context.getString(R.string.analyzing) + " 3/3...");
                }
                newsContentJson.getResult().getContent().setTextmore(convertResultDetail(newsContentJson, newsContentJson.getResult().getContent().getTextmore()));
            }
        }
        return newsContentJson;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // android.os.AsyncTask
    public void onPostExecute(NewsContentJson newsContentJson) {
        super.onPostExecute((AnalyzerTextHelper) newsContentJson);
        NewsContentJsonCallback newsContentJsonCallback = this.onPostExecute;
        if (newsContentJsonCallback != null) {
            newsContentJsonCallback.execute(newsContentJson);
        }
        this.context = null;
    }
}
