gabarito tp02

2022-04-08 08:22:32 -03:00 · 2022-04-08 08:22:32 -03:00 · 7d3067139b
parent 7e10a58dd0
commit 7d3067139b
2 changed files with 645 additions and 0 deletions
--- a/tps/gabarito/tp02/Film.java
+++ b/tps/gabarito/tp02/Film.java
@ -0,0 +1,311 @@
+import java.io.*;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+
+/**
+ * @author Thiago de Campos Ribeiro Nolasco
+ */
+public class Film {
+    // Attributes
+    private String name;
+    private String ogTitle;
+    private Date releaseDate;
+    private Integer duration;
+    private String genre;
+    private String ogLanguage;
+    private String situation;
+    private Float budget;
+    private String[] arrKeyWds;
+
+    SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy");
+
+    // Constructors
+    public Film() {
+        this(null, null, null, null, null, null, null, null);
+    }
+
+    /**
+     * @param name
+     * @param ogTitle
+     * @param releaseDate
+     * @param duration
+     * @param genre
+     * @param ogLanguage
+     * @param situation
+     * @param budget
+     */
+    public Film(String name, String ogTitle, Date releaseDate, Integer duration, String genre, String ogLanguage, String situation, Float budget) {
+        this.name = name;
+        this.ogTitle = ogTitle;
+        this.releaseDate = releaseDate;
+        this.duration = duration;
+        this.genre = genre;
+        this.ogLanguage = ogLanguage;
+        this.situation = situation;
+        this.budget = budget;
+        this.arrKeyWds = null;
+    }
+
+    // Getters and Setters
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getOgTitle() {
+        return ogTitle;
+    }
+
+    public void setOgTitle(String ogTitle) {
+        this.ogTitle = ogTitle;
+    }
+
+    public Date getReleaseDate() {
+        return releaseDate;
+    }
+
+    public void setReleaseDate(Date releaseDate) {
+        this.releaseDate = releaseDate;
+    }
+
+    public Integer getDuration() {
+        return duration;
+    }
+
+    public void setDuration(Integer duration) {
+        this.duration = duration;
+    }
+
+    public String getGenre() {
+        return genre;
+    }
+
+    public void setGenre(String genre) {
+        this.genre = genre;
+    }
+
+    public String getOgLanguage() {
+        return ogLanguage;
+    }
+
+    public void setOgLanguage(String ogLanguage) {
+        this.ogLanguage = ogLanguage;
+    }
+
+    public String getSituation() {
+        return situation;
+    }
+
+    public void setSituation(String situation) {
+        this.situation = situation;
+    }
+
+    public Float getBudget() {
+        return budget;
+    }
+
+    public void setBudget(Float budget) {
+        this.budget = budget;
+    }
+
+    public String[] getArrKeyWds() {
+        return arrKeyWds;
+    }
+
+    public void setArrKeyWds(String[] arrKeyWds) {
+        this.arrKeyWds = arrKeyWds;
+    }
+
+    public Film clone(){
+        Film cloned = new Film();
+
+        cloned.name = this.name;
+        cloned.ogTitle = this.ogTitle;
+        cloned.releaseDate = this.releaseDate;
+        cloned.duration = this.duration;
+        cloned.genre = this.genre;
+        cloned.ogLanguage = this.ogLanguage;
+        cloned.situation = this.situation;
+        cloned.budget = this.budget;
+        cloned.arrKeyWds = this.arrKeyWds;
+
+        return cloned;
+    }
+
+
+    /**
+     * @param fileName
+     */
+    public void ler(String fileName){
+        // Getting the right path for each read file
+        String path = "./filmes/" + fileName;
+
+        // Method that will split chunks of the read HTML and will assign the value to each Film's attribute
+        splittingString(path);
+    }
+
+    private void splittingString(String path){
+        // Data declaration
+        String line = "";
+
+        try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path),"UTF-8"))) {
+
+            // Film name
+            while(!reader.readLine().contains("title ott"));
+            while(!reader.readLine().contains("h2"));
+            this.name = removeTags(reader.readLine().trim());
+
+            // Film release date
+            while(!reader.readLine().contains("\"release\""));
+            this.releaseDate = sdf.parse(removeTags(reader.readLine().trim()));
+
+            // Film genre
+            while(!reader.readLine().contains("genres"));
+                // In this case, will use "line" because the last readLine will have the content that we want
+            while(!(line = reader.readLine()).contains("<a href"));
+            this.genre = removeTags(line).trim();
+
+            // Film duration
+            while(!reader.readLine().contains("runtime"));
+            reader.readLine(); // Needed because an empty line was found
+            this.duration = hoursToMinutes(reader.readLine().trim());
+
+            // Film original title (if there is) & situation
+            this.ogTitle = this.name;
+            while( !(line = reader.readLine()).contains("Situação</bdi>") ) {
+                if(line.contains("Título original")){
+                    this.ogTitle = removeTags(line.replace("Título original", " ")).trim();
+                }
+            }
+            this.situation = removeTags(line.replace("Situação", " ")).trim();
+
+            // Film original language
+            while( !(line = reader.readLine()).contains("Idioma original</bdi>") );
+            this.ogLanguage = removeTags(line.replace("Idioma original", " ")).trim();
+
+            // Film budget
+            while( !(line = reader.readLine()).contains("Orçamento</bdi>") );
+            String aux = removeTags(line.replace("Orçamento", " ")).trim();
+            this.budget = (aux.equals("-")) ? 0.0F : convertBudget(aux);
+
+            // Film key-words
+            line = "";
+            while( !reader.readLine().contains("Palavras-chave</bdi>") );
+            while( !(line += reader.readLine().trim() + " ").contains("</ul>") );
+            if(!line.contains("Nenhuma palavra-chave foi adicionada")){
+                arrKeyWds = removeTags(line).trim().split("  ");
+            }
+
+
+        } catch (FileNotFoundException e){
+            System.out.println("File not found");
+        } catch (IOException e){
+            System.out.println("File cannot be read");
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    /**
+     * Receives a line that contains an HTML content and removes its tags
+     * @param line
+     * @return
+     */
+    private String removeTags(String line){
+        // Data declaration
+        String resp = "";
+        int i = 0;
+
+        /*
+           The main idea here is to check if the char is equals to '<', if it's, it means that an HTML tag has opened
+           So, CAN'T read anything until the tag is closed, '>' is found.
+
+           It's also checking if any HTML special character (&....;) or if any "()" is found
+           IF found, don't read anything until it has ended.
+         */
+        while (i < line.length()) {
+            if (line.charAt(i) == '<') {
+                i++;
+                while (line.charAt(i) != '>') i++;
+            }else {
+                resp += line.charAt(i);
+            }
+            i++;
+        }
+        // Returning cleaned line
+        return resp.replace("&nbsp;", "");
+    }
+
+    /**
+     * Receives a String that contains hours, and convert it to minutes (Integer)
+     * @param value
+     * @return
+     */
+    private int hoursToMinutes(String value){
+        // Data declaration
+        value = removeLetters(value);
+        int result = 0, minutes = 0;
+
+        String[] splitValue = value.split(" ");
+        if(splitValue.length > 1) {
+            int hour = Integer.parseInt(splitValue[0]);
+            minutes = Integer.parseInt(splitValue[1]);
+            result = (60 * hour) + minutes;
+        } else {
+            minutes = Integer.parseInt(splitValue[0]);
+            result = minutes;
+        }
+
+        return result;
+    }
+
+    /**
+     * Receives a String that contains hours, and leave only the numbers (ex: 1h 49m = 1 49)
+     * @param value
+     * @return
+     */
+    private String removeLetters(String value){
+        // Data declaration
+        String result = "";
+
+        for(int i = 0; i < value.length(); i++){
+            // If char is a number, a blank space, or a '.' (Used on convertBudget), will be stored into "result"
+            if( (value.charAt(i) >= 48 && value.charAt(i) <= 57) || value.charAt(i) == ' ' || value.charAt(i) == '.')
+                result += value.charAt(i);
+        }
+        return result;
+    }
+
+    /**
+     * Receives a String that contains a FLOAT number, and converts it to a FLOAT number
+     * (PS: It's necessary to remove few characters because String has ',' on it)
+     * @param value
+     * @return
+     */
+    private Float convertBudget(String value){
+        return Float.parseFloat(removeLetters(value));
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer sb = new StringBuffer();
+        sb.append(name);
+        sb.append(" ").append(ogTitle);
+        sb.append(" ").append(sdf.format(getReleaseDate()));
+        sb.append(" ").append(duration);
+        sb.append(" ").append(genre);
+        sb.append(" ").append(ogLanguage);
+        sb.append(" ").append(situation);
+        sb.append(" ").append(budget);
+        sb.append(" ").append(arrKeyWds == null ? "[]" : Arrays.asList(arrKeyWds).toString());
+        return sb.toString();
+    }
+
+    public void imprimir(){
+        System.out.println(this.toString());
+    }
+}
--- a/tps/gabarito/tp02/Movie.c
+++ b/tps/gabarito/tp02/Movie.c
@ -0,0 +1,334 @@
+// -------------------------------------------------------------------------------- //
+// @author Pedro Henrique Lopes Costa
+// 1/2022
+//
+// -------------------------------------------------------------------------------- //
+// Includes
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+// -------------------------------------------------------------------------------- //
+// Definitions
+#define MAX_MOVIES          100
+#define MAX_FIELD_SIZE      100
+#define MAX_KEYWORDS        20
+#define MAX_LINE_SIZE       250
+#define FDR_PREFIX          "/tmp/filmes/"
+
+// -------------------------------------------------------------------------------- //
+// Structs
+typedef struct {
+    int year,
+    month,
+    day;
+} Date; 
+
+typedef struct {
+    char name[MAX_FIELD_SIZE],
+        original_title[MAX_FIELD_SIZE],
+        genre[MAX_FIELD_SIZE], 
+        original_language[MAX_FIELD_SIZE], 
+        situation[MAX_FIELD_SIZE],
+        keywords[MAX_KEYWORDS][MAX_FIELD_SIZE];
+    Date release_date;
+    int duration, count_keywords;
+    float budget;
+} Movie;
+
+// -------------------------------------------------------------------------------- //
+// Global variables
+Movie movies[MAX_MOVIES];
+int count_movies = 0;
+
+// -------------------------------------------------------------------------------- //
+// Functions
+bool isFim(char *str) { return str[0] == 'F' && str[1] == 'I' && str[2] == 'M'; }
+
+char *remove_line_break(char *line) {
+    while (*line != '\r' && *line != '\n') line++;
+    *line = '\0';
+    return line;
+}
+
+char *freadline(char *line, int max_size, FILE *file) { return remove_line_break(fgets(line, max_size, file)); }
+char *readline(char *line, int max_size) { return freadline(line, max_size, stdin); }
+
+long int indexOf(char *str, char *search) {
+    long int pos = strcspn(str, search);
+    return pos == strlen(str) ? -1 : pos;
+}
+
+char *substring(char *string, int position, int length) {
+    char *p;
+    int c;
+    p = malloc(length+1);
+    if(p == NULL) {
+        printf("Unable to allocate memory.\n");
+        exit(1);
+    }
+    for(c = 0; c < length; c++) {
+        *(p+c) = *(string+position-1);      
+        string++;  
+    }
+    *(p+c) = '\0';
+    return p;
+}
+
+void str_replace(char *target, const char *needle, const char *replacement) {
+    char buffer[1024] = { 0 };
+    char *insert_point = &buffer[0];
+    const char *tmp = target;
+    size_t needle_len = strlen(needle);
+    size_t repl_len = strlen(replacement);
+
+    while(1) {
+        const char *p = strstr(tmp, needle);
+        if(p == NULL) {
+            strcpy(insert_point, tmp);
+            break;
+        }
+        memcpy(insert_point, tmp, p - tmp);
+        insert_point += p - tmp;
+        memcpy(insert_point, replacement, repl_len);
+        insert_point += repl_len;
+        tmp = p + needle_len;
+    }
+    strcpy(target, buffer);
+}
+
+int firstDigit(const char *str, int start) {
+    for(int i = start; i != strlen(str); i++) if(str[i] >= '0' && str[i] <= '9') return i;
+    return -1;
+}
+
+// Remove tags 
+char *extractOnlyText(char *html, char *text) {
+    char *start = text;
+    int contagem = 0;
+    while (*html != '\0') {
+        if (*html == '<') {
+            if (
+                (*(html + 1) == 'p') ||
+                (*(html + 1) == 'b' && *(html + 2) == 'r') ||
+                (*(html + 1) == '/' && *(html + 2) == 'h' && *(html + 3) == '1') ||
+                (*(html + 1) == '/' && *(html + 2) == 't' && *(html + 3) == 'h') ||
+                (*(html + 1) == '/' && *(html + 2) == 't' && *(html + 3) == 'd')
+            ) break;
+            else contagem++;
+        }
+        else if (*html == '>') contagem--;
+        else if (contagem == 0 && *html != '"') {
+            if (*html == '&') html = strchr(html, ';');
+            else if (*html != '\r' && *html != '\n') *text++ = *html;
+        }
+        html++;
+    }
+    *text = '\0';
+    return *start == ' ' ? start + 1 : start;
+}
+
+// -------------------------------------------------------------------------------- //
+// Class movie functions
+void movie_print(Movie *movie) {
+    printf("%s %s %02i/%02i/%04i %i %s %s %s %g [",
+    movie -> name,
+    movie -> original_title,
+    movie -> release_date.day, movie -> release_date.month, movie -> release_date.year,
+    movie -> duration,
+    movie -> genre,
+    movie -> original_language,
+    movie -> situation,
+    movie -> budget);
+    for(int i = 0; i < movie -> count_keywords; i++) {
+        if(i == movie -> count_keywords - 1) printf("%s]\n", movie -> keywords[i]);
+        else printf("%s, ", movie -> keywords[i]);
+    }
+    if(movie -> count_keywords == 0) printf("]\n");
+}
+
+void movie_readHtml(char *filename) {
+    FILE *html_file;
+    char *line_html = NULL;
+    size_t len = 0;
+    ssize_t read;
+
+    html_file = fopen(filename, "r");
+
+    if(html_file == NULL) exit(EXIT_FAILURE);
+
+    // ------------------------------------ //
+
+    // Creating movie variables
+    char *name = NULL, 
+    *original_title = NULL,
+    *genre = NULL,
+    *original_language = NULL,
+    *situation = NULL,
+    *keywords = NULL;
+
+    Date release_date;
+
+    release_date.day = 0;
+    int duration = -1;
+    float budget = -1;
+
+    // ------------------------------------ //
+    
+    // Read HTML line by line
+    while((read = getline(&line_html, &len, html_file)) != -1) {
+
+        // --------------------------- //
+        // Find movie name
+        if(name == NULL) {
+            if(strstr(line_html, "<title>") != NULL) {
+                name = strstr(line_html, "<title>") + 7;
+                strcpy(movies[count_movies].name, name);
+                str_replace(movies[count_movies].name, "&#8212;", "—");
+                movies[count_movies].name[strlen(movies[count_movies].name) - 46] = '\0';
+            }
+        }
+
+        // --------------------------- //
+        // Find movie original title
+        if(original_title == NULL) {
+            if(strstr(line_html, "<p class=\"wrap\">") != NULL) {
+                original_title = strstr(line_html, "</strong> ") + 10;
+                original_title[strlen(original_title) - 5] = '\0';
+                strcpy(movies[count_movies].original_title, original_title);
+            }
+        }
+
+        // --------------------------- //
+        // Find movie release date
+        if(release_date.day == 0) {
+            if(strstr(line_html, "<span class=\"release\">") != NULL) {
+                // Skip one line
+                read = getline(&line_html, &len, html_file);
+                char *day, *month, *year;
+                day = substring(line_html, 9, 2);
+                month = substring(line_html, 12, 2);
+                year = substring(line_html, 15, 4);
+                movies[count_movies].release_date.day = atoi(day);
+                movies[count_movies].release_date.month = atoi(month);
+                movies[count_movies].release_date.year = atoi(year);
+            }
+        }
+
+        // --------------------------- //
+        // Find movie duration
+        if(duration == -1) {
+            if(strstr(line_html, "<span class=\"runtime\">") != NULL) {
+                // Skip two lines
+                read = getline(&line_html, &len, html_file);
+                read = getline(&line_html, &len, html_file);
+                int h_pos = indexOf(line_html, "h"),
+                    hours = 0,
+                    minutes = 0;
+                if(h_pos != -1) hours = atoi(substring(line_html, firstDigit(line_html, 0), h_pos));
+                minutes = atoi(substring(line_html, firstDigit(line_html, h_pos == -1 ? 0 : h_pos), strlen(line_html) - 1));
+                duration = (hours * 60) + minutes;
+                movies[count_movies].duration = duration;
+            }
+        }
+
+        // -------------------------- //
+        // Find movie genres
+        if(genre == NULL) {
+            if(strstr(line_html, "<span class=\"genres\">") != NULL) {
+                // Skip two lines
+                read = getline(&line_html, &len, html_file);
+                read = getline(&line_html, &len, html_file);
+                extractOnlyText(line_html, movies[count_movies].genre);
+                genre = substring(movies[count_movies].genre, 7, strlen(movies[count_movies].genre));
+                strcpy(movies[count_movies].genre, genre);
+            }
+        }
+
+        // --------------------------- //
+        // Find movie original language
+        if(original_language == NULL) {
+            if(strstr(line_html, "<bdi>Idioma original</bdi>") != NULL) {
+                strcpy(movies[count_movies].original_language, line_html);
+                original_language = substring(movies[count_movies].original_language, 50, strlen(line_html) - 54);
+                strcpy(movies[count_movies].original_language, original_language);
+            }
+        }
+
+        // --------------------------- //
+        // Find movie situation
+        if(situation == NULL) {
+            if(strstr(line_html, "<bdi>Situação</bdi>") != NULL) {
+                strcpy(movies[count_movies].situation, line_html);
+                situation = substring(movies[count_movies].situation, 44, strlen(line_html) - 44);
+                strcpy(movies[count_movies].situation, situation);
+            }
+        }
+
+        // --------------------------- //
+        // Find movie budget
+        if(budget == -1) {
+            if(strstr(line_html, "<bdi>Orçamento</bdi>") != NULL) {
+                char *p_budget, e_budget[strlen(line_html)];
+                strcpy(e_budget, line_html);
+                p_budget = substring(e_budget, 45, strlen(line_html) - 49);
+                if(!strcmp(p_budget, "-")) movies[count_movies].budget = 0;
+                else{
+                    strcpy(e_budget, p_budget);
+                    str_replace(e_budget, "$", "");
+                    str_replace(e_budget, ",", "");
+                    movies[count_movies].budget = atof(e_budget);
+                }
+            }
+        }
+
+        // --------------------------- //
+        // Find movie keywords
+        if(keywords == NULL) {
+            if(strstr(line_html, "<h4><bdi>Palavras-chave</bdi></h4>") != NULL) {
+                // Skip two lines until keywords starts
+                for(int i = 0; i < 2; i++) read = getline(&line_html, &len, html_file);
+                char tmp_line[strlen(line_html)];
+                strcpy(tmp_line, line_html);
+                keywords = substring(tmp_line, 5, strlen(line_html) - 5);
+
+                if(strcmp(keywords, "<p><bdi>Nenhuma palavra-chave foi adicionada.</bdi></p>")) {
+                    // Skip more two lines until keywords starts
+                    for(int x = 0; x < 2; x++) read = getline(&line_html, &len, html_file);
+                    while(true) {
+                        if(strstr(line_html, "</ul>") != NULL) break;
+                        if(strstr(line_html, "<li>") != NULL){
+                            extractOnlyText(line_html, tmp_line);
+                            keywords = substring(tmp_line, 9, strlen(line_html) - 8);
+                            strcpy(movies[count_movies].keywords[movies[count_movies].count_keywords++], keywords);
+                        }
+                        read = getline(&line_html, &len, html_file);
+                    }
+                }
+            }
+        }
+
+        // ------------------------------------ //
+        // Verify variables still "null"
+        if(original_title == NULL) strcpy(movies[count_movies].original_title, movies[count_movies].name);
+    }
+
+    // ------------------------------------ //
+    fclose(html_file);
+    if(line_html) free(line_html);
+}
+
+// -------------------------------------------------------------------------------- //
+int main() {
+    size_t prefix_size = strlen(FDR_PREFIX);
+    char line[MAX_LINE_SIZE];
+    strcpy(line, FDR_PREFIX);
+    readline(line + prefix_size, MAX_LINE_SIZE);
+    while(!isFim(line + prefix_size)) {
+        movie_readHtml(line);
+        movie_print(&movies[count_movies++]);
+        readline(line + prefix_size, MAX_LINE_SIZE);
+    }
+    return EXIT_SUCCESS;
+}