From 7d3067139bc860112cd50dc5c54a8962a7a0fb8f Mon Sep 17 00:00:00 2001 From: Felipe Domingos Date: Fri, 8 Apr 2022 08:22:32 -0300 Subject: [PATCH] gabarito tp02 --- tps/gabarito/tp02/Film.java | 311 +++++++++++++++++++++++++++++++++ tps/gabarito/tp02/Movie.c | 334 ++++++++++++++++++++++++++++++++++++ 2 files changed, 645 insertions(+) create mode 100644 tps/gabarito/tp02/Film.java create mode 100644 tps/gabarito/tp02/Movie.c diff --git a/tps/gabarito/tp02/Film.java b/tps/gabarito/tp02/Film.java new file mode 100644 index 0000000..0b0225d --- /dev/null +++ b/tps/gabarito/tp02/Film.java @@ -0,0 +1,311 @@ +import java.io.*; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; + +/** + * @author Thiago de Campos Ribeiro Nolasco + */ +public class Film { + // Attributes + private String name; + private String ogTitle; + private Date releaseDate; + private Integer duration; + private String genre; + private String ogLanguage; + private String situation; + private Float budget; + private String[] arrKeyWds; + + SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy"); + + // Constructors + public Film() { + this(null, null, null, null, null, null, null, null); + } + + /** + * @param name + * @param ogTitle + * @param releaseDate + * @param duration + * @param genre + * @param ogLanguage + * @param situation + * @param budget + */ + public Film(String name, String ogTitle, Date releaseDate, Integer duration, String genre, String ogLanguage, String situation, Float budget) { + this.name = name; + this.ogTitle = ogTitle; + this.releaseDate = releaseDate; + this.duration = duration; + this.genre = genre; + this.ogLanguage = ogLanguage; + this.situation = situation; + this.budget = budget; + this.arrKeyWds = null; + } + + // Getters and Setters + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getOgTitle() { + return ogTitle; + } + + public void setOgTitle(String ogTitle) { + this.ogTitle = ogTitle; + } + + public Date getReleaseDate() { + return releaseDate; + } + + public void setReleaseDate(Date releaseDate) { + this.releaseDate = releaseDate; + } + + public Integer getDuration() { + return duration; + } + + public void setDuration(Integer duration) { + this.duration = duration; + } + + public String getGenre() { + return genre; + } + + public void setGenre(String genre) { + this.genre = genre; + } + + public String getOgLanguage() { + return ogLanguage; + } + + public void setOgLanguage(String ogLanguage) { + this.ogLanguage = ogLanguage; + } + + public String getSituation() { + return situation; + } + + public void setSituation(String situation) { + this.situation = situation; + } + + public Float getBudget() { + return budget; + } + + public void setBudget(Float budget) { + this.budget = budget; + } + + public String[] getArrKeyWds() { + return arrKeyWds; + } + + public void setArrKeyWds(String[] arrKeyWds) { + this.arrKeyWds = arrKeyWds; + } + + public Film clone(){ + Film cloned = new Film(); + + cloned.name = this.name; + cloned.ogTitle = this.ogTitle; + cloned.releaseDate = this.releaseDate; + cloned.duration = this.duration; + cloned.genre = this.genre; + cloned.ogLanguage = this.ogLanguage; + cloned.situation = this.situation; + cloned.budget = this.budget; + cloned.arrKeyWds = this.arrKeyWds; + + return cloned; + } + + + /** + * @param fileName + */ + public void ler(String fileName){ + // Getting the right path for each read file + String path = "./filmes/" + fileName; + + // Method that will split chunks of the read HTML and will assign the value to each Film's attribute + splittingString(path); + } + + private void splittingString(String path){ + // Data declaration + String line = ""; + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path),"UTF-8"))) { + + // Film name + while(!reader.readLine().contains("title ott")); + while(!reader.readLine().contains("h2")); + this.name = removeTags(reader.readLine().trim()); + + // Film release date + while(!reader.readLine().contains("\"release\"")); + this.releaseDate = sdf.parse(removeTags(reader.readLine().trim())); + + // Film genre + while(!reader.readLine().contains("genres")); + // In this case, will use "line" because the last readLine will have the content that we want + while(!(line = reader.readLine()).contains("") ) { + if(line.contains("Título original")){ + this.ogTitle = removeTags(line.replace("Título original", " ")).trim(); + } + } + this.situation = removeTags(line.replace("Situação", " ")).trim(); + + // Film original language + while( !(line = reader.readLine()).contains("Idioma original") ); + this.ogLanguage = removeTags(line.replace("Idioma original", " ")).trim(); + + // Film budget + while( !(line = reader.readLine()).contains("Orçamento") ); + String aux = removeTags(line.replace("Orçamento", " ")).trim(); + this.budget = (aux.equals("-")) ? 0.0F : convertBudget(aux); + + // Film key-words + line = ""; + while( !reader.readLine().contains("Palavras-chave") ); + while( !(line += reader.readLine().trim() + " ").contains("") ); + if(!line.contains("Nenhuma palavra-chave foi adicionada")){ + arrKeyWds = removeTags(line).trim().split(" "); + } + + + } catch (FileNotFoundException e){ + System.out.println("File not found"); + } catch (IOException e){ + System.out.println("File cannot be read"); + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * Receives a line that contains an HTML content and removes its tags + * @param line + * @return + */ + private String removeTags(String line){ + // Data declaration + String resp = ""; + int i = 0; + + /* + The main idea here is to check if the char is equals to '<', if it's, it means that an HTML tag has opened + So, CAN'T read anything until the tag is closed, '>' is found. + + It's also checking if any HTML special character (&....;) or if any "()" is found + IF found, don't read anything until it has ended. + */ + while (i < line.length()) { + if (line.charAt(i) == '<') { + i++; + while (line.charAt(i) != '>') i++; + }else { + resp += line.charAt(i); + } + i++; + } + // Returning cleaned line + return resp.replace(" ", ""); + } + + /** + * Receives a String that contains hours, and convert it to minutes (Integer) + * @param value + * @return + */ + private int hoursToMinutes(String value){ + // Data declaration + value = removeLetters(value); + int result = 0, minutes = 0; + + String[] splitValue = value.split(" "); + if(splitValue.length > 1) { + int hour = Integer.parseInt(splitValue[0]); + minutes = Integer.parseInt(splitValue[1]); + result = (60 * hour) + minutes; + } else { + minutes = Integer.parseInt(splitValue[0]); + result = minutes; + } + + return result; + } + + /** + * Receives a String that contains hours, and leave only the numbers (ex: 1h 49m = 1 49) + * @param value + * @return + */ + private String removeLetters(String value){ + // Data declaration + String result = ""; + + for(int i = 0; i < value.length(); i++){ + // If char is a number, a blank space, or a '.' (Used on convertBudget), will be stored into "result" + if( (value.charAt(i) >= 48 && value.charAt(i) <= 57) || value.charAt(i) == ' ' || value.charAt(i) == '.') + result += value.charAt(i); + } + return result; + } + + /** + * Receives a String that contains a FLOAT number, and converts it to a FLOAT number + * (PS: It's necessary to remove few characters because String has ',' on it) + * @param value + * @return + */ + private Float convertBudget(String value){ + return Float.parseFloat(removeLetters(value)); + } + + @Override + public String toString() { + final StringBuffer sb = new StringBuffer(); + sb.append(name); + sb.append(" ").append(ogTitle); + sb.append(" ").append(sdf.format(getReleaseDate())); + sb.append(" ").append(duration); + sb.append(" ").append(genre); + sb.append(" ").append(ogLanguage); + sb.append(" ").append(situation); + sb.append(" ").append(budget); + sb.append(" ").append(arrKeyWds == null ? "[]" : Arrays.asList(arrKeyWds).toString()); + return sb.toString(); + } + + public void imprimir(){ + System.out.println(this.toString()); + } +} diff --git a/tps/gabarito/tp02/Movie.c b/tps/gabarito/tp02/Movie.c new file mode 100644 index 0000000..e5842f8 --- /dev/null +++ b/tps/gabarito/tp02/Movie.c @@ -0,0 +1,334 @@ +// -------------------------------------------------------------------------------- // +// @author Pedro Henrique Lopes Costa +// 1/2022 +// +// -------------------------------------------------------------------------------- // +// Includes +#include +#include +#include +#include + +// -------------------------------------------------------------------------------- // +// Definitions +#define MAX_MOVIES 100 +#define MAX_FIELD_SIZE 100 +#define MAX_KEYWORDS 20 +#define MAX_LINE_SIZE 250 +#define FDR_PREFIX "/tmp/filmes/" + +// -------------------------------------------------------------------------------- // +// Structs +typedef struct { + int year, + month, + day; +} Date; + +typedef struct { + char name[MAX_FIELD_SIZE], + original_title[MAX_FIELD_SIZE], + genre[MAX_FIELD_SIZE], + original_language[MAX_FIELD_SIZE], + situation[MAX_FIELD_SIZE], + keywords[MAX_KEYWORDS][MAX_FIELD_SIZE]; + Date release_date; + int duration, count_keywords; + float budget; +} Movie; + +// -------------------------------------------------------------------------------- // +// Global variables +Movie movies[MAX_MOVIES]; +int count_movies = 0; + +// -------------------------------------------------------------------------------- // +// Functions +bool isFim(char *str) { return str[0] == 'F' && str[1] == 'I' && str[2] == 'M'; } + +char *remove_line_break(char *line) { + while (*line != '\r' && *line != '\n') line++; + *line = '\0'; + return line; +} + +char *freadline(char *line, int max_size, FILE *file) { return remove_line_break(fgets(line, max_size, file)); } +char *readline(char *line, int max_size) { return freadline(line, max_size, stdin); } + +long int indexOf(char *str, char *search) { + long int pos = strcspn(str, search); + return pos == strlen(str) ? -1 : pos; +} + +char *substring(char *string, int position, int length) { + char *p; + int c; + p = malloc(length+1); + if(p == NULL) { + printf("Unable to allocate memory.\n"); + exit(1); + } + for(c = 0; c < length; c++) { + *(p+c) = *(string+position-1); + string++; + } + *(p+c) = '\0'; + return p; +} + +void str_replace(char *target, const char *needle, const char *replacement) { + char buffer[1024] = { 0 }; + char *insert_point = &buffer[0]; + const char *tmp = target; + size_t needle_len = strlen(needle); + size_t repl_len = strlen(replacement); + + while(1) { + const char *p = strstr(tmp, needle); + if(p == NULL) { + strcpy(insert_point, tmp); + break; + } + memcpy(insert_point, tmp, p - tmp); + insert_point += p - tmp; + memcpy(insert_point, replacement, repl_len); + insert_point += repl_len; + tmp = p + needle_len; + } + strcpy(target, buffer); +} + +int firstDigit(const char *str, int start) { + for(int i = start; i != strlen(str); i++) if(str[i] >= '0' && str[i] <= '9') return i; + return -1; +} + +// Remove tags +char *extractOnlyText(char *html, char *text) { + char *start = text; + int contagem = 0; + while (*html != '\0') { + if (*html == '<') { + if ( + (*(html + 1) == 'p') || + (*(html + 1) == 'b' && *(html + 2) == 'r') || + (*(html + 1) == '/' && *(html + 2) == 'h' && *(html + 3) == '1') || + (*(html + 1) == '/' && *(html + 2) == 't' && *(html + 3) == 'h') || + (*(html + 1) == '/' && *(html + 2) == 't' && *(html + 3) == 'd') + ) break; + else contagem++; + } + else if (*html == '>') contagem--; + else if (contagem == 0 && *html != '"') { + if (*html == '&') html = strchr(html, ';'); + else if (*html != '\r' && *html != '\n') *text++ = *html; + } + html++; + } + *text = '\0'; + return *start == ' ' ? start + 1 : start; +} + +// -------------------------------------------------------------------------------- // +// Class movie functions +void movie_print(Movie *movie) { + printf("%s %s %02i/%02i/%04i %i %s %s %s %g [", + movie -> name, + movie -> original_title, + movie -> release_date.day, movie -> release_date.month, movie -> release_date.year, + movie -> duration, + movie -> genre, + movie -> original_language, + movie -> situation, + movie -> budget); + for(int i = 0; i < movie -> count_keywords; i++) { + if(i == movie -> count_keywords - 1) printf("%s]\n", movie -> keywords[i]); + else printf("%s, ", movie -> keywords[i]); + } + if(movie -> count_keywords == 0) printf("]\n"); +} + +void movie_readHtml(char *filename) { + FILE *html_file; + char *line_html = NULL; + size_t len = 0; + ssize_t read; + + html_file = fopen(filename, "r"); + + if(html_file == NULL) exit(EXIT_FAILURE); + + // ------------------------------------ // + + // Creating movie variables + char *name = NULL, + *original_title = NULL, + *genre = NULL, + *original_language = NULL, + *situation = NULL, + *keywords = NULL; + + Date release_date; + + release_date.day = 0; + int duration = -1; + float budget = -1; + + // ------------------------------------ // + + // Read HTML line by line + while((read = getline(&line_html, &len, html_file)) != -1) { + + // --------------------------- // + // Find movie name + if(name == NULL) { + if(strstr(line_html, "") != NULL) { + name = strstr(line_html, "<title>") + 7; + strcpy(movies[count_movies].name, name); + str_replace(movies[count_movies].name, "—", "—"); + movies[count_movies].name[strlen(movies[count_movies].name) - 46] = '\0'; + } + } + + // --------------------------- // + // Find movie original title + if(original_title == NULL) { + if(strstr(line_html, "<p class=\"wrap\">") != NULL) { + original_title = strstr(line_html, "</strong> ") + 10; + original_title[strlen(original_title) - 5] = '\0'; + strcpy(movies[count_movies].original_title, original_title); + } + } + + // --------------------------- // + // Find movie release date + if(release_date.day == 0) { + if(strstr(line_html, "<span class=\"release\">") != NULL) { + // Skip one line + read = getline(&line_html, &len, html_file); + char *day, *month, *year; + day = substring(line_html, 9, 2); + month = substring(line_html, 12, 2); + year = substring(line_html, 15, 4); + movies[count_movies].release_date.day = atoi(day); + movies[count_movies].release_date.month = atoi(month); + movies[count_movies].release_date.year = atoi(year); + } + } + + // --------------------------- // + // Find movie duration + if(duration == -1) { + if(strstr(line_html, "<span class=\"runtime\">") != NULL) { + // Skip two lines + read = getline(&line_html, &len, html_file); + read = getline(&line_html, &len, html_file); + int h_pos = indexOf(line_html, "h"), + hours = 0, + minutes = 0; + if(h_pos != -1) hours = atoi(substring(line_html, firstDigit(line_html, 0), h_pos)); + minutes = atoi(substring(line_html, firstDigit(line_html, h_pos == -1 ? 0 : h_pos), strlen(line_html) - 1)); + duration = (hours * 60) + minutes; + movies[count_movies].duration = duration; + } + } + + // -------------------------- // + // Find movie genres + if(genre == NULL) { + if(strstr(line_html, "<span class=\"genres\">") != NULL) { + // Skip two lines + read = getline(&line_html, &len, html_file); + read = getline(&line_html, &len, html_file); + extractOnlyText(line_html, movies[count_movies].genre); + genre = substring(movies[count_movies].genre, 7, strlen(movies[count_movies].genre)); + strcpy(movies[count_movies].genre, genre); + } + } + + // --------------------------- // + // Find movie original language + if(original_language == NULL) { + if(strstr(line_html, "<bdi>Idioma original</bdi>") != NULL) { + strcpy(movies[count_movies].original_language, line_html); + original_language = substring(movies[count_movies].original_language, 50, strlen(line_html) - 54); + strcpy(movies[count_movies].original_language, original_language); + } + } + + // --------------------------- // + // Find movie situation + if(situation == NULL) { + if(strstr(line_html, "<bdi>Situação</bdi>") != NULL) { + strcpy(movies[count_movies].situation, line_html); + situation = substring(movies[count_movies].situation, 44, strlen(line_html) - 44); + strcpy(movies[count_movies].situation, situation); + } + } + + // --------------------------- // + // Find movie budget + if(budget == -1) { + if(strstr(line_html, "<bdi>Orçamento</bdi>") != NULL) { + char *p_budget, e_budget[strlen(line_html)]; + strcpy(e_budget, line_html); + p_budget = substring(e_budget, 45, strlen(line_html) - 49); + if(!strcmp(p_budget, "-")) movies[count_movies].budget = 0; + else{ + strcpy(e_budget, p_budget); + str_replace(e_budget, "$", ""); + str_replace(e_budget, ",", ""); + movies[count_movies].budget = atof(e_budget); + } + } + } + + // --------------------------- // + // Find movie keywords + if(keywords == NULL) { + if(strstr(line_html, "<h4><bdi>Palavras-chave</bdi></h4>") != NULL) { + // Skip two lines until keywords starts + for(int i = 0; i < 2; i++) read = getline(&line_html, &len, html_file); + char tmp_line[strlen(line_html)]; + strcpy(tmp_line, line_html); + keywords = substring(tmp_line, 5, strlen(line_html) - 5); + + if(strcmp(keywords, "<p><bdi>Nenhuma palavra-chave foi adicionada.</bdi></p>")) { + // Skip more two lines until keywords starts + for(int x = 0; x < 2; x++) read = getline(&line_html, &len, html_file); + while(true) { + if(strstr(line_html, "</ul>") != NULL) break; + if(strstr(line_html, "<li>") != NULL){ + extractOnlyText(line_html, tmp_line); + keywords = substring(tmp_line, 9, strlen(line_html) - 8); + strcpy(movies[count_movies].keywords[movies[count_movies].count_keywords++], keywords); + } + read = getline(&line_html, &len, html_file); + } + } + } + } + + // ------------------------------------ // + // Verify variables still "null" + if(original_title == NULL) strcpy(movies[count_movies].original_title, movies[count_movies].name); + } + + // ------------------------------------ // + fclose(html_file); + if(line_html) free(line_html); +} + +// -------------------------------------------------------------------------------- // +int main() { + size_t prefix_size = strlen(FDR_PREFIX); + char line[MAX_LINE_SIZE]; + strcpy(line, FDR_PREFIX); + readline(line + prefix_size, MAX_LINE_SIZE); + while(!isFim(line + prefix_size)) { + movie_readHtml(line); + movie_print(&movies[count_movies++]); + readline(line + prefix_size, MAX_LINE_SIZE); + } + return EXIT_SUCCESS; +} \ No newline at end of file